In [7]:
import redis 

client = redis.Redis('host.docker.internal', 6379, 0)

client.flushdb()

True

In [8]:
import json
import numpy as np
import threading
import time
import io
from concurrent.futures import ThreadPoolExecutor
from pcomp.kafka_handlers import KafkaProducerHandler, KafkaConsumerHandler, KafkaConsumerHandlerNeuron
from pcomp.activation_functions import ACTIVATIONS, relu, softmax
from pcomp.redis_utils import RedisHandler
from pcomp.s3client import S3Client
from pcomp.utils import batch_generator
from pcomp import neuroncalc
from pcomp.neurons_accumulator import NeuronsAccumulator
from pcomp.avro_utils import avro_serialize_batch, avro_deserialize_batch
from base64 import b64encode, b64decode

In [9]:
# Kafka Configuration
KAFKA_BROKER = 'kafka:9092'

class Neuron(threading.Thread):
    def __init__(self, layer_id, neuron_id, weights, bias, activation, is_final_layer):
        threading.Thread.__init__(self)
        self.layer_id = layer_id
        self.layer_id_num = int(self.layer_id.replace("layer_", ""))
        self.neuron_id = neuron_id
        self.weights = np.array(weights)
        self.bias = np.array(bias)
        self.activation = activation
        self.activation_func = ACTIVATIONS.get(activation, relu)
        self.is_final_layer = is_final_layer
        self.redis_handler = RedisHandler('host.docker.internal', 6379, 0)
        self.executor = ThreadPoolExecutor(max_workers=4)
        self.producer = None

    def fetch_input(self, batch_id, batch_size, columns_size):
        key = f"batch:{batch_id}:initial_data" if self.layer_id == 'layer_0' else f"batch:{batch_id}:{int(self.layer_id[-1]) - 1}"
        # Poll Redis until the data is available.
        while True:
            data = np.frombuffer(self.redis_handler.get(key), dtype=np.float64).reshape(-1, int(columns_size))
            if data is not None:
                return data
            print(f"⏳ Neuron {self.neuron_id} waiting for input data for key: {key}")

    def process_and_send(self, batch_id, batch_size, columns_size):
        input_data = self.fetch_input(batch_id, batch_size, columns_size)
        z = np.dot(input_data, self.weights) + self.bias
        output = z if self.is_final_layer else self.activation_func(z)
        # activation_fn = "None" if self.is_final_layer else self.activation
        # output = neuroncalc.compute_neuron_output_batch(
        #     input_data, self.weights, self.bias,
        #     activation_fn
        # )
        #self.redis_handler.set(f"batch:{batch_id}:n_{self.layer_id_num}_{self.neuron_id}", output, True, 1000)
        message_dict = {
            "neuron_id": self.neuron_id,
            "batch_id": batch_id,
            "batch_size":int(batch_size),
            "columns_size": int(columns_size),
            "data": output.tobytes()
        }
        msg = avro_serialize_batch(message_dict)
        self.producer.send_neuron(msg)
        #self.producer.send(f'requests-responses', 'www.neuron.example')

    def run(self):
        # Instantiate Kafka consumer and producer inside the thread.
        consumer = KafkaConsumerHandler(f'layer-{self.layer_id_num}', KAFKA_BROKER, group_id=f"{self.neuron_id}_{self.layer_id_num}_group")
        self.producer = KafkaProducerHandler(KAFKA_BROKER, f'layer-{self.layer_id_num}-complete')
        last_msg_time = time.time()
        while True:
            got_message = False
            for message in consumer.consume():
                got_message = True
                last_msg_time = time.time()
                layer, batch_id_str, batch_size, columns_size = message.split('|')
                if layer == self.layer_id:
                    batch_id = int(batch_id_str)
                    self.process_and_send(batch_id, batch_size, columns_size)
            if not got_message and (time.time() - last_msg_time > 10):
                consumer.commit()
                consumer.close()
                self.producer.close()
                break
            time.sleep(0.05)


class LayerCoordinator(threading.Thread):
    def __init__(self, layer_id, neuron_count, is_final_layer=False):
        threading.Thread.__init__(self)
        self.layer_id = layer_id
        self.layer_id_num = int(self.layer_id.replace("layer_", ""))
        self.neuron_count = neuron_count
        self.is_final_layer = is_final_layer
        self.accumulators = {}
        self.redis_handler = RedisHandler('host.docker.internal', 6379, 0)
        self.executor = ThreadPoolExecutor(max_workers=4)
        self.producer = None

    def run(self):
        consumer = KafkaConsumerHandlerNeuron(f'layer-{self.layer_id_num}-complete', KAFKA_BROKER, group_id=f"{self.layer_id_num}_coord_group")
        self.producer = KafkaProducerHandler(KAFKA_BROKER, 'activate-layer')
        last_msg_time = time.time()
        while True:
            got_message = False
            for message in consumer.consume():
                got_message = True
                last_msg_time = time.time()
                message_dict = avro_deserialize_batch(message.value())
                neuron_id = message_dict["neuron_id"]
                batch_id = message_dict["batch_id"]
                data_bytes = message_dict["data"]
                output = np.frombuffer(data_bytes, dtype=np.float64)
                try:
                    acc = self.accumulators[batch_id]
                except KeyError:
                    acc = self.accumulators[batch_id] = NeuronsAccumulator(self.neuron_count)
                if acc.outputs[neuron_id] is None:
                    acc.outputs[neuron_id] = output
                    acc.completed += 1
                if acc.completed == self.neuron_count:
                    batch_size = message_dict["batch_size"]
                    columns_size = message_dict["columns_size"]
                    self.aggregate_neuron_outputs(batch_id, batch_size, columns_size, acc.outputs)
                    del self.accumulators[batch_id]
            if not got_message and (time.time() - last_msg_time > 10):
                consumer.commit()
                consumer.close()
                self.producer.close()
                break
            time.sleep(0.05)

    def aggregate_neuron_outputs(self, batch_id, batch_size, columns_size, output_batch):
        #outputs = self.redis_handler.get_batch_multi(self.batch_keys[batch_id], batch_size)
        outputs = np.squeeze(np.stack(output_batch, axis=1))
        # Store the aggregated result in Redis.
        if not self.is_final_layer:
            self.redis_handler.set(f"batch:{batch_id}:{self.layer_id_num}", outputs, True, 1000)
            self.activate_next_layer(batch_id, batch_size, columns_size)
        else:
            preds = np.argmax(outputs, axis=1)
            cnt = batch_id * int(batch_size)
            mapping = {idx + cnt: int(prediction) for idx, prediction in enumerate(preds)}
            self.redis_handler.hset_bulk("batch:predictions", mapping)
            self.redis_handler.delete_batch_keys(batch_id)

    def activate_next_layer(self, batch_id, batch_size, columns_size):
        next_layer = f'layer_{self.layer_id_num + 1}'
        self.producer.send(f"{next_layer}|{batch_id}|{batch_size}|{self.neuron_count}")
        #self.producer.send(f'requests-responses', 'www.layercoordinator.example')
            

class Layer(threading.Thread):
    def __init__(self, layer_id, neuron_count):
        threading.Thread.__init__(self)
        self.layer_id = layer_id
        self.neuron_count = neuron_count
        self.layer_id_num = int(self.layer_id.replace("layer_", ""))
        self.executor = ThreadPoolExecutor(max_workers=8)
        self.producer = None

    def activate_neurons(self, batch_id, batch_size, columns_size):
        self.producer.send(f"{self.layer_id}|{batch_id}|{batch_size}|{columns_size}")

    def run(self):
        consumer = KafkaConsumerHandler('activate-layer', KAFKA_BROKER, group_id=f"{self.layer_id_num}_group")
        self.producer = KafkaProducerHandler(KAFKA_BROKER, f'layer-{self.layer_id_num}')
        last_msg_time = time.time()
        while True:
            got_message = False
            for message in consumer.consume():
                got_message = True
                last_msg_time = time.time()
                layer, batch_id_str, batch_size, columns_size = message.split('|')
                if layer == self.layer_id:
                    batch_id = int(batch_id_str)
                    self.activate_neurons(batch_id, batch_size, columns_size)
            if not got_message and (time.time() - last_msg_time > 10):
                consumer.commit()
                consumer.close()
                self.producer.close()
                break
            time.sleep(0.05)

def predict_data():
    batch_size = 50
    producer = KafkaProducerHandler(KAFKA_BROKER, 'activate-layer')
    redis_handler = RedisHandler('host.docker.internal', 6379, 0)
    s3_client = S3Client("host.docker.internal:9000", "admin", "admin123")
    buffer = s3_client.download_fileobj("my-bucket", "mnist.csv")
    content_str = buffer.getvalue().decode("utf-8")
    data = np.genfromtxt(io.StringIO(content_str), delimiter=',', skip_header=1)
    features = data[:, :-1][:200]
    for idx, batch in enumerate(batch_generator(features, batch_size), start=0):
        redis_handler.set(f"batch:{idx}:initial_data", batch, True, 1000)
        producer.send(f"layer_0|{idx}|{batch_size}|784")
    producer.close()

# Load network and dataset
data = json.load(open("node_based_model.json"))
#df = pd.read_csv('data/mnist.csv').head(10)

neurons = []
layers = []
coordinators = []

for layer_name, layer_info in data.items():
    neurons += [Neuron(layer_id=layer_name, neuron_id=i, weights=node['weights'], bias=node['biases'], activation=node['activation'], is_final_layer=(layer_name == list(data.keys())[-1])) for i, node in enumerate(layer_info['nodes'])]
    layers.append(Layer(layer_id=layer_name, neuron_count=len(layer_info['nodes'])))
    coordinators.append(LayerCoordinator(layer_id=layer_name, neuron_count=len(layer_info['nodes']), is_final_layer=(layer_name == list(data.keys())[-1])))

# Start all threads
for thread in neurons + layers + coordinators:
    thread.start()

print("Threads started")

predict_data()

Threads started


In [5]:
for thread in neurons + layers + coordinators:
    thread.join()