In [1]:
import json
import random
import time
from datetime import datetime
import numpy as np
import logging
import socket
from confluent_kafka import Producer, Consumer
import random
from joblib import dump
import numpy as np
from sklearn.ensemble import IsolationForest

In [2]:
DELAY = 2
OUTLIERS_GENERATION_PROBABILITY = 0.2
KAFKA_BROKER = "kafka:9092"
TRANSACTIONS_TOPIC = "transactions"

In [3]:
rng = np.random.RandomState(42)

# Generate train data
X = 0.3 * rng.randn(500, 2)
X_train = np.r_[X + 2, X - 2]
X_train = np.round(X_train, 3)

# fit the model
clf = IsolationForest(n_estimators=50, max_samples=500, random_state=rng, contamination=0.01)
clf.fit(X_train)

dump(clf, './isolation_forest.joblib')

['./isolation_forest.joblib']

Producer

In [4]:
def create_producer():
    try:
        producer = Producer({"bootstrap.servers": KAFKA_BROKER,
                             "client.id": socket.gethostname(),
                             "enable.idempotence": True,  # EOS processing
                             "compression.type": "lz4",
                             "batch.size": 64000,
                             "linger.ms": 10,
                             "acks": "all",  # Wait for the leader and all ISR to send response back
                             "retries": 5,
                             "delivery.timeout.ms": 1000})  # Total time to make retries
    except Exception as e:
        logging.exception("Couldn't create the producer")
        producer = None
    return producer


def create_consumer(topic, group_id):
    try:
        consumer = Consumer({"bootstrap.servers": KAFKA_BROKER,
                             "group.id": group_id,
                             "client.id": socket.gethostname(),
                             "isolation.level": "read_committed",
                             "default.topic.config": {"auto.offset.reset": "latest", # Only consume new messages
                                                      "enable.auto.commit": False}
                             })

        consumer.subscribe([topic])
    except Exception as e:
        logging.exception("Couldn't create the consumer")
        consumer = None

    return consumer

In [5]:
_id = 0
producer = create_producer()

if producer is not None:
    try:
        while True:
            print("This code is printed by Le Nguyen Hoang Phuc - 23521198")
            # Generate some abnormal observations
            if random.random() <= OUTLIERS_GENERATION_PROBABILITY:
                X_test = np.random.uniform(low=-4, high=4, size=(1, 2))
            else:
                X = 0.3 * np.random.randn(1, 2)
                X_test = (X + np.random.choice(a=[2, -2], size=1, p=[0.5, 0.5]))

            X_test = np.round(X_test, 3).tolist()

            current_time = datetime.utcnow().isoformat()

            record = {"id": _id, "data": X_test, "current_time": current_time}
            record = json.dumps(record).encode("utf-8")
            print('produce message')
            print(record)

            producer.produce(topic=TRANSACTIONS_TOPIC,
                            value=record)
            producer.flush()
            _id += 1
            time.sleep(DELAY)
    except KeyboardInterrupt: print("Stopped")
    finally:
        producer.flush()

This code is printed by Le Nguyen Hoang Phuc - 23521198
produce message
b'{"id": 0, "data": [[-1.996, -1.442]], "current_time": "2025-11-25T08:53:13.019329"}'
This code is printed by Le Nguyen Hoang Phuc - 23521198
produce message
b'{"id": 1, "data": [[1.394, 1.879]], "current_time": "2025-11-25T08:53:16.024602"}'
This code is printed by Le Nguyen Hoang Phuc - 23521198
produce message
b'{"id": 2, "data": [[-2.026, -1.978]], "current_time": "2025-11-25T08:53:18.187146"}'
This code is printed by Le Nguyen Hoang Phuc - 23521198
produce message
b'{"id": 3, "data": [[-0.725, -2.715]], "current_time": "2025-11-25T08:53:20.192412"}'
This code is printed by Le Nguyen Hoang Phuc - 23521198
produce message
b'{"id": 4, "data": [[-3.215, 3.688]], "current_time": "2025-11-25T08:53:22.197033"}'
This code is printed by Le Nguyen Hoang Phuc - 23521198
produce message
b'{"id": 5, "data": [[-0.378, -3.177]], "current_time": "2025-11-25T08:53:24.200144"}'
This code is printed by Le Nguyen Hoang Phuc - 23