In [1]:
import logging
import socket
from confluent_kafka import Producer, Consumer
import json
import os
from joblib import load
import logging
import numpy as np

In [2]:
NUM_PARTITIONS = 3
KAFKA_BROKER = "kafka:9092"
TRANSACTIONS_TOPIC = "transactions"
TRANSACTIONS_CONSUMER_GROUP = "transactions"
ANOMALIES_TOPIC = "anomalies"

In [3]:
def create_producer():
    try:
        producer = Producer({"bootstrap.servers": KAFKA_BROKER,
                             "client.id": socket.gethostname(),
                             "enable.idempotence": True,  # EOS processing
                             "compression.type": "lz4",
                             "batch.size": 64000,
                             "linger.ms": 10,
                             "acks": "all",  # Wait for the leader and all ISR to send response back
                             "retries": 5,
                             "delivery.timeout.ms": 1000})  # Total time to make retries
    except Exception as e:
        logging.exception("Couldn't create the producer")
        producer = None
    return producer


def create_consumer(topic, group_id):
    try:
        consumer = Consumer({"bootstrap.servers": KAFKA_BROKER,
                             "group.id": group_id,
                             "client.id": socket.gethostname(),
                             "isolation.level": "read_committed",
                             "default.topic.config": {"auto.offset.reset": "latest", # Only consume new messages
                                                      "enable.auto.commit": False}
                             })

        consumer.subscribe([topic])
    except Exception as e:
        logging.exception("Couldn't create the consumer")
        consumer = None

    return consumer

model_path = os.path.abspath('./isolation_forest.joblib')


def detect():
    consumer = create_consumer(topic=TRANSACTIONS_TOPIC, group_id=TRANSACTIONS_CONSUMER_GROUP)

    producer = create_producer()

    clf = load(model_path)

    while True:
        message = consumer.poll(timeout=50)
        if message is None:
            continue
        if message.error():
            logging.error("Consumer error: {}".format(message.error()))
            continue

        # Message that came from producer
        record = json.loads(message.value().decode('utf-8'))
        data = record["data"]
        print(data)
        prediction = clf.predict(data)
        if prediction[0] == 1:
            print('Normal')

        # prediction = clf.predict(data)

        # If an anomaly comes in, send it to anomalies topic
        if prediction[0] == -1:
            print('Abnormal')
            score = clf.score_samples(data)
            record["score"] = np.round(score, 3).tolist()

            _id = str(record["id"])
            record = json.dumps(record).encode("utf-8")

            producer.produce(topic=ANOMALIES_TOPIC,
                                value=record)
            producer.flush()
            print(record)
            print('Alert send')
try:
    for _ in range(NUM_PARTITIONS):
        print("This code is printed by Le Nguyen Hoang Phuc - 23521198")
        detect()
except KeyboardInterrupt:
    print("Break")

This code is printed by Le Nguyen Hoang Phuc - 23521198
[[1.983, 1.836]]
Normal
[[-2.505, -1.868]]
Normal
[[-2.339, -1.921]]
Normal
[[-2.263, -1.64]]
Normal
[[-2.026, -2.022]]
Normal
[[1.856, 2.363]]
Normal
[[-2.0, -1.742]]
Normal
[[-1.594, -2.1]]
Normal
[[-1.532, 1.505]]
Abnormal
b'{"id": 43, "data": [[-1.532, 1.505]], "current_time": "2025-11-25T08:54:40.399177", "score": [-0.674]}'
Alert send
[[-1.768, -2.491]]
Normal
[[0.985, -1.499]]
Abnormal
b'{"id": 45, "data": [[0.985, -1.499]], "current_time": "2025-11-25T08:54:44.406817", "score": [-0.709]}'
Alert send
[[1.695, 1.751]]
Normal
[[-1.759, -1.725]]
Normal
[[2.022, 2.063]]
Normal
[[-3.848, -3.488]]
Abnormal
b'{"id": 49, "data": [[-3.848, -3.488]], "current_time": "2025-11-25T08:54:52.415483", "score": [-0.735]}'
Alert send
[[2.095, 1.992]]
Normal
[[3.599, -0.291]]
Abnormal
b'{"id": 51, "data": [[3.599, -0.291]], "current_time": "2025-11-25T08:54:56.420695", "score": [-0.757]}'
Alert send
[[-2.138, -2.08]]
Normal
[[-1.923, -2.31]]
