In [8]:
import pandas as pd
from sklearn.metrics import accuracy_score
from drift_detectors import DDM, BasicWindowDDM, HDDM_W, ECDD
from kafka import KafkaConsumer, KafkaProducer
import json
from joblib import load

## Define the Kafka consumer and producer
The consumer consumes the preprocessed data from the topic `hai-preprocessed` and the producer sends the drift detection results to the topic `hai-results`.

In [4]:
def get_kafka_instances():
    consumer = KafkaConsumer(
        'hai-preprocessed',
        bootstrap_servers=['localhost:9092'],
        auto_offset_reset='earliest',
        enable_auto_commit=True,
        value_deserializer=lambda x: json.loads(x.decode('utf-8'))
    )
    
    producer = KafkaProducer(
        bootstrap_servers=['localhost:9092'],
        value_serializer=lambda x: json.dumps(x).encode('utf-8'))
    
    return consumer, producer

## Import the model

In [5]:
clf = load('models/base_model.joblib')

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


## Drift detection function

In [6]:
def drift_detection(consumer, producer, clf, drift_detector):
    # consume the streamed data from kafka and detect drift
    try:
        for i, msg in enumerate(consumer):
            # get the data from the message
            data = msg.value
            #print(data['features'])
            # convert the dictionary to a dataframe
            X = pd.DataFrame(data['features'], index=[0])
            # get the labels
            y = pd.DataFrame(data['labels'], index=[0])
            # predict the labels
            y_pred = clf.predict(X)
            # get the accuracy
            accuracy = accuracy_score(y, y_pred)
            # detect drift
            drift_detector.add_element(accuracy)
            warning_detected = drift_detector.detected_warning_zone()
            drift_detected = drift_detector.detected_change()
            
            # send the results
            producer.send(
                'hai-results', 
                value={
                    'drift_detector': drift_detector.get_id(),
                    'attack': data['labels']['attack'],
                    'accuracy': accuracy, 
                    'warning_detected': int(warning_detected), 
                    'drift_detected': int(drift_detected)
                })
            
            # print the results
            '''if accuracy < 0.5:
                print('Iteration {}'.format(i), 'Accuracy: {}'.format(accuracy))'''
            if (i + 1) % 10000 == 0:
                print('Iteration {}'.format(i + 1), 'Accuracy {}'.format(accuracy))
            if drift_detected:
                print(f'Iteration {i + 1} Accuracy: {accuracy} - drift detected!')
            i += 1
    except KeyboardInterrupt:
        print("Consumer interrupted. Stopping...")
    finally:
        consumer.close()

## Drift detection with DDM

In [None]:
# Do the drift detection using DDM
consumer, producer = get_kafka_instances()
drift_detector = DDM()
drift_detection(consumer, producer, clf, drift_detector)

Iteration 2704 Accuracy: 1.0 - drift detected!
Iteration 10000 Accuracy 1.0
Iteration 10789 Accuracy: 1.0 - drift detected!
Iteration 15213 Accuracy: 1.0 - drift detected!
Iteration 20000 Accuracy 1.0
Iteration 21037 Accuracy: 1.0 - drift detected!
Iteration 22134 Accuracy: 1.0 - drift detected!
Iteration 30000 Accuracy 1.0
Iteration 40000 Accuracy 1.0
Iteration 50000 Accuracy 1.0
Iteration 56253 Accuracy: 1.0 - drift detected!
Iteration 59840 Accuracy: 1.0 - drift detected!
Iteration 60000 Accuracy 1.0


## Drift detection with HDDM_W

In [None]:
# Do the drift detection using HDDM_W
consumer, producer = get_kafka_instances()
drift_detector = HDDM_W()
drift_detection(consumer, producer, clf, drift_detector)

Iteration 2318 Accuracy: 1.0 - drift detected!
Iteration 9004 Accuracy: 1.0 - drift detected!
Iteration 10000 Accuracy 1.0
Iteration 14556 Accuracy: 1.0 - drift detected!
Iteration 19342 Accuracy: 1.0 - drift detected!
Iteration 20000 Accuracy 1.0
Iteration 21905 Accuracy: 1.0 - drift detected!
Iteration 30000 Accuracy 1.0
Iteration 40000 Accuracy 1.0
Iteration 46120 Accuracy: 1.0 - drift detected!
Iteration 50000 Accuracy 1.0
Iteration 53118 Accuracy: 1.0 - drift detected!
Iteration 57595 Accuracy: 1.0 - drift detected!
Iteration 60000 Accuracy 1.0
Iteration 62075 Accuracy: 1.0 - drift detected!
Iteration 65839 Accuracy: 1.0 - drift detected!
Iteration 70000 Accuracy 1.0
Iteration 72392 Accuracy: 1.0 - drift detected!
Iteration 79765 Accuracy: 1.0 - drift detected!
Iteration 80000 Accuracy 1.0
Iteration 81458 Accuracy: 1.0 - drift detected!
Iteration 86806 Accuracy: 1.0 - drift detected!
Iteration 90000 Accuracy 1.0
Iteration 95777 Accuracy: 1.0 - drift detected!
Iteration 100000 Accu

## Drift detection with BasicWindowDDM

In [5]:
# Do the drift detection using BasicWindowDDM
consumer, producer = get_kafka_instances()
drift_detector = BasicWindowDDM()
drift_detection(consumer, producer, clf, drift_detector)

Iteration 2121 Accuracy: 0.0 - drift detected!
Iteration 2141 Accuracy: 0.0 - drift detected!
Iteration 2161 Accuracy: 0.0 - drift detected!
Iteration 2181 Accuracy: 0.0 - drift detected!
Iteration 2201 Accuracy: 0.0 - drift detected!
Iteration 2221 Accuracy: 0.0 - drift detected!
Iteration 2241 Accuracy: 0.0 - drift detected!
Iteration 2261 Accuracy: 0.0 - drift detected!
Iteration 2281 Accuracy: 0.0 - drift detected!
Iteration 2301 Accuracy: 0.0 - drift detected!
Iteration 8901 Accuracy: 0.0 - drift detected!
Iteration 8921 Accuracy: 0.0 - drift detected!
Iteration 8941 Accuracy: 0.0 - drift detected!
Iteration 8961 Accuracy: 0.0 - drift detected!
Iteration 8981 Accuracy: 0.0 - drift detected!
Iteration 10000 Accuracy 1.0
Iteration 14361 Accuracy: 0.0 - drift detected!
Iteration 14381 Accuracy: 0.0 - drift detected!
Iteration 14401 Accuracy: 0.0 - drift detected!
Iteration 14421 Accuracy: 0.0 - drift detected!
Iteration 14441 Accuracy: 0.0 - drift detected!
Iteration 14461 Accuracy: 

## Drift detection with ECDD

In [9]:
# Do the drift detection using ECDD
consumer, producer = get_kafka_instances()
drift_detector = ECDD()
drift_detection(consumer, producer, clf, drift_detector)

Iteration 2114 Accuracy: 0.0 - drift detected!
Iteration 2117 Accuracy: 0.0 - drift detected!
Iteration 2120 Accuracy: 0.0 - drift detected!
Iteration 2123 Accuracy: 0.0 - drift detected!
Iteration 2126 Accuracy: 0.0 - drift detected!
Iteration 2129 Accuracy: 0.0 - drift detected!
Iteration 2132 Accuracy: 0.0 - drift detected!
Iteration 2135 Accuracy: 0.0 - drift detected!
Iteration 2138 Accuracy: 0.0 - drift detected!
Iteration 2141 Accuracy: 0.0 - drift detected!
Iteration 2144 Accuracy: 0.0 - drift detected!
Iteration 2147 Accuracy: 0.0 - drift detected!
Iteration 2150 Accuracy: 0.0 - drift detected!
Iteration 2153 Accuracy: 0.0 - drift detected!
Iteration 2156 Accuracy: 0.0 - drift detected!
Iteration 2159 Accuracy: 0.0 - drift detected!
Iteration 2162 Accuracy: 0.0 - drift detected!
Iteration 2165 Accuracy: 0.0 - drift detected!
Iteration 2168 Accuracy: 0.0 - drift detected!
Iteration 2171 Accuracy: 0.0 - drift detected!
Iteration 2174 Accuracy: 0.0 - drift detected!
Iteration 217

## Other, not important things

In [None]:
'''
from(bucket: "mema_bucket")
  |> range(start: v.timeRangeStart, stop:v.timeRangeStop)
  |> filter(fn: (r) => r._measurement == "hai_results" and r._field == "accuracy" and r._value < 0.5)

'''