In [5]:
import pandas as pd
from sklearn.metrics import accuracy_score
from drift_detectors import DDM, BasicWindowDDM, HDDM_W, ECDD
from kafka import KafkaConsumer, KafkaProducer
import json
from joblib import load

## Define the Kafka consumer and producer
The consumer consumes the preprocessed data from the topic `hai-preprocessed` and the producer sends the drift detection results to the topic `hai-results`.

In [11]:
consumer = KafkaConsumer(
    'hai-preprocessed',
    bootstrap_servers=['localhost:9092'],
    auto_offset_reset='earliest',
    enable_auto_commit=True,
    value_deserializer=lambda x: json.loads(x.decode('utf-8'))
)

producer = KafkaProducer(
    bootstrap_servers=['localhost:9092'],
    value_serializer=lambda x: json.dumps(x).encode('utf-8'))

## Import the model

In [None]:
clf = load('models/base_model.joblib')

## Drift detection function

In [None]:
def drift_detection(consumer, producer, clf, drift_detector):
    # consume the streamed data from kafka and detect drift
    for i, msg in enumerate(consumer):
        # get the data from the message
        data = msg.value
        #print(data['features'])
        # convert the dictionary to a dataframe
        X = pd.DataFrame(data['features'], index=[0])
        # get the labels
        y = pd.DataFrame(data['labels'], index=[0])
        # predict the labels
        y_pred = clf.predict(X)
        # get the accuracy
        accuracy = accuracy_score(y, y_pred)
        # detect drift
        drift_detector.add_element(accuracy)
        warning_detected = drift_detector.detected_warning_zone()
        drift_detected = drift_detector.detected_change()
        
        # send the results
        producer.send(
            'hai-results', 
            value={
                'drift_detector': drift_detector.get_name(), 
                'accuracy': accuracy, 
                'warning_detected': int(warning_detected), 
                'drift_detected': int(drift_detected)
            })
        
        # print the results
        '''if accuracy < 0.5:
            print('Iteration {}'.format(i), 'Accuracy {}'.format(accuracy))'''
        if i % 1000 == 0:
            print('Iteration {}'.format(i), 'Accuracy {}'.format(accuracy))
        if drift_detected:
            print(f'Iteration {i} Accuracy: {accuracy} - drift detected!')
        i += 1

## Drift detection with DDM

In [None]:
# Do the drift detection using DDM
drift_detector = DDM()
drift_detection(consumer, producer, clf, drift_detector)

## Drift detection with HDDM_W

In [None]:
# Do the drift detection using HDDM_W
drift_detector = HDDM_W()
drift_detection(consumer, producer, clf, drift_detector)

## Drift detection with BasicWindowDDM

In [None]:
# Do the drift detection using BasicWindowDDM
drift_detector = BasicWindowDDM()
drift_detection(consumer, producer, clf, drift_detector)

## Drift detection with ECDD

In [None]:
# Do the drift detection using ECDD
drift_detector = ECDD()
drift_detection(consumer, producer, clf, drift_detector)

## Other, not important things

In [None]:
'''
from(bucket: "mema_bucket")
  |> range(start: v.timeRangeStart, stop:v.timeRangeStop)
  |> filter(fn: (r) => r._measurement == "hai_results" and r._field == "accuracy" and r._value < 0.5)

'''