In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score
from drift_detectors.ddm import DDM
from drift_detectors.basic_window_ddm import BasicWindowDDM
from model_handlers.model_trainer import ModelTrainerTester
from sklearn.ensemble import RandomForestClassifier
from kafka import KafkaConsumer, KafkaProducer
import json
from data_loaders.train_loader import TrainLoader

In [2]:
# import csv training data
data_folder = '../../hai_dataset/hai/hai-21.03'
data_filenames = ['train1.csv', 'train2.csv', 'train3.csv']
label_columns = ['attack', 'attack_P1', 'attack_P2', 'attack_P3']

In [3]:
data_loader = TrainLoader(data_folder=data_folder, data_filenames=data_filenames, label_columns=label_columns)
df = data_loader.get_data()
X_train, X_test, y_train, y_test = data_loader.split_data(data_frame=df)

In [4]:
clf = RandomForestClassifier()
model_handler = ModelTrainerTester(classifier=clf, X_train=X_train, y_train=y_train)
clf = model_handler.train_model()
_, tr_accuracy = model_handler.test_model(X_test=X_test, y_test=y_test)

In [9]:
tr_accuracy

1.0

In [10]:
# filter df attack == 1
df[df['attack'] > 0]
# there are no attacks in the training data -> impossible to train a model

Unnamed: 0,P1_B2004,P1_B2016,P1_B3004,P1_B3005,P1_B4002,P1_B4005,P1_B400B,P1_B4022,P1_FCV01D,P1_FCV01Z,...,P4_ST_GOV,P4_ST_LD,P4_ST_PO,P4_ST_PS,P4_ST_PT01,P4_ST_TT01,attack,attack_P1,attack_P2,attack_P3


In [5]:
consumer = KafkaConsumer(
    'hai-preprocessed',
    bootstrap_servers=['localhost:9092'],
    auto_offset_reset='earliest',
    enable_auto_commit=True,
    value_deserializer=lambda x: json.loads(x.decode('utf-8'))
)

producer = KafkaProducer(
    bootstrap_servers=['localhost:9092'],
    value_serializer=lambda x: json.dumps(x).encode('utf-8'))

In [6]:
# ddm = DDM()

In [7]:
ddm = BasicWindowDDM()

In [8]:
# consume the streamed data from kafka and detect drift
for i, msg in enumerate(consumer):
    # get the data from the message
    data = msg.value
    #print(data['features'])
    # convert the dictionary to a dataframe
    X = pd.DataFrame(data['features'], index=[0])
    # get the labels
    y = pd.DataFrame(data['labels'], index=[0])
    # predict the labels
    y_pred = clf.predict(X)
    # get the accuracy
    accuracy = accuracy_score(y, y_pred)
    # detect drift
    ddm.add_element(accuracy)
    warning_detected = ddm.detected_warning_zone()
    drift_detected = ddm.detected_change()
    
    # send the results
    producer.send('hai-results', value={'accuracy': accuracy, 'warning_detected': int(warning_detected), 'drift_detected': int(drift_detected)})
    
    # print the results
    '''if accuracy < 0.5:
        print('Iteration {}'.format(i), 'Accuracy {}'.format(accuracy))'''
    if i % 1000 == 0:
        print('Iteration {}'.format(i), 'Accuracy {}'.format(accuracy))
    if drift_detected:
        print(f'Iteration {i} - accuracy: {accuracy} - drift detected: {drift_detected}')
    i += 1

Iteration 0 Accuracy 1.0
Iteration 1000 Accuracy 1.0
Iteration 2000 Accuracy 1.0
Iteration 2120 - accuracy: 0.0 - drift detected: True
Iteration 2140 - accuracy: 0.0 - drift detected: True
Iteration 2160 - accuracy: 0.0 - drift detected: True
Iteration 2180 - accuracy: 0.0 - drift detected: True
Iteration 2200 - accuracy: 0.0 - drift detected: True
Iteration 2220 - accuracy: 0.0 - drift detected: True
Iteration 2240 - accuracy: 0.0 - drift detected: True
Iteration 2260 - accuracy: 0.0 - drift detected: True
Iteration 2280 - accuracy: 0.0 - drift detected: True
Iteration 2300 - accuracy: 0.0 - drift detected: True
Iteration 3000 Accuracy 1.0
Iteration 4000 Accuracy 1.0
Iteration 5000 Accuracy 1.0
Iteration 6000 Accuracy 1.0
Iteration 7000 Accuracy 1.0
Iteration 8000 Accuracy 1.0
Iteration 8900 - accuracy: 0.0 - drift detected: True
Iteration 8920 - accuracy: 0.0 - drift detected: True
Iteration 8940 - accuracy: 0.0 - drift detected: True
Iteration 8960 - accuracy: 0.0 - drift detected: 

KeyboardInterrupt: 

In [None]:
'''
from(bucket: "mema_bucket")
  |> range(start: v.timeRangeStart, stop:v.timeRangeStop)
  |> filter(fn: (r) => r._measurement == "hai_results" and r._field == "accuracy" and r._value < 0.5)

'''