In [51]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score, precision_score, recall_score, f1_score
import time

In [52]:
# Import the online learning metrics and algorithms from the River library
from river import metrics
from river import stream
from river import tree,neighbors,naive_bayes,ensemble,linear_model
from river.drift import ADWIN, KSWIN
from river.drift.binary import DDM, EDDM
from river import forest

In [53]:
def get_data():
    df  = pd.read_csv("dataverse/rt_8873985678962563_gradual.csv")

    X, y = [], []

    for i in range(len(df)):
        X.append([df.loc[i, "X1"], df.loc[i, "X2"]])
        y.append(df.loc[i, "class"])

    return np.array(X), np.array(y)

# Generate the dataset
X, y = get_data()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [54]:
# Define a generic adaptive learning function
# The argument "model" means an online adaptive learning algorithm
def adaptive_learning(model, drift_detector, X_train, y_train, X_test, y_test):
    metric = metrics.Accuracy() # Use accuracy as the metric
    i = 0 # count the number of evaluated data points
    t = [] # record the number of evaluated data points
    m = [] # record the real-time accuracy
    yt = [] # record all the true labels of the test set
    yp = [] # record all the predicted labels of the test set
    
    drift_detect = drift_detector
    print('drift_detector: ', drift_detector)
    
    # Learn the training set
    for xi1, yi1 in stream.iter_array(X_train, y_train):
        model.learn_one(xi1,yi1)


    # Predict the test set
    for xi, yi in stream.iter_array(X_test, y_test):
        y_pred= model.predict_one(xi)  # Predict the test sample
        model.learn_one(xi,yi) # Learn the test sample
#         print("yi: "+ str(yi))
#         print("y_pred: "+ str(y_pred))
        metric.update(yi, y_pred) # Update the real-time accuracy
        
        #drift_detection part
#         val = 0
#         if yi != y_pred:
#             val = 1 
#         in_drift = drift_detect.update(yi)
#         if in_drift:
#             print(f"Change detected at index {i}")

        drift_detect.update(float(yi))
        if drift_detect.drift_detected:
            print(f"Change detected at index {i}")
            if str(drift_detect) != 'EDDM':
                drift_detect.reset()
            
            
#         val = 0
#         if yi != y_pred:
#             val = 1 
#         drift_detect.update(float(val))
#         if drift_detector.drift_detected:
#             print(f"Change detected at index {i}")
        
        t.append(i)
#         print("metric: "+ str(metric))
#         if metric == None:
#             m.append(0*100)
#         else:
#             m.append(metric.get()*100)
            
        m.append(metric.get()*100)
            
        yt.append(yi)
        yp.append(y_pred)
        i = i+1
    print("Accuracy: "+str(round(accuracy_score(yt,yp),4)*100)+"%")
    print("Precision: "+str(round(precision_score(yt,yp),4)*100)+"%")
    print("Recall: "+str(round(recall_score(yt,yp),4)*100)+"%")
    print("F1-score: "+str(round(f1_score(yt,yp),4)*100)+"%")
    return t, m

In [55]:
# Use the Gaussian Naive Bayes as a baseline model
name_01 = "Gaussian Naive Bayes with DDM"
model_01 = naive_bayes.GaussianNB() # Define the model
drift_detector_01 = DDM() # Define the drift detector
t, m_01 = adaptive_learning(model_01, drift_detector_01, X_train, y_train, X_test, y_test) # Learn the model on the dataset
# acc_fig(t, m_01, name_01) # Draw the figure of how the real-time accuracy changes with the number of samples

drift_detector:  DDM
Accuracy: 65.89%
Precision: 67.02%
Recall: 57.98%
F1-score: 62.17%


In [56]:
# Drift detection
drift_detector = ADWIN()
# drifts = []

for i, (X, y) in enumerate(zip(X_train, y_train)):
    drift_detector.update(y)   # Here we are using the label for drift detection
    if drift_detector.drift_detected:
        print(f'Change detected at index {i}')
#         drifts.append(i)
        drift_detector.reset()   # Resetting the detector after a drift is detected