## Implementation of Concept Drift Detection and Adaption methods and their comparison in IoT Data streams

## Import and installing libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score, precision_score, recall_score, f1_score
import lightgbm as lgb
import time

## Reading the dataset

In [None]:
df = pd.read_csv("./data/IoT_2020_b_0.01_fs.csv")


## Training and Testing data split


In [None]:
#10% training set, and 90% test set

X = df.drop(['Label'],axis=1)
y = df['Label']
X_train, X_test, y_train, y_test = train_test_split(X,y, train_size = 0.1, test_size = 0.9, shuffle=False,random_state = 0)

In [None]:
## Importing the online learning metrics and algorithms from the River library

from river import metrics
from river import stream
from river import tree,neighbors,naive_bayes,ensemble,linear_model
from river.drift import DDM, ADWIN

## Adaptive Learning Function

In [None]:
# Creating a generic adaptive learning function


def adaptive_learning(model, X_train, y_train, X_test, y_test):
    
    metric = metrics.Accuracy() # Accuracy metric
    i = 0 # counting evaluated data points
    j = [] # number of evaluated data points
    k = [] # real-time accuracy
    xt = [] # true labels of the test set
    xp = [] # predicted labels of the test set

    # Training set learn
    
    for xi1, yi1 in stream.iter_pandas(X_train, y_train):
        model.learn_one(xi1,yi1) 

    # Predicting the test set
    
    for xi, yi in stream.iter_pandas(X_test, y_test):
        y_pred= model.predict_one(xi)  # Predict the test sample
        model.learn_one(xi,yi) # Learn the test sample
        metric = metric.update(yi, y_pred) # Update the real-time accuracy
        j.append(i)
        k.append(metric.get()*100)
        xt.append(yi)
        xp.append(y_pred)
        i = i+1
        
    print("Accuracy: "+str(round(accuracy_score(xt,xp),4)*100)+"%")
    print("Precision: "+str(round(precision_score(xt,xp),4)*100)+"%")
    print("Recall: "+str(round(recall_score(xt,xp),4)*100)+"%")
    print("F1-score: "+str(round(f1_score(xt,xp),4)*100)+"%")
    return j, k

### Accuracy changes figure

In [None]:
def acc_fig(j, k, name):
    plt.rcParams.update({'font.size': 15})
    plt.figure(1,figsize=(10,6)) 
    sns.set_style("darkgrid")
    plt.clf() 
    plt.plot(j,k,'-b',label='Avg Accuracy: %.2f%%'%(m[-1]))

    plt.legend(loc='best')
    plt.title(name+' on IoTID20 dataset', fontsize=15)
    plt.xlabel('Number of samples')
    plt.ylabel('Accuracy (%)')

    plt.draw()

## Drift Detection and Adaptation models

In [None]:
# %%time

# Adaptive Random Forest (ARF) model with ADWIN drift detector

#model name
name1 = "ARF model with ADWIN drift detector "

#defining model
model1 = ensemble.AdaptiveRandomForestClassifier(n_models = 3, drift_detector = ADWIN())

#learning model
j, k1 = adaptive_learning(model1, X_train, y_train, X_test, y_test)

# accuracy change figure
acc_fig(j, k1, name1)

In [None]:
%%time

# Adaptive Random Forest (ARF) model with DDM drift detector

#model name
name2 = "ARF model with DDM drift detector"

#defining model
model2 = ensemble.AdaptiveRandomForestClassifier(n_models = 3, drift_detector = DDM())


#learning model
j, k2 = adaptive_learning(model2, X_train, y_train, X_test, y_test)

# accuracy change figure
acc_fig(j, k2, name2)

In [None]:
%%time

# Streaming Random Patches (SRP) model with ADWIN drift detector

#model name
name3 = "SRP model with ADWIN drift detector"

#defining model
model3 = ensemble.SRPClassifier(n_models = 3, drift_detector = ADWIN())

#learning model
j, k3 = adaptive_learning(model3, X_train, y_train, X_test, y_test)

# accuracy change figure
acc_fig(j, k3, name3)

In [None]:
%%time

# Streaming Random Patches (SRP) model with DDM drift detector

#model name
name4 = "SRP model with DDM drift detector"

#defining model
model4 = ensemble.SRPClassifier(n_models = 3, drift_detector = DDM())

#learning model
j, k4 = adaptive_learning(model4, X_train, y_train, X_test, y_test)

# accuracy change figure
acc_fig(j, k4, name4) 

In [None]:
%%time

# Extremely Fast Decision Tree (EFDT) model 

#model name
name5 = "EFDT model"

#defining model
model5 = tree.ExtremelyFastDecisionTreeClassifier()

#learning model
j, k5 = adaptive_learning(model5, X_train, y_train, X_test, y_test)

# accuracy change figure
acc_fig(j, k5, name5)

In [None]:
%%time
# Hoeffding Tree (HT) model 

#model name
name6 = "HT model"

#defining model
model6 = tree.HoeffdingTreeClassifier()

#learning model
j, k6 = adaptive_learning(model6, X_train, y_train, X_test, y_test)

# accuracy change figure
acc_fig(t, m6, name6)

In [None]:
%%time
# Leveraging Bagging (LB) model 

#model name
name7 = "LB model"

#defining model
model7 = ensemble.LeveragingBaggingClassifier(model=tree.HoeffdingTreeClassifier(),n_models=3)

#learning model
j, k7 = adaptive_learning(model7, X_train, y_train, X_test, y_test) 

# accuracy change figure
acc_fig(j, k7, name7) 

### Average weighted model

In [None]:

def AWM(X_train, y_train, X_test, y_test):
    
    # Record the real-time accuracy and 4 base learners
    metric = metrics.Accuracy()
    metric1 = metrics.Accuracy()
    metric2 = metrics.Accuracy()
    metric3 = metrics.Accuracy()
    metric4 = metrics.Accuracy()

    i=0
    j = []
    k = []
    k1 = []
    k2 = []
    k3 = []
    k4 = []
    xt = []
    xp = []
    
     # ARF-ADWIN
    e1 = ensemble.AdaptiveRandomForestClassifier(n_models=3)
     # SRP-ADWIN
    e2 = ensemble.SRPClassifier(n_models=3)
     # ARF-DDM
    e3 = ensemble.AdaptiveRandomForestClassifier(n_models=3,drift_detector=DDM(),warning_detector=DDM())
     # SRP-DDM
    e4 = ensemble.SRPClassifier(n_models=3,drift_detector=DDM(),warning_detector=DDM())

    # The four base learners learn the training set
    for xi1, yi1 in stream.iter_pandas(X_train, y_train):
        e1.learn_one(xi1,yi1)
        e2.learn_one(xi1,yi1)
        e3.learn_one(xi1,yi1)
        e4.learn_one(xi1,yi1)

    # Predict the test set
    for xi, yi in stream.iter_pandas(X_test, y_test):
       
        y_pred1= e1.predict_one(xi) 
        y_prob1= e1.predict_proba_one(xi) 
        e1.learn_one(xi,yi)

        y_pred2= e2.predict_one(xi) 
        y_prob2= e2.predict_proba_one(xi)
        e2.learn_one(xi,yi)

        y_pred3= e3.predict_one(xi) 
        y_prob3= e3.predict_proba_one(xi)
        e3.learn_one(xi,yi)

        y_pred4= e4.predict_one(xi) 
        y_prob4= e4.predict_proba_one(xi)
        e4.learn_one(xi,yi)
        
        # Record their real-time accuracy
        metric1 = metric1.update(yi, y_pred1)
        metric2 = metric2.update(yi, y_pred2)
        metric3 = metric3.update(yi, y_pred3)
        metric4 = metric4.update(yi, y_pred4)    

        # Calculate the real-time error rates of four base learners
        r1 = 1-metric1.get()
        r2 = 1-metric2.get()
        r3 = 1-metric3.get()
        r4 = 1-metric4.get()

        
        rp = 0.001 # epsilon
        
        # Calculate the weight of each base learner by the reciprocal of its real-time error rate
        ra = 1/(r1+rp)+1/(r2+rp)+1/(r3+rp)+1/(r4+rp)
        w1 = 1/(r1+rp)/ra
        w2 = 1/(r2+rp)/ra
        w3 = 1/(r3+rp)/ra
        w4 = 1/(r4+rp)/ra

        # Make ensemble predictions by the classification probabilities
        if  y_pred1 == 1:
            ypro10=1-y_prob1[1]
            ypro11=y_prob1[1]
        else:
            ypro10=y_prob1[0]
            ypro11=1-y_prob1[0]
        if  y_pred2 == 1:
            ypro20=1-y_prob2[1]
            ypro21=y_prob2[1]
        else:
            ypro20=y_prob2[0]
            ypro21=1-y_prob2[0]
        if  y_pred3 == 1:
            ypro30=1-y_prob3[1]
            ypro31=y_prob3[1]
        else:
            ypro30=y_prob3[0]
            ypro31=1-y_prob3[0]
        if  y_pred4 == 1:
            ypro40=1-y_prob4[1]
            ypro41=y_prob4[1]
        else:
            ypro40=y_prob4[0]
            ypro41=1-y_prob4[0]        

        # Calculate the final probabilities of classes 0 & 1 to make predictions
        y_prob_0 = w1*ypro10+w2*ypro20+w3*ypro30+w4*ypro40
        y_prob_1 = w1*ypro11+w2*ypro21+w3*ypro31+w4*ypro41

        if (y_prob_0>y_prob_1):
            y_pred = 0
            y_prob = y_prob_0
        else:
            y_pred = 1
            y_prob = y_prob_1
        
        # Update the real-time accuracy of the ensemble model
        metric = metric.update(yi, y_pred)

        j.append(i)
        k.append(metric.get()*100)
        xt.append(yi)
        xp.append(y_pred)
        
        i=i+1
    print("Accuracy: "+str(round(accuracy_score(yt,yp),4)*100)+"%")
    print("Precision: "+str(round(precision_score(yt,yp),4)*100)+"%")
    print("Recall: "+str(round(recall_score(yt,yp),4)*100)+"%")
    print("F1-score: "+str(round(f1_score(yt,yp),4)*100)+"%")
    return j, k

In [None]:
%%time
# Average weighted model

#modelname
name = "Average weighted model"

#learning dataset
j, k = AWM(X_train, y_train, X_test, y_test)

#accuracy
acc_fig(j, k, name) 


### Comparision and plotting of all models

In [None]:
plt.rcParams.update({'font.size': 30})
plt.figure(1,figsize=(24,15)) 
sns.set_style("darkgrid")
plt.clf() 

# Plotting the accuracy change of each learner
plt.plot(j,k,'-r',label=name+', Avg Accuracy: %.2f%%'%(k[-1]))
plt.plot(j,k1,'-b',label=name1+', Avg Accuracy: %.2f%%'%(k1[-1]))
plt.plot(j,k2,'-g',label=name2+', Avg Accuracy: %.2f%%'%(k2[-1]))
plt.plot(j,k3,'orange',label=name3+', Avg Accuracy: %.2f%%'%(k3[-1]))
plt.plot(j,k4,'black',label=name4+', Avg Accuracy: %.2f%%'%(k4[-1]))
plt.plot(j,k5,'magenta',label=name5+', Avg Accuracy: %.2f%%'%(k5[-1]))
plt.plot(j,k6,'grey',label=name6+', Avg Accuracy: %.2f%%'%(k6[-1]))
plt.plot(j,k7,'brown',label=name7+', Avg Accuracy: %.2f%%'%(k7[-1]))

# Drift points/time
dr = [0,270,600]
for i in range(len(dr)):
    if i!=0:
        plt.text(dr[i]-500, 100.8, 'Drift '+str(i), c = "red", fontsize = 25)
        plt.vlines(dr[i], 0, 100, colors = "red", linewidth=4, linestyles = "dashed")
        
plt.legend(loc='lower right')
plt.ylim(85, 102)
plt.title('Comparsion of all models', fontsize=40)
plt.xlabel('Number of samples')
plt.ylabel('Accuracy (%)')

plt.draw()