In [None]:
import numpy as np
import pandas as pd
import river
import nannyml as nml
import pickle
import matplotlib.pyplot as plt

# Load river modules
from river import datasets
from river import evaluate
from river import linear_model

from river import metrics
from river import optim
from river import preprocessing
from river import drift
from river.metrics import ClassificationReport

In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score

labels = list()
y_preds = list()

def update_error_window(label, y_pred, window_size=100):

    labels.append(int(label))
    y_preds.append(int(y_pred))
    lbl_window = pd.Series(labels)[-window_size:]#.rolling(window=window_size)
    pred_window = pd.Series(y_preds)[-window_size:]#.rolling(window=window_size)
    
    accuracy = accuracy_score(lbl_window, pred_window)
    precision = precision_score(lbl_window, pred_window)
    recall = recall_score(lbl_window, pred_window)
    
    return accuracy, precision, recall

In [None]:
# Initialize dataset
dataset = datasets.synth.Agrawal(
     classification_function=0,
     seed=42
)

# Load model pickle file
LogRegmodel = pickle.load(open('saved_models/classification/model.pkl', 'rb'))

# Initialize drift detector
drift_detector = drift.ADWIN(delta=1)

# Initializing lists
drifts = list()
correctly_classified_data = list()
acc_scores = list()
y_pred = list()
y_true = list()

# Initialize metrics
metric = river.metrics.Accuracy()

for i,data in enumerate(dataset.take(20000)):
    y_pred_before = LogRegmodel.predict_one(data[0])
    y_pred.append(y_pred_before)
    y_true.append(data[1])
    metric = metric.update(data[1],y_pred_before) # Accuracy
    correctly_classified = y_pred_before==data[1]  # checking accuracy
    correctly_classified_data.append(correctly_classified)
    acc, pre, rec = update_error_window(y_pred_before,data[1])
    acc_scores.append(acc)
    drift_detector.update(correctly_classified)   # Data is processed one sample at a time
    if drift_detector.drift_detected:
        # The drift detector indicates after each sample if there is a drift in the data
        print(f'Change detected at index {i}')
        drifts.append(i)

In [None]:
drifts

In [None]:
# Removing 'nan' from accuracy scores
acc_scores_cleaned = [x for x in acc_scores if str(x) != 'nan']

In [None]:
plt.plot(acc_scores_cleaned[:200])
plt.xlabel('Data Stream Samples')
plt.ylabel('Accuracy Scores')
plt.title('Impact on Accuracy Scores due to Concept Drift')

In [None]:
# Converting predictions from boolean to integer
y_pred = [int(x) for x in y_pred]

In [None]:
report = ClassificationReport()
for yt,yp in zip(y_true,y_pred):
    report = report.update(yt,yp)

In [None]:
report