In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [9]:
# turn off worning
import warnings
warnings.filterwarnings('ignore')

In [113]:
dataset = 'insects'
drift_detector = 'KS'

In [114]:
drift_baseline = pd.read_excel(f'{dataset}/monitored_baseline.xlsx')

In [115]:
# Create the reversed pairs
reversed_df = drift_baseline.rename(columns={'feat_1': 'feat_2', 'feat_2': 'feat_1', 'feat_1_drift': 'feat_2_drift', 'feat_2_drift': 'feat_1_drift', 'feat_1_cd_score': 'feat_2_cd_score', 'feat_2_cd_score': 'feat_1_cd_score'})

# Concatenate the original dataframe with the reversed pairs
drift_baseline = pd.concat([drift_baseline, reversed_df], ignore_index=True)

In [116]:
drift_baseline = drift_baseline.drop_duplicates(subset='feat_1').loc[:, ['feat_1', 'feat_1_drift']]

In [117]:
drift_js = pd.read_excel(f'{dataset}/monitored_{drift_detector}.xlsx')

In [118]:
# Create the reversed pairs
reversed_df = drift_js.rename(columns={'feat_1': 'feat_2', 'feat_2': 'feat_1', 'feat_1_drift': 'feat_2_drift', 'feat_2_drift': 'feat_1_drift', 'feat_1_cd_score': 'feat_2_cd_score', 'feat_2_cd_score': 'feat_1_cd_score'})

# Concatenate the original dataframe with the reversed pairs
drift_js = pd.concat([drift_js, reversed_df], ignore_index=True)

In [119]:
drift_js = drift_js.drop_duplicates(subset='feat_1').loc[:, ['feat_1', 'feat_1_drift']].rename(columns={'feat_1_drift': 'feat_1_drift_js'})

In [120]:
# merge the two dataframes
drift_df = pd.merge(drift_baseline, drift_js, on='feat_1')

In [121]:
drift_df.agg({'feat_1_drift': 'mean', 'feat_1_drift_js': 'mean'})

feat_1_drift       0.275
feat_1_drift_js    0.330
dtype: float64

In [122]:
# use sklearn to calculate the accuracy, precision, recall and F1 score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

accuracy = accuracy_score(drift_df['feat_1_drift'], drift_df['feat_1_drift_js'])
precision = precision_score(drift_df['feat_1_drift'], drift_df['feat_1_drift_js'])
recall = recall_score(drift_df['feat_1_drift'], drift_df['feat_1_drift_js'])
f1 = f1_score(drift_df['feat_1_drift'], drift_df['feat_1_drift_js'])

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1: {f1}')

Accuracy: 0.895
Precision: 0.7575757575757576
Recall: 0.9090909090909091
F1: 0.8264462809917356


In [123]:
results = []

step_dict = {'heartbeats': [140, 70, 35, 17, 8, 4], 'insects': [100, 50, 25, 12, 6, 3], 'cicids': [9, 4], 'covertype': [7, 3]} 

for i in step_dict[dataset]: # covertype
    accuracy = []
    precision = []
    recall = []
    f1 = []
    for j in range(20):
        features = drift_df.sample(i)
        accuracy.append(accuracy_score(features['feat_1_drift'], features['feat_1_drift_js']))
        precision.append(precision_score(features['feat_1_drift'], features['feat_1_drift_js']))
        recall.append(recall_score(features['feat_1_drift'], features['feat_1_drift_js']))
        f1.append(f1_score(features['feat_1_drift'], features['feat_1_drift_js']))
    results.append([i, np.mean(accuracy), np.mean(recall), np.mean(precision), np.mean(f1)])

results_df = pd.DataFrame(results, columns=['Number of features', 'Accuracy', 'Recall', 'Precision', 'F1'])

In [124]:
results_df

Unnamed: 0,Number of features,Accuracy,Recall,Precision,F1
0,100,0.8955,0.90644,0.767767,0.82992
1,50,0.898,0.918616,0.768343,0.833296
2,25,0.892,0.898689,0.756919,0.815767
3,12,0.891667,0.910833,0.769167,0.806382
4,6,0.875,0.791667,0.679167,0.712857
5,3,0.933333,0.825,0.825,0.816667


In [125]:
results_df.to_excel(f'{dataset}/results_{drift_detector}.xlsx', index=False)