# Concept Drift Detection

Steven Sison | April 29, 2024

## Description

This document will be used to set-up and evaluate the concept drift detection algorithm, DDM. The document will include preliminary set-up as well as the necessary evaluations to check the performance of the detector.


## Setting Up the Algorithm

### Steps

1. Generate the Warm-Up Dataset and Streaming Dataset
2. Set-up the Detector Configuration
3. Set-up the metrics such as the error scorers.
4. Start the Concept Drift Detection
    - Warm Up Phase
    - Testing Phase

### Preliminaries

In [None]:
import pandas as pd                     # For data transformation
import numpy as numpy                   # For scientific calculations
import seaborn as sns                   # For data visualizations
import matplotlib.pyplot as plt         # For plotting
import matplotlib.patches as mpatches

# For Concept Drift
from frouros.datasets.synthetic import SEA
from frouros.detectors.concept_drift import DDM, DDMConfig
from frouros.metrics.prequential_error import PrequentialError

import random
from river import drift

### Generating the Warm-Up Dataset and Streaming Dataset

#### Warm-up Dataset

In [None]:
warm_up_dataset = pd.read_csv("binary_new_Bacud_unbalanced_lexical.csv")

warm_up_dataset['actual'] = warm_up_dataset['url_type']
warm_up_dataset['predicted'] = warm_up_dataset['url_type']

warm_up_dataset = warm_up_dataset[['predicted', 'actual']]

warm_up_dataset.head()

#### Streaming Dataset

In [None]:
# Temporarily create 4 divisions of the warm-up dataset
# First 10k for warm-up, next 10k for correct predictions, next 10k to induce concept drift

warm_up = warm_up_dataset.head(10000)
temp_after_warm_up = warm_up_dataset[9501: 10001]
temp_concept_drift = warm_up_dataset[10001: 20001]


In [None]:
# Put incorrect predictions in temp_concept_drift
temp_after_warm_up["predicted"] = temp_concept_drift["predicted"].replace({
    0:1,
    1:0,
});

temp_concept_drift["predicted"] = temp_concept_drift["predicted"].replace({
    0:1,
    1:0,
});

temp_concept_drift.head()

In [None]:
# Combine the testing phase and with the concept drift

concept_drift_test = pd.concat([warm_up, temp_after_warm_up])

concept_drift_test.head()

### Detector Configuration

In [None]:
temp_warm_up_predicted = pd.read_csv("warm-up-predicted.csv")
temp_warm_up_actual = pd.read_csv("warm-up-actual.csv")

temp_testing_predicted = pd.read_csv("testing-predicted.csv")
temp_testing_up_actual = pd.read_csv("testing-actual.csv")

In [None]:
config = DDMConfig(
    warning_level=2.0,
    drift_level=3.0,
    min_num_instances=len(
        temp_warm_up_predicted.index
    ),  # Minimum number of instances to start checking for drift
)

detector = DDM(
    config=config,
)

### Error Metrics

In [None]:
metrics = [
    PrequentialError(
        alpha=alpha,
        name=f"alpha={alpha}",
    )
    for alpha in [1.0, 0.9999, 0.999]
]
metrics_historic_detector = {f"{metric.name}": [] for metric in metrics}


def error_scorer(y_true, y_pred):  # Error function
    return 1 - (y_true == y_pred)

### Concept Drift Detection Using Frouros

### Concept Drift Detection Using Frouros

In [None]:
rng = random.Random(42)

#### Warm-Up Phase

In [None]:
# Warm-up detector
print(f"Warming up detector with {len(temp_warm_up_predicted.index)} samples")

warm_up_predicted = temp_warm_up_predicted.iloc[:, 0].tolist()
warm_up_actual = temp_warm_up_actual.iloc[:, 0].tolist()

for y_pred, y_actual in zip(warm_up_predicted, warm_up_actual):
    error = error_scorer(y_true=y_actual, y_pred=y_pred)
    _ = detector.update(value=error)

    for metric_historic, metric in zip(metrics_historic_detector.keys(), metrics):
        metrics_historic_detector[metric_historic].append(metric(error))

#### Testing Phase

In [None]:
idx_drift, idx_warning = [], []

i = len(temp_warm_up_predicted.index)
test_predicted = temp_testing_predicted.iloc[:, 0].tolist()
test_actual = temp_testing_up_actual.iloc[:, 0].tolist()


for y_pred, y_actual in zip(test_predicted, test_actual):

    error = error_scorer(y_true=y_actual, y_pred=y_pred)
    _ = detector.update(value=error)  # Detector's update

    # All the following lines are optional and only used for plotting the whole process
    for metric_historic, metric in zip(metrics_historic_detector.keys(), metrics):
        metrics_historic_detector[metric_historic].append(metric(error))

    status = detector.status
    if status["drift"]:
        # Drift detected
        print(f"Drift detected at index: {i}")
        idx_drift.append(i)
        detector.reset()  # Reset detector
        for metric in metrics:  # Reset metrics
            metric.reset()
        break  # Stop simulation
    elif status["warning"]:
        # Warning zone
        idx_warning.append(i)
    i += 1

#### Visualization

In [None]:
plt.rcParams.update({"font.size": 20})

alpha = 0.6
linewidth = 1.0

fig, ax = plt.subplots(
    nrows=2,
    ncols=1,
    figsize=(12, 8),
    sharex=True,
    dpi=300,
    gridspec_kw={"height_ratios": [3, 1]},
)

for (metric_name, metric_values), linecolor in zip(
    metrics_historic_detector.items(),
    ["#1f77b4", "#ff7f0e", "#2ca02c"],
):
    ax[0].plot(
        metric_values,
        color=linecolor,
        linewidth=linewidth,
        alpha=alpha,
        label=metric_name,
    )

drift_color = "red"
drift_linestyle = "--"
warmup_color = "grey"
warning_color = "#e8f519"
real_drift_color = "black"

for idx in range(0, len(temp_warm_up_predicted.index)):
    ax[1].axvline(x=idx, color=warmup_color, linewidth=linewidth)

for idx in idx_warning:
    ax[1].axvline(x=idx, color=warning_color, linewidth=linewidth)

for idx in idx_drift:
    ax[1].axvline(x=idx, color=drift_color, linestyle=drift_linestyle, linewidth=1.5)

ax[0].set_ylabel("Prequential Error")
ax[0].legend(
    loc="lower center",
    ncol=3,
    bbox_to_anchor=(0.5, -0.175),
    fancybox=True,
)
ax[1].set_yticks([])
ax[1].set_ylabel("Detector")
drift_path = mpatches.Patch(
    color=drift_color, label="Drift detected", linestyle=drift_linestyle
)
warning_path = mpatches.Patch(color=warning_color, label="Warning")
warmup_path = mpatches.Patch(color=warmup_color, label="Warm up")
real_drift_path = mpatches.Patch(
    color=real_drift_color, label="Real drift", linestyle=drift_linestyle
)
ax[1].axvline(x=20000, linestyle=drift_linestyle, linewidth=1.5)
ax[1].legend(
    handles=[warmup_path, warning_path, drift_path],
    loc="upper center",
    ncol=4,
    bbox_to_anchor=(0.5, -0.2),
    fancybox=True,
)

fig.tight_layout()
plt.show()