This notebook is part of the supplementary material of the books "Online Machine Learning - Eine praxisorientiere Einführung",  
https://link.springer.com/book/9783658425043 and "Online Machine Learning - A Practical Guide with Examples in Python" https://link.springer.com/book/9789819970063
The contents are open source and published under the "BSD 3-Clause License".
This software is provided "as is" without warranty of any kind, either express or implied, including but not limited to implied warranties of merchantability and fitness for a particular purpose. The author or authors assume no liability for any damages or liability, whether in contract, tort, or otherwise, arising out of or in connection with the software or the use or other dealings with the software.

# Chapter 5: Evaluation und Performance Measurement

## Determination of the training and test data set in the 'spotRiver' package

In [None]:
from river import linear_model, datasets, preprocessing
from spotRiver.evaluation.eval_bml import eval_oml_horizon
from spotRiver.utils.data_conversion import convert_to_df
from sklearn.metrics import mean_absolute_error
metric = mean_absolute_error
model = (preprocessing.StandardScaler() |
        linear_model.LinearRegression())
dataset = datasets.TrumpApproval()
target_column = "Approve"
df = convert_to_df(dataset, target_column)
train = df[:500]
test = df[500:]
horizon = 10
df_eval, df_preds = eval_oml_horizon(
    model, train, test, target_column,
    horizon, metric=metric)

* Gnerate directory "figures" if it does not exist

In [None]:
import os
if not os.path.exists("figures"):
    os.makedirs("figures")    

In [None]:
from spotRiver.evaluation.eval_bml import plot_bml_oml_horizon_metrics
df_labels = ["OML Linear"]
plot_bml_oml_horizon_metrics(df_eval, df_labels, metric=metric, filename="./figures/ch05_fig_bml_oml_horizon_metrics.pdf")

In [None]:
from spotRiver.evaluation.eval_bml import plot_bml_oml_horizon_predictions
df_labels = ["OML Linear"]
plot_bml_oml_horizon_predictions(df_preds, df_labels, target_column=target_column, filename="./figures/ch05_fig_bml_oml_horizon_predictions.pdf")

## Methods for OML (River)

In [None]:
from river import datasets
from spotRiver.evaluation.eval_oml import eval_oml_iter_progressive, plot_oml_iter_progressive
from river import metrics as river_metrics
from river import tree as river_tree
from river import preprocessing as river_preprocessing
dataset = datasets.TrumpApproval()
model =  (river_preprocessing.StandardScaler() | river_tree.HoeffdingAdaptiveTreeRegressor(seed=1))

res_num = eval_oml_iter_progressive(
    dataset = list(dataset),
    step = 1,
    metric = river_metrics.MAE(),
    models =
    {
        "HATR": model,
    }
)
plot_oml_iter_progressive(res_num, filename="./figures/ch05-eval_oml_iter_progressive.pdf")

# SEA Drift

As shown in [https://riverml.xyz/0.15.0/api/tree/HoeffdingAdaptiveTreeClassifier/](https://riverml.xyz/0.15.0/api/tree/HoeffdingAdaptiveTreeClassifier/)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from spotRiver.evaluation.eval_oml import eval_oml_iter_progressive, plot_oml_iter_progressive
from spotRiver.evaluation.eval_bml import eval_bml_horizon, eval_bml_landmark, eval_bml_window, eval_oml_horizon, plot_bml_oml_horizon_predictions, plot_bml_oml_horizon_metrics
from spotRiver.utils.data_conversion import convert_to_df
from river import metrics as river_metrics, compose, feature_extraction, linear_model, preprocessing, stats
from river import stream as river_stream
from river import preprocessing as river_preprocessing
from river.datasets import synth
from river.tree import HoeffdingAdaptiveTreeClassifier, HoeffdingTreeClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_regression
from sklearn import preprocessing as preprocessing_sklearn
from sklearn import tree as sklearn_tree
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, f1_score

In [None]:
# alternative: metric = accuracy_score
metric = f1_score
horizon = 7*24
k = 10 
n_total = int(k*100_000)
position = int(k*25_000)
width = int(n_total/250)
n_train = 1_000
a = n_train + position - 50
b = a + 50

In [None]:
gen_1 = synth.ConceptDriftStream(
        stream=synth.SEA(seed=42, variant=0),
        drift_stream=synth.SEA(seed=42, variant=1),
                               seed=1, position=position, width=width)
gen_2 = synth.ConceptDriftStream(
        stream=synth.SEA(seed=42, variant=2),
        drift_stream=synth.SEA(seed=42, variant=3),
                               seed=1, position=position, width=width)
dataset = synth.ConceptDriftStream(stream=gen_1, drift_stream=gen_2, seed=1, position=2*position, width=width)
data_dict = {key: [] for key in list(dataset.take(1))[0][0].keys()}
data_dict["y"] = []
for x, y in dataset.take(n_total):
    for key, value in x.items():
        data_dict[key].append(value)
    data_dict["y"].append(y)
df = pd.DataFrame(data_dict)
# Add column names x1 until x10 to the first 10 columns of the dataframe and the column name y to the last column
df.columns = [f"x{i}" for i in range(1, 4)] + ["y"]
df = df.apply(lambda x: x.astype(int) if x.dtype == bool else x)
train = df[:n_train]
test = df[n_train:]
target_column = "y"

* Plot the y values of the samples in the df dataframe using a sliding window of 1000 samples

In [None]:
# plot the y values of the samples in the df dataframe using a sliding window of 1000 samples
fig = plt.figure(figsize=(7,3), tight_layout=True)
df["y"].rolling(2500).mean().plot()
plt.axvline(position, color="red")
plt.axvline(2*position, color="red")
plt.axvline(3*position, color="red")
fig.savefig("./figures/ch05_sea_drift.pdf", format='pdf', dpi=300)

plt.show()

# Evaluation, Performance Measurement


## Rolling Metrics Evaluation

### Demonstration of the "Rolling" Metric

* The method `Rolling` from the package `river` calculates a metric for the data of a sliding window $W$, where the parameter `window_size` specifies the window size $w$.
* In the example, the accuracy is calculated with a window size $w=3$:
* The first value is 0%, since `{True}` and `{False}` occur.
* The second value is 50%, since `{True, False}` are compared with `{False, False}`.
* The third value is 66%, since `{True, False, True}` are compared with `{False, False, True}`.
* The fourth value is 100%, since `{False, True, True}` are compared with `{False, True, True}`. The error in the first sample does not matter anymore.

In [None]:
from river import metrics, utils

y_true = [True, False, True, True] 
y_pred = [False, False, True, True]

metric = utils.Rolling(metrics.Accuracy(), window_size=3)

for yt, yp in zip(y_true, y_pred): 
    print(metric.update(yt, yp)) 

# Progressive Validation

## First Example: Progressive Validation (without delay)

### Using  the California Housing dataset.

* First we load the California Housing dataset.
  
* Then store the independent variables as `X` and the target variable as `y`.
* The data can then be incrementally retrieved as `dataset` via an iterator that uses the `river` method `stream.iter_pandas`.

In [None]:
from sklearn.datasets import fetch_california_housing
import pandas as pd
california_housing = fetch_california_housing()
features = california_housing.feature_names
X = pd.DataFrame(california_housing.data, columns=features)
y = pd.Series(california_housing.target)

* Add a datetime date column to the dataframe X and the feature "moment" to the list of features

In [None]:
X["date"] = pd.date_range(start="2020-01-01", periods=len(X), freq="D")
# features.append("moment")

In [None]:
from river import stream
dataset = stream.iter_pandas(X, y)
for x, y in dataset:
    print(x, y)
    break

## Model

* A linear regression model is selected as the model.
* A pipeline is set up in which the characteristics are first selected using 'Select' and then scaled before they are passed on to the linear regression model.

In [None]:
from river import compose, preprocessing, linear_model, optim
model = compose.Select(*features)
model |= preprocessing.StandardScaler()
model |= linear_model.LinearRegression(optimizer=optim.SGD(0.001))

## Calculation of the MAE (Mean Absolute Errors) using progressive validation (Test-Then-Train).

* Progressive validation is the canonical method to evaluate the performance of a model. 
* It can be used to estimate how a model would have performed in a production scenario.
* The data set is transformed into a stream of questions and answers. 
  * At each step, the model is either asked to predict an observation or is either updated. 
  * The target is revealed to the model only after a certain time, which is determined by the delay parameter (`delay`). 
* By default, there is no delay, which means that the samples are processed sequentially. If there is no delay, this function performs a progressive validation (test-then-train). 
* When there is a delay, we refer to this as delayed progressive validation.
* In the first example, the MAE is displayed after `print_every=5000` steps, but is updated after each step.

In [None]:
from river import metrics, evaluate
metric = metrics.MAE()
evaluate.progressive_val_score(dataset, model, metric, print_every=5_000)

### Second Example: Progressive Validation with Delay

* In the second example `delay=dt.timedelta(days=1)` is set and thus a delay is built in:

In [None]:
import datetime as dt

X = pd.DataFrame(california_housing.data, columns=features)
y = pd.Series(california_housing.target)



* Add a datetime date column to the dataframe X and the feature "moment" to the list of features

In [None]:


X["moment"] = pd.date_range(start="2020-01-01", periods=len(X), freq="D")
dataset = stream.iter_pandas(X, y)

evaluate.progressive_val_score(
    dataset=dataset,
    model=model.clone(),
    metric=metric,
    moment="moment",
    delay=dt.timedelta(days=1),
    print_every=5_000
)


* Increase the delay to two weeks:

In [None]:
X = pd.DataFrame(california_housing.data, columns=features)
y = pd.Series(california_housing.target)

# add a datetime date column to the dataframe X and the feature "moment" to the list of features
X["moment"] = pd.date_range(start="2020-01-01", periods=len(X), freq="D")

dataset = stream.iter_pandas(X, y)

evaluate.progressive_val_score(
    dataset=dataset,
    model=model.clone(),
    metric=metric,
    moment='moment',
    delay=dt.timedelta(days=14),
    print_every=5_000
)

# Drift Simulator

## Example: The synthetic SEA data set


* Implementation of the abrupt drift data stream described in [@stre01a]. 
* Each observation consists of three features. 
  * Only the first two features are relevant. 
* The target is binary and positive if the sum of the features exceeds a certain threshold. 
* There are four thresholds to choose from. 
* Concept drift can be introduced at any time during the stream by switching the threshold.

* In detail, the SEA dataset were generated as follows: 
  * First, $n=60,000$ random points were generated in a three-dimensional feature space. 
  * The features have values between 0 to 10, with only the first two features ($f_1$ and $f_2$) being relevant. 
  * The $n$ points were then divided into four blocks of $15,000$ points each. 
  * In each block, the class membership of a point is determined using a threshold $\tau_i$, where $i$ indicates the block in question.
  * Thresholds $\tau_1= 8$, $\tau_2=9$, $\tau_3=7$, and $\tau_4 = 9.5$ were chosen. 
  * In addition, the data were noisy ("We inserted about 10% class noise into each block of data.") by swapping 10% of the class memberships. 
  * Finally, a test set ($n_t = 10,000$) was determined, each composed of $2,500$ data points taken from each block.
* The python package `river` provides the function `SEA` to generate the data.

## Creating the data iterator

* First we create the iterator `dataset`, with which we create $n$ records and store them in the two lists

* `xl` and
* `yl`

as `float` values.

In [None]:
from river.datasets import synth
n = 12500 # size test data of each class. There are 4 classes.
k = 2500 # size test data set of each class.
test = dict()
train = dict()

for i in range(4): 
    xtrain = []
    ytrain = []
    xtest = []
    ytest = []

    dataset = synth.SEA(variant=i, seed=2*i)
    for x, y in dataset.take(n):
        xtrain.append( [float(a) for a in list(x.values()) ])
        ytrain.append(y)
    train[i] = [xtrain, ytrain]

    dataset = synth.SEA(variant=i, seed=2*i+1)
    for x, y in dataset.take(k):
        xtest.append( [float(a) for a in list(x.values()) ])
        ytest.append(y)
    test[i] = [xtest, ytest]
    
#print(test[0][0])
#print(test[0][1])


## The complete SEA data set

* The four partial data sets are now combined into one overall data set.

In [None]:
Xtrain = train[0][0] + train[1][0] + train[2][0] + train[3][0]
Ytrain = train[0][1] + train[1][1] + train[2][1] + train[3][1]

Xtest = test[0][0] + test[1][0] + test[2][0] + test[3][0]
Ytest = test[0][1] + test[1][1] + test[2][1] + test[3][1]

## Creating a model (decision tree)

* A decision tree is now fitted on the complete data set. 
* The first levels of the tree are visualized.

In [None]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf = clf.fit(Xtrain, Ytrain)
tree.plot_tree(clf, max_depth=2)

## graphviz

* If the library `graphviz` is available (on operating system level and then as Python interface), appealing graphics can be generated.

In [None]:
dot_data = tree.export_graphviz(clf, out_file="sae_tree.dot", max_depth=2, filled=True)
! dot -Tpng sae_tree.dot -o sae_tree.png 

In [None]:
from IPython.display import Image
Image(filename='sae_tree.png')

## Prediction on the SEA test data

In [None]:
Ypred = clf.predict(Xtest)

In [None]:
import pandas as pd
from sklearn import metrics
import seaborn as sns
import matplotlib.pyplot as plt

confusion_matrix = metrics.confusion_matrix(Ytest,
                                            Ypred)

matrix_df = pd.DataFrame(confusion_matrix)
ax = plt.axes()
sns.set(font_scale=1.3)
plt.figure(figsize=(10,7))
sns.heatmap(matrix_df, annot=True, fmt="g", ax=ax, cmap="magma")
ax.set_title('Confusion Matrix - Decision Tree')
ax.set_xlabel("Predicted label", fontsize =15)
ax.set_ylabel("True Label", fontsize=15)