This example is inspired by
https://docs.seldon.io/projects/alibi-detect/en/latest/examples/cd_online_wine.html


In [15]:
import pandas as pd
import numpy as np
import requests
from mlserver.codecs import PandasCodec

red = pd.read_csv(
    "https://storage.googleapis.com/seldon-datasets/wine_quality/winequality-red.csv", sep=';'
)
white = pd.read_csv(
    "https://storage.googleapis.com/seldon-datasets/wine_quality/winequality-white.csv", sep=';'
)
white = white.drop(["quality"], axis=1)
red = red.drop(["quality"], axis=1)
white = white[:500].astype(np.float64)
red = red[:500].astype(np.float64)



In [12]:

import requests
from mlserver.codecs import NumpyRequestCodec

detector_name = "WineDriftDetector"
params = {
    "detector_name": detector_name,
    "drift_type": "tabular",
}
training_request = PandasCodec.encode_request(white)

endpoint = "http://localhost:8080/v2/models/fit/"
response = requests.post(endpoint, json=training_request.dict(), params=params)


In [14]:

endpoint = f"http://localhost:8080/v2/repository/models/{detector_name}/load"
response = requests.post(endpoint)

In [27]:

endpoint = f"http://localhost:8080/v2/models/{detector_name}/infer"

drifts = []
for i in range(len(red)):
    test_request = PandasCodec.encode_request(red.iloc[[i], :])
    response = requests.post(endpoint, json=test_request.dict(), params=params).json()
    # print(response)
    if response["outputs"][0]["data"][0]:
        # The drift detector indicates after each sample if there is a drift in the data
        print(f'Change detected at index {i}')
        drifts.append(i)
    break


In [28]:
response

{'model_name': 'WineDriftDetector',
 'id': 'bba28a76-ddea-49b3-8568-3560056ae75b',
 'parameters': {'online': True,
  'backend': 'pytorch',
  'detector_type': 'drift',
  'name': 'MMDDriftOnlineTorch',
  'version': '0.11.0'},
 'outputs': [{'name': 'is_drift',
   'shape': [1, 1],
   'datatype': 'INT64',
   'data': [0]},
  {'name': 'distance', 'shape': [1, 1], 'datatype': 'BYTES', 'data': [None]},
  {'name': 'p_val', 'shape': [1, 1], 'datatype': 'BYTES', 'data': [None]},
  {'name': 'threshold',
   'shape': [1, 1],
   'datatype': 'FP64',
   'data': [0.09148103156293008]},
  {'name': 'time', 'shape': [1, 1], 'datatype': 'INT64', 'data': [2]},
  {'name': 'ert', 'shape': [1, 1], 'datatype': 'FP64', 'data': [50.0]},
  {'name': 'test_stat',
   'shape': [1, 1],
   'datatype': 'FP64',
   'data': [0.021385254789165864]}]}

In [None]:
def time_run(cd, X):
    n = X.shape[0]
    perm = np.random.permutation(n)
    t = 0
    cd.reset_state()
    while True:
        pred = cd.predict(X[perm[t%n]])
        if pred['data']['is_drift'] == 1:
            return t
        else:
            t += 1

In [None]:
ert = 50
n_runs = 250
times_h0 = [time_run(cd, X_h0) for _ in range(n_runs)]
print(f"Average run-time under no-drift: {np.mean(times_h0)}")
_ = scipy.stats.probplot(np.array(times_h0), dist=scipy.stats.geom, sparams=1/ert, plot=plt)

In [None]:
ert = 50
n_runs = 250
times_h1 = [time_run(cd, X_corr) for _ in range(n_runs)]
print(f"Average run-time under drift: {np.mean(times_h1)}")