In [None]:
%pip install --upgrade mldrift matplotlib

In [None]:
import numpy as np
import pandas as pd
import lightgbm as lgbm
import matplotlib.pyplot as plt

from datetime import datetime, timedelta

In [None]:
plt.style.use("dark_background")

In [None]:
t = pd.date_range(start="2000-01-01", end=datetime.today(), freq="H")
t

In [None]:
x = pd.to_numeric(t)
x

In [None]:
f = 1 / (365 * 24 * 60 * 60 * 30 * 10**9) * 10  # f ~= 1/year
f

In [None]:
y = np.sin(2 * np.pi * f * x)
y

In [None]:
data = {
    "f1": np.sin(2 * np.pi * f * x),  # *(x[-1]-x)*f
    "f2": np.random.random() * 2 * np.cos(2 * np.pi * f * x)
    + 0.2 * np.random.random(x.shape),
    "f3": np.sinc(2 * np.pi * f * x),
    "f4": 1 + 0.01 * np.random.random(x.shape),
    "f5": 1 + 0.2 * np.random.random(x.shape),
    # "f6": x*f+2*np.random.random(x.shape),
    # "f7": (x[-1]-x)*f+2*np.random.random(x.shape)
}

df = pd.DataFrame(data, index=t)

In [None]:
for i in range(len(data)):

    fig, ax = plt.subplots(figsize=(16, 8))

    ax.set_xlabel("time")
    ax.set_ylabel("y")
    ax.set_title(f"f{i+1}")

    plt.plot(t, df[f"f{i+1}"])

In [None]:
# from src.tabular.lightgbm_diff import DataDiff
from mldrift.tabular.lightgbm_diff import DataDiff

import mlflow

with mlflow.start_run():

    baseline = df["2000-01-01":"2000-02-01"]
    test = df["2000-02-01":"2000-03-01"]

    diff = DataDiff(baseline, test)

    metrics = diff.run()
    metrics

In [None]:
from mldrift.tabular.utils.helper import *

In [None]:
# automatically use first interval as baseline
results = run_and_visualize(
    df, start=datetime(2000, 1, 1), end=datetime.today()
)

In [None]:
# explicitly set baseline
baseline = df["2000-01-01":"2001-01-01"]
results = run_and_visualize(
    df, start=datetime(2000, 1, 1), end=datetime.today(), baseline_df=baseline
)

In [None]:
# explicitly set baseline and change evaluation interval
baseline = df["2000-01-01":"2002-01-01"]
results = run_and_visualize(
    df,
    start=datetime(2000, 1, 1),
    end=datetime.today(),
    baseline_df=baseline,
    interval=timedelta(days=365),
)