In [1]:
import sys
from pathlib import Path

# добавить корень репозитория (/home/egor/repo/mldrift) в sys.path
ROOT = Path(__file__).resolve().parents[1] if "__file__" in globals() else Path.cwd().parent
sys.path.append(str(ROOT))

from src.get_data import get_dataset
import requests
import json
from tqdm import tqdm
import pandas as pd
tqdm.pandas()

_token = requests.post("http://localhost:8080/auth/token", data='{"username": "airflow","password": "airflow"}').json()
token = _token['access_token']
BASE_URL = 'http://localhost:8080/api/v2'
headers = {"Authorization": f"Bearer {token}"}

In [2]:
random_sample = get_dataset().sample(1).reset_index().iloc[0]


print(f"""{{
  "user_id": {int(random_sample['index'])},
  "features": {random_sample.drop(["index", "target"]).to_json()}
}}""")


{
  "user_id": 8172,
  "features": {"f0":-1.9908792565,"f1":-0.6448430479,"f2":2.6497640535,"f3":-0.2788650937,"f4":-0.3028653138,"f5":-0.2149043753,"f6":-2.8519554231,"f7":-1.086886348,"f8":-2.411772588,"f9":-0.6478169469}
}


In [3]:
random_sample = get_dataset().sample(1).reset_index().iloc[0]

random_sample['target']
requests.post("http://localhost:8000/predict", data=f"""{{
  "user_id": {int(random_sample['index'])},
  "features": {random_sample.drop(["index", "target"]).to_json()}
}}""").json(), random_sample['target']

({'user_id': 1210,
  'assigned_group': 'A',
  'model_stage': 'Production',
  'prediction': 0},
 0.0)

In [4]:
random_sample = get_dataset().reset_index()

def predict_model(b):
    pred = requests.post("http://localhost:8000/predict", data=f"""{{
"user_id": {int(b['index'])},
"features": {b.drop(["index", "target"]).to_json()}
}}""").json()
    return pd.Series([pred['prediction'], pred['assigned_group']])

random_sample[['prediction', 'group']] = random_sample.progress_apply(predict_model, axis=1)

100%|██████████| 10000/10000 [01:09<00:00, 144.87it/s]


In [5]:
random_sample[random_sample['target'] != random_sample['prediction']]['group'].value_counts()

group
A    328
Name: count, dtype: int64

In [12]:
requests.get(f"{BASE_URL}/variables/DRIFT_TYPE", headers=headers).json()

{'detail': 'The Variable with key: `DRIFT_TYPE` was not found'}

In [6]:
requests.post(f"{BASE_URL}/variables", headers=headers, data=f'''
{{
  "key": "DRIFT_TYPE",
  "value": "mean_shift",
  "description": ""
}}
''').json()

{'key': 'DRIFT_TYPE',
 'value': 'mean_shift',
 'description': '',
 'is_encrypted': False}

In [7]:
requests.post(f"{BASE_URL}/variables", headers=headers, data=f'''
{{
  "key": "DRIFT_MAGNITUDE",
  "value": "7.0",
  "description": ""
}}
''').json()

{'key': 'DRIFT_MAGNITUDE',
 'value': '7.0',
 'description': '',
 'is_encrypted': False}

In [8]:
requests.patch(f"{BASE_URL}/variables/DRIFT_MAGNITUDE", headers=headers, data=f'''
{{
  "key": "DRIFT_MAGNITUDE",
  "value": "7.0",
  "description": ""
}}
''').json()

{'key': 'DRIFT_MAGNITUDE',
 'value': '7.0',
 'description': '',
 'is_encrypted': False}

In [9]:
from datetime import datetime as dt
from datetime import timezone

now = pd.to_datetime(dt.strftime(dt.now().astimezone(), "%Y-%m-%dT%H:%M:%S%z"))

now, now.timestamp() 

(Timestamp('2026-01-15 11:36:06+0500', tz='UTC+05:00'), 1768458966.0)

In [10]:
import os

os.environ['DRIFT_TYPE'] = 'mean_shift'
os.environ['DRIFT_MAGNITUDE'] = '12.0'

In [11]:
random_sample = get_dataset().reset_index()

def predict_model(b):
    pred = requests.post("http://localhost:8000/predict", data=f"""{{
"user_id": {int(b['index'])},
"features": {b.drop(["index", "target"]).to_json()}
}}""").json()
    return pd.Series([pred['prediction'], pred['assigned_group']])

random_sample[['prediction', 'group']] = random_sample.progress_apply(predict_model, axis=1)

100%|██████████| 10000/10000 [01:09<00:00, 143.80it/s]


In [12]:
random_sample[random_sample['target'] != random_sample['prediction']]['group'].value_counts()

group
A    3658
B     779
Name: count, dtype: int64

In [13]:
stats = requests.get("http://localhost:8000/ab/stats").json()
stats = pd.DataFrame(stats)

In [14]:
stats.head()

Unnamed: 0,timestamp,user_id,assigned_group,model_stage,prediction,features
0,2026-01-15T06:31:30+0000,1210,A,Production,0,f0=-2.3923084098|f1=-0.6669136754|f2=2.0590830...
1,2026-01-15T06:31:34+0000,0,A,Production,2,f0=-0.1169492085|f1=-1.2008520526|f2=1.7556034...
2,2026-01-15T06:31:34+0000,1,A,Production,1,f0=-1.1937841629|f1=1.4147508807|f2=1.14391281...
3,2026-01-15T06:31:34+0000,2,A,Production,0,f0=1.4326582685|f1=-0.3790214796|f2=2.19999037...
4,2026-01-15T06:31:34+0000,3,A,Production,0,f0=0.1103507212|f1=-2.6549402816|f2=2.34092535...


In [15]:
stats['timestamp'] = pd.to_datetime(stats['timestamp'])

In [16]:
ab_test = stats[stats['timestamp'] > now]
ab_test.shape

(10000, 6)

In [17]:
abs_test = ab_test.merge(random_sample, left_on='user_id', right_on='index')
abs_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 20 columns):
 #   Column          Non-Null Count  Dtype              
---  ------          --------------  -----              
 0   timestamp       10000 non-null  datetime64[ns, UTC]
 1   user_id         10000 non-null  int64              
 2   assigned_group  10000 non-null  object             
 3   model_stage     10000 non-null  object             
 4   prediction_x    10000 non-null  int64              
 5   features        10000 non-null  object             
 6   index           10000 non-null  int64              
 7   f0              10000 non-null  float64            
 8   f1              10000 non-null  float64            
 9   f2              10000 non-null  float64            
 10  f3              10000 non-null  float64            
 11  f4              10000 non-null  float64            
 12  f5              10000 non-null  float64            
 13  f6              10000 non-null  

In [18]:
import numpy as np
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix
)
from scipy.stats import ttest_ind, chi2_contingency


def compute_ml_metrics(
    df: pd.DataFrame,
    target_col: str = "target",
    pred_col: str = "prediction",
    group_col: str = "group",
    proba_col: str | None = None,
):
    """
    Возвращает:
      ml_metrics_df: метрики по группам
      conf_matrices: словарь {group -> confusion_matrix}
    """
    classes = sorted(df[target_col].unique())
    n_classes = len(classes)

    ml_metrics = []
    conf_matrices = {}

    for group, df_g in df.groupby(group_col):
        y_true = df_g[target_col]
        y_pred = df_g[pred_col]

        row = {
                "group": group,
                "accuracy": accuracy_score(y_true, y_pred),
                "precision": precision_score(y_true, y_pred, average="weighted"),
                "recall": recall_score(y_true, y_pred,average="weighted"),
                "f1": f1_score(y_true, y_pred, average="weighted"),
        }

        ml_metrics.append(row)
        conf_matrices[group] = confusion_matrix(y_true, y_pred, labels=classes)

    ml_metrics_df = pd.DataFrame(ml_metrics)
    return ml_metrics_df, conf_matrices


def compute_ttests_for_numeric(
    df: pd.DataFrame,
    group_col: str = "group",
    exclude_cols: list[str] | None = None,
):
    """
    T‑test по всем числовым фичам между двумя группами.
    """
    if exclude_cols is None:
        exclude_cols = [group_col, "target", "prediction", "index"]

    groups = df[group_col].dropna().unique()
    if len(groups) != 2:
        return pd.DataFrame()  # t‑test только для 2 групп

    g1, g2 = groups
    df1 = df[df[group_col] == g1]
    df2 = df[df[group_col] == g2]

    numeric_cols = [
        c for c in df.select_dtypes(include=[np.number]).columns
        if c not in exclude_cols
    ]

    results = []
    for col in numeric_cols:
        stat, p = ttest_ind(
            df1[col].dropna(),
            df2[col].dropna(),
            equal_var=False,
        )
        results.append({
            "feature": col,
            "group_1": g1,
            "group_2": g2,
            "t_stat": stat,
            "p_value": p,
            "significant_p<0.05": p < 0.05,
        })

    return pd.DataFrame(results)


def compute_chi_square(
    df: pd.DataFrame,
    cat_col: str,
    group_col: str = "group",
    alpha: float = 0.05,
):
    """
    Chi-square для категориального столбца cat_col по группам.
    Возвращает dict с chi2, p_value и самой таблицей.
    """
    table = pd.crosstab(df[group_col], df[cat_col])
    chi2, p, dof, expected = chi2_contingency(table)
    return {
        "table": table,
        "chi2": chi2,
        "p_value": p,
        "dof": dof,
        "significant_p<alpha": p < alpha,
    }


def compare_groups_by_predictions(
    df: pd.DataFrame,
    pred_col: str = "prediction",
    group_col: str = "group",
    alpha: float = 0.05,
):
    """
    Тест: одинаково ли распределены предсказания между группами.
    H0: распределение pred_col одинаковое во всех группах.
    """
    return compute_chi_square(df, cat_col=pred_col, group_col=group_col, alpha=alpha)


In [19]:
abs_test['group'].value_counts()

group
A    7969
B    2031
Name: count, dtype: int64

In [20]:
ml_metrics_df, conf_matrices = compute_ml_metrics(abs_test, pred_col='prediction_y', group_col='assigned_group')
display(ml_metrics_df)
conf_matrices

Unnamed: 0,group,accuracy,precision,recall,f1
0,A,0.540971,0.545552,0.540971,0.53357
1,B,0.616445,0.615759,0.616445,0.615875


{'A': array([[1071,  752,  852],
        [ 249, 1891,  501],
        [ 500,  804, 1349]]),
 'B': array([[416,  83, 167],
        [113, 470, 107],
        [166, 143, 366]])}

In [21]:
pred_test = compare_groups_by_predictions(
    abs_test,
    pred_col="prediction_x",
    group_col="assigned_group",
)

display(pred_test["table"])
print(
    "chi2:", pred_test["chi2"],
    "p_value:", pred_test["p_value"],
    "significant_p<0.05:", pred_test["significant_p<alpha"],
)

prediction_x,0,1,2
assigned_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,1820,3447,2702
B,695,696,640


chi2: 117.68190814515982 p_value: 2.790604796457559e-26 significant_p<0.05: True
