In [1]:
import pandas as pd
import numpy as np
from scipy.stats import ks_2samp

# --------------------
# Load training data
# --------------------
train_df = pd.read_csv("../data/data.csv")

train_df["gender"] = pd.factorize(train_df["gender"])[0]
train_df = train_df.drop(columns=["sno", "target"])
train_df = train_df.dropna()

# --------------------
# Load generated inference data
# (same generation logic as earlier script)
# --------------------
np.random.seed(42)

infer_df = pd.DataFrame({
    "age": np.random.randint(30, 80, 100),
    "gender": np.random.randint(0, 2, 100),
    "cp": np.random.randint(0, 4, 100),
    "trestbps": np.random.randint(90, 180, 100),
    "chol": np.random.randint(150, 300, 100),
    "fbs": np.random.randint(0, 2, 100),
    "restecg": np.random.randint(0, 2, 100),
    "thalach": np.random.randint(70, 200, 100),
    "exang": np.random.randint(0, 2, 100),
    "oldpeak": np.random.uniform(0.0, 6.0, 100),
    "slope": np.random.randint(0, 3, 100),
    "ca": np.random.randint(0, 4, 100),
    "thal": np.random.randint(0, 3, 100)
})

# --------------------
# Drift computation
# --------------------
print("=== Input Drift Report ===\n")

for col in train_df.columns:
    train_mean = train_df[col].mean()
    infer_mean = infer_df[col].mean()

    train_std = train_df[col].std()
    infer_std = infer_df[col].std()

    ks_stat, p_value = ks_2samp(train_df[col], infer_df[col])

    print(f"Feature: {col}")
    print(f"  Train mean/std: {train_mean:.2f} / {train_std:.2f}")
    print(f"  Infer mean/std: {infer_mean:.2f} / {infer_std:.2f}")
    print(f"  KS statistic: {ks_stat:.3f}, p-value: {p_value:.3f}")

    if p_value < 0.05:
        print("  → Drift detected")
    else:
        print("  → No significant drift")

    print()


=== Input Drift Report ===

Feature: age
  Train mean/std: 54.35 / 9.18
  Infer mean/std: 54.07 / 14.45
  KS statistic: 0.202, p-value: 0.004
  → Drift detected

Feature: gender
  Train mean/std: 0.31 / 0.46
  Infer mean/std: 0.43 / 0.50
  KS statistic: 0.119, p-value: 0.216
  → No significant drift

Feature: cp
  Train mean/std: 0.97 / 1.03
  Infer mean/std: 1.50 / 1.16
  KS statistic: 0.201, p-value: 0.004
  → Drift detected

Feature: trestbps
  Train mean/std: 131.68 / 17.66
  Infer mean/std: 129.66 / 24.41
  KS statistic: 0.189, p-value: 0.008
  → Drift detected

Feature: chol
  Train mean/std: 246.18 / 51.41
  Infer mean/std: 227.84 / 42.20
  KS statistic: 0.180, p-value: 0.014
  → Drift detected

Feature: fbs
  Train mean/std: 0.15 / 0.35
  Infer mean/std: 0.53 / 0.50
  KS statistic: 0.383, p-value: 0.000
  → Drift detected

Feature: restecg
  Train mean/std: 0.52 / 0.53
  Infer mean/std: 0.43 / 0.50
  KS statistic: 0.075, p-value: 0.759
  → No significant drift

Feature: thalach