# üìä Evidently Data Drift Detection (Clean Implementation)
This notebook provides a clean, working implementation of data drift detection using **Evidently 0.7.x**.

In [None]:
import pandas as pd
import numpy as np

# 1. Import Evidently components
try:
    # Correct imports for Evidently v0.4+
    from evidently.report import Report
    from evidently.metric_preset import DataDriftPreset
    print("‚úÖ Evidently imports successful!")
except ImportError as e:
    print(f"‚ùå Import Error: {e}")
    print("Try running: !pip install evidently --upgrade")

## 1Ô∏è‚É£ Load Data

In [None]:
# Load processed data
df = pd.read_csv('../data/processed/daily_demand.csv')
print(f"Total Data: {df.shape}")

# Determine target column
target_col = 'demand_quantity' if 'demand_quantity' in df.columns else df.columns[1]
print(f"Target Column: {target_col}")

## 2Ô∏è‚É£ Create Baseline vs Current Split

In [None]:
# Split first 50% as Reference (Baseline), last 50% as Current (Production)
mid_point = len(df) // 2
reference_df = df.iloc[:mid_point].reset_index(drop=True)
current_df = df.iloc[mid_point:].reset_index(drop=True)

print(f"Reference Data: {reference_df.shape}")
print(f"Current Data:   {current_df.shape}")

## 3Ô∏è‚É£ Simulate Drift (Optional)

In [None]:
# Create a 'Drifted' copy of current data for testing
current_drifted = current_df.copy()
current_drifted[target_col] = current_drifted[target_col] * 2 + 100  # Shift mean and variance

print("Created 'current_drifted' dataset with artificial drift.")

## 4Ô∏è‚É£ Run Drift Report

In [None]:
# Initialize Report with Data Drift Preset
drift_report = Report(metrics=[
    DataDriftPreset(),
])

# Run report on REAL data (Reference vs Current)
drift_report.run(reference_data=reference_df, current_data=current_df)

# Display report
drift_report

In [None]:
# Save Report
drift_report.save_html('drift_report_real.html')
print("‚úÖ Saved: drift_report_real.html")

## 5Ô∏è‚É£ Run Drift Report (With Simulated Drift)

In [None]:
# Run report on DRIFTED data
drifted_report = Report(metrics=[
    DataDriftPreset(),
])

drifted_report.run(reference_data=reference_df, current_data=current_drifted)

# Display report
drifted_report

In [None]:
# Save Report
drifted_report.save_html('drift_report_simulated.html')
print("‚úÖ Saved: drift_report_simulated.html")

## 6Ô∏è‚É£ Extract JSON Metrics

In [None]:
# Get results as Python dictionary
results = drifted_report.as_dict()

drift_detected = results['metrics'][0]['result']['dataset_drift']
share_drifted = results['metrics'][0]['result']['drift_share']

print(f"Drift Detected: {drift_detected}")
print(f"Share of Drifted Columns: {share_drifted:.2%}")