# Class 2 - Part 3: Data Drift Detection

## Objective
In this session, you will:
1. Understand data drift and its impact on model performance
2. Create shifted datasets (brightness, contrast variations)
3. Compare input data distributions using histograms
4. Calculate KL divergence to quantify distribution shift
5. Measure accuracy degradation under data drift
6. Implement drift detection to trigger retraining

## Key Concepts
- **Data Drift**: Changes in input data distribution over time
- **Covariate Shift**: Distribution of features changes
- **Concept Drift**: Relationship between features and target changes
- **KL Divergence**: Measure of how different two distributions are
- **Drift Detection**: Automated alerts when model performance drops

## Why Data Drift Matters

**Real-world Example**: ADAS (Advanced Driver Assistance Systems)
- Model trained on daytime highway footage
- Deployed at night or in rain ‚Üí Images much darker
- Model accuracy drops from 95% ‚Üí 70%
- But code hasn't changed - **DATA CHANGED**

**Other Examples**:
- Economic crisis changes user spending patterns
- Camera hardware changed = different image quality
- Seasonal changes in weather/lighting
- Model degradation is silent - no alerts!

## Step 1: Setup & Load Original Dataset

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from PIL import Image, ImageEnhance
from scipy.stats import entropy
from scipy.spatial.distance import jensenshannon
from collections import defaultdict

import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.models import resnet18
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

print("‚úÖ Libraries imported")

In [None]:
# Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
class_names = ['animal', 'name_board', 'vehicle', 'pedestrian', 'pothole', 'road_sign', 'speed_breaker']
num_classes = len(class_names)

print(f"Device: {device}")
print(f"Classes: {class_names}")

# Dataset paths
DATASET_PATH = r"C:\Users\Lucifer\python_workspace\BITS\AI_Quality_Engineering\dataset"
TEST_PATH = os.path.join(DATASET_PATH, "test")

print(f"Dataset path: {TEST_PATH}")

In [None]:
# Load model (same as Part 1)
class CNNModel(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.resnet = resnet18(pretrained=False)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)
    
    def forward(self, x):
        return self.resnet(x)

model = CNNModel(num_classes).to(device)
model.eval()

print("‚úÖ Model created")

In [None]:
# Image transformations
transform_base = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

print("‚úÖ Transformations ready")

In [None]:
# Load test dataset
test_dataset = ImageFolder(TEST_PATH, transform=transform_base)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f"‚úÖ Loaded {len(test_dataset)} test samples")

## Step 2: Create Drift-Shifted Datasets

In [None]:
class BrightnessShiftedDataset(torch.utils.data.Dataset):
    """Simulates data drift by reducing image brightness"""
    def __init__(self, original_dataset, brightness_factor=0.4):
        self.original_dataset = original_dataset
        self.brightness_factor = brightness_factor
        self.transform = transform_base
    
    def __len__(self):
        return len(self.original_dataset)
    
    def __getitem__(self, idx):
        # Get original image path and label
        image_path, label = self.original_dataset.imgs[idx]
        image = Image.open(image_path).convert('RGB')
        
        # Apply brightness shift (simulating night time or cloudy conditions)
        enhancer = ImageEnhance.Brightness(image)
        image = enhancer.enhance(self.brightness_factor)
        
        # Apply standard transformations
        image = self.transform(image)
        
        return image, label

# Test it
try:
    brightness_shifted_dataset = BrightnessShiftedDataset(test_dataset, brightness_factor=0.4)
    brightness_shifted_loader = DataLoader(brightness_shifted_dataset, batch_size=32, shuffle=False)
    print("‚úÖ Brightness-shifted dataset created")
except Exception as e:
    print(f"‚ö†Ô∏è Could not load brightness-shifted dataset: {e}")
    print("   (Dataset files may be missing)")

In [None]:
class ContrastShiftedDataset(torch.utils.data.Dataset):
    """Simulates data drift by reducing image contrast"""
    def __init__(self, original_dataset, contrast_factor=0.5):
        self.original_dataset = original_dataset
        self.contrast_factor = contrast_factor
        self.transform = transform_base
    
    def __len__(self):
        return len(self.original_dataset)
    
    def __getitem__(self, idx):
        # Get original image path and label
        image_path, label = self.original_dataset.imgs[idx]
        image = Image.open(image_path).convert('RGB')
        
        # Apply contrast shift (simulating poor camera quality or fog)
        enhancer = ImageEnhance.Contrast(image)
        image = enhancer.enhance(self.contrast_factor)
        
        # Apply standard transformations
        image = self.transform(image)
        
        return image, label

# Test it
try:
    contrast_shifted_dataset = ContrastShiftedDataset(test_dataset, contrast_factor=0.5)
    contrast_shifted_loader = DataLoader(contrast_shifted_dataset, batch_size=32, shuffle=False)
    print("‚úÖ Contrast-shifted dataset created")
except Exception as e:
    print(f"‚ö†Ô∏è Could not load contrast-shifted dataset: {e}")
    print("   (Dataset files may be missing)")

## Step 3: Evaluate Model on Different Data Distributions

In [None]:
def evaluate_dataset(model, loader, device, dataset_name):
    """
    Evaluate model on a dataset.
    
    Returns:
        accuracy: Top-1 accuracy
        predictions: List of predictions
    """
    model.eval()
    correct = 0
    total = 0
    predictions = []
    
    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            
            correct += (predicted.cpu() == labels).sum().item()
            total += labels.size(0)
            predictions.extend(predicted.cpu().numpy())
    
    accuracy = 100 * correct / total
    return accuracy, predictions

print("‚úÖ Evaluation function defined")

In [None]:
# Evaluate on different datasets
print("\n" + "="*70)
print("MODEL PERFORMANCE UNDER DATA DRIFT")
print("="*70)

results = {}

try:
    # Original test set
    acc_original, pred_original = evaluate_dataset(model, test_loader, device, "Original Test")
    results['Original'] = acc_original
    print(f"\n‚úÖ Original Test Set Accuracy: {acc_original:.2f}%")
except Exception as e:
    print(f"\n‚ùå Error evaluating original dataset: {e}")

try:
    # Brightness-shifted
    acc_brightness, pred_brightness = evaluate_dataset(model, brightness_shifted_loader, device, "Brightness-Shifted")
    results['Brightness-Shifted (0.4x)'] = acc_brightness
    degradation_brightness = acc_original - acc_brightness
    print(f"\n‚ö†Ô∏è  Brightness-Shifted (40% brightness) Accuracy: {acc_brightness:.2f}%")
    print(f"   ‚Üí Degradation: {degradation_brightness:.2f}% (‚ñº)")
except Exception as e:
    print(f"\n‚ö†Ô∏è Error evaluating brightness-shifted dataset: {e}")

try:
    # Contrast-shifted
    acc_contrast, pred_contrast = evaluate_dataset(model, contrast_shifted_loader, device, "Contrast-Shifted")
    results['Contrast-Shifted (0.5x)'] = acc_contrast
    degradation_contrast = acc_original - acc_contrast
    print(f"\n‚ö†Ô∏è  Contrast-Shifted (50% contrast) Accuracy: {acc_contrast:.2f}%")
    print(f"   ‚Üí Degradation: {degradation_contrast:.2f}% (‚ñº)")
except Exception as e:
    print(f"\n‚ö†Ô∏è Error evaluating contrast-shifted dataset: {e}")

print("\n" + "="*70)

## Step 4: Extract Features and Compare Distributions

In [None]:
def extract_features(model, loader, device, num_samples=None):
    """
    Extract image features (tensors before classification layer).
    
    Returns:
        features: numpy array of shape (num_samples, feature_dim)
    """
    model.eval()
    features_list = []
    
    with torch.no_grad():
        for i, (images, _) in enumerate(loader):
            images = images.to(device)
            # Get features from ResNet (average pooling output)
            x = model.resnet.conv1(images)
            x = model.resnet.bn1(x)
            x = model.resnet.relu(x)
            x = model.resnet.maxpool(x)
            x = model.resnet.layer1(x)
            x = model.resnet.layer2(x)
            x = model.resnet.layer3(x)
            x = model.resnet.layer4(x)
            x = model.resnet.avgpool(x)
            features = x.view(x.size(0), -1)  # Flatten
            
            features_list.append(features.cpu().numpy())
            
            if num_samples and i * len(images) >= num_samples:
                break
    
    return np.vstack(features_list) if features_list else np.array([])

print("‚úÖ Feature extraction function defined")

In [None]:
# Extract features from different datasets (sample)
print("\nExtracting features from datasets...")
try:
    features_original = extract_features(model, test_loader, device, num_samples=100)
    print(f"‚úÖ Original features shape: {features_original.shape}")
except Exception as e:
    print(f"‚ùå Error extracting original features: {e}")
    features_original = None

try:
    features_brightness = extract_features(model, brightness_shifted_loader, device, num_samples=100)
    print(f"‚úÖ Brightness-shifted features shape: {features_brightness.shape}")
except Exception as e:
    print(f"‚ö†Ô∏è Error extracting brightness features: {e}")
    features_brightness = None

try:
    features_contrast = extract_features(model, contrast_shifted_loader, device, num_samples=100)
    print(f"‚úÖ Contrast-shifted features shape: {features_contrast.shape}")
except Exception as e:
    print(f"‚ö†Ô∏è Error extracting contrast features: {e}")
    features_contrast = None

## Step 5: Calculate KL Divergence (Distribution Shift Metric)

In [None]:
def compute_distribution_metrics(features_dist1, features_dist2, bin_edges=None):
    """
    Compare two feature distributions using KL divergence.
    
    Args:
        features_dist1: np array of features from distribution 1
        features_dist2: np array of features from distribution 2
    
    Returns:
        kl_divergence: KL(dist1 || dist2)
        jsd: Jensen-Shannon divergence (symmetric)
    """
    # Use first feature dimension for simplicity
    feat1 = features_dist1[:, 0]
    feat2 = features_dist2[:, 0]
    
    # Create histograms
    bins = np.linspace(min(feat1.min(), feat2.min()), max(feat1.max(), feat2.max()), 50)
    hist1, _ = np.histogram(feat1, bins=bins)
    hist2, _ = np.histogram(feat2, bins=bins)
    
    # Normalize to probabilities
    hist1 = hist1 / hist1.sum()
    hist2 = hist2 / hist2.sum()
    
    # Add small epsilon to avoid log(0)
    epsilon = 1e-10
    hist1 = hist1 + epsilon
    hist2 = hist2 + epsilon
    
    # KL divergence
    kl_div = np.sum(hist1 * np.log(hist1 / hist2))
    
    # Jensen-Shannon divergence (symmetric)
    jsd = jensenshannon(hist1, hist2)
    
    return kl_div, jsd, hist1, hist2, bins

print("‚úÖ Distribution metrics function defined")

In [None]:
# Calculate KL divergence
print("\n" + "="*70)
print("DISTRIBUTION SHIFT METRICS")
print("="*70)

measurements = {}

if features_original is not None and features_brightness is not None:
    try:
        kl_brightness, jsd_brightness, hist_orig, hist_bright, bins = compute_distribution_metrics(
            features_original, features_brightness
        )
        measurements['brightness'] = {
            'kl_divergence': kl_brightness,
            'jsd': jsd_brightness,
            'hist_orig': hist_orig,
            'hist_shifted': hist_bright,
            'bins': bins
        }
        
        print(f"\nüìä Original vs Brightness-Shifted:")
        print(f"   KL Divergence: {kl_brightness:.4f}")
        print(f"   Jensen-Shannon: {jsd_brightness:.4f}")
        print(f"   Interpretation: {'‚ö†Ô∏è  HIGH DRIFT' if jsd_brightness > 0.1 else '‚úÖ LOW DRIFT'}")
    except Exception as e:
        print(f"Error computing brightness metrics: {e}")

if features_original is not None and features_contrast is not None:
    try:
        kl_contrast, jsd_contrast, hist_orig, hist_cont, bins = compute_distribution_metrics(
            features_original, features_contrast
        )
        measurements['contrast'] = {
            'kl_divergence': kl_contrast,
            'jsd': jsd_contrast,
            'hist_orig': hist_orig,
            'hist_shifted': hist_cont,
            'bins': bins
        }
        
        print(f"\nüìä Original vs Contrast-Shifted:")
        print(f"   KL Divergence: {kl_contrast:.4f}")
        print(f"   Jensen-Shannon: {jsd_contrast:.4f}")
        print(f"   Interpretation: {'‚ö†Ô∏è  HIGH DRIFT' if jsd_contrast > 0.1 else '‚úÖ LOW DRIFT'}")
    except Exception as e:
        print(f"Error computing contrast metrics: {e}")

print("\n" + "="*70)

## Step 6: Visualize Distribution Shifts

In [None]:
# Create comparison visualizations
if measurements:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Brightness comparison
    if 'brightness' in measurements:
        m = measurements['brightness']
        axes[0, 0].bar(range(len(m['hist_orig'])), m['hist_orig'], alpha=0.6, label='Original', width=0.8)
        axes[0, 0].bar(range(len(m['hist_shifted'])), m['hist_shifted'], alpha=0.6, label='Brightness-Shifted', width=0.8)
        axes[0, 0].set_xlabel('Feature Bin')
        axes[0, 0].set_ylabel('Probability')
        axes[0, 0].set_title(f'Distribution Shift: Brightness (JSD={m["jsd"]:.4f})')
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)
    
    # Contrast comparison
    if 'contrast' in measurements:
        m = measurements['contrast']
        axes[0, 1].bar(range(len(m['hist_orig'])), m['hist_orig'], alpha=0.6, label='Original', width=0.8)
        axes[0, 1].bar(range(len(m['hist_shifted'])), m['hist_shifted'], alpha=0.6, label='Contrast-Shifted', width=0.8)
        axes[0, 1].set_xlabel('Feature Bin')
        axes[0, 1].set_ylabel('Probability')
        axes[0, 1].set_title(f'Distribution Shift: Contrast (JSD={m["jsd"]:.4f})')
        axes[0, 1].legend()
        axes[0, 1].grid(True, alpha=0.3)
    
    # Model accuracy comparison
    if results:
        axes[1, 0].bar(results.keys(), results.values(), color=['blue', 'orange', 'red'])
        axes[1, 0].set_ylabel('Accuracy (%)')
        axes[1, 0].set_title('Model Accuracy Under Data Drift')
        axes[1, 0].grid(True, alpha=0.3, axis='y')
        axes[1, 0].set_ylim([0, 100])
        
        # Add value labels on bars
        for i, (k, v) in enumerate(results.items()):
            axes[1, 0].text(i, v + 2, f'{v:.1f}%', ha='center', fontweight='bold')
    
    # KL Divergence comparison
    kl_values = {}
    if 'brightness' in measurements:
        kl_values['Brightness'] = measurements['brightness']['kl_divergence']
    if 'contrast' in measurements:
        kl_values['Contrast'] = measurements['contrast']['kl_divergence']
    
    if kl_values:
        axes[1, 1].bar(kl_values.keys(), kl_values.values(), color=['orange', 'red'])
        axes[1, 1].set_ylabel('KL Divergence')
        axes[1, 1].set_title('Distribution Shift Magnitude')
        axes[1, 1].grid(True, alpha=0.3, axis='y')
        axes[1, 1].axhline(y=0.1, color='r', linestyle='--', label='High Drift Threshold')
        axes[1, 1].legend()
    
    plt.tight_layout()
    plt.savefig('data_drift_analysis.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    print("‚úÖ Drift analysis visualization saved")
else:
    print("‚ö†Ô∏è No measurements to visualize")

## Step 7: Drift Detection System

In [None]:
class DriftDetector:
    """
    Automated drift detection system.
    Monitors model performance and input distribution.
    """
    
    def __init__(self, baseline_accuracy, accuracy_threshold=5.0, jsd_threshold=0.1):
        """
        Args:
            baseline_accuracy: Original model accuracy
            accuracy_threshold: Alert if accuracy drops > this % (default 5%)
            jsd_threshold: Alert if JSD > this value (default 0.1)
        """
        self.baseline_accuracy = baseline_accuracy
        self.accuracy_threshold = accuracy_threshold
        self.jsd_threshold = jsd_threshold
        self.alerts = []
    
    def check_accuracy_drift(self, current_accuracy, dataset_name):
        """
        Check if current accuracy shows significant degradation.
        """
        degradation = self.baseline_accuracy - current_accuracy
        
        alert = {
            'type': 'ACCURACY_DRIFT',
            'dataset': dataset_name,
            'baseline': self.baseline_accuracy,
            'current': current_accuracy,
            'degradation': degradation,
            'triggered': degradation > self.accuracy_threshold
        }
        
        self.alerts.append(alert)
        return alert
    
    def check_distribution_drift(self, jsd_score, dataset_name):
        """
        Check if input distribution has shifted significantly.
        """
        alert = {
            'type': 'DISTRIBUTION_DRIFT',
            'dataset': dataset_name,
            'jsd_score': jsd_score,
            'threshold': self.jsd_threshold,
            'triggered': jsd_score > self.jsd_threshold
        }
        
        self.alerts.append(alert)
        return alert
    
    def generate_report(self):
        """
        Generate drift detection report.
        """
        triggered_alerts = [a for a in self.alerts if a.get('triggered', False)]
        
        report = {
            'total_checks': len(self.alerts),
            'alerts_triggered': len(triggered_alerts),
            'recommendation': 'RETRAIN' if triggered_alerts else 'MONITOR',
            'details': triggered_alerts
        }
        
        return report

print("‚úÖ DriftDetector class defined")

In [None]:
# Run drift detection
if results:
    print("\n" + "="*70)
    print("üîç DRIFT DETECTION SYSTEM")
    print("="*70)
    
    baseline_acc = results.get('Original', 0)
    detector = DriftDetector(
        baseline_accuracy=baseline_acc,
        accuracy_threshold=5.0,  # Alert if accuracy drops > 5%
        jsd_threshold=0.1        # Alert if JSD > 0.1
    )
    
    # Check accuracy drift
    if 'Brightness-Shifted (0.4x)' in results:
        alert_acc = detector.check_accuracy_drift(
            results['Brightness-Shifted (0.4x)'],
            'Brightness-Shifted'
        )
        status = "üî¥ TRIGGERED" if alert_acc['triggered'] else "‚úÖ NORMAL"
        print(f"\nAccuracy Drift Check (Brightness): {status}")
        print(f"  Baseline: {alert_acc['baseline']:.2f}%")
        print(f"  Current: {alert_acc['current']:.2f}%")
        print(f"  Degradation: {alert_acc['degradation']:.2f}%")
        print(f"  Threshold: {detector.accuracy_threshold}%")
    
    if 'Contrast-Shifted (0.5x)' in results:
        alert_acc = detector.check_accuracy_drift(
            results['Contrast-Shifted (0.5x)'],
            'Contrast-Shifted'
        )
        status = "üî¥ TRIGGERED" if alert_acc['triggered'] else "‚úÖ NORMAL"
        print(f"\nAccuracy Drift Check (Contrast): {status}")
        print(f"  Baseline: {alert_acc['baseline']:.2f}%")
        print(f"  Current: {alert_acc['current']:.2f}%")
        print(f"  Degradation: {alert_acc['degradation']:.2f}%")
        print(f"  Threshold: {detector.accuracy_threshold}%")
    
    # Check distribution drift
    if 'brightness' in measurements:
        alert_dist = detector.check_distribution_drift(
            measurements['brightness']['jsd'],
            'Brightness-Shifted'
        )
        status = "üî¥ TRIGGERED" if alert_dist['triggered'] else "‚úÖ NORMAL"
        print(f"\nDistribution Drift Check (Brightness): {status}")
        print(f"  JSD Score: {alert_dist['jsd_score']:.4f}")
        print(f"  Threshold: {alert_dist['threshold']}")
    
    if 'contrast' in measurements:
        alert_dist = detector.check_distribution_drift(
            measurements['contrast']['jsd'],
            'Contrast-Shifted'
        )
        status = "üî¥ TRIGGERED" if alert_dist['triggered'] else "‚úÖ NORMAL"
        print(f"\nDistribution Drift Check (Contrast): {status}")
        print(f"  JSD Score: {alert_dist['jsd_score']:.4f}")
        print(f"  Threshold: {alert_dist['threshold']}")
    
    # Final report
    report = detector.generate_report()
    print("\n" + "="*70)
    print("üìã DRIFT DETECTION REPORT")
    print("="*70)
    print(f"Total Checks: {report['total_checks']}")
    print(f"Alerts Triggered: {report['alerts_triggered']}")
    print(f"\nüéØ RECOMMENDATION: {report['recommendation']}")
    
    if report['recommendation'] == 'RETRAIN':
        print("\n‚ö†Ô∏è  ACTION REQUIRED:")
        print("   1. Collect new training data from production")
        print("   2. Retrain model on combined dataset")
        print("   3. Validate on holdout test set")
        print("   4. Deploy new version using canary deployment")
    else:
        print("\n‚úÖ MODEL IS STABLE - Continue monitoring")

## Step 8: Summary & Key Takeaways

In [None]:
print("""
‚ïî‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïó
‚ïë                     üìä DATA DRIFT & MONITORING - KEY INSIGHTS                   ‚ïë
‚ïö‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïù

üéØ WHAT WE LEARNED:

1Ô∏è‚É£ REAL MODELS FACE DATA DRIFT:
   ‚úì Lighting changes (night driving)
   ‚úì Camera quality degradation
   ‚úì Seasonal variations
   ‚úì User behavior shifts
   ‚úì Dataset collection changes

2Ô∏è‚É£ DRIFT MANIFESTS AS ACCURACY LOSS:
   ‚úì Brightness: -5-15% accuracy drop
   ‚úì Contrast: -3-10% accuracy drop
   ‚úì Multiple shifts: cascading failures

3Ô∏è‚É£ KL DIVERGENCE QUANTIFIES SHIFT:
   ‚úì Measures change in input distribution
   ‚úì Detects drift BEFORE accuracy drops
   ‚úì Used for early warning systems
   ‚úì Jensen-Shannon: symmetric variant

4Ô∏è‚É£ AUTOMATED DRIFT DETECTION:
   ‚úì Monitor accuracy continuously
   ‚úì Track input distribution statistics
   ‚úì Set thresholds (accuracy -5%, JSD > 0.1)
   ‚úì Trigger alerts for retraining

5Ô∏è‚É£ MITIGATION STRATEGIES:
   ‚úì Continuous retraining on fresh data
   ‚úì Online learning (incremental updates)
   ‚úì Ensemble methods (combine models)
   ‚úì Domain adaptation techniques
   ‚úì Robust training (data augmentation)

‚ö†Ô∏è  CRITICAL METRICS TO TRACK:
   ‚Ä¢ Accuracy degradation
   ‚Ä¢ Input distribution changes (KL divergence)
   ‚Ä¢ Prediction confidence shifts
   ‚Ä¢ Runtime performance changes
   ‚Ä¢ Error rate by class

üîç PRODUCTION WORKFLOW:
   ‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
   ‚îÇ 1. Train model on baseline data         ‚îÇ
   ‚îÇ 2. Deploy with monitoring               ‚îÇ
   ‚îÇ 3. Detect drift (accuracy or KL div)    ‚îÇ
   ‚îÇ 4. Alert data science team              ‚îÇ
   ‚îÇ 5. Collect new data from production     ‚îÇ
   ‚îÇ 6. Retrain and validate                 ‚îÇ
   ‚îÇ 7. Deploy new version (canary)          ‚îÇ
   ‚îÇ 8. Continue monitoring                  ‚îÇ
   ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò

üìö FURTHER READING:
   ‚Ä¢ Concept Drift: When and How to Retrain?
   ‚Ä¢ Population Stability Index (PSI)
   ‚Ä¢ Kolmogorov-Smirnov test for distributions
   ‚Ä¢ ADWIN (Adaptive Windowing) algorithm
   ‚Ä¢ Evidenly AI library for drift detection

""")

In [None]:
# Create a summary table
if results or measurements:
    print("\n" + "="*70)
    print("üìã SUMMARY TABLE")
    print("="*70 + "\n")
    
    summary_data = []
    
    if 'Original' in results:
        summary_data.append({
            'Dataset': 'Original',
            'Accuracy': f"{results['Original']:.2f}%",
            'JSD': 'Baseline',
            'Drift Status': '‚úÖ Baseline'
        })
    
    if 'Brightness-Shifted (0.4x)' in results:
        jsd_val = measurements['brightness']['jsd'] if 'brightness' in measurements else 'N/A'
        jsd_str = f"{jsd_val:.4f}" if isinstance(jsd_val, float) else jsd_val
        drift_status = "üî¥ HIGH" if isinstance(jsd_val, float) and jsd_val > 0.1 else "‚ö†Ô∏è MEDIUM"
        summary_data.append({
            'Dataset': 'Brightness-Shifted',
            'Accuracy': f"{results['Brightness-Shifted (0.4x)']:.2f}%",
            'JSD': jsd_str,
            'Drift Status': drift_status
        })
    
    if 'Contrast-Shifted (0.5x)' in results:
        jsd_val = measurements['contrast']['jsd'] if 'contrast' in measurements else 'N/A'
        jsd_str = f"{jsd_val:.4f}" if isinstance(jsd_val, float) else jsd_val
        drift_status = "üî¥ HIGH" if isinstance(jsd_val, float) and jsd_val > 0.1 else "‚ö†Ô∏è MEDIUM"
        summary_data.append({
            'Dataset': 'Contrast-Shifted',
            'Accuracy': f"{results['Contrast-Shifted (0.5x)']:.2f}%",
            'JSD': jsd_str,
            'Drift Status': drift_status
        })
    
    if summary_data:
        df_summary = pd.DataFrame(summary_data)
        print(df_summary.to_string(index=False))
        print("\n" + "="*70)