# Distribution Shift in the AI Job Market

This notebook demonstrates how ML models trained on historical AI job market data experience significant performance degradation when deployed on future data.

## 1. Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

from config import MODELS_DIR, OUTPUTS_DIR, FIGURES_DIR
from data_processing import load_and_prepare_data, FeaturePreprocessor, get_temporal_splits, validate_no_leakage
from model import BaselineModel, train_baseline
from drift_detection import DriftDetector
from evaluation import calculate_all_metrics, MonthlyEvaluator, RollingWindowRetrainer, StrategyComparison

print("âœ“ Imports complete")

## 2. Load Data

In [None]:
df = load_and_prepare_data()
print(f"Dataset shape: {df.shape}")
df.head()

## 3. Temporal Split

In [None]:
train_df, monthly_windows = get_temporal_splits(df)
validate_no_leakage(train_df, monthly_windows)

## 4. Preprocess Features

In [None]:
preprocessor = FeaturePreprocessor()
preprocessor.fit(train_df)
X_train, y_train = preprocessor.transform(train_df)

print(f"Features: {X_train.shape}")
print(f"Target distribution:", pd.Series(y_train).value_counts().sort_index().to_dict())

## 5. Train Baseline Model

In [None]:
MODELS_DIR.mkdir(parents=True, exist_ok=True)
OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)
FIGURES_DIR.mkdir(parents=True, exist_ok=True)

model = train_baseline(X_train, y_train, preprocessor.feature_columns)

y_train_pred = model.predict(X_train)
y_train_proba = model.predict_proba(X_train)
baseline_metrics = calculate_all_metrics(y_train, y_train_pred, y_train_proba)

print(f"\nðŸ“Š Baseline: Acc={baseline_metrics['accuracy']:.4f}, AUC={baseline_metrics['roc_auc']:.4f}")

## 6. Drift Detection Setup

In [None]:
drift_detector = DriftDetector()
drift_detector.set_reference(
    features=X_train,
    predictions=y_train_proba,
    feature_names=preprocessor.feature_columns,
    skills_series=train_df.get('required_skills')
)
print("âœ“ Drift detector initialized")

## 7. Monthly Evaluation

In [None]:
evaluator = MonthlyEvaluator()
evaluator.set_baseline(baseline_metrics)

historical_data = [('train', train_df)]

print(f"{'Month':<10} {'Samples':>8} {'Accuracy':>10} {'AUC':>10} {'ECE':>10}")
print("-" * 50)

for month_label, month_df in monthly_windows:
    X_month, y_month = preprocessor.transform(month_df)
    y_month_proba = model.predict_proba(X_month)
    
    metrics = evaluator.evaluate_month(model, X_month, y_month, month_label)
    drift_detector.detect(X_month, y_month_proba, month_label, month_df.get('required_skills'))
    
    print(f"{month_label:<10} {metrics['n_samples']:>8} {metrics['accuracy']:>10.3f} "
          f"{metrics['roc_auc']:>10.3f} {metrics['calibration']['ece']:>10.3f}")
    
    historical_data.append((month_label, month_df))

## 8. Performance Visualization

In [None]:
trends = evaluator.get_trends()

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Accuracy
axes[0,0].plot(trends['months'], trends['accuracy'], 'o-', color='#2ecc71', lw=2)
axes[0,0].axhline(baseline_metrics['accuracy'], color='red', ls='--', label=f'Baseline {baseline_metrics["accuracy"]:.3f}')
axes[0,0].set_title('Accuracy Over Time'); axes[0,0].legend(); axes[0,0].set_ylim(0,1)
plt.setp(axes[0,0].xaxis.get_majorticklabels(), rotation=45)

# AUC
axes[0,1].plot(trends['months'], trends['roc_auc'], 's-', color='#3498db', lw=2)
axes[0,1].axhline(baseline_metrics['roc_auc'], color='red', ls='--', label=f'Baseline {baseline_metrics["roc_auc"]:.3f}')
axes[0,1].set_title('ROC-AUC Over Time'); axes[0,1].legend(); axes[0,1].set_ylim(0,1)
plt.setp(axes[0,1].xaxis.get_majorticklabels(), rotation=45)

# ECE
colors = ['#e74c3c' if e > 0.1 else '#2ecc71' for e in trends['ece']]
axes[1,0].bar(trends['months'], trends['ece'], color=colors)
axes[1,0].set_title('Calibration Error'); axes[1,0].axhline(0.1, color='orange', ls='--')
plt.setp(axes[1,0].xaxis.get_majorticklabels(), rotation=45)

# Entropy
axes[1,1].plot(trends['months'], trends['mean_entropy'], 'D-', color='#9b59b6', lw=2)
axes[1,1].set_title('Prediction Entropy')
plt.setp(axes[1,1].xaxis.get_majorticklabels(), rotation=45)

plt.tight_layout()
plt.savefig(FIGURES_DIR / 'performance_dashboard.png', dpi=150)
plt.show()

## 9. Summary

In [None]:
drift_summary = drift_detector.get_summary()

print("="*60)
print("EXPERIMENT SUMMARY")
print("="*60)
print(f"Training samples: {len(train_df)}")
print(f"Deployment months: {len(monthly_windows)}")
print(f"Features: {X_train.shape[1]}")
print(f"\nBaseline accuracy: {baseline_metrics['accuracy']:.3f}")
print(f"Final month accuracy: {trends['accuracy'][-1]:.3f}")
print(f"Performance drop: {baseline_metrics['accuracy'] - trends['accuracy'][-1]:.3f}")
print(f"\nDrift triggers: {drift_summary['triggered_periods']} / {drift_summary['total_periods']}")
print(f"First failure: {evaluator.identify_failure_month()}")