# System Imbalance Prediction - Production Model Training

This notebook trains the production ML models for predicting electricity system imbalance (MWh) for 15-minute settlement periods.

## Model Overview
- **Algorithm:** HistGradientBoostingRegressor (one model per lead time)
- **Lead Times:** 15, 12, 9, 6, 3 minutes before settlement end
- **Features:** Minimal set of 3-5 features per model (optimized for performance)
- **Baseline:** IPESOFT weighted average formulas

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Fix for Windows joblib issue
import os
os.environ['LOKY_MAX_CPU_COUNT'] = '1'

## 1. Configuration

In [None]:
# Paths
DATA_PATH = Path('../Data/live3minValue.csv')
MODELS_PATH = Path('../models')
MODELS_PATH.mkdir(exist_ok=True)

# Lead times (minutes before settlement end)
LEAD_TIMES = [15, 12, 9, 6, 3]

# Optimized features per lead time (discovered through feature importance analysis)
FEATURES = {
    15: ['roll_mean_5', 'val_curr', 'roll_mean_10', 'roll_std_5'],
    12: ['val_curr', 'roll_mean_5', 'lag_1', 'roll_mean_10', 'roll_mean_20'],
    9:  ['val_curr', 'roll_mean_5', 'qh_cumsum', 'lag_2', 'roll_mean_10'],
    6:  ['qh_cumsum', 'val_curr', 'roll_mean_5'],
    3:  ['qh_cumsum', 'val_curr', 'roll_mean_5']
}

# Model hyperparameters
MODEL_PARAMS = {
    'learning_rate': 0.05,
    'max_depth': 10,
    'max_iter': 300,
    'random_state': 42
}

# Train/Test split ratio
TRAIN_RATIO = 0.8

## 2. Load Data

In [None]:
def load_data(path):
    """Load and parse the live3minValue CSV."""
    df = pd.read_csv(
        path,
        sep=';',
        header=0,
        names=['timestamp', 'value', 'extra'],
        usecols=[0, 1],
        dtype={'timestamp': str, 'value': str}
    )
        #Where value = 3MIN_RE_with_GCC

    # Clean data
    df = df[df['timestamp'].notna() & (df['timestamp'] != '')]
    df = df[~df['timestamp'].str.contains('H.DaE', na=False)]
    
    # Parse timestamp and value
    df['timestamp'] = df['timestamp'].str.replace('"', '')
    df['timestamp'] = pd.to_datetime(df['timestamp'], format='%m/%d/%Y %I:%M:%S %p.000', errors='coerce')
    df['value'] = df['value'].str.replace('"', '').astype(float)
    
    # Set index and resample to 3-min grid
    df = df.dropna(subset=['timestamp'])
    df = df.set_index('timestamp').sort_index()
    df = df.resample('3min').mean()
    
    return df

# Load data
df = load_data(DATA_PATH)
print(f"Loaded {len(df):,} records")
print(f"Date range: {df.index.min()} to {df.index.max()}")
print(f"Duration: {(df.index.max() - df.index.min()).days} days")

## 3. Feature Engineering

In [None]:
def engineer_features(df):
    """Create features for the ML models."""
    features = pd.DataFrame(index=df.index)
    
    # Current value and lags
    features['val_curr'] = df['value']
    for i in range(1, 13):
        features[f'lag_{i}'] = df['value'].shift(i)
    
    # Rolling statistics (5, 10, 20 steps = 15, 30, 60 min)
    for window in [5, 10, 20]:
        features[f'roll_mean_{window}'] = df['value'].rolling(window).mean()
        features[f'roll_std_{window}'] = df['value'].rolling(window).std()
    
    # Intra-QH accumulation
    features['qh_start'] = df.index.floor('15min')
    features['qh_cumsum'] = features.groupby('qh_start')['val_curr'].cumsum()
    features['qh_count'] = features.groupby('qh_start').cumcount() + 1
    features = features.drop(columns=['qh_start'])
    
    return features

# Engineer features
features = engineer_features(df)
print(f"Engineered {len(features.columns)} features")

## 4. Target Variable

In [None]:
def create_target(df):
    """Create target: systemImbalance (MWh) = mean of QH values × 0.25"""
    target = df.groupby(df.index.floor('15min'))['value'].mean() * 0.25
    target.name = 'target'
    # Index by settlement end time (QH start + 15 min)
    target.index = target.index + pd.Timedelta(minutes=15)
    return target

# Create target
target = create_target(df)
print(f"Target samples: {len(target):,}")
print(f"Target range: [{target.min():.2f}, {target.max():.2f}] MWh")
print(f"Target mean: {target.mean():.2f} MWh")

## 5. Dataset Creation

In [None]:
def create_dataset(features, target, lead_time, feature_cols):
    """
    Create aligned dataset for a specific lead time.
    
    Timing: For prediction at (settlement_end - lead_time),
    we use features from (settlement_end - lead_time - 3min) due to transmission lag.
    """
    offset = pd.Timedelta(minutes=lead_time + 3)  # +3 for transmission lag
    aligned_data = []
    
    for t_target, y in target.items():
        t_cutoff = t_target - offset
        if t_cutoff in features.index:
            row = features.loc[t_cutoff, feature_cols].copy()
            row['target'] = y
            row['settlement_end'] = t_target
            aligned_data.append(row)
    
    dataset = pd.DataFrame(aligned_data)
    dataset = dataset.dropna()
    
    return dataset

# Create datasets for each lead time
datasets = {}
for lt in LEAD_TIMES:
    datasets[lt] = create_dataset(features, target, lt, FEATURES[lt])
    print(f"LT={lt:2d} min: {len(datasets[lt]):,} samples, {len(FEATURES[lt])} features")

## 6. Baseline Predictor

In [None]:
def baseline_predict(features_df, lead_time):
    """
    IPESOFT Baseline predictor (weighted average formulas).
    Output is in MWh.
    """
    predictions = []
    
    for idx, row in features_df.iterrows():
        lag1 = row.get('lag_1', row.get('val_curr', np.nan))
        lag2 = row.get('lag_2', np.nan)
        lag3 = row.get('lag_3', np.nan)
        lag4 = row.get('lag_4', np.nan)
        
        if lead_time == 15:
            pred = 0.25 * lag1
        elif lead_time == 12:
            pred = 0.25 * lag1
        elif lead_time == 9:
            pred = 0.25 * (0.8 * lag1 + 0.2 * lag2)
        elif lead_time == 6:
            pred = 0.25 * (0.6 * lag1 + 0.2 * lag2 + 0.2 * lag3)
        elif lead_time == 3:
            pred = 0.25 * (0.4 * lag1 + 0.2 * lag2 + 0.2 * lag3 + 0.2 * lag4)
        else:
            pred = np.nan
        
        predictions.append(pred)
    
    return np.array(predictions)

## 7. Train Models

In [None]:
# Storage for results
models = {}
results = {}
predictions_store = {'ml': {}, 'baseline': {}}

print("Training models...\n")
print(f"{'LT':<8} {'Features':<10} {'Train':<10} {'Test':<10}")
print("-" * 40)

for lt in LEAD_TIMES:
    dataset = datasets[lt]
    feature_cols = FEATURES[lt]
    
    # Train/Test split (chronological)
    split_idx = int(len(dataset) * TRAIN_RATIO)
    train = dataset.iloc[:split_idx]
    test = dataset.iloc[split_idx:]
    
    X_train = train[feature_cols]
    y_train = train['target']
    X_test = test[feature_cols]
    y_test = test['target']
    
    print(f"LT={lt:2d}    {len(feature_cols):<10} {len(train):<10} {len(test):<10}")
    
    # Train model
    model = HistGradientBoostingRegressor(**MODEL_PARAMS)
    model.fit(X_train, y_train)
    models[lt] = model
    
    # Predictions
    ml_pred = model.predict(X_test)
    
    # For baseline, we need lag features
    baseline_features = engineer_features(df)
    baseline_test = create_dataset(baseline_features, target, lt, 
                                   ['val_curr', 'lag_1', 'lag_2', 'lag_3', 'lag_4'])
    baseline_test = baseline_test[baseline_test['settlement_end'].isin(test['settlement_end'])]
    bl_pred = baseline_predict(baseline_test, lt)
    
    # Store predictions with settlement times
    for i, st in enumerate(test['settlement_end']):
        if st not in predictions_store['ml']:
            predictions_store['ml'][st] = []
            predictions_store['baseline'][st] = []
        predictions_store['ml'][st].append((y_test.iloc[i], ml_pred[i], lt))
        if i < len(bl_pred):
            predictions_store['baseline'][st].append((y_test.iloc[i], bl_pred[i], lt))
    
    # Calculate metrics
    results[lt] = {
        'ml_mae': mean_absolute_error(y_test, ml_pred),
        'ml_rmse': np.sqrt(mean_squared_error(y_test, ml_pred)),
        'ml_r2': r2_score(y_test, ml_pred),
        'bl_mae': mean_absolute_error(y_test.iloc[:len(bl_pred)], bl_pred),
        'ml_sign_acc': 100 * np.mean((y_test.values >= 0) == (ml_pred >= 0)),
        'bl_sign_acc': 100 * np.mean((y_test.values[:len(bl_pred)] >= 0) == (bl_pred >= 0))
    }

print("\nTraining complete!")

## 8. Evaluation Results

In [None]:
print("=" * 80)
print("CLASSICAL METRICS (Test Set)")
print("=" * 80)
print(f"{'LT':<10} {'ML MAE':>10} {'BL MAE':>10} {'Improvement':>12} {'ML RMSE':>10} {'ML R²':>10}")
print("-" * 80)

for lt in LEAD_TIMES:
    r = results[lt]
    imp = 100 * (r['bl_mae'] - r['ml_mae']) / r['bl_mae']
    print(f"LT={lt:2d} min  {r['ml_mae']:>10.3f} {r['bl_mae']:>10.3f} {imp:>+11.1f}% {r['ml_rmse']:>10.3f} {r['ml_r2']:>10.3f}")

# Averages
avg_ml_mae = np.mean([results[lt]['ml_mae'] for lt in LEAD_TIMES])
avg_bl_mae = np.mean([results[lt]['bl_mae'] for lt in LEAD_TIMES])
avg_imp = 100 * (avg_bl_mae - avg_ml_mae) / avg_bl_mae
print("-" * 80)
print(f"{'Average':<10} {avg_ml_mae:>10.3f} {avg_bl_mae:>10.3f} {avg_imp:>+11.1f}%")

In [None]:
print("\n" + "=" * 80)
print("SIGN ACCURACY (Test Set)")
print("=" * 80)
print(f"{'LT':<10} {'ML SignAcc':>12} {'BL SignAcc':>12} {'Improvement':>12}")
print("-" * 80)

for lt in LEAD_TIMES:
    r = results[lt]
    imp = r['ml_sign_acc'] - r['bl_sign_acc']
    print(f"LT={lt:2d} min  {r['ml_sign_acc']:>11.1f}% {r['bl_sign_acc']:>11.1f}% {imp:>+11.1f}%")

avg_ml_sign = np.mean([results[lt]['ml_sign_acc'] for lt in LEAD_TIMES])
avg_bl_sign = np.mean([results[lt]['bl_sign_acc'] for lt in LEAD_TIMES])
print("-" * 80)
print(f"{'Average':<10} {avg_ml_sign:>11.1f}% {avg_bl_sign:>11.1f}% {avg_ml_sign - avg_bl_sign:>+11.1f}%")

## 9. SignAccQH - Combined Sign Accuracy per Quarter Hour

In [None]:
def calculate_signaccqh(predictions_dict):
    """Calculate sign accuracy per QH (>=3/5 correct signs)."""
    qh_errors = []
    
    for st, preds in predictions_dict.items():
        if len(preds) == 5:  # All 5 lead times present
            errors = sum(1 for y, p, _ in preds if (y >= 0) != (p >= 0))
            qh_errors.append(errors)
    
    return qh_errors

ml_errors = calculate_signaccqh(predictions_store['ml'])
bl_errors = calculate_signaccqh(predictions_store['baseline'])

print("\n" + "=" * 70)
print("SIGN ERROR DISTRIBUTION PER QH")
print("=" * 70)
print(f"{'Errors':<10} {'ML Model':>12} {'Baseline':>12} {'Difference':>12}  Interpretation")
print("-" * 70)

interpretations = {
    0: 'Perfect (5/5)',
    1: 'Excellent (4/5)',
    2: 'Good (3/5)',
    3: 'Poor (2/5)',
    4: 'Bad (1/5)',
    5: 'Worst (0/5)'
}

for e in range(6):
    ml_pct = 100 * sum(1 for x in ml_errors if x == e) / len(ml_errors)
    bl_pct = 100 * sum(1 for x in bl_errors if x == e) / len(bl_errors)
    diff = ml_pct - bl_pct
    marker = '*' if e <= 2 else ''
    print(f"{e:<10} {ml_pct:>11.1f}% {bl_pct:>11.1f}% {diff:>+11.1f}%  {interpretations[e]} {marker}")

print("-" * 70)
ml_signaccqh = 100 * sum(1 for x in ml_errors if x <= 2) / len(ml_errors)
bl_signaccqh = 100 * sum(1 for x in bl_errors if x <= 2) / len(bl_errors)
print(f"{'SignAccQH':<10} {ml_signaccqh:>11.1f}% {bl_signaccqh:>11.1f}% {ml_signaccqh - bl_signaccqh:>+11.1f}%  (<=2 errors)")
print(f"\nTotal QH periods: {len(ml_errors)}")
print("\n* = Contributes to SignAccQH success")

## 10. Save Models

In [None]:
# Save models
for lt, model in models.items():
    model_path = MODELS_PATH / f'model_production_lt{lt}.joblib'
    joblib.dump(model, model_path)
    print(f"Saved: {model_path}")

# Save configuration
config = {
    'features': FEATURES,
    'model_params': MODEL_PARAMS,
    'lead_times': LEAD_TIMES
}
config_path = MODELS_PATH / 'model_config.joblib'
joblib.dump(config, config_path)
print(f"Saved: {config_path}")

## 11. Summary

In [None]:
print("\n" + "=" * 70)
print("PRODUCTION MODEL SUMMARY")
print("=" * 70)

print("\n** Model Configuration **")
print(f"Algorithm: HistGradientBoostingRegressor")
print(f"Lead Times: {LEAD_TIMES}")
print(f"Total Features: {sum(len(f) for f in FEATURES.values())} (across all models)")

print("\n** Features per Lead Time **")
for lt in LEAD_TIMES:
    print(f"  LT={lt:2d}: {FEATURES[lt]}")

print("\n** Performance vs Baseline **")
print(f"  Average MAE Improvement: {avg_imp:+.1f}%")
print(f"  Average SignAcc Improvement: {avg_ml_sign - avg_bl_sign:+.1f}%")
print(f"  SignAccQH: {ml_signaccqh:.1f}% (vs {bl_signaccqh:.1f}% baseline, {ml_signaccqh - bl_signaccqh:+.1f}%)")
print(f"  Perfect QH Predictions (0 errors): {100 * sum(1 for x in ml_errors if x == 0) / len(ml_errors):.1f}%")

print("\n** Key Results **")
print(f"  - ML model achieves {ml_signaccqh:.1f}% SignAccQH (>=3/5 correct signs per QH)")
print(f"  - {100 * sum(1 for x in ml_errors if x == 0) / len(ml_errors):.1f}% of QH have perfect sign prediction (all 5 lead times correct)")
print(f"  - Only {100 * sum(1 for x in ml_errors if x >= 3) / len(ml_errors):.1f}% of QH have poor predictions (<=2/5 correct)")

print("\n" + "=" * 70)
print("Models saved to:", MODELS_PATH)
print("=" * 70)