# Production Model Inference Demo

제주도 전력 수요 예측 Production 모델 사용 데모

**Author**: Hybrid Agent Pipeline (Claude + Gemini)  
**Date**: 2024-12

---

## Contents

1. [Setup](#1-setup)
2. [Load Production Models](#2-load-production-models)
3. [Single Prediction](#3-single-prediction)
4. [Conditional Prediction](#4-conditional-prediction)
5. [Batch Prediction](#5-batch-prediction)
6. [Visualization](#6-visualization)
7. [Performance Comparison](#7-performance-comparison)

## 1. Setup

In [None]:
import sys
from pathlib import Path

# Add project root to path
PROJECT_ROOT = Path.cwd().parent
sys.path.insert(0, str(PROJECT_ROOT / "src"))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

# Set plot style
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 11

print(f"Project Root: {PROJECT_ROOT}")

In [None]:
# Load data
data_path = PROJECT_ROOT / "data" / "processed" / "jeju_hourly_merged.csv"
df = pd.read_csv(data_path, parse_dates=['datetime'])

print(f"Data shape: {df.shape}")
print(f"Date range: {df['datetime'].min()} ~ {df['datetime'].max()}")
df.head()

## 2. Load Production Models

In [None]:
from inference import ProductionPredictor, predict, predict_batch

# Initialize predictor
predictor = ProductionPredictor()
predictor.load_models()

In [None]:
# Check model configurations
print("[demand_only Model]")
print(f"  Features: {predictor.config_demand['n_features']}")
print(f"  Hidden size: {predictor.config_demand['model_config']['hidden_size']}")
print(f"  Sequence length: {predictor.config_demand['training_config']['seq_length']}")

print("\n[weather_full Model]")
print(f"  Features: {predictor.config_weather['n_features']}")
print(f"  Hidden size: {predictor.config_weather['model_config']['hidden_size']}")
print(f"  Sequence length: {predictor.config_weather['training_config']['seq_length']}")

## 3. Single Prediction

단일 시점 예측 예제

In [None]:
# Get recent data (need at least 168 + some buffer for lag features)
test_data = df.tail(500).copy()

# Predict with demand_only model
pred_demand = predictor.predict_demand_only(test_data)
print(f"demand_only prediction: {pred_demand:.2f} MW")

# Predict with weather_full model
pred_weather = predictor.predict_weather_full(test_data)
print(f"weather_full prediction: {pred_weather:.2f} MW")

# Actual value
actual = test_data['power_demand'].iloc[-1]
print(f"\nActual demand: {actual:.2f} MW")
print(f"demand_only error: {abs(pred_demand - actual):.2f} MW ({abs(pred_demand - actual) / actual * 100:.2f}%)")
print(f"weather_full error: {abs(pred_weather - actual):.2f} MW ({abs(pred_weather - actual) / actual * 100:.2f}%)")

## 4. Conditional Prediction

계절에 따른 자동 모델 선택 (겨울철 기상변수 가중치 적용)

In [None]:
# Conditional prediction (soft mode - recommended)
result = predictor.predict_conditional(test_data, mode='soft')

print("[Conditional Prediction Result]")
print(f"  Timestamp: {result.timestamp}")
print(f"  Predicted: {result.predicted_demand:.2f} MW")
print(f"  Model used: {result.model_used}")
print(f"  Is winter: {result.context['is_winter']}")
print(f"  Weather weight: {result.context['weather_weight']:.2f}")

In [None]:
# Compare predictions across different seasons
def get_seasonal_data(df, month):
    """특정 월의 데이터 추출"""
    mask = df['datetime'].dt.month == month
    seasonal = df[mask].copy()
    if len(seasonal) > 500:
        return seasonal.tail(500)
    return seasonal

seasons = {
    'Winter (Jan)': 1,
    'Spring (Apr)': 4,
    'Summer (Jul)': 7,
    'Fall (Oct)': 10
}

print("Seasonal Predictions Comparison")
print("=" * 60)

for season_name, month in seasons.items():
    seasonal_data = get_seasonal_data(df, month)
    if len(seasonal_data) >= 300:
        result = predictor.predict_conditional(seasonal_data, mode='soft')
        print(f"{season_name}:")
        print(f"  Predicted: {result.predicted_demand:.2f} MW")
        print(f"  Model: {result.model_used}")
        print(f"  Weather weight: {result.context['weather_weight']:.2f}")
        print()

## 5. Batch Prediction

연속 예측 (슬라이딩 윈도우)

In [None]:
# Batch prediction with demand_only model
batch_data = df.tail(1000).copy()

batch_result = predictor.predict_batch(
    batch_data, 
    model='demand_only', 
    step=1  # Predict every hour
)

print(f"Batch Prediction Results")
print(f"  Total predictions: {len(batch_result.predictions)}")
print(f"  Mean: {batch_result.predictions.mean():.2f} MW")
print(f"  Std: {batch_result.predictions.std():.2f} MW")
print(f"  Min: {batch_result.predictions.min():.2f} MW")
print(f"  Max: {batch_result.predictions.max():.2f} MW")

In [None]:
# Create prediction dataframe
pred_df = pd.DataFrame({
    'timestamp': batch_result.timestamps,
    'predicted': batch_result.predictions
})
pred_df.set_index('timestamp', inplace=True)

# Merge with actual data
actual_df = batch_data.set_index('datetime')[['power_demand']].copy()
actual_df.columns = ['actual']

comparison_df = pred_df.join(actual_df, how='inner')
comparison_df['error'] = comparison_df['predicted'] - comparison_df['actual']
comparison_df['abs_error'] = abs(comparison_df['error'])
comparison_df['pct_error'] = comparison_df['abs_error'] / comparison_df['actual'] * 100

comparison_df.head(10)

## 6. Visualization

In [None]:
# Plot predictions vs actual
fig, axes = plt.subplots(2, 1, figsize=(14, 10))

# Panel 1: Predictions vs Actual
ax1 = axes[0]
ax1.plot(comparison_df.index, comparison_df['actual'], 
         label='Actual', color='#2c3e50', linewidth=1.5, alpha=0.8)
ax1.plot(comparison_df.index, comparison_df['predicted'], 
         label='Predicted', color='#e74c3c', linewidth=1.5, alpha=0.8)
ax1.set_xlabel('Time')
ax1.set_ylabel('Power Demand (MW)')
ax1.set_title('Production Model: Predictions vs Actual', fontsize=14, fontweight='bold')
ax1.legend(loc='upper right')
ax1.grid(True, alpha=0.3)

# Panel 2: Error distribution
ax2 = axes[1]
ax2.fill_between(comparison_df.index, comparison_df['error'], 0, 
                 where=comparison_df['error'] >= 0, color='#e74c3c', alpha=0.5, label='Over-prediction')
ax2.fill_between(comparison_df.index, comparison_df['error'], 0, 
                 where=comparison_df['error'] < 0, color='#3498db', alpha=0.5, label='Under-prediction')
ax2.axhline(y=0, color='black', linestyle='-', linewidth=1)
ax2.set_xlabel('Time')
ax2.set_ylabel('Prediction Error (MW)')
ax2.set_title('Prediction Error Over Time', fontsize=14, fontweight='bold')
ax2.legend(loc='upper right')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Error statistics
print("Prediction Error Statistics")
print("=" * 40)
print(f"MAPE: {comparison_df['pct_error'].mean():.2f}%")
print(f"MAE: {comparison_df['abs_error'].mean():.2f} MW")
print(f"RMSE: {np.sqrt((comparison_df['error'] ** 2).mean()):.2f} MW")
print(f"MBE: {comparison_df['error'].mean():.2f} MW")
print(f"Max Error: {comparison_df['abs_error'].max():.2f} MW")

In [None]:
# Error distribution histogram
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Absolute error histogram
ax1 = axes[0]
ax1.hist(comparison_df['pct_error'], bins=50, color='#3498db', edgecolor='white', alpha=0.7)
ax1.axvline(x=comparison_df['pct_error'].mean(), color='#e74c3c', linestyle='--', linewidth=2, 
            label=f'Mean: {comparison_df["pct_error"].mean():.2f}%')
ax1.set_xlabel('Percentage Error (%)')
ax1.set_ylabel('Frequency')
ax1.set_title('Error Distribution', fontsize=14, fontweight='bold')
ax1.legend()

# Scatter plot: Actual vs Predicted
ax2 = axes[1]
ax2.scatter(comparison_df['actual'], comparison_df['predicted'], alpha=0.5, s=10, c='#3498db')
min_val = min(comparison_df['actual'].min(), comparison_df['predicted'].min())
max_val = max(comparison_df['actual'].max(), comparison_df['predicted'].max())
ax2.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='Perfect prediction')
ax2.set_xlabel('Actual Demand (MW)')
ax2.set_ylabel('Predicted Demand (MW)')
ax2.set_title('Actual vs Predicted', fontsize=14, fontweight='bold')
ax2.legend()

plt.tight_layout()
plt.show()

## 7. Performance Comparison

demand_only vs weather_full vs conditional 비교

In [None]:
from evaluation.metrics import compute_all_metrics

# Compare models on test data
test_data = df.tail(2000).copy()

models = ['demand_only', 'weather_full']
results = {}

for model in models:
    print(f"Evaluating {model}...", end=" ")
    batch_result = predictor.predict_batch(test_data, model=model, step=1)
    
    # Get actual values for comparison
    pred_df = pd.DataFrame({
        'timestamp': batch_result.timestamps,
        'predicted': batch_result.predictions
    }).set_index('timestamp')
    
    actual_df = test_data.set_index('datetime')[['power_demand']]
    merged = pred_df.join(actual_df, how='inner')
    
    metrics = compute_all_metrics(
        merged['power_demand'].values,
        merged['predicted'].values
    )
    results[model] = metrics
    print(f"MAPE: {metrics['MAPE']:.2f}%")

print("\nDone!")

In [None]:
# Display comparison table
comparison_table = pd.DataFrame(results).T
comparison_table = comparison_table[['MAPE', 'RMSE', 'MAE', 'R2']]
comparison_table.columns = ['MAPE (%)', 'RMSE (MW)', 'MAE (MW)', 'R²']

print("Model Performance Comparison")
print("=" * 60)
display(comparison_table.round(4))

In [None]:
# Visualize model comparison
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# MAPE comparison
ax1 = axes[0]
mape_values = [results[m]['MAPE'] for m in models]
colors = ['#2ecc71', '#3498db']
bars = ax1.bar(models, mape_values, color=colors, edgecolor='black')
ax1.set_ylabel('MAPE (%)')
ax1.set_title('MAPE Comparison', fontsize=14, fontweight='bold')
for bar, val in zip(bars, mape_values):
    ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
             f'{val:.2f}%', ha='center', fontweight='bold')

# R² comparison
ax2 = axes[1]
r2_values = [results[m]['R2'] for m in models]
bars = ax2.bar(models, r2_values, color=colors, edgecolor='black')
ax2.set_ylabel('R²')
ax2.set_title('R² Comparison', fontsize=14, fontweight='bold')
ax2.set_ylim(0.8, 0.9)
for bar, val in zip(bars, r2_values):
    ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.002,
             f'{val:.4f}', ha='center', fontweight='bold')

plt.tight_layout()
plt.show()

## 8. Quick Start Guide

Production 모델 사용을 위한 빠른 시작 가이드

In [None]:
# Quick Start: Method 1 - Using convenience functions
from inference import predict, predict_batch

# Single prediction (recommended: conditional mode)
result = predict(test_data, model='conditional', mode='soft')
print(f"Quick prediction: {result.predicted_demand:.2f} MW")

In [None]:
# Quick Start: Method 2 - Using ProductionPredictor class
from inference import ProductionPredictor

predictor = ProductionPredictor()
predictor.load_models()

# demand_only (fastest)
pred = predictor.predict_demand_only(test_data)
print(f"demand_only: {pred:.2f} MW")

# conditional (recommended for production)
result = predictor.predict_conditional(test_data, mode='soft')
print(f"conditional: {result.predicted_demand:.2f} MW (weight={result.context['weather_weight']})")

## Summary

### Model Recommendations

| Scenario | Recommended Model | Expected MAPE |
|----------|-------------------|---------------|
| Real-time (h=1) | demand_only | ~6.5% |
| Winter operation | conditional_soft | ~4.5% |
| General use | conditional_soft | ~6.3% |

### Key Findings

1. **demand_only** is the primary model (17 features)
2. **conditional_soft** automatically adjusts weather weight in winter
3. Winter months (Dec, Jan, Feb) benefit from weather features
4. Lag variables dominate predictions (corr=0.974)

In [None]:
print("Demo completed!")