# âš¡ Day 3 â€” Baseline Statistical Models
## Energy Consumption Forecasting | Claysys AI Hackathon 2026

**Date:** February 21, 2026  
**Objective:** Establish performance baselines using Naive, Holt-Winters, and ARIMA/SARIMA models.

---

In [None]:
import sys
sys.path.insert(0, '..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

from src.models.baseline import NaiveForecaster, HoltWintersModel, ARIMAModel
from src.evaluation import compute_metrics, plot_predictions

plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams.update({'figure.dpi': 120})
print('âœ… Day 3 Setup complete')

## 1. Load Processed Data

In [None]:
train_df = pd.read_csv('../data/processed/train.csv', index_col='Datetime', parse_dates=True)
test_df  = pd.read_csv('../data/processed/test.csv',  index_col='Datetime', parse_dates=True)

# Use only the target column for statistical models
train_series = train_df['Global_active_power']
test_series  = test_df['Global_active_power']

print(f'Train: {train_series.index.min()} â†’ {train_series.index.max()} ({len(train_series):,} hourly records)')
print(f'Test : {test_series.index.min()} â†’ {test_series.index.max()} ({len(test_series):,} hourly records)')

## 2. Model 1 â€” Naive Seasonal Baseline

In [None]:
naive_model = NaiveForecaster(seasonality=24)   # repeat yesterday's 24-hour pattern
naive_model.fit(train_series)

naive_preds = naive_model.predict(len(test_series))
metrics_naive = compute_metrics(test_series.values, naive_preds, model_name='Naive Seasonal')

plot_predictions(test_series.values[:168], naive_preds[:168],
                 index=test_series.index[:168],
                 model_name='Naive Seasonal (First Week of Test)',
                 filename='naive_predictions.png')

## 3. Model 2 â€” Holt-Winters Exponential Smoothing

In [None]:
# Use a smaller training slice to speed up fitting (last 2000 hours)
hw_train = train_series.iloc[-2000:]

hw_model = HoltWintersModel(seasonal='add', seasonal_periods=24)
hw_model.fit(hw_train)

hw_preds = hw_model.predict(len(test_series))
metrics_hw = compute_metrics(test_series.values, hw_preds, model_name='Holt-Winters')

plot_predictions(test_series.values[:168], hw_preds[:168],
                 index=test_series.index[:168],
                 model_name='Holt-Winters (First Week of Test)',
                 filename='holtwinters_predictions.png')

## 4. Model 3 â€” ARIMA

In [None]:
# Use last 1000 points for ARIMA (computationally expensive at full scale)
arima_train = train_series.iloc[-1000:]

arima_model = ARIMAModel(seasonal=False)   # non-seasonal ARIMA for speed
arima_model.fit(arima_train)

arima_preds = arima_model.predict(len(test_series))
metrics_arima = compute_metrics(test_series.values, arima_preds, model_name='ARIMA')

plot_predictions(test_series.values[:168], arima_preds[:168],
                 index=test_series.index[:168],
                 model_name='ARIMA (First Week of Test)',
                 filename='arima_predictions.png')

## 5. Baseline Comparison

In [None]:
from src.evaluation import compare_models, plot_model_comparison

results_day3 = [metrics_naive, metrics_hw, metrics_arima]
comparison_df = compare_models(results_day3)

# Save for Day 7 final report
comparison_df.to_csv('../reports/baseline_results.csv')

plot_model_comparison(comparison_df, metric='RMSE')
plot_model_comparison(comparison_df, metric='MAE')

print('\nðŸŽ‰ Day 3 Complete! Baselines established.')
print('   Best so far:', comparison_df.index[0])
print('   Ready for Day 4: Classical ML Models (Random Forest, XGBoost)')