# âš¡ Day 5 â€” Deep Learning: LSTM & GRU with PyTorch
## Energy Consumption Forecasting | Claysys AI Hackathon 2026

**Date:** February 23, 2026  
**Objective:** Build and train LSTM and GRU sequence models to capture temporal dependencies in energy consumption.

---

In [None]:
import sys
sys.path.insert(0, '..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import warnings
warnings.filterwarnings('ignore')

from src.preprocessing import create_sequences, normalize
from src.models.lstm_model import DeepLearningForecaster
from src.evaluation import compute_metrics, plot_predictions, compare_models

plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams.update({'figure.dpi': 120})

print(f'âœ… PyTorch version: {torch.__version__}')
print(f'   CUDA available: {torch.cuda.is_available()}')
print(f'   Device: {"GPU" if torch.cuda.is_available() else "CPU"}')

## 1. Load & Prepare Sequence Data

In [None]:
train_df = pd.read_csv('../data/processed/train.csv', index_col='Datetime', parse_dates=True)
test_df  = pd.read_csv('../data/processed/test.csv',  index_col='Datetime', parse_dates=True)

# For LSTM: use target + key features as multivariate input
FEATURES = [
    'Global_active_power',    # Target (always first column)
    'Global_reactive_power',
    'Voltage',
    'Global_intensity',
    'Sub_metering_1',
    'Sub_metering_2',
    'Sub_metering_3',
    'hour_sin', 'hour_cos',
    'month_sin', 'month_cos',
    'dow_sin', 'dow_cos',
    'is_weekend',
]

FEATURES = [f for f in FEATURES if f in train_df.columns]

train_data = train_df[FEATURES].dropna()
test_data  = test_df[FEATURES].dropna()

print(f'Train data: {train_data.shape}')
print(f'Test data : {test_data.shape}')
print(f'Features  : {FEATURES}')

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Scale all features to [0,1]
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train_data.values)
test_scaled  = scaler.transform(test_data.values)

# Create sliding-window sequences
SEQ_LENGTH = 24    # Look back 24 hours
HORIZON    = 1     # Predict 1 step ahead

X_train, y_train = create_sequences(train_scaled, seq_length=SEQ_LENGTH, horizon=HORIZON)
X_test,  y_test  = create_sequences(test_scaled,  seq_length=SEQ_LENGTH, horizon=HORIZON)

print(f'X_train shape: {X_train.shape}   (samples, timesteps, features)')
print(f'X_test  shape: {X_test.shape}')
print(f'y_train shape: {y_train.shape}')

## 2. Train LSTM Model

In [None]:
# Split off validation set from end of training
val_size = int(len(X_train) * 0.1)
X_tr, X_val = X_train[:-val_size], X_train[-val_size:]
y_tr, y_val = y_train[:-val_size], y_train[-val_size:]

lstm_model = DeepLearningForecaster(
    model_type='LSTM',
    input_size=len(FEATURES),
    hidden_size=128,
    num_layers=2,
    dropout=0.2,
    seq_length=SEQ_LENGTH,
    batch_size=64,
    learning_rate=1e-3
)

lstm_model.fit(X_tr, y_tr, X_val=X_val, y_val=y_val, epochs=50, patience=10)

In [None]:
lstm_model.plot_training_history(save=True)

lstm_preds_scaled = lstm_model.predict(X_test)

# Inverse-transform predictions (only the target column, index 0)
def inverse_scale_target(preds_scaled, scaler, n_features):
    dummy = np.zeros((len(preds_scaled), n_features))
    dummy[:, 0] = preds_scaled
    return scaler.inverse_transform(dummy)[:, 0]

lstm_preds = inverse_scale_target(lstm_preds_scaled, scaler, len(FEATURES))
y_test_actual = inverse_scale_target(y_test, scaler, len(FEATURES))

metrics_lstm = compute_metrics(y_test_actual, lstm_preds, model_name='LSTM')
plot_predictions(y_test_actual[:168], lstm_preds[:168],
                 model_name='LSTM (First Week of Test)',
                 filename='lstm_predictions.png')

## 3. Train GRU Model

In [None]:
gru_model = DeepLearningForecaster(
    model_type='GRU',
    input_size=len(FEATURES),
    hidden_size=128,
    num_layers=2,
    dropout=0.2,
    seq_length=SEQ_LENGTH,
    batch_size=64,
    learning_rate=1e-3
)

gru_model.fit(X_tr, y_tr, X_val=X_val, y_val=y_val, epochs=50, patience=10)
gru_model.plot_training_history(save=True)

gru_preds_scaled = gru_model.predict(X_test)
gru_preds = inverse_scale_target(gru_preds_scaled, scaler, len(FEATURES))

metrics_gru = compute_metrics(y_test_actual, gru_preds, model_name='GRU')
plot_predictions(y_test_actual[:168], gru_preds[:168],
                 model_name='GRU (First Week of Test)',
                 filename='gru_predictions.png')

## 4. LSTM vs GRU Comparison

In [None]:
results_day5 = [metrics_lstm, metrics_gru]
comparison_df = compare_models(results_day5)
comparison_df.to_csv('../reports/dl_results.csv')

fig, ax = plt.subplots(figsize=(14, 5))
n = 168
ax.plot(y_test_actual[:n], label='Actual', color='black', linewidth=1.5, zorder=5)
ax.plot(lstm_preds[:n], label='LSTM', color='#9C27B0', linewidth=1.2, linestyle='--')
ax.plot(gru_preds[:n],  label='GRU',  color='#00BCD4', linewidth=1.2, linestyle='--')
ax.set_title('LSTM vs GRU â€” 1 Week Forecast', fontweight='bold')
ax.set_ylabel('Global Active Power (kW)')
ax.legend()
plt.tight_layout()
plt.savefig('../reports/figures/lstm_gru_comparison.png', bbox_inches='tight')
plt.show()

print('\nðŸŽ‰ Day 5 Complete! Deep learning models trained.')
print('   Ready for Day 6: Prophet + Ensemble Stacking')