# Improved LSTM Training (Capturing Traffic Spikes)
This notebook implements advanced techniques to solve the "flat-line" prediction problem common in sparse traffic data:
1. **Feature Engineering:** Added 5-minute and 15-minute rolling averages to provide the model with local density context.
2. **Weighted MSE Loss:** Penalizes errors on non-zero traffic counts more heavily to prevent the model from simply predicting the mean.
3. **Cyclical Time Features:** Encodes the time of day using Sine/Cosine transforms to help the LSTM understand the 24-hour cycle.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import joblib

## 1. Feature Engineering
Adding rolling averages and cyclical time features.

In [None]:
df = pd.read_csv('lstm_training_data.csv')

# 1. Rolling Averages (Capture local density)
for d in ['North', 'South', 'East', 'West']:
    df[f'{d}_5m_avg'] = df[d].rolling(window=5).mean().fillna(0)
    df[f'{d}_15m_avg'] = df[d].rolling(window=15).mean().fillna(0)

# 2. Cyclical Time Features (Minute of day 0-1439)
df['hour_sin'] = np.sin(2 * np.pi * df['time_index'] / 1440)
df['hour_cos'] = np.cos(2 * np.pi * df['time_index'] / 1440)

features = [
    'North', 'South', 'East', 'West', 
    'North_5m_avg', 'South_5m_avg', 'East_5m_avg', 'West_5m_avg',
    'North_15m_avg', 'South_15m_avg', 'East_15m_avg', 'West_15m_avg',
    'hour_sin', 'hour_cos', 'day_of_week'
]
targets = [
    'target_count_North', 'target_count_South', 'target_count_East', 'target_count_West',
    'target_score_North', 'target_score_South', 'target_score_East', 'target_score_West'
]

split_idx = int(len(df) * 0.8)
train_df = df.iloc[:split_idx].copy()
test_df = df.iloc[split_idx:].copy()

## 2. Preprocessing and Scaling

In [None]:
scaler_features = MinMaxScaler()
scaler_targets = MinMaxScaler()

train_features_scaled = scaler_features.fit_transform(train_df[features])
train_targets_scaled = scaler_targets.fit_transform(train_df[targets])
test_features_scaled = scaler_features.transform(test_df[features])
test_targets_scaled = scaler_targets.transform(test_df[targets])

joblib.dump(scaler_features, 'scaler_features.pkl')
joblib.dump(scaler_targets, 'scaler_targets.pkl')

In [None]:
class TrafficDataset(Dataset):
    def __init__(self, features, targets, lookback=60):
        self.features = torch.FloatTensor(features)
        self.targets = torch.FloatTensor(targets)
        self.lookback = lookback

    def __len__(self):
        return len(self.features) - self.lookback

    def __getitem__(self, idx):
        x = self.features[idx : idx + self.lookback]
        y = self.targets[idx + self.lookback - 1]
        return x, y

LOOKBACK = 60
train_dataset = TrafficDataset(train_features_scaled, train_targets_scaled, LOOKBACK)
test_dataset = TrafficDataset(test_features_scaled, test_targets_scaled, LOOKBACK)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

## 3. Weighted Loss Function
We penalize error on samples where traffic is present higher than on empty intervals.

In [None]:
def weighted_mse_loss(inputs, targets, weight=5.0):
    """Weights the loss more heavily when the target is non-zero."""
    loss = (inputs - targets) ** 2
    # Apply weight where targets are > 0.05 (normalized)
    weights = torch.where(targets > 0.05, torch.tensor(weight).to(targets.device), torch.tensor(1.0).to(targets.device))
    return (loss * weights).mean()

## 4. Model and Training

In [None]:
class TrafficLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=3):
        super(TrafficLSTM, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.3)
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, 32),
            nn.ReLU(),
            nn.Linear(32, output_dim)
        )

    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = TrafficLSTM(len(features), 128, len(targets)).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0005)

epochs = 30
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for bx, by in train_loader:
        bx, by = bx.to(device), by.to(device)
        optimizer.zero_grad()
        pred = model(bx)
        loss = weighted_mse_loss(pred, by)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    if epoch % 5 == 0:
        print(f"Epoch {epoch} Loss: {total_loss/len(train_loader):.6f}")

torch.save(model.state_dict(), 'traffic_lstm_model.pth')

## 5. Visualization
Verify if the model now captures the spikes.

In [None]:
model.eval()
x_sample, y_sample = next(iter(test_loader))
with torch.no_grad():
    preds = model(x_sample.to(device)).cpu().numpy()

# Inverse transform to original scale for plotting
preds_rescaled = scaler_targets.inverse_transform(preds)
actual_rescaled = scaler_targets.inverse_transform(y_sample.numpy())

plt.figure(figsize=(15, 5))
plt.plot(actual_rescaled[:200, 0], label='Actual (North)')
plt.plot(preds_rescaled[:200, 0], label='Predicted (North)', alpha=0.8)
plt.title('Actual vs Predicted Traffic Count (North)')
plt.legend()
plt.show()