In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# --- 1. Programmatic Data Generation (3 Years of Daily Sales Data) ---
# This creates exactly 1095 samples as required by your project guidelines.
def generate_multivariate_data(n_samples=1095):
    np.random.seed(42)
    t = np.arange(n_samples)
    
    # Series 1: Daily Sales (Target) - Trend + Seasonality
    series1 = 0.05 * t + 15 * np.sin(2 * np.pi * t / 365) + np.random.normal(0, 2, n_samples)
    
    # Series 2: Store Foot Traffic (Correlated Feature)
    series2 = 0.7 * series1 + 5 * np.random.normal(0, 1, n_samples)
    
    # Series 3: Market Noise (Non-stationary Chaotic Feature)
    series3 = np.cumsum(np.random.normal(0, 1, n_samples))
    
    df = pd.DataFrame({
        'Sales_Target': series1,
        'Foot_Traffic': series2,
        'Market_Noise': series3
    })
    return df

# Initialize data
df_sales = generate_multivariate_data()
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df_sales)

# --- 2. Sequence Creation (Look-back/Look-ahead) ---
def create_sequences(data, look_back=30, look_ahead=7):
    X, y = [], []
    for i in range(len(data) - look_back - look_ahead):
        X.append(data[i : i + look_back])
        y.append(data[i + look_back : i + look_back + look_ahead, 0])
    return torch.FloatTensor(np.array(X)), torch.FloatTensor(np.array(y))

X, y = create_sequences(scaled_data)
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# --- 3. Model Architecture (LSTM + Attention Mechanism) ---
class AttentionLayer(nn.Module):
    def __init__(self, hidden_dim):
        super(AttentionLayer, self).__init__()
        self.attn_weights = nn.Linear(hidden_dim, 1)

    def forward(self, lstm_outputs):
        weights = torch.softmax(self.attn_weights(lstm_outputs), dim=1)
        context = torch.sum(weights * lstm_outputs, dim=1)
        return context

class ForecastingModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(ForecastingModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.attention = AttentionLayer(hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        context = self.attention(lstm_out)
        return self.fc(context)

# Initialize Model
model = ForecastingModel(input_dim=3, hidden_dim=64, output_dim=7)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# --- 4. Training Loop (50 Epochs) ---
print("Starting Training...")
for epoch in range(50):
    model.train()
    optimizer.zero_grad()
    preds = model(X_train)
    loss = criterion(preds, y_train)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item():.4f}")

# --- 5. Final Evaluation & Metrics ---
model.eval()
with torch.no_grad():
    test_preds = model(X_test).numpy()
    y_true = y_test.numpy()

mae = mean_absolute_error(y_true, test_preds)
rmse = np.sqrt(mean_squared_error(y_true, test_preds))

print(f"\n--- Final Project Metrics ---")
print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")

Starting Training...
Epoch 0 | Loss: 0.3206
Epoch 10 | Loss: 0.2270
Epoch 20 | Loss: 0.0948
Epoch 30 | Loss: 0.0494
Epoch 40 | Loss: 0.0410

--- Final Project Metrics ---
MAE: 0.1730
RMSE: 0.1897
