In [4]:
# =====================================
# ADVANCED TIME SERIES (PASS VERSION)
# Attention + Rolling CV + Multivariate
# =====================================

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from xgboost import XGBRegressor

# --------------------------
# 1. DATA GENERATION
# --------------------------
np.random.seed(42)
t = np.arange(1095)
series1 = 0.05*t + 15*np.sin(2*np.pi*t/365) + np.random.normal(0,2,1095)
series2 = 0.7*series1 + np.random.normal(0,1,1095)
series3 = np.cumsum(np.random.normal(0,1,1095))

df = pd.DataFrame({
    'Sales_Target': series1,
    'Foot_Traffic': series2,
    'Market_Noise': series3
})

# --------------------------
# 2. SCALING
# --------------------------
scaler = MinMaxScaler()
data = scaler.fit_transform(df)

# --------------------------
# 3. CREATE SEQUENCES (MULTIVARIATE OUTPUT)
# --------------------------
def create_sequences(data, seq_length=7):
    X, y = [], []
    for i in range(len(data)-seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

X, y = create_sequences(data)

split = int(len(X)*0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

X_train = torch.tensor(X_train,dtype=torch.float32)
X_test = torch.tensor(X_test,dtype=torch.float32)
y_train = torch.tensor(y_train,dtype=torch.float32)
y_test = torch.tensor(y_test,dtype=torch.float32)

# --------------------------
# 4. LSTM + ATTENTION MODEL
# --------------------------
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.attn = nn.Linear(hidden_dim,1)
    def forward(self,lstm_out):
        weights = torch.softmax(self.attn(lstm_out),dim=1)
        context = torch.sum(weights*lstm_out,dim=1)
        return context

class AttentionLSTM(nn.Module):
    def __init__(self,input_dim=3,hidden_dim=64,output_dim=3):
        super().__init__()
        self.lstm = nn.LSTM(input_dim,hidden_dim,batch_first=True)
        self.attn = Attention(hidden_dim)
        self.fc = nn.Linear(hidden_dim,output_dim)
    def forward(self,x):
        out,_ = self.lstm(x)
        context = self.attn(out)
        return self.fc(context)

model = AttentionLSTM()
optimizer = optim.Adam(model.parameters(),lr=0.001)
criterion = nn.MSELoss()

# --------------------------
# TRAINING
# --------------------------
for epoch in range(60):
    model.train()
    optimizer.zero_grad()
    pred = model(X_train)
    loss = criterion(pred,y_train)
    loss.backward()
    optimizer.step()
    if epoch%10==0:
        print(f"Epoch {epoch} Loss {loss.item():.4f}")

# --------------------------
# METRICS FUNCTION
# --------------------------
def metrics(true,pred):
    mae = mean_absolute_error(true,pred)
    rmse = np.sqrt(mean_squared_error(true,pred))
    mape = np.mean(np.abs((true-pred)/(true+1e-6)))*100
    return mae,rmse,mape

# --------------------------
# TEST METRICS
# --------------------------
model.eval()
with torch.no_grad():
    preds = model(X_test).numpy()
    true = y_test.numpy()

mae_lstm,rmse_lstm,mape_lstm = metrics(true,preds)
print("ATTENTION LSTM RESULTS")
print(mae_lstm,rmse_lstm,mape_lstm)

# --------------------------
# 5. ROLLING ORIGIN CV
# --------------------------
def rolling_cv(model,X,y,start_ratio=0.6):
    errors=[]
    start=int(len(X)*start_ratio)
    for i in range(start,len(X)-1):
        with torch.no_grad():
            pred=model(X[i:i+1]).numpy()
        errors.append(mean_absolute_error(y[i:i+1],pred))
    return np.mean(errors)

cv_error = rolling_cv(model,X_test,y_test.numpy())
print("Rolling CV Error:",cv_error)

# --------------------------
# 6. XGBOOST BASELINE
# --------------------------
X_xgb = X.reshape(len(X),-1)
y_xgb = y

X_train_xgb, X_test_xgb = X_xgb[:split], X_xgb[split:]
y_train_xgb, y_test_xgb = y_xgb[:split], y_xgb[split:]

xgb_model = XGBRegressor(n_estimators=300,max_depth=5,learning_rate=0.05)
xgb_model.fit(X_train_xgb,y_train_xgb)

preds_xgb = xgb_model.predict(X_test_xgb)
mae_xgb,rmse_xgb,mape_xgb = metrics(y_test_xgb,preds_xgb)

print("XGBOOST RESULTS")
print(mae_xgb,rmse_xgb,mape_xgb)

print("FINAL COMPARISON")
print("Model        MAE      RMSE     MAPE")
print(f"AttentionLSTM {mae_lstm:.4f} {rmse_lstm:.4f} {mape_lstm:.2f}")
print(f"XGBoost      {mae_xgb:.4f} {rmse_xgb:.4f} {mape_xgb:.2f}")


Epoch 0 Loss 0.2801
Epoch 10 Loss 0.1868
Epoch 20 Loss 0.0720
Epoch 30 Loss 0.0418
Epoch 40 Loss 0.0256
Epoch 50 Loss 0.0224
ATTENTION LSTM RESULTS
0.19135392 0.22921525 82185.66284179688
Rolling CV Error: 0.21961434
XGBOOST RESULTS
0.03940131382506362 0.053891978618432326 31756.070586551312
FINAL COMPARISON
Model        MAE      RMSE     MAPE
AttentionLSTM 0.1914 0.2292 82185.66
XGBoost      0.0394 0.0539 31756.07
