In [1]:
# Advanced Time Series Forecasting with Attention LSTM + Rolling Origin CV + XGBoost
# Run in Jupyter by splitting sections into cells OR run as python script

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from xgboost import XGBRegressor

# ===================== 1. DATA GENERATION =====================
np.random.seed(42)
t = np.arange(1095)
series1 = 0.05*t + 15*np.sin(2*np.pi*t/365) + np.random.normal(0,2,1095)
series2 = 0.7*series1 + np.random.normal(0,1,1095)
series3 = np.cumsum(np.random.normal(0,1,1095))

df = pd.DataFrame({
    'Sales_Target':series1,
    'Foot_Traffic':series2,
    'Market_Noise':series3
})

# ===================== 2. SCALING =====================
scaler = MinMaxScaler()
data = scaler.fit_transform(df.values)

SEQ_LEN = 30
HORIZON = 7

def create_sequences(data, seq_len, horizon):
    X, y = [], []
    for i in range(len(data)-seq_len-horizon):
        X.append(data[i:i+seq_len])
        y.append(data[i+seq_len:i+seq_len+horizon,0])
    return np.array(X), np.array(y)

X, y = create_sequences(data, SEQ_LEN, HORIZON)

split = int(len(X)*0.8)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# ===================== 3. ATTENTION LSTM =====================
class AttentionLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, horizon):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.attn = nn.Linear(hidden_size,1)
        self.fc = nn.Linear(hidden_size, horizon)
    def forward(self,x):
        out,_ = self.lstm(x)
        weights = torch.softmax(self.attn(out),dim=1)
        context = torch.sum(weights*out,dim=1)
        return self.fc(context)

model = AttentionLSTM(3,64,HORIZON)
opt = torch.optim.Adam(model.parameters(),lr=0.001)
loss_fn = nn.MSELoss()

# ===================== 4. TRAINING =====================
for epoch in range(40):
    model.train()
    pred = model(X_train)
    loss = loss_fn(pred,y_train)
    opt.zero_grad()
    loss.backward()
    opt.step()
    if epoch%10==0:
        print('Epoch',epoch,'Loss',loss.item())

# ===================== 5. METRICS =====================
def metrics(true,pred):
    mae = mean_absolute_error(true,pred)
    rmse = np.sqrt(mean_squared_error(true,pred))
    mape = np.mean(np.abs((true-pred)/true))*100
    return mae,rmse,mape

# ===================== 6. EVALUATE LSTM =====================
model.eval()
with torch.no_grad():
    preds = model(X_test).numpy()
true = y_test.numpy()

mae_lstm,rmse_lstm,mape_lstm = metrics(true,preds)
print('\nATTENTION LSTM:',mae_lstm,rmse_lstm,mape_lstm)

# ===================== 7. ROLLING ORIGIN CV =====================
def rolling_cv(model,X,y,start=0.7):
    errors=[]
    start=int(len(X)*start)
    for i in range(start,len(X)-1):
        with torch.no_grad():
            p=model(X[i:i+1]).numpy()
        errors.append(mean_absolute_error(y[i:i+1].numpy(),p))
    return np.mean(errors)

cv_error = rolling_cv(model,X_test,y_test)
print('Rolling CV Error:',cv_error)

# ===================== 8. XGBOOST BASELINE =====================
X_xgb = X.reshape(len(X),-1)
X_train_xgb,X_test_xgb = X_xgb[:split],X_xgb[split:]
y_train_xgb,y_test_xgb = y[:split],y[split:]

xgb = XGBRegressor(n_estimators=300,max_depth=5,learning_rate=0.05)
xgb.fit(X_train_xgb,y_train_xgb)
preds_xgb = xgb.predict(X_test_xgb)

mae_xgb,rmse_xgb,mape_xgb = metrics(y_test_xgb,preds_xgb)
print('XGBOOST:',mae_xgb,rmse_xgb,mape_xgb)

# ===================== 9. FINAL COMPARISON =====================
print('\nFINAL COMPARISON')
print('Model        MAE      RMSE      MAPE')
print(f'AttentionLSTM {mae_lstm:.4f} {rmse_lstm:.4f} {mape_lstm:.2f}')
print(f'XGBoost       {mae_xgb:.4f} {rmse_xgb:.4f} {mape_xgb:.2f}')


Epoch 0 Loss 0.24566173553466797
Epoch 10 Loss 0.16724969446659088
Epoch 20 Loss 0.060859885066747665
Epoch 30 Loss 0.03764991834759712

ATTENTION LSTM: 0.23225208 0.24459802 31.28059208393097
Rolling CV Error: 0.28040326
XGBOOST: 0.046675443527244176 0.057820837966432094 6.393091093024528

FINAL COMPARISON
Model        MAE      RMSE      MAPE
AttentionLSTM 0.2323 0.2446 31.28
XGBoost       0.0467 0.0578 6.39
