# Advanced Time Series Forecasting with LSTM + Attention
## Jupyter Notebook Version
---
This notebook contains:
- Data preprocessing for forecasting
- LSTM + Self-Attention model
- Baseline LSTM model
- Backtesting pipeline
- Classification model (LightGBM + SHAP)

⚠️ *Note: The full pipeline code is placed below in organized sections.*

## 📌 Import Libraries

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import optuna
import lightgbm as lgb
import shap
import matplotlib.pyplot as plt

## 📌 Self-Attention Layer Implementation

In [None]:
from tensorflow.keras import backend as K

class SelfAttention(layers.Layer):
    def __init__(self, return_sequences=False, **kwargs):
        super(SelfAttention, self).__init__(**kwargs)
        self.return_sequences = return_sequences

    def build(self, input_shape):
        self.W = self.add_weight(name='att_weight',
                                 shape=(input_shape[-1], input_shape[-1]),
                                 initializer='glorot_uniform', trainable=True)
        self.b = self.add_weight(name='att_bias', shape=(input_shape[-1],),
                                 initializer='zeros', trainable=True)
        self.u = self.add_weight(name='att_u', shape=(input_shape[-1],),
                                 initializer='glorot_uniform', trainable=True)

    def call(self, inputs):
        u_it = K.tanh(K.dot(inputs, self.W) + self.b)
        scores = K.dot(u_it, self.u)
        alphas = K.softmax(scores)
        context = K.sum(inputs * K.expand_dims(alphas, -1), axis=1)
        return context


## 📌 Attention-LSTM Model

In [None]:
def build_attention_lstm(input_shape, units=64, dropout=0.2, lr=1e-3):
    inputs = layers.Input(shape=input_shape)
    x = layers.LSTM(units, return_sequences=True)(inputs)
    x = layers.Dropout(dropout)(x)
    x = SelfAttention()(x)
    x = layers.Dense(units//2, activation='relu')(x)
    outputs = layers.Dense(1)(x)
    model = models.Model(inputs, outputs)
    model.compile(optimizer=tf.keras.optimizers.Adam(lr), loss='mse', metrics=['mae'])
    return model

## 📌 Notebook Ready – Add Data Loading, Backtesting, and SHAP Sections Next