<a href="https://colab.research.google.com/github/sabeenasulthan/neural-network-project/blob/main/neural_networkproject_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
#Data generation
# data_generation.py
import numpy as np
import pandas as pd

def generate_time_series(n_steps=1500, seed=42):
    np.random.seed(seed)

    t = np.arange(n_steps)

    trend = 0.01 * t
    seasonal = np.sin(2 * np.pi * t / 30)
    volatility = np.random.normal(0, 0.5 + 0.005 * t)

    exog1 = np.sin(2 * np.pi * t / 7)
    exog2 = np.random.normal(0, 1, n_steps)

    y = trend + seasonal + 0.5 * exog1 + 0.2 * exog2 + volatility

    df = pd.DataFrame({
        "y": y,
        "exog1": exog1,
        "exog2": exog2
    })

    return df

    # preprocessing.py
import numpy as np
from sklearn.preprocessing import StandardScaler

def create_sequences(data, target_col, window):
    X, y = [], []
    for i in range(len(data) - window):
        X.append(data.iloc[i:i+window].values)
        y.append(data.iloc[i+window][target_col])
    return np.array(X), np.array(y)

def scale_data(train, test):
    scaler = StandardScaler()
    train_scaled = scaler.fit_transform(train)
    test_scaled = scaler.transform(test)
    return train_scaled, test_scaled, scaler

    # deep_model.py
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, n_features, hidden_size=64):
        super().__init__()
        self.lstm = nn.LSTM(n_features, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])

def train_model(model, X, y, epochs=20, lr=0.001):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        preds = model(X)
        loss = loss_fn(preds.squeeze(), y)
        loss.backward()
        optimizer.step()

    return model

    # baseline_model.py
from statsmodels.tsa.statespace.sarimax import SARIMAX

def train_sarimax(train_y, train_exog):
    model = SARIMAX(
        train_y,
        exog=train_exog,
        order=(1,1,1),
        seasonal_order=(1,1,1,12)
    )
    return model.fit(disp=False)

    # evaluation.py
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

def evaluate(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    return rmse, mae

    # explainability.py
import shap
import torch

def explain_lstm(model, background, samples, window_size, n_features):
    model.eval()

    # Reshape background and samples to 2D for KernelExplainer
    background_reshaped = background.reshape(background.shape[0], -1)
    samples_reshaped = samples.reshape(samples.shape[0], -1)

    def f(x):
        # x from KernelExplainer will be (batch_size, window_size * n_features)
        # Reshape it back to (batch_size, window_size, n_features) for the LSTM model
        x_reshaped_for_model = x.reshape(-1, window_size, n_features)
        x_torch = torch.tensor(x_reshaped_for_model, dtype=torch.float32)
        with torch.no_grad():
            return model(x_torch).numpy()

    explainer = shap.KernelExplainer(f, background_reshaped)
    shap_values = explainer.shap_values(samples_reshaped)
    return shap_values

    # main.py
import torch
import pandas as pd

# Generate data
df = generate_time_series()

# Train-test split
train_size = int(0.8 * len(df))
train_df = df.iloc[:train_size]
test_df = df.iloc[train_size:]

# Scale
train_scaled, test_scaled, scaler = scale_data(train_df, test_df)

train_scaled = pd.DataFrame(train_scaled, columns=df.columns)
test_scaled = pd.DataFrame(test_scaled, columns=df.columns)

# Windowing
WINDOW = 20
X_train, y_train = create_sequences(train_scaled, "y", WINDOW)
X_test, y_test = create_sequences(test_scaled, "y", WINDOW)

X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32)
X_test_t = torch.tensor(X_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.float32)

# Deep model
model = LSTMModel(n_features=X_train.shape[2])
model = train_model(model, X_train_t, y_train_t)

# Predictions
model.eval()
with torch.no_grad():
    dl_preds = model(X_test_t).squeeze().numpy()

dl_rmse, dl_mae = evaluate(y_test, dl_preds)

# Baseline
sarimax = train_sarimax(train_df["y"], train_df[["exog1","exog2"]])
sarimax_preds = sarimax.forecast(
    steps=len(test_df),
    exog=test_df[["exog1","exog2"]]
)

base_rmse, base_mae = evaluate(test_df["y"], sarimax_preds)

print("Deep Learning RMSE:", dl_rmse, "MAE:", dl_mae)
print("SARIMAX RMSE:", base_rmse, "MAE:", base_mae)

# Explainability
n_features = X_train.shape[2] # Get n_features from X_train
shap_vals = explain_lstm(
    model,
    X_train[:50],
    X_test[:10],
    WINDOW,
    n_features
)

Deep Learning RMSE: 1.7486859987305992 MAE: 1.407262060984579
SARIMAX RMSE: 7.333321377181396 MAE: 5.821312485292824


  0%|          | 0/10 [00:00<?, ?it/s]