<a href="https://colab.research.google.com/github/sharon504/ml-learning/blob/main/LSTM_model_for_d3code_hackathon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import joblib

# ------------------------------
# Step 1: Prepare the Data
# ------------------------------

# Example extended data for demonstration (replace with your actual data)
data = pd.read_csv("dataset-1.csv")

df = pd.DataFrame(data)
df.set_index('Month', inplace=True)

# Check for NaNs or Infs
if df.isnull().values.any():
    print("Data contains NaNs. Filling NaNs with forward fill.")
    df = df.fillna(method='ffill')  # Example: forward fill
if np.isinf(df.values).any():
    print("Data contains infinite values. Replacing Infs with NaNs and forward filling.")
    df = df.replace([np.inf, -np.inf], np.nan).fillna(method='ffill')

# Normalize the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df)

# Save the scaler to disk
scaler_save_path = 'scaler.save'
joblib.dump(scaler, scaler_save_path)
print(f"Scaler saved to {scaler_save_path}")

# Convert to tensor
data_tensor = torch.FloatTensor(scaled_data)

# Create sequences
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        x = data[i:i + seq_length]
        y = data[i + seq_length]
        xs.append(x)
        ys.append(y)
    return torch.stack(xs), torch.stack(ys)

# Parameters
seq_length = 3
X, y = create_sequences(data_tensor, seq_length)

# Check for NaNs in sequences
if torch.isnan(X).any() or torch.isnan(y).any():
    raise ValueError("Sequences contain NaN values. Please check the data preprocessing steps.")

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ------------------------------
# Step 2: Define the LSTM Model
# ------------------------------
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=2):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 64)  # First fully connected layer
        self.fc2 = nn.Linear(64, output_size)  # Second fully connected layer
        self.relu = nn.ReLU()  # Activation function
        self.init_weights()

    def init_weights(self):
        for name, param in self.lstm.named_parameters():
            if 'weight' in name:
                nn.init.xavier_uniform_(param.data)
            elif 'bias' in name:
                nn.init.zeros_(param.data)
        nn.init.xavier_uniform_(self.fc1.weight)
        nn.init.zeros_(self.fc1.bias)
        nn.init.xavier_uniform_(self.fc2.weight)
        nn.init.zeros_(self.fc2.bias)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # Get the last time step
        out = self.fc1(out)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# ------------------------------
# Step 3: Initialize and Train the Model
# ------------------------------

# Model parameters
input_size = X.shape[2]  # Number of features
hidden_size = 64
output_size = y.shape[1]  # Number of sales categories

# Initialize the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LSTMModel(input_size, hidden_size, output_size, num_layers=2).to(device)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)

# Training loop with gradient clipping
num_epochs = 100000
for epoch in range(num_epochs):
    model.train()

    # Move data to the device
    X_train_batch = X_train.to(device)
    y_train_batch = y_train.to(device)

    # Forward pass
    outputs = model(X_train_batch)
    loss = criterion(outputs, y_train_batch)

    # Check for NaNs in outputs and loss
    if torch.isnan(outputs).any():
        raise ValueError(f"Model outputs contain NaNs at epoch {epoch+1}.")
    if torch.isnan(loss):
        raise ValueError(f"Loss became NaN at epoch {epoch+1}.")

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()

    # Gradient clipping
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Save the model weights and biases
model_save_path = 'lstm_model.pth'
torch.save(model.state_dict(), model_save_path)
print(f"Model weights and biases saved to {model_save_path}")

Data contains NaNs. Filling NaNs with forward fill.
Scaler saved to scaler.save
Epoch [10/100000], Loss: 0.1811
Epoch [20/100000], Loss: 0.1805
Epoch [30/100000], Loss: 0.1799


  df = df.fillna(method='ffill')  # Example: forward fill


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch [50020/100000], Loss: 0.0008
Epoch [50030/100000], Loss: 0.0008
Epoch [50040/100000], Loss: 0.0008
Epoch [50050/100000], Loss: 0.0008
Epoch [50060/100000], Loss: 0.0008
Epoch [50070/100000], Loss: 0.0008
Epoch [50080/100000], Loss: 0.0008
Epoch [50090/100000], Loss: 0.0008
Epoch [50100/100000], Loss: 0.0008
Epoch [50110/100000], Loss: 0.0008
Epoch [50120/100000], Loss: 0.0008
Epoch [50130/100000], Loss: 0.0008
Epoch [50140/100000], Loss: 0.0008
Epoch [50150/100000], Loss: 0.0008
Epoch [50160/100000], Loss: 0.0008
Epoch [50170/100000], Loss: 0.0008
Epoch [50180/100000], Loss: 0.0008
Epoch [50190/100000], Loss: 0.0008
Epoch [50200/100000], Loss: 0.0008
Epoch [50210/100000], Loss: 0.0008
Epoch [50220/100000], Loss: 0.0008
Epoch [50230/100000], Loss: 0.0008
Epoch [50240/100000], Loss: 0.0008
Epoch [50250/100000], Loss: 0.0008
Epoch [50260/100000], Loss: 0.0008
Epoch [50270/100000], Loss: 0.0008
Epoch [50280/100000], Los

In [None]:
output_size

10

In [None]:
input_size

10