In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np



In [2]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # LSTM Layer
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        # Fully Connected Layer (Output Layer)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize hidden state and cell state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: (batch_size, seq_length, hidden_size)

        # Take the last time step's output
        out = self.fc(out[:, -1, :])  # Shape: (batch_size, output_size)
        
        return out


In [3]:
# Generate synthetic sequential data for regression
np.random.seed(42)
torch.manual_seed(42)

# Create dummy time-series data
def generate_data(seq_length=10, num_samples=100):
    X = np.array([np.arange(i, i+seq_length) for i in range(num_samples)])
    y = np.sum(X, axis=1)  # Example: sum of the sequence as target
    return torch.tensor(X, dtype=torch.float32).unsqueeze(-1), torch.tensor(y, dtype=torch.float32).unsqueeze(-1)

# Prepare dataset
seq_length = 10
X, y = generate_data(seq_length=seq_length, num_samples=1000)

# Split into train and test sets
train_size = int(0.8 * len(X))
X_train, y_train = X[:train_size], y[:train_size]
X_test, y_test = X[train_size:], y[train_size:]


In [7]:
# Model Hyperparameters
input_size = 1
hidden_size = 64
num_layers = 2
output_size = 1
learning_rate = 0.01
num_epochs = 20000

# Initialize Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LSTMModel(input_size, hidden_size, num_layers, output_size).to(device)

# Loss and Optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Move data to device
X_train, y_train, X_test, y_test = X_train.to(device), y_train.to(device), X_test.to(device), y_test.to(device)

# Training Loop
for epoch in range(num_epochs):
    model.train()
    
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print loss every 10 epochs
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    if loss.item()<50:
        break


Epoch [10/20000], Loss: 21597192.0000
Epoch [20/20000], Loss: 21543478.0000
Epoch [30/20000], Loss: 21489342.0000
Epoch [40/20000], Loss: 21435322.0000
Epoch [50/20000], Loss: 21381890.0000
Epoch [60/20000], Loss: 21329040.0000
Epoch [70/20000], Loss: 21276700.0000
Epoch [80/20000], Loss: 21224788.0000
Epoch [90/20000], Loss: 21173236.0000
Epoch [100/20000], Loss: 21121992.0000
Epoch [110/20000], Loss: 21071020.0000
Epoch [120/20000], Loss: 21020290.0000
Epoch [130/20000], Loss: 20969782.0000
Epoch [140/20000], Loss: 20919482.0000
Epoch [150/20000], Loss: 20869370.0000
Epoch [160/20000], Loss: 20819444.0000
Epoch [170/20000], Loss: 20769690.0000
Epoch [180/20000], Loss: 20720100.0000
Epoch [190/20000], Loss: 20670672.0000
Epoch [200/20000], Loss: 20621402.0000
Epoch [210/20000], Loss: 20572276.0000
Epoch [220/20000], Loss: 20523304.0000
Epoch [230/20000], Loss: 20474470.0000
Epoch [240/20000], Loss: 20425780.0000
Epoch [250/20000], Loss: 20377226.0000
Epoch [260/20000], Loss: 20328808.

In [5]:
model.eval()
with torch.no_grad():
    predictions = model(X_test)
    test_loss = criterion(predictions, y_test)

print(f'Test Loss: {test_loss.item():.4f}')


Test Loss: 75069224.0000


Just for understanding how LSTM works!!