<a href="https://colab.research.google.com/github/myllanes/Introduction-to-Deep-Learning/blob/main/H3_2_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import requests
import time
import torch.optim as optim

# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text  # This is the entire text data

# Step 2: Prepare the dataset
sequence_length = 50
# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

# Create sequences and targets
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Step 4: Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

# LSTM model
class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm1 = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.dropout1 = nn.Dropout(p=0.5)
        self.lstm2 = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.dropout2 = nn.Dropout(p=0.5)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize hidden state and cell state
        h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)

        # Forward pass through LSTM layers
        embedded = self.embedding(x)
        lstm_out, _ = self.lstm1(embedded, (h0, c0))
        lstm_out = self.dropout1(lstm_out)
        lstm_out, _ = self.lstm2(lstm_out, (h0, c0))
        lstm_out = self.dropout2(lstm_out)
        output = self.fc(lstm_out[:, -1, :])
        return output

# Hyperparameters
input_size = len(chars)  # Number of unique characters
hidden_size = 108
output_size = len(chars)  # Number of unique characters
learning_rate = 0.002
epochs = 10

# Model, loss, and optimizer
model = CharLSTM(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Calculate model size
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

total_params = count_parameters(model)
print(f"Total trainable parameters: {total_params}")

# Step 6: Training loop
def train(model, train_loader, criterion, optimizer, epochs):
    model.train()
    start_time = time.time()
    for epoch in range(epochs):
        epoch_loss = 0
        correct = 0
        total = 0
        for batch_idx, (data, target) in enumerate(train_loader):
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

            _, predicted = torch.max(output, 1)
            correct += (predicted == target).sum().item()
            total += target.size(0)

            if batch_idx % 100 == 0:
                print(f"Epoch {epoch+1}, Batch {batch_idx}, Loss: {loss.item():.4f}")

        avg_loss = epoch_loss / len(train_loader)
        accuracy = correct / total
        print(f"Epoch {epoch+1}, Average Training Loss: {avg_loss:.4f}, Training Accuracy: {accuracy:.4f}")

    training_time = time.time() - start_time
    print(f"Training time: {training_time:.2f} seconds")

# Step 7: Evaluation
def evaluate(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            loss = criterion(output, target)
            total_loss += loss.item()
            _, predicted = torch.max(output, 1)
            correct += (predicted == target).sum().item()

    avg_loss = total_loss / len(test_loader)
    accuracy = correct / len(test_loader.dataset)
    print(f"Validation Loss: {avg_loss:.4f}, Validation Accuracy: {accuracy:.4f}")

# Train the model
train(model, train_loader, criterion, optimizer, epochs)

# Evaluate the model
evaluate(model, test_loader, criterion)

# Report model size and execution time
print(f"Total trainable parameters: {total_params}")


Total trainable parameters: 202457
Epoch 1, Batch 0, Loss: 4.1781
Epoch 1, Batch 100, Loss: 2.8763
Epoch 1, Batch 200, Loss: 2.7952
Epoch 1, Batch 300, Loss: 2.6637
Epoch 1, Batch 400, Loss: 2.4456
Epoch 1, Batch 500, Loss: 2.3883
Epoch 1, Batch 600, Loss: 2.3291
Epoch 1, Batch 700, Loss: 2.3152
Epoch 1, Batch 800, Loss: 2.3480
Epoch 1, Batch 900, Loss: 2.1869
Epoch 1, Batch 1000, Loss: 2.1859
Epoch 1, Batch 1100, Loss: 2.2481
Epoch 1, Batch 1200, Loss: 2.1379
Epoch 1, Batch 1300, Loss: 2.2436
Epoch 1, Batch 1400, Loss: 2.2986
Epoch 1, Batch 1500, Loss: 2.2792
Epoch 1, Batch 1600, Loss: 2.2307
Epoch 1, Batch 1700, Loss: 2.1393
Epoch 1, Batch 1800, Loss: 2.0327
Epoch 1, Batch 1900, Loss: 2.0777
Epoch 1, Batch 2000, Loss: 2.1244
Epoch 1, Batch 2100, Loss: 2.0284
Epoch 1, Batch 2200, Loss: 2.0780
Epoch 1, Batch 2300, Loss: 1.9897
Epoch 1, Batch 2400, Loss: 2.3778
Epoch 1, Batch 2500, Loss: 1.9482
Epoch 1, Batch 2600, Loss: 2.3155
Epoch 1, Batch 2700, Loss: 2.2034
Epoch 1, Batch 2800, Loss