In [1]:
import numpy as np
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    confusion_matrix,
)
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
from torchvision import datasets, transforms

#Problem 2
from torch.utils.data import Dataset, DataLoader, random_split
import requests

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Using device: {device}")

'''devNumber = torch.cuda.current_device()
devName = torch.cuda.get_device_name(devNumber)

print(f"Current device number is: {devNumber}")
print(f"GPU name is: {devName}")'''



Using device: cuda


'devNumber = torch.cuda.current_device()\ndevName = torch.cuda.get_device_name(devNumber)\n\nprint(f"Current device number is: {devNumber}")\nprint(f"GPU name is: {devName}")'

In [21]:
'''
Problem 1 (40pts)
Inspired by the course example, train and validate rnn.RNN, rnn.LSTM and rnn.GRU for learning the above sequence. Use 
sequence lengths of 10, 20, and 30 for your training. Feel free to adjust other network parameters. Report and compare
training loss, validation accuracy, execution time for training, and computational and mode size complexities across the 
three models over various lengths of sequence.
'''

# Sample text for training
text = """Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text.
At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model.
One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks.
Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time.
Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants.
In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."""

chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}
chars = sorted(list(set(text)))

#Preparing the dataset
def prepare_data(sequence_length):
    X = []
    y = []
    for i in range(len(text) - sequence_length):
        sequence = text[i:i + sequence_length]
        label = text[i + sequence_length]
        X.append([char_to_ix[char] for char in sequence])
        y.append(char_to_ix[label])
    return np.array(X), np.array(y)

# Define RNN, LSTM, and GRU models
class CharModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, model_type="RNN"):
        super(CharModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        if model_type == "RNN":
            self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        elif model_type == "LSTM":
            self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        else:
            self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output

# Training function/loop
def train_and_evaluate(model, X_train, y_train, X_val, y_val, epochs, learning_rate):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    start_time = time.time()
    
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output = model(X_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()    
        model.eval()
        with torch.no_grad():
            val_output = model(X_val)
            val_loss = criterion(val_output, y_val)
            _, predicted = torch.max(val_output, 1)
            val_accuracy = (predicted == y_val).float().mean()
            
        if (epoch+1) % 10 == 0:
            print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}, Validation Loss: {val_loss.item():.4f}, Validation Accuracy: {val_accuracy.item():.4f}')
    execution_time = time.time() - start_time
    return loss.item(), val_accuracy.item(), execution_time, sum(p.numel() for p in model.parameters())

# Metrics for Model
sequence_lengths = [10, 20, 30]
model_types = ["RNN", "LSTM", "GRU"]
hidden_size = 128
epochs = 100
learning_rate = 0.005
results = []

#Training the Model
for model_type in model_types:
        print(f"\nModel Type: {model_type}")
        for seq_length in sequence_lengths:
            # Splitting the dataset into training and validation sets 
            X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
            #Converting data to PyTorch tensors
            X_train, y_train = torch.tensor(X_train, dtype=torch.long), torch.tensor(y_train, dtype=torch.long)
            X_val, y_val = torch.tensor(X_val, dtype=torch.long), torch.tensor(y_val, dtype=torch.long)

            print(f"\nTraining with sequence length: {seq_length} for {model_type}\n")
            X, y = prepare_data(seq_length)
            model = CharModel(len(chars), hidden_size, len(chars), model_type)
            loss, val_acc, exec_time, model_size = train_and_evaluate(model, X_train, y_train, X_val, y_val,epochs, learning_rate)
            results.append((model_type, seq_length, loss, val_acc, exec_time, model_size))
            
# Display results
print("\nModel Comparison:")
for result in results:
    print(f"{result[0]} | Seq Len: {result[1]} | Loss: {result[2]:.4f} | Val Acc: {result[3]:.4f} | Time: {result[4]:.2f}s | Model Size: {result[5]}")



Model Type: RNN

Training with sequence length: 10 for RNN

Epoch 10, Loss: 2.2405, Validation Loss: 2.3848, Validation Accuracy: 0.3242
Epoch 20, Loss: 1.7632, Validation Loss: 2.1252, Validation Accuracy: 0.4258
Epoch 30, Loss: 1.4085, Validation Loss: 2.0321, Validation Accuracy: 0.4280
Epoch 40, Loss: 1.0951, Validation Loss: 2.0049, Validation Accuracy: 0.4640
Epoch 50, Loss: 0.8055, Validation Loss: 2.0564, Validation Accuracy: 0.4788
Epoch 60, Loss: 0.5730, Validation Loss: 2.1916, Validation Accuracy: 0.4936
Epoch 70, Loss: 0.3755, Validation Loss: 2.3229, Validation Accuracy: 0.4936
Epoch 80, Loss: 0.2336, Validation Loss: 2.4898, Validation Accuracy: 0.5064
Epoch 90, Loss: 0.1482, Validation Loss: 2.6295, Validation Accuracy: 0.4915
Epoch 100, Loss: 0.0914, Validation Loss: 2.7821, Validation Accuracy: 0.5000

Training with sequence length: 20 for RNN

Epoch 10, Loss: 2.2620, Validation Loss: 2.2925, Validation Accuracy: 0.3929
Epoch 20, Loss: 1.7936, Validation Loss: 2.0487

In [None]:
'''Problem 2 (60pts)
Build the model for.LSTM and rnn.GRU for the tiny Shakespeare dataset, the data loader code is already provided.
Train the models for the sequence of 20 and 30, report and compare training loss, validation accuracy, execution time for training, and computational and mode size complexities across the two models.
Adjust the hyperparameters (fully connected network, number of hidden layers, and the number of hidden states) and compare your results (training and validation loss, computation complexity, model size, training and inference time, and the output sequence). Analyze their influence on accuracy, running time, and computational perplexity.
What if we increase the sequence length to 50. Perform the training and report the accuracy and model complexity results.'''

from torch.utils.data import Dataset, DataLoader, random_split
import requests

# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text  # This is the entire text data

# Step 2: Prepare the dataset
def process_data(sequence_length):
    # Create a character mapping to integers
    chars = sorted(list(set(text)))
    char_to_int = {ch: i for i, ch in enumerate(chars)}
    int_to_char = {i: ch for i, ch in enumerate(chars)}
    
    # Encode the text into integers
    encoded_text = [char_to_int[ch] for ch in text]

    # Create sequences and targets
    sequences = []
    targets = []
    for i in range(len(encoded_text) - sequence_length):
        seq = encoded_text[i:i + sequence_length]
        target = encoded_text[i + sequence_length]
        sequences.append(seq)
        targets.append(target)
    # Convert lists to PyTorch tensors
    return torch.tensor(sequences, dtype=torch.long), torch.tensor(targets, dtype=torch.long), char_to_int, int_to_char

# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Define RNN, LSTM, and GRU models
class CharModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, model_type="RNN"):
        super(CharModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        if model_type == "LSTM":
            self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        else:
            self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output

# Training function/loop
def train_and_evaluate(model, train_loader, test_loader, epochs, learning_rate, device):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    model.to(device)
    start_time = time.time()
    
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_train_loss = total_loss / len(train_loader)

        # Evaluate on test set
        model.eval()
        total_val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                val_output = model(X_batch)
                val_loss = criterion(val_output, y_batch)
                total_val_loss += val_loss.item()
                _, predicted = torch.max(val_output, 1)
                correct += (predicted == y_batch).sum().item()
                total += y_batch.size(0)

        avg_val_loss = total_val_loss / len(test_loader)
        val_accuracy = correct / total

        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

    execution_time = time.time() - start_time
    model_size = sum(p.numel() for p in model.parameters())

    return avg_train_loss, val_accuracy, execution_time, model_size

# Metrics for Model
sequence_lengths = [20, 30]
model_types = ["LSTM", "GRU"]
hidden_size = 128
epochs = 50
batch_size = 128
learning_rate = 0.005

results = []

# Training the models for different sequence lengths
for seq_length in sequence_lengths:
    print(f"\nProcessing sequence length: {seq_length}")
    
    sequences, targets, char_to_int, int_to_char = process_data(seq_length)
    dataset = CharDataset(sequences, targets)

    # Split into training and validation
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

    for model_type in model_types:
        print(f"\nTraining {model_type} with sequence length {seq_length}...\n")
        model = CharModel(len(char_to_int), hidden_size, len(char_to_int), model_type)
        train_loss, val_acc, exec_time, model_size = train_and_evaluate(model, train_loader, test_loader, epochs, learning_rate, device)
        
        results.append((model_type, seq_length, train_loss, val_acc, exec_time, model_size))

# Display results
print("\nFinal Model Comparison:")
for result in results:
    print(f"{result[0]} | Seq Len: {result[1]} | Loss: {result[2]:.4f} | Val Acc: {result[3]:.4f} | Time: {result[4]:.2f}s | Model Size: {result[5]}")



Processing sequence length: 20

Training LSTM with sequence length 20...

Epoch 10, Train Loss: 1.5574, Val Loss: 1.5744, Val Accuracy: 0.5252
Epoch 20, Train Loss: 1.6072, Val Loss: 1.6267, Val Accuracy: 0.5097
Epoch 30, Train Loss: 1.6753, Val Loss: 1.6971, Val Accuracy: 0.4931
Epoch 40, Train Loss: 1.6907, Val Loss: 1.7150, Val Accuracy: 0.4906
Epoch 50, Train Loss: 1.6932, Val Loss: 1.7121, Val Accuracy: 0.4880

Training GRU with sequence length 20...

Epoch 10, Train Loss: 1.8145, Val Loss: 1.8225, Val Accuracy: 0.4652
Epoch 20, Train Loss: 1.8362, Val Loss: 1.8440, Val Accuracy: 0.4541
Epoch 30, Train Loss: 1.8472, Val Loss: 1.8748, Val Accuracy: 0.4456
Epoch 40, Train Loss: 1.8429, Val Loss: 1.8535, Val Accuracy: 0.4560
Epoch 50, Train Loss: 1.8412, Val Loss: 1.8705, Val Accuracy: 0.4564

Processing sequence length: 30

Training LSTM with sequence length 30...

Epoch 10, Train Loss: 1.5316, Val Loss: 1.5562, Val Accuracy: 0.5309
Epoch 20, Train Loss: 1.5781, Val Loss: 1.5925, V

In [None]:
'''Problem 2 Adjust hyperparameters( fully connected network, number of hidden layers, and the number of hidden states)
and compare results (training and validation loss, computation complexity, model size, training and inference time, and the ouptut sequence)
'''

# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text  # This is the entire text data

# Step 2: Prepare the dataset
def process_data(sequence_length):
    # Create a character mapping to integers
    chars = sorted(list(set(text)))
    char_to_int = {ch: i for i, ch in enumerate(chars)}
    int_to_char = {i: ch for i, ch in enumerate(chars)}
    
    # Encode the text into integers
    encoded_text = [char_to_int[ch] for ch in text]

    # Create sequences and targets
    sequences = []
    targets = []
    for i in range(len(encoded_text) - sequence_length):
        seq = encoded_text[i:i + sequence_length]
        target = encoded_text[i + sequence_length]
        sequences.append(seq)
        targets.append(target)
    # Convert lists to PyTorch tensors
    return torch.tensor(sequences, dtype=torch.long), torch.tensor(targets, dtype=torch.long), char_to_int, int_to_char

# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Define RNN, LSTM, and GRU models
class CharModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, model_type="RNN"):
        super(CharModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        if model_type == "LSTM":
            self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        else:
            self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output

# Training function/loop
def train_and_evaluate(model, train_loader, test_loader, epochs, learning_rate, device):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    model.to(device)
    start_time = time.time()
    
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_train_loss = total_loss / len(train_loader)

        # Evaluate on test set
        model.eval()
        total_val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                val_output = model(X_batch)
                val_loss = criterion(val_output, y_batch)
                total_val_loss += val_loss.item()
                _, predicted = torch.max(val_output, 1)
                correct += (predicted == y_batch).sum().item()
                total += y_batch.size(0)

        avg_val_loss = total_val_loss / len(test_loader)
        val_accuracy = correct / total

        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

    execution_time = time.time() - start_time
    model_size = sum(p.numel() for p in model.parameters())

    return avg_train_loss, val_accuracy, execution_time, model_size

# Metrics for Model
sequence_lengths = [20, 30]
model_types = ["LSTM", "GRU"]
hidden_size = 64
epochs = 50 
learning_rate = 0.01
batch_size = 128
results = []

# Training the models for different sequence lengths
for seq_length in sequence_lengths:
    print(f"\nProcessing sequence length: {seq_length}")
    
    sequences, targets, char_to_int, int_to_char = process_data(seq_length)
    dataset = CharDataset(sequences, targets)

    # Split into training and validation
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

    for model_type in model_types:
        print(f"\nTraining {model_type} with sequence length {seq_length}...\n")
        model = CharModel(len(char_to_int), hidden_size, len(char_to_int), model_type)
        train_loss, val_acc, exec_time, model_size = train_and_evaluate(model, train_loader, test_loader, epochs, learning_rate, device)
        
        results.append((model_type, seq_length, train_loss, val_acc, exec_time, model_size))

# Display results
print("\nFinal Model Comparison:")
for result in results:
    print(f"{result[0]} | Seq Len: {result[1]} | Loss: {result[2]:.4f} | Val Acc: {result[3]:.4f} | Time: {result[4]:.2f}s | Model Size: {result[5]}")


Processing sequence length: 20

Training LSTM with sequence length 20...

Epoch 10, Train Loss: 1.7888, Val Loss: 1.7981, Val Accuracy: 0.4690
Epoch 20, Train Loss: 1.8584, Val Loss: 1.8633, Val Accuracy: 0.4543
Epoch 30, Train Loss: 1.8644, Val Loss: 1.8599, Val Accuracy: 0.4508
Epoch 40, Train Loss: 1.8878, Val Loss: 1.8857, Val Accuracy: 0.4471
Epoch 50, Train Loss: 1.8990, Val Loss: 1.8983, Val Accuracy: 0.4403

Training GRU with sequence length 20...

Epoch 10, Train Loss: 2.0367, Val Loss: 2.0228, Val Accuracy: 0.4126
Epoch 20, Train Loss: 2.0751, Val Loss: 2.0781, Val Accuracy: 0.4066
Epoch 30, Train Loss: 2.1056, Val Loss: 2.1021, Val Accuracy: 0.3950
Epoch 40, Train Loss: 2.1374, Val Loss: 2.1485, Val Accuracy: 0.3820
Epoch 50, Train Loss: 2.1437, Val Loss: 2.1421, Val Accuracy: 0.3804

Processing sequence length: 30

Training LSTM with sequence length 30...

Epoch 10, Train Loss: 1.7788, Val Loss: 1.7961, Val Accuracy: 0.4703
Epoch 20, Train Loss: 1.8397, Val Loss: 1.8317, V

In [2]:
'''What if we increase the sequence length to 50? Perform the training and report the accuracy and model complexity results.'''

# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text  # This is the entire text data

# Step 2: Prepare the dataset
def process_data(sequence_length):
    # Create a character mapping to integers
    chars = sorted(list(set(text)))
    char_to_int = {ch: i for i, ch in enumerate(chars)}
    int_to_char = {i: ch for i, ch in enumerate(chars)}
    
    # Encode the text into integers
    encoded_text = [char_to_int[ch] for ch in text]

    # Create sequences and targets
    sequences = []
    targets = []
    for i in range(len(encoded_text) - sequence_length):
        seq = encoded_text[i:i + sequence_length]
        target = encoded_text[i + sequence_length]
        sequences.append(seq)
        targets.append(target)
    # Convert lists to PyTorch tensors
    return torch.tensor(sequences, dtype=torch.long), torch.tensor(targets, dtype=torch.long), char_to_int, int_to_char

# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Define RNN, LSTM, and GRU models
class CharModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, model_type="RNN"):
        super(CharModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        if model_type == "LSTM":
            self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        else:
            self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output

# Training function/loop
def train_and_evaluate(model, train_loader, test_loader, epochs, learning_rate, device):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    model.to(device)
    start_time = time.time()
    
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_train_loss = total_loss / len(train_loader)

        # Evaluate on test set
        model.eval()
        total_val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                val_output = model(X_batch)
                val_loss = criterion(val_output, y_batch)
                total_val_loss += val_loss.item()
                _, predicted = torch.max(val_output, 1)
                correct += (predicted == y_batch).sum().item()
                total += y_batch.size(0)

        avg_val_loss = total_val_loss / len(test_loader)
        val_accuracy = correct / total

        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

    execution_time = time.time() - start_time
    model_size = sum(p.numel() for p in model.parameters())

    return avg_train_loss, val_accuracy, execution_time, model_size

#Changed Parameters
sequence_lengths = [50]
model_types = ["LSTM", "GRU"]
hidden_size = 64
epochs = 1
batch_size = 128
learning_rate = 0.01
results = []

# Training the models for different sequence lengths
for seq_length in sequence_lengths:
    print(f"\nProcessing sequence length: {seq_length}")
    
    sequences, targets, char_to_int, int_to_char = process_data(seq_length)
    dataset = CharDataset(sequences, targets)

    # Split into training and validation
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

    for model_type in model_types:
        print(f"\nTraining {model_type} with sequence length {seq_length}...\n")
        model = CharModel(len(char_to_int), hidden_size, len(char_to_int), model_type)
        train_loss, val_acc, exec_time, model_size = train_and_evaluate(model, train_loader, test_loader, epochs, learning_rate, device)
        
        results.append((model_type, seq_length, train_loss, val_acc, exec_time, model_size))
# Display results
print("\nFinal Model Comparison:")
for result in results:
    print(f"{result[0]} | Seq Len: {result[1]} | Loss: {result[2]:.4f} | Val Acc: {result[3]:.4f} | Time: {result[4]:.2f}s | Model Size: {result[5]}")


Processing sequence length: 50

Training LSTM with sequence length 50...


Training GRU with sequence length 50...


Final Model Comparison:
LSTM | Seq Len: 50 | Loss: 1.8636 | Val Acc: 0.4793 | Time: 89.27s | Model Size: 41665
GRU | Seq Len: 50 | Loss: 1.9679 | Val Acc: 0.4442 | Time: 87.57s | Model Size: 33345
