<a href="https://colab.research.google.com/github/sesmael/Real-Time-ML-/blob/main/Homework3_4106.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split

# Check CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Given text sequence
text = ("Next character prediction is a fundamental task in the field of natural language processing (NLP) "
        "that involves predicting the next character in a sequence of text based on the characters that precede it. "
        "This task is essential for various applications, including text auto-completion, spell checking, "
        "and even in the development of sophisticated AI models capable of generating human-like text.")

# Creating character vocabulary
chars = sorted(list(set(text)))
char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

# Function to prepare datasets
def prepare_data(sequence_length):
    X, y = [], []
    for i in range(len(text) - sequence_length):
        sequence = text[i:i + sequence_length]
        label = text[i + sequence_length]
        X.append([char_to_ix[char] for char in sequence])
        y.append(char_to_ix[label])
    X = np.array(X)
    y = np.array(y)
    return train_test_split(torch.tensor(X, dtype=torch.long),
                            torch.tensor(y, dtype=torch.long),
                            test_size=0.2, random_state=42)

# RNN Model Definition
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, model_type="RNN"):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        if model_type == "RNN":
            self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        elif model_type == "LSTM":
            self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        elif model_type == "GRU":
            self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output

# Training function
def train_model(model_type, sequence_length):
    X_train, X_val, y_train, y_val = prepare_data(sequence_length)
    model = CharRNN(len(chars), 128, len(chars), model_type).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.005)

    start_time = time.time()
    for epoch in range(50):
        model.train()
        optimizer.zero_grad()
        output = model(X_train.to(device))
        loss = criterion(output, y_train.to(device))
        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            val_output = model(X_val.to(device))
            val_loss = criterion(val_output, y_val.to(device))
            val_accuracy = (torch.argmax(val_output, 1) == y_val.to(device)).float().mean()

        if (epoch + 1) % 10 == 0:
            print(f'{model_type} - Seq Length {sequence_length} | Epoch {epoch+1}: Loss {loss.item():.4f}, Val Acc {val_accuracy.item():.4f}')

    exec_time = time.time() - start_time
    return loss.item(), val_accuracy.item(), exec_time, sum(p.numel() for p in model.parameters())

# Running experiments
sequence_lengths = [10, 20, 30]
models = ["RNN", "LSTM", "GRU"]
results = []
for seq_len in sequence_lengths:
    for model in models:
        loss, accuracy, exec_time, model_size = train_model(model, seq_len)
        results.append((model, seq_len, loss, accuracy, exec_time, model_size))

# Displaying results
import pandas as pd

# Convert results to a DataFrame
results_df = pd.DataFrame(results, columns=["Model", "Sequence Length", "Training Loss", "Validation Accuracy", "Execution Time (s)", "Model Size"])

# Print results as a table
print(results_df)

# Save results to a CSV file (optional)
results_df.to_csv("rnn_lstm_gru_comparison.csv", index=False)

print("Results saved to 'rnn_lstm_gru_comparison.csv'")


RNN - Seq Length 10 | Epoch 10: Loss 1.6465, Val Acc 0.2597
RNN - Seq Length 10 | Epoch 20: Loss 0.7395, Val Acc 0.2857
RNN - Seq Length 10 | Epoch 30: Loss 0.2374, Val Acc 0.2468
RNN - Seq Length 10 | Epoch 40: Loss 0.0659, Val Acc 0.2597
RNN - Seq Length 10 | Epoch 50: Loss 0.0233, Val Acc 0.2727
LSTM - Seq Length 10 | Epoch 10: Loss 2.1638, Val Acc 0.2078
LSTM - Seq Length 10 | Epoch 20: Loss 1.2374, Val Acc 0.2468
LSTM - Seq Length 10 | Epoch 30: Loss 0.4831, Val Acc 0.2468
LSTM - Seq Length 10 | Epoch 40: Loss 0.1209, Val Acc 0.2338
LSTM - Seq Length 10 | Epoch 50: Loss 0.0349, Val Acc 0.2208
GRU - Seq Length 10 | Epoch 10: Loss 1.9211, Val Acc 0.2208
GRU - Seq Length 10 | Epoch 20: Loss 0.9489, Val Acc 0.2208
GRU - Seq Length 10 | Epoch 30: Loss 0.2961, Val Acc 0.2727
GRU - Seq Length 10 | Epoch 40: Loss 0.0710, Val Acc 0.2727
GRU - Seq Length 10 | Epoch 50: Loss 0.0225, Val Acc 0.2597
RNN - Seq Length 20 | Epoch 10: Loss 1.6347, Val Acc 0.2000
RNN - Seq Length 20 | Epoch 20: Los

In [1]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import requests

# Check CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text  # This is the entire text data

# Step 2: Prepare the dataset
sequence_length = 20
# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

# Create sequences and targets
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences.to(device)
        self.targets = targets.to(device)

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Step 4: Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

# Model Definition
class CharLSTMGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, model_type="LSTM"):
        super(CharLSTMGRU, self).__init__()
        self.embedding = nn.Embedding(input_size, hidden_size).to(device)
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        if model_type == "LSTM":
            self.rnn = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True).to(device)
        elif model_type == "GRU":
            self.rnn = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True).to(device)
        self.fc = nn.Linear(hidden_size, output_size).to(device)

    def forward(self, x):
        x = x.to(device)
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output

# Training function
def train_model(model_type, sequence_length):
    model = CharLSTMGRU(len(chars), 256, len(chars), 2, model_type).to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.003)

    start_time = time.time()
    for epoch in range(30):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        # Validation
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for X_val, y_val in test_loader:
                X_val, y_val = X_val.to(device), y_val.to(device)
                val_output = model(X_val)
                predicted = torch.argmax(val_output, 1)
                correct += (predicted == y_val).sum().item()
                total += y_val.size(0)
        val_accuracy = correct / total

        if (epoch + 1) % 10 == 0:
            print(f'{model_type} - Seq Length {sequence_length} | Epoch {epoch+1}: Loss {total_loss:.4f}, Val Acc {val_accuracy:.4f}')

    exec_time = time.time() - start_time
    return total_loss, val_accuracy, exec_time, sum(p.numel() for p in model.parameters())

# Running experiments
sequence_lengths = [20, 30]
results = []
for seq_len in sequence_lengths:
    for model in ["LSTM", "GRU"]:
        loss, accuracy, exec_time, model_size = train_model(model, seq_len)
        results.append((model, seq_len, loss, accuracy, exec_time, model_size))

# Display results
results_df = pd.DataFrame(results, columns=["Model", "Sequence Length", "Training Loss", "Validation Accuracy", "Execution Time (s)", "Model Size"])
print(results_df)
results_df.to_csv("lstm_gru_comparison.csv", index=False)
print("Results saved to 'lstm_gru_comparison.csv'")

Using device: cuda
LSTM - Seq Length 20 | Epoch 10: Loss 9955.0330, Val Acc 0.5535
LSTM - Seq Length 20 | Epoch 20: Loss 10480.6142, Val Acc 0.5328
LSTM - Seq Length 20 | Epoch 30: Loss 11838.6115, Val Acc 0.4921
GRU - Seq Length 20 | Epoch 10: Loss 15247.3753, Val Acc 0.3660
GRU - Seq Length 20 | Epoch 20: Loss 14008.0638, Val Acc 0.4125
GRU - Seq Length 20 | Epoch 30: Loss 13157.8531, Val Acc 0.4477
LSTM - Seq Length 30 | Epoch 10: Loss 9778.4563, Val Acc 0.5598
LSTM - Seq Length 30 | Epoch 20: Loss 9960.5545, Val Acc 0.5531
LSTM - Seq Length 30 | Epoch 30: Loss 10331.1304, Val Acc 0.5394
GRU - Seq Length 30 | Epoch 10: Loss 14822.9504, Val Acc 0.3823
GRU - Seq Length 30 | Epoch 20: Loss 14651.3563, Val Acc 0.3889
GRU - Seq Length 30 | Epoch 30: Loss 12830.0405, Val Acc 0.4397
  Model  Sequence Length  Training Loss  Validation Accuracy  \
0  LSTM               20   11838.611453             0.492099   
1   GRU               20   13157.853111             0.447715   
2  LSTM           

In [11]:
# Optimized Google Colab LSTM vs GRU Training (20 Epochs)

import time
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import requests
from google.colab import files

# Check CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

# Step 2: Prepare the dataset
sequence_length = 50
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = np.array([char_to_int[ch] for ch in text], dtype=np.int32)

# Create sequences and targets using NumPy
num_samples = len(encoded_text) - sequence_length
sequences = np.zeros((num_samples, sequence_length), dtype=np.int32)
targets = np.zeros(num_samples, dtype=np.int32)

for i in range(num_samples):
    sequences[i] = encoded_text[i:i + sequence_length]
    targets[i] = encoded_text[i + sequence_length]

# Convert to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Step 3: Create Dataset Class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)
del sequences, targets, text, encoded_text  # Free memory

# Step 4: Create DataLoaders
batch_size = 64
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size, pin_memory=True)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size, pin_memory=True)

# Model Definition
class CharLSTMGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, model_type="LSTM"):
        super(CharLSTMGRU, self).__init__()
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        if model_type == "LSTM":
            self.rnn = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
        elif model_type == "GRU":
            self.rnn = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output

# Training function (Train for 20 Epochs)
def train_model(model_type):
    model = CharLSTMGRU(len(chars), 128, len(chars), 1, model_type).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.005)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.5)  # Adjusted for 20 epochs

    start_time = time.time()
    epoch_results = []

    for epoch in range(20):  # Train for 20 epochs
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()

            output = model(X_batch)
            loss = criterion(output, y_batch)

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            total_loss += loss.item()

        scheduler.step()

        # Validation
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for X_val, y_val in test_loader:
                X_val, y_val = X_val.to(device), y_val.to(device)
                val_output = model(X_val)
                predicted = torch.argmax(val_output, 1)
                correct += (predicted == y_val).sum().item()
                total += y_val.size(0)
        val_accuracy = correct / total

        # Print every epoch result
        print(f'{model_type} | Epoch {epoch+1}: Loss {total_loss:.4f}, Val Acc {val_accuracy:.4f}')
        epoch_results.append((epoch+1, model_type, total_loss, val_accuracy))

    exec_time = time.time() - start_time
    return epoch_results, exec_time, sum(p.numel() for p in model.parameters())

# Running experiments
results = []
epoch_logs = []

for model in ["LSTM", "GRU"]:
    epoch_results, exec_time, model_size = train_model(model)
    results.append((model, sequence_length, exec_time, model_size))
    epoch_logs.extend(epoch_results)

# Save and display epoch-wise results
epoch_df = pd.DataFrame(epoch_logs, columns=["Epoch", "Model", "Training Loss", "Validation Accuracy"])
epoch_df.to_csv("epoch_logs.csv", index=False)
files.download("epoch_logs.csv")

# Save and display model-wise results
results_df = pd.DataFrame(results, columns=["Model", "Sequence Length", "Execution Time (s)", "Model Size"])
results_df.to_csv("lstm_gru_comparison.csv", index=False)
files.download("lstm_gru_comparison.csv")

# Print final results
print(results_df)


Using device: cuda
LSTM | Epoch 1: Loss 24807.0632, Val Acc 0.4987
LSTM | Epoch 2: Loss 22972.0424, Val Acc 0.5096
LSTM | Epoch 3: Loss 22690.8442, Val Acc 0.5125
LSTM | Epoch 4: Loss 22640.9099, Val Acc 0.5114
LSTM | Epoch 5: Loss 22692.5390, Val Acc 0.5153
LSTM | Epoch 6: Loss 22738.0605, Val Acc 0.5110
LSTM | Epoch 7: Loss 22850.8748, Val Acc 0.5078
LSTM | Epoch 8: Loss 21727.7429, Val Acc 0.5334
LSTM | Epoch 9: Loss 21296.8081, Val Acc 0.5370
LSTM | Epoch 10: Loss 21123.7980, Val Acc 0.5405
LSTM | Epoch 11: Loss 21000.2590, Val Acc 0.5428
LSTM | Epoch 12: Loss 20914.7041, Val Acc 0.5415
LSTM | Epoch 13: Loss 20834.1538, Val Acc 0.5438
LSTM | Epoch 14: Loss 20787.0868, Val Acc 0.5423
LSTM | Epoch 15: Loss 20227.3076, Val Acc 0.5552
LSTM | Epoch 16: Loss 20025.8245, Val Acc 0.5584
LSTM | Epoch 17: Loss 19917.1863, Val Acc 0.5587
LSTM | Epoch 18: Loss 19839.5604, Val Acc 0.5603
LSTM | Epoch 19: Loss 19787.8821, Val Acc 0.5593
LSTM | Epoch 20: Loss 19729.9911, Val Acc 0.5590
GRU | Epoc

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

  Model  Sequence Length  Execution Time (s)  Model Size
0  LSTM               50         1084.554183      148801
1   GRU               50         1059.254674      115777
