# Import Libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import requests
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Problem 2

Build the model for.LSTM and rnn.GRU for the tiny Shakespeare dataset, the data loader code is already provided.

Train the models for the sequence of 20 and 30, report and compare training loss, validation accuracy, execution time for training, and computational and mode size complexities across the two models.
Adjust the hyperparameters (fully connected network, number of hidden layers, and the number of hidden states) and compare your results (training and validation loss, computation complexity, model size, training and inference time, and the output sequence). Analyze their influence on accuracy, running time, and computational perplexity.
What if we increase the sequence length to 50? Perform the training and report the accuracy and model complexity results.

LSTM for Sequences of 20

In [3]:
# Step 1: Download and prepare the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

sequence_length = 20

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

encoded_text = [char_to_int[ch] for ch in text]

sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)

batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=batch_size)

# Step 2: Define the LSTM model
class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.lstm(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(1, batch_size, self.hidden_size, device=device),
                torch.zeros(1, batch_size, self.hidden_size, device=device))

# Step 3: Instantiate the model, loss function, and optimizer
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
learning_rate = 0.001

model = CharLSTM(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Step 4: Training the model
epochs = 20
start_time = time.time()
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {running_loss / len(train_loader)}")

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

# Step 5: Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Accuracy on test set: {100 * correct / total}%")

Epoch 1/20, Training Loss: 1.7044667768635697
Epoch 2/20, Training Loss: 1.4805794546676654
Epoch 3/20, Training Loss: 1.4196120505694063
Epoch 4/20, Training Loss: 1.383898568175609
Epoch 5/20, Training Loss: 1.3588185905726287
Epoch 6/20, Training Loss: 1.3396095018139258
Epoch 7/20, Training Loss: 1.32466255495142
Epoch 8/20, Training Loss: 1.3111787827464265
Epoch 9/20, Training Loss: 1.300564376636271
Epoch 10/20, Training Loss: 1.2909448598214968
Epoch 11/20, Training Loss: 1.2828321612161668
Epoch 12/20, Training Loss: 1.275565198036493
Epoch 13/20, Training Loss: 1.2688569088503334
Epoch 14/20, Training Loss: 1.2631857866999479
Epoch 15/20, Training Loss: 1.257508506027023
Epoch 16/20, Training Loss: 1.2534969425105666
Epoch 17/20, Training Loss: 1.2484862255899645
Epoch 18/20, Training Loss: 1.2452146334769187
Epoch 19/20, Training Loss: 1.241536570045022
Epoch 20/20, Training Loss: 1.2385825407843583
Total execution time for training: 368.2469711303711 seconds
Accuracy on tes

LSTM for Sequences of 30

In [4]:
# Step 1: Download and prepare the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

sequence_length = 30

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

encoded_text = [char_to_int[ch] for ch in text]

sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)

batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=batch_size)

# Step 2: Define the LSTM model
class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.lstm(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(1, batch_size, self.hidden_size, device=device),
                torch.zeros(1, batch_size, self.hidden_size, device=device))

# Step 3: Instantiate the model, loss function, and optimizer
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
learning_rate = 0.001

model = CharLSTM(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Step 4: Training the model
epochs = 20
start_time = time.time()
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {running_loss / len(train_loader)}")

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

# Step 5: Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Accuracy on test set: {100 * correct / total}%")

Epoch 1/20, Training Loss: 1.6950380157719929
Epoch 2/20, Training Loss: 1.4720642535903983
Epoch 3/20, Training Loss: 1.411861192732109
Epoch 4/20, Training Loss: 1.3755707422242518
Epoch 5/20, Training Loss: 1.3514177188644858
Epoch 6/20, Training Loss: 1.3314216484326915
Epoch 7/20, Training Loss: 1.316760920000979
Epoch 8/20, Training Loss: 1.303829388860752
Epoch 9/20, Training Loss: 1.2932665293509429
Epoch 10/20, Training Loss: 1.2840459199212984
Epoch 11/20, Training Loss: 1.27549685058208
Epoch 12/20, Training Loss: 1.2689115162714417
Epoch 13/20, Training Loss: 1.2621861946049326
Epoch 14/20, Training Loss: 1.2566690268919376
Epoch 15/20, Training Loss: 1.251834972042909
Epoch 16/20, Training Loss: 1.2464262265466235
Epoch 17/20, Training Loss: 1.242239686166221
Epoch 18/20, Training Loss: 1.2380418947080873
Epoch 19/20, Training Loss: 1.2343832001611548
Epoch 20/20, Training Loss: 1.2304845427772606
Total execution time for training: 486.49959897994995 seconds
Accuracy on te

LSTM for Sequences of 50

In [5]:
# Step 1: Download and prepare the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

sequence_length = 50

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

encoded_text = [char_to_int[ch] for ch in text]

sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)

batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

# Step 2: Define the LSTM model
class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.lstm(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(1, batch_size, self.hidden_size, device=device),
                torch.zeros(1, batch_size, self.hidden_size, device=device))

# Step 3: Instantiate the model, loss function, and optimizer
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
learning_rate = 0.001

model = CharLSTM(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Step 4: Training the model
epochs = 20
start_time = time.time()
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {running_loss / len(train_loader)}")

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

# Step 5: Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Accuracy on test set: {100 * correct / total}%")

Epoch 1/20, Training Loss: 1.704880822818123
Epoch 2/20, Training Loss: 1.4727437397343
Epoch 3/20, Training Loss: 1.409704635969588
Epoch 4/20, Training Loss: 1.3736175929024237
Epoch 5/20, Training Loss: 1.3474052431011898
Epoch 6/20, Training Loss: 1.3275314313475122
Epoch 7/20, Training Loss: 1.3117519244797482
Epoch 8/20, Training Loss: 1.2983366263389862
Epoch 9/20, Training Loss: 1.286451895949044
Epoch 10/20, Training Loss: 1.2770698783541663
Epoch 11/20, Training Loss: 1.2686877055466388
Epoch 12/20, Training Loss: 1.2611189105900267
Epoch 13/20, Training Loss: 1.2536283979677878
Epoch 14/20, Training Loss: 1.2471858707293089
Epoch 15/20, Training Loss: 1.2413178995783916
Epoch 16/20, Training Loss: 1.2363641978746642
Epoch 17/20, Training Loss: 1.2315996338332371
Epoch 18/20, Training Loss: 1.2276254321953912
Epoch 19/20, Training Loss: 1.223973639861974
Epoch 20/20, Training Loss: 1.2205282230661754
Total execution time for training: 806.8999671936035 seconds
Accuracy on tes

GRU for Sequences of 20

In [9]:
# Step 1: Download and prepare the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

sequence_length = 20

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

encoded_text = [char_to_int[ch] for ch in text]

sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)

batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

# Step 2: Define the GRU model
class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.gru(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size, device=device)

# Step 3: Instantiate the model, loss function, and optimizer
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
learning_rate = 0.001

model = CharGRU(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Step 4: Training the model
epochs = 20
start_time = time.time()
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {running_loss / len(train_loader)}")

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

# Step 5: Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Accuracy on test set: {100 * correct / total}%")

Epoch 1/20, Training Loss: 1.693705230181886
Epoch 2/20, Training Loss: 1.5016383575045782
Epoch 3/20, Training Loss: 1.4563810346595734
Epoch 4/20, Training Loss: 1.4295546779910244
Epoch 5/20, Training Loss: 1.4137790482309585
Epoch 6/20, Training Loss: 1.402241639874136
Epoch 7/20, Training Loss: 1.392108776423347
Epoch 8/20, Training Loss: 1.387681982150313
Epoch 9/20, Training Loss: 1.3829168865838684
Epoch 10/20, Training Loss: 1.3788068379503655
Epoch 11/20, Training Loss: 1.3765765258842546
Epoch 12/20, Training Loss: 1.373994446123939
Epoch 13/20, Training Loss: 1.3747096779834793
Epoch 14/20, Training Loss: 1.3736125485297257
Epoch 15/20, Training Loss: 1.372895422829203
Epoch 16/20, Training Loss: 1.3750756927351806
Epoch 17/20, Training Loss: 1.3765832916440734
Epoch 18/20, Training Loss: 1.3774741507438293
Epoch 19/20, Training Loss: 1.378143287917286
Epoch 20/20, Training Loss: 1.3799517101261443
Total execution time for training: 471.5668182373047 seconds
Accuracy on tes

GRU for Sequences of 30

In [11]:
# Step 1: Download and prepare the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

sequence_length = 30

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

encoded_text = [char_to_int[ch] for ch in text]

sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)

batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

# Step 2: Define the GRU model
class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.gru(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size, device=device)

# Step 3: Instantiate the model, loss function, and optimizer
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
learning_rate = 0.001

model = CharGRU(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Step 4: Training the model
epochs = 20
start_time = time.time()
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {running_loss / len(train_loader)}")

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

# Step 5: Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Accuracy on test set: {100 * correct / total}%")

Epoch 1/20, Training Loss: 1.6845021229696835
Epoch 2/20, Training Loss: 1.4907634529107203
Epoch 3/20, Training Loss: 1.4462939730438773
Epoch 4/20, Training Loss: 1.4209519840405032
Epoch 5/20, Training Loss: 1.4030264049845391
Epoch 6/20, Training Loss: 1.3909033857760453
Epoch 7/20, Training Loss: 1.3831645669314072
Epoch 8/20, Training Loss: 1.376318691006626
Epoch 9/20, Training Loss: 1.3696787143430693
Epoch 10/20, Training Loss: 1.3671412442138122
Epoch 11/20, Training Loss: 1.363532286776391
Epoch 12/20, Training Loss: 1.362036306695287
Epoch 13/20, Training Loss: 1.3613632699315465
Epoch 14/20, Training Loss: 1.3603778393341766
Epoch 15/20, Training Loss: 1.3611623440931668
Epoch 16/20, Training Loss: 1.3613169246699983
Epoch 17/20, Training Loss: 1.362958763241426
Epoch 18/20, Training Loss: 1.3644108500030283
Epoch 19/20, Training Loss: 1.368361405566617
Epoch 20/20, Training Loss: 1.3688058824424285
Total execution time for training: 577.7683596611023 seconds
Accuracy on t

GRU for Sequences of 50

In [12]:
# Step 1: Download and prepare the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

sequence_length = 50

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

encoded_text = [char_to_int[ch] for ch in text]

sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)

batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

# Step 2: Define the GRU model
class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.gru(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return (torch.zeros(1, batch_size, self.hidden_size, device=device))
        
# Step 3: Instantiate the model, loss function, and optimizer
input_size = len(chars)
hidden_size = 256
output_size = len(chars)
learning_rate = 0.001

model = CharGRU(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Step 4: Training the model
epochs = 20
start_time = time.time()
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {running_loss / len(train_loader)}")

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

# Step 5: Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        hidden = model.init_hidden(data.size(0))
        output, hidden = model(data, hidden)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print(f"Accuracy on test set: {100 * correct / total}%")

Epoch 1/20, Training Loss: 1.6898421757430877
Epoch 2/20, Training Loss: 1.488524606555867
Epoch 3/20, Training Loss: 1.4408102502543791
Epoch 4/20, Training Loss: 1.4152128369731927
Epoch 5/20, Training Loss: 1.3974233954661786
Epoch 6/20, Training Loss: 1.3845649699093605
Epoch 7/20, Training Loss: 1.3758783903496112
Epoch 8/20, Training Loss: 1.3680664905857414
Epoch 9/20, Training Loss: 1.3630427946029957
Epoch 10/20, Training Loss: 1.3593863217359703
Epoch 11/20, Training Loss: 1.3564262528139726
Epoch 12/20, Training Loss: 1.3537577885822831
Epoch 13/20, Training Loss: 1.3524633977765794
Epoch 14/20, Training Loss: 1.350524461324101
Epoch 15/20, Training Loss: 1.3530915290514856
Epoch 16/20, Training Loss: 1.353417439476829
Epoch 17/20, Training Loss: 1.3531882635251877
Epoch 18/20, Training Loss: 1.3544715554689235
Epoch 19/20, Training Loss: 1.3554600750146508
Epoch 20/20, Training Loss: 1.357957244594852
Total execution time for training: 783.5927467346191 seconds
Accuracy on 