<a href="https://colab.research.google.com/github/karrin10/Introduction-to-Deep-Learning/blob/Homework-5/hw5_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import requests
import time
import math

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Problem 2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# sequence of 20:

# download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

# prepare the dataset
sequence_length = 20
text = text[:sequence_length * (len(text)//sequence_length)]

# create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]


sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# create dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)

# create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

class CharModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, model_type='Transformer', num_layers=2, num_heads=2, dim_feedforward=256, dropout=0.1):
        super(CharModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        if model_type == 'Transformer':
            encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=num_heads, dim_feedforward=dim_feedforward, dropout=dropout)
            self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        else:
            raise ValueError("Invalid model type. Choose 'Transformer'.")
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        transformer_output = self.transformer_encoder(embedded)
        output = self.fc(transformer_output[:, -1, :])
        return output

# train and evaluate function
def train_evaluate(model_type, train_loader, val_loader, device):
    model = CharModel(len(chars), hidden_size, len(chars), model_type).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    start_time = time.time()
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            output = model(inputs)
            loss = criterion(output, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)

        epoch_train_loss = train_loss / len(train_loader.dataset)

        # validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                val_output = model(inputs)
                loss = criterion(val_output, targets)
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(val_output, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

        epoch_val_loss = val_loss / len(val_loader.dataset)
        epoch_val_accuracy = correct / total

        if (epoch+1) % 1 == 0:
            print(f'Epoch {epoch+1}, Train Loss: {epoch_train_loss}, Validation Loss: {epoch_val_loss}, Validation Accuracy: {epoch_val_accuracy}')

    end_time = time.time()
    execution_time = end_time - start_time

    return epoch_train_loss, epoch_val_loss, epoch_val_accuracy, execution_time

# def. parameters
hidden_size = 512
num_layers = 2
num_heads = 2
dim_feedforward = 256
dropout = 0.1
learning_rate = 0.0001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
epochs = 20

# train and evaluate models for sequence length 20
print("\nTraining models for sequence length: 20")
results = {}
for model_type in ['Transformer']:
    print(f"\nTraining {model_type} model...")
    loss, val_loss, val_accuracy, execution_time = train_evaluate(model_type, train_loader, test_loader, device)
    results[model_type] = {
        'loss': loss,
        'val_loss': val_loss,
        'val_accuracy': val_accuracy,
        'execution_time': execution_time
    }

# print results
print("\nResults for sequence length: 20")
for model_type, data in results.items():
    print(f"\n{model_type} Model:")
    print(f"Training Loss: {data['loss']}")
    print(f"Validation Loss: {data['val_loss']}")
    print(f"Validation Accuracy: {data['val_accuracy']}")
    print(f"Execution Time: {data['execution_time']} seconds")


Training models for sequence length: 20

Training Transformer model...




In [None]:
# sequence of 30:

# repeat steps from sequence of 20:
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

sequence_length = 30
text = text[:sequence_length * (len(text)//sequence_length)]

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

encoded_text = [char_to_int[ch] for ch in text]

sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)

batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

class CharModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, model_type='Transformer', num_layers=2, num_heads=2, dim_feedforward=256, dropout=0.1):
        super(CharModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        if model_type == 'Transformer':
            encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=num_heads, dim_feedforward=dim_feedforward, dropout=dropout)
            self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        else:
            raise ValueError("Invalid model type. Choose 'Transformer'.")
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        transformer_output = self.transformer_encoder(embedded)
        output = self.fc(transformer_output[:, -1, :])
        return output

def train_evaluate(model_type, train_loader, val_loader, device):
    model = CharModel(len(chars), hidden_size, len(chars), model_type).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    start_time = time.time()
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            output = model(inputs)
            loss = criterion(output, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)

        epoch_train_loss = train_loss / len(train_loader.dataset)

        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                val_output = model(inputs)
                loss = criterion(val_output, targets)
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(val_output, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

        epoch_val_loss = val_loss / len(val_loader.dataset)
        epoch_val_accuracy = correct / total

        if (epoch+1) % 1 == 0:
            print(f'Epoch {epoch+1}, Train Loss: {epoch_train_loss}, Validation Loss: {epoch_val_loss}, Validation Accuracy: {epoch_val_accuracy}')

    end_time = time.time()
    execution_time = end_time - start_time

    return epoch_train_loss, epoch_val_loss, epoch_val_accuracy, execution_time

hidden_size = 512
num_layers = 2
num_heads = 2
dim_feedforward = 256
dropout = 0.1
learning_rate = 0.0001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
epochs = 20

print("\nTraining models for sequence length: 20")
results = {}
for model_type in ['Transformer']:
    print(f"\nTraining {model_type} model...")
    loss, val_loss, val_accuracy, execution_time = train_evaluate(model_type, train_loader, test_loader, device)
    results[model_type] = {
        'loss': loss,
        'val_loss': val_loss,
        'val_accuracy': val_accuracy,
        'execution_time': execution_time
    }

print("\nResults for sequence length: 20")
for model_type, data in results.items():
    print(f"\n{model_type} Model:")
    print(f"Training Loss: {data['loss']}")
    print(f"Validation Loss: {data['val_loss']}")
    print(f"Validation Accuracy: {data['val_accuracy']}")
    print(f"Execution Time: {data['execution_time']} seconds")

In [None]:
# sequence of 50:

# repeat steps:
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

sequence_length = 50
text = text[:sequence_length * (len(text)//sequence_length)]

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

encoded_text = [char_to_int[ch] for ch in text]

sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

dataset = CharDataset(sequences, targets)

batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

class CharModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, model_type='Transformer', num_layers=2, num_heads=2, dim_feedforward=256, dropout=0.1):
        super(CharModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        if model_type == 'Transformer':
            encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=num_heads, dim_feedforward=dim_feedforward, dropout=dropout)
            self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        else:
            raise ValueError("Invalid model type. Choose 'Transformer'.")
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        transformer_output = self.transformer_encoder(embedded)
        output = self.fc(transformer_output[:, -1, :])
        return output

def train_evaluate(model_type, train_loader, val_loader, device):
    model = CharModel(len(chars), hidden_size, len(chars), model_type).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    start_time = time.time()
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            output = model(inputs)
            loss = criterion(output, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)

        epoch_train_loss = train_loss / len(train_loader.dataset)

        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                val_output = model(inputs)
                loss = criterion(val_output, targets)
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(val_output, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

        epoch_val_loss = val_loss / len(val_loader.dataset)
        epoch_val_accuracy = correct / total

        if (epoch+1) % 1 == 0:
            print(f'Epoch {epoch+1}, Train Loss: {epoch_train_loss}, Validation Loss: {epoch_val_loss}, Validation Accuracy: {epoch_val_accuracy}')

    end_time = time.time()
    execution_time = end_time - start_time

    return epoch_train_loss, epoch_val_loss, epoch_val_accuracy, execution_time

hidden_size = 512
num_layers = 2
num_heads = 2
dim_feedforward = 256
dropout = 0.1
learning_rate = 0.0001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
epochs = 20

print("\nTraining models for sequence length: 20")
results = {}
for model_type in ['Transformer']:
    print(f"\nTraining {model_type} model...")
    loss, val_loss, val_accuracy, execution_time = train_evaluate(model_type, train_loader, test_loader, device)
    results[model_type] = {
        'loss': loss,
        'val_loss': val_loss,
        'val_accuracy': val_accuracy,
        'execution_time': execution_time
    }

print("\nResults for sequence length: 20")
for model_type, data in results.items():
    print(f"\n{model_type} Model:")
    print(f"Training Loss: {data['loss']}")
    print(f"Validation Loss: {data['val_loss']}")
    print(f"Validation Accuracy: {data['val_accuracy']}")
    print(f"Execution Time: {data['execution_time']} seconds")