In [1]:
from pytorch_optim_training_manager import train_manager
import torch
import torchvision
from datasets import load_dataset
import datasets
import torchvision.transforms as transforms
import models
import os
import re
from collections import Counter
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
os.environ["WANDB_DISABLED"] = "true"
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [4]:
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)

P1

In [39]:
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))])

In [40]:
training_set = torchvision.datasets.FashionMNIST('./data', train=True, transform=transform, download=True)
validation_set = torchvision.datasets.FashionMNIST('./data', train=False, transform=transform, download=True)

In [41]:
training_loader = torch.utils.data.DataLoader(training_set, batch_size=128, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=128, shuffle=False)

In [42]:
model = models.CNN_Simple()

In [10]:
torch.save(model.state_dict(), 'models/CNN_Simple.pt')

In [62]:
loss_fn = torch.nn.CrossEntropyLoss()

start again here if evaluating for multiple optimizers

In [61]:
model.load_state_dict(torch.load('models/CNN_Simple.pt'))

<All keys matched successfully>

In [63]:
optimizer = torch.optim.RMSprop(model.parameters())

In [64]:
manager = train_manager(model, loss_fn, optimizer,training_loader,validation_loader,device=device)

In [65]:
losses = manager.train(20, verbose=True, eval_all_epochs=True, eval_mode='accuracy')

epoch0: train_loss: 0.7445
eval_loss: 0.7654
epoch1: train_loss: 0.4870
eval_loss: 0.8421
epoch2: train_loss: 0.4218
eval_loss: 0.8280
epoch3: train_loss: 0.3985
eval_loss: 0.8498
epoch4: train_loss: 0.3834
eval_loss: 0.8499
epoch5: train_loss: 0.3681
eval_loss: 0.8615
epoch6: train_loss: 0.3534
eval_loss: 0.8421
epoch7: train_loss: 0.3487
eval_loss: 0.8549
epoch8: train_loss: 0.3395
eval_loss: 0.8593
epoch9: train_loss: 0.3377
eval_loss: 0.8275
epoch10: train_loss: 0.3334
eval_loss: 0.8585
epoch11: train_loss: 0.3271
eval_loss: 0.8700
epoch12: train_loss: 0.3228
eval_loss: 0.8732
epoch13: train_loss: 0.3229
eval_loss: 0.8624
epoch14: train_loss: 0.3187
eval_loss: 0.8546
epoch15: train_loss: 0.3144
eval_loss: 0.8699
epoch16: train_loss: 0.3065
eval_loss: 0.8671
epoch17: train_loss: 0.3106
eval_loss: 0.8713
epoch18: train_loss: 0.3165
eval_loss: 0.8644
epoch19: train_loss: 0.2994
eval_loss: 0.8545


In [66]:
print(torch.max(torch.tensor(losses[1])))

tensor(0.8732)


In [67]:
torch.save(torch.tensor(losses[1]), 'results/RMSprop_P1.pt')

P2

In [5]:
transform = transforms.Compose(
    [transforms.ToTensor()])

In [6]:
training_set = torchvision.datasets.MNIST('./data', train=True, transform=transform, download=True)
validation_set = torchvision.datasets.MNIST('./data', train=False, transform=transform, download=True)

In [7]:
training_loader = torch.utils.data.DataLoader(training_set, batch_size=64, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=64, shuffle=False)

In [8]:
model = models.VAE()

In [9]:
torch.save(model.state_dict(), 'models/VAE.pt')

In [62]:
class VAE_loss(torch.nn.Module):
    def __init__(self):
        super(VAE_loss, self).__init__()
        self.loss_fn = torch.nn.CrossEntropyLoss()
    def forward(self, preds, labels):
        x, mean, logvar = preds
        reproduction_loss = self.loss_fn(x, labels)
        # KLD = -0.5 * torch.sum(1 + logvar - mean**2 - logvar.exp())
        # (reproduction_loss + KLD) / len(labels)
        return reproduction_loss

In [63]:
loss_fn = VAE_loss()

restart here for a different optimizer

In [94]:
model.load_state_dict(torch.load('models/VAE.pt'))

<All keys matched successfully>

In [95]:
optimizer = torch.optim.Adam(model.parameters())

In [96]:
manager = train_manager(model, loss_fn, optimizer,training_loader,validation_loader,device=device)

In [97]:
losses = manager.train(20, verbose=True, eval_all_epochs=True, eval_mode='loss')

epoch0: train_loss: 5.7042
eval_loss: 5.6861
epoch1: train_loss: 5.6861
eval_loss: 5.6861
epoch2: train_loss: 5.6861
eval_loss: 5.6861
epoch3: train_loss: 5.6861
eval_loss: 5.6861
epoch4: train_loss: 5.6861
eval_loss: 5.6861
epoch5: train_loss: 5.6861
eval_loss: 5.6861
epoch6: train_loss: 5.6861
eval_loss: 5.6861
epoch7: train_loss: 5.6861
eval_loss: 5.6861
epoch8: train_loss: 5.6861
eval_loss: 5.6861
epoch9: train_loss: 5.6861
eval_loss: 5.6861
epoch10: train_loss: 5.6861
eval_loss: 5.6861
epoch11: train_loss: 5.6861
eval_loss: 5.6861
epoch12: train_loss: 5.6861
eval_loss: 5.6861
epoch13: train_loss: 5.6861
eval_loss: 5.6861
epoch14: train_loss: 5.6861
eval_loss: 5.6861
epoch15: train_loss: 5.6861
eval_loss: 5.6861
epoch16: train_loss: 5.6861
eval_loss: 5.6861
epoch17: train_loss: 5.6861
eval_loss: 5.6861
epoch18: train_loss: 5.6861
eval_loss: 5.6861
epoch19: train_loss: 5.6861
eval_loss: 5.6861


In [98]:
print(torch.min(torch.tensor(losses[1])))

tensor(5.6861)


In [99]:
torch.save(torch.tensor(losses[1]), 'results/AdaGrad_P2.pt')

P3

In [100]:
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))])

In [101]:
training_set = torchvision.datasets.FashionMNIST('./data', train=True, transform=transform, download=True)
validation_set = torchvision.datasets.FashionMNIST('./data', train=False, transform=transform, download=True)
training_loader = torch.utils.data.DataLoader(training_set, batch_size=128, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=128, shuffle=False)

In [102]:
model = models.VAE()

In [103]:
loss_fn = VAE_loss()

In [104]:
torch.save(model.state_dict(), 'models/VAE.pt')

restart here if evaluating for multiple optimizers

In [131]:
model.load_state_dict(torch.load('models/VAE.pt'))

<All keys matched successfully>

In [132]:
optimizer = torch.optim.RMSprop(model.parameters())

In [133]:
manager = train_manager(model, loss_fn, optimizer, training_loader, validation_loader, device=device)

In [134]:
losses = manager.train(20, verbose=True, eval_all_epochs=True, eval_mode='loss')

epoch0: train_loss: 5.6933
eval_loss: 5.6861
epoch1: train_loss: 5.6861
eval_loss: 5.6861
epoch2: train_loss: 5.6861
eval_loss: 5.6861
epoch3: train_loss: 5.6861
eval_loss: 5.6861
epoch4: train_loss: 5.8317
eval_loss: 5.9984
epoch5: train_loss: 6.0267
eval_loss: 6.0671
epoch6: train_loss: 6.1017
eval_loss: 6.0813
epoch7: train_loss: 6.0685
eval_loss: 6.1842
epoch8: train_loss: 6.1787
eval_loss: 6.1613
epoch9: train_loss: 6.2528
eval_loss: 6.2936
epoch10: train_loss: 6.4205
eval_loss: 6.5108
epoch11: train_loss: 6.5214
eval_loss: 6.5335
epoch12: train_loss: 6.5845
eval_loss: 6.6530
epoch13: train_loss: 6.6453
eval_loss: 6.5651
epoch14: train_loss: 6.5402
eval_loss: 6.5377
epoch15: train_loss: 6.5074
eval_loss: 6.4810
epoch16: train_loss: 6.4605
eval_loss: 6.4335
epoch17: train_loss: 6.4326
eval_loss: 6.4359
epoch18: train_loss: 6.4298
eval_loss: 6.4354
epoch19: train_loss: 6.4394
eval_loss: 6.4798


In [135]:
print(torch.min(torch.tensor(losses[1])))

tensor(5.6861)


In [136]:
torch.save(torch.tensor(losses[1]), 'results/RMSprop_P3.pt')

P4

In [5]:
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5074,0.4867,0.4411),(0.2011,0.1987,0.2025))]
)

In [6]:
training_set = torchvision.datasets.CIFAR100('./data', train=True, transform=transform, download=True)
validation_set = torchvision.datasets.CIFAR100('./data', train=False, transform=transform, download=True)
training_loader = torch.utils.data.DataLoader(training_set, batch_size=256, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=256, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [7]:
model = models.All_CNN_C()

In [8]:
loss_fn = torch.nn.CrossEntropyLoss()

In [9]:
torch.save(model.state_dict(), 'models/All_CNN_C.pt')

restart here if evaluating multiple optimizers

In [10]:
model.load_state_dict(torch.load('models/All_CNN_C.pt'))

<All keys matched successfully>

In [11]:
optimizer = torch.optim.SGD(model.parameters())

In [12]:
manager = train_manager(model, loss_fn, optimizer, training_loader, validation_loader, device=device)

In [13]:
losses = manager.train(20, verbose=True, eval_all_epochs=True, eval_mode='accuracy')

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


epoch0: train_loss: 4.6056
eval_loss: 0.0100
epoch1: train_loss: 4.6056
eval_loss: 0.0100
epoch2: train_loss: 4.6056
eval_loss: 0.0100
epoch3: train_loss: 4.6056
eval_loss: 0.0100
epoch4: train_loss: 4.6056
eval_loss: 0.0100
epoch5: train_loss: 4.6056
eval_loss: 0.0100
epoch6: train_loss: 4.6056
eval_loss: 0.0100
epoch7: train_loss: 4.6056
eval_loss: 0.0100


KeyboardInterrupt: 

P5

In [5]:
dataset = load_dataset("wikitext", 'wikitext-2-v1', split='train')

Data cleaning

In [6]:
def isEnglish(sample):
    try:
        sample.encode(encoding='utf-8').decode('ascii')
    except UnicodeDecodeError:
        return False
    else:
        return True
def lowerCase(sample):
    return {"text": sample["text"].lower()}    

In [7]:
dataset = dataset.filter(lambda x: 100 <= len(x['text'].split()) <= 128)
dataset = dataset.filter(lambda x: not re.match(" = .* = \n", x['text']))
dataset = dataset.filter(lambda x: isEnglish(x['text']))
dataset = dataset.map(lambda x: lowerCase(x))

In [8]:
def count_tokens(dataset):
    """Counts the frequency of each token in the dataset.
    You should return a dict with token as keys, frequency as values.
    Hint: you can use Counter() class to help."""

    token_freq_dict = Counter(" ".join((x['text'] for x in dataset)).split())
    return token_freq_dict

def replaceRare(sample, rare_tokens, unk_token):
    text = sample["text"]
    modified_tokens = [(token if token not in rare_tokens else unk_token)
                       for token in text.split()]
    return {"text": " ".join(modified_tokens)}

def isUnkSeq(sample, unk_token, unk_thred=0.1):
    sample_tokens = sample["text"].split()
    if sample_tokens.count(unk_token)/len(sample_tokens) > unk_thred:
        return True
    else:
        return False


def build_vocabulary(dataset, min_freq=5, unk_token='<unk>'):
    """Builds a vocabulary dict for the given dataset."""
    # 1- Get unique tokens and their frequencies.
    token_freq_dict = count_tokens(dataset)

    # 2- Find a set of rare tokens with frequency lower than `min_freq`.
    #    Replace them with `unk_token`.
    rare_tokens_set = set()
    low_freq = [x[0] for x in token_freq_dict.items() if x[1] <= min_freq]
    rare_tokens_set.update(low_freq)
    dataset = dataset.map(replaceRare, fn_kwargs={"rare_tokens": rare_tokens_set,
                                                  "unk_token": unk_token})

    # 3- Filter out sequences with more than 15% rare tokens.
    dataset = dataset.filter(lambda x: not isUnkSeq(x, unk_token, unk_thred=0.15))

    # 4- Recompute the token frequency to get final vocabulary dict.
    token_freq_dict = count_tokens(dataset)
    return dataset, token_freq_dict


In [9]:
wikitext_dataset, token_freq_dict = build_vocabulary(dataset, min_freq=5, unk_token='<unk>')

In [10]:
class RNNDataset(Dataset):
    def __init__(self,
                 dataset: datasets.arrow_dataset.Dataset,
                 max_seq_length: int, ):
        self.train_data = self.prepare_rnn_lm_dataset(dataset)
        self.max_seq_length = max_seq_length + 2  # as <start> and <stop> will be added
        self.dataset_vocab = self.get_dataset_vocabulary(dataset)
        self.token2idx = {element: index for index, element in enumerate(self.dataset_vocab)}
        self.idx2token = dict(enumerate(self.dataset_vocab))
        self.pad_idx = self.token2idx["<pad>"]

    def __len__(self):
        return len(self.train_data)

    def __getitem__(self, idx):
        # Get a list of tokens of the given sequence. Represent each token with its index in `self.token2idx`.
        token_list = self.train_data[idx].split()
        # having a fallback to <unk> token if an unseen word is encoded.
        token_ids = [self.token2idx.get(t, self.token2idx['<unk>']) for t in token_list]

        # Add padding token to the sequence to reach the max_seq_length. 
        token_ids += [self.token2idx['<pad>']] * (self.max_seq_length - len(token_ids))

        return torch.tensor(token_ids)

    def get_dataset_vocabulary(self, dataset: datasets.arrow_dataset.Dataset):
        vocab = set()
        print("Getting the vocabulary for the train dataset")
        for sample in tqdm(dataset):
            vocab.update(set(sample["text"].split()))
        vocab.update(set(["<start>", "<stop>", "<pad>"]))
        vocab = sorted(vocab)
        return vocab

    @staticmethod
    def prepare_rnn_lm_dataset(target_dataset: datasets.arrow_dataset.Dataset):
        """
        A "<start>" token has to be added before every sentence and a <stop> afterwards.
        
        :param args: target_dataset: the target dataset to extract samples
        return: a list of strings each containing 'window_size' tokens.
        """
        prepared_dataset = []
        for sample in target_dataset:
            prepared_dataset.append(f"<start> {sample['text']} <stop>")
        return prepared_dataset


In [11]:
MAX_SEQ_LENGTH = 128
rnn_dataset = RNNDataset(dataset=wikitext_dataset,
                         max_seq_length=MAX_SEQ_LENGTH)

Getting the vocabulary for the train dataset


100%|██████████| 1158/1158 [00:00<00:00, 7778.08it/s]


In [12]:
def get_dataloader(rnn_dataset, test_ratio=0.1):
    # split train/test dataset.
    rnn_train_dataset, rnn_test_dataset = torch.utils.data.random_split(rnn_dataset, [1-test_ratio, test_ratio])
    # get pytorch DataLoader
    train_dataloader = DataLoader(rnn_train_dataset, batch_size=8, shuffle=True)
    test_dataloader = DataLoader(rnn_test_dataset, batch_size=8, shuffle=False)
    return train_dataloader, test_dataloader

In [13]:
train_loader, val_loader = get_dataloader(rnn_dataset, test_ratio=0.1)

In [14]:
vocab_size = len(rnn_dataset.token2idx)
embedding_dim = 100
hidden_dim = 100
dropout_rate = 0.15

In [15]:
model = models.LSTM(vocab_size=vocab_size,input_dim=embedding_dim,hidden_dim=hidden_dim,dropout_rate=dropout_rate)

In [16]:
loss_fn = torch.nn.CrossEntropyLoss(ignore_index=rnn_dataset.pad_idx)

In [17]:
torch.save(model.state_dict(), 'models/LSTM.pt')

Restart here for evaluating multiple optimizers

In [42]:
model.load_state_dict(torch.load('models/LSTM.pt'))

<All keys matched successfully>

In [43]:
optimizer = torch.optim.RMSprop(model.parameters())

In [44]:
manager = train_manager(model, loss_fn, optimizer, train_loader, val_loader, device=device)

In [45]:
losses = manager.train(20, verbose=True, eval_all_epochs=True, eval_mode='accuracy', is_LSTM=True)

epoch0: train_loss: 5.8608
eval_loss: 0.1387
epoch1: train_loss: 5.2657
eval_loss: 0.1600
epoch2: train_loss: 5.0421
eval_loss: 0.1631
epoch3: train_loss: 4.8698
eval_loss: 0.1678
epoch4: train_loss: 4.7286
eval_loss: 0.1720
epoch5: train_loss: 4.6126
eval_loss: 0.1722
epoch6: train_loss: 4.5109
eval_loss: 0.1736
epoch7: train_loss: 4.4287
eval_loss: 0.1685
epoch8: train_loss: 4.3619
eval_loss: 0.1712
epoch9: train_loss: 4.3080
eval_loss: 0.1701
epoch10: train_loss: 4.2633
eval_loss: 0.1656
epoch11: train_loss: 4.2266
eval_loss: 0.1693
epoch12: train_loss: 4.1948
eval_loss: 0.1688
epoch13: train_loss: 4.1677
eval_loss: 0.1695
epoch14: train_loss: 4.1414
eval_loss: 0.1700
epoch15: train_loss: 4.1230
eval_loss: 0.1712
epoch16: train_loss: 4.1018
eval_loss: 0.1694
epoch17: train_loss: 4.0821
eval_loss: 0.1699
epoch18: train_loss: 4.0726
eval_loss: 0.1674
epoch19: train_loss: 4.0547
eval_loss: 0.1652


In [46]:
print(torch.max(torch.tensor(losses[1])))

tensor(0.1736)


In [47]:
torch.save(torch.tensor(losses[1]), 'results/RMSprop_P5.pt')