In [1]:
from pytorch_optim_training_manager import train_manager
import torch
import torchvision
from datasets import load_dataset
import datasets
import torchvision.transforms as transforms
import models
import os
import re
from collections import Counter
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from ucimlrepo import fetch_ucirepo 
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
os.environ["WANDB_DISABLED"] = "true"
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [4]:
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)

P0

In [5]:
# fetch dataset 
wine_quality = fetch_ucirepo(id=186) 
  
# data (as pandas dataframes) 
x = wine_quality.data.features 
y = wine_quality.data.targets 
  
# metadata 
# print(wine_quality.metadata) 
  
# variable information 
# print(wine_quality.variables) 

In [6]:
data_train, data_validation = train_test_split(x.join(y), random_state=104, test_size=0.25, shuffle=True)

In [29]:
scaler = MinMaxScaler()
columns_to_normalize = data_train.columns.tolist()

data_train[columns_to_normalize] = scaler.fit_transform(data_train[columns_to_normalize])
data_validation[columns_to_normalize] = scaler.transform(data_validation[columns_to_normalize])

In [31]:
class create_dataset(Dataset):
    def __init__(self, data,):
        self.data = data
        
    def __getitem__(self, index):
        x = torch.from_numpy(self.data.iloc[index][:-1].to_numpy())
        y = torch.from_numpy(self.data.iloc[index][-1:].to_numpy())
        return x, y
    
    def __len__(self):
        return len(self.data)

dataset_train = create_dataset(data_train)
dataset_validation = create_dataset(data_validation)
training_loader = DataLoader(dataset_train,batch_size=128,shuffle=True)
validation_loader = DataLoader(dataset_validation,batch_size=1,shuffle=False)

In [32]:
model = models.Simple_Net()

In [33]:
torch.save(model.state_dict(), 'models/Simple_Net.pt')

In [34]:
loss_fn = torch.nn.MSELoss()

restart here when evaluating multiple optimizers 

In [35]:
model.load_state_dict(torch.load('models/Simple_Net.pt'))

<All keys matched successfully>

In [36]:
optimizer = torch.optim.Adam(model.parameters())

In [37]:
manager = train_manager(model, loss_fn, optimizer,training_loader,validation_loader,device=device)

In [38]:
losses = manager.train(20, verbose=True, eval_all_epochs=True, eval_mode='accuracy', is_wine=True)

epoch0: train_loss: 0.1685
eval_loss: 0.9428
epoch1: train_loss: 0.0565
eval_loss: 0.9945
epoch2: train_loss: 0.0261
eval_loss: 0.9975
epoch3: train_loss: 0.0239
eval_loss: 0.9975
epoch4: train_loss: 0.0222
eval_loss: 0.9988
epoch5: train_loss: 0.0189
eval_loss: 0.9994
epoch6: train_loss: 0.0174
eval_loss: 0.9994
epoch7: train_loss: 0.0165
eval_loss: 0.9994
epoch8: train_loss: 0.0166
eval_loss: 0.9994
epoch9: train_loss: 0.0164
eval_loss: 0.9994
epoch10: train_loss: 0.0155
eval_loss: 0.9994
epoch11: train_loss: 0.0154
eval_loss: 0.9994
epoch12: train_loss: 0.0155
eval_loss: 0.9994
epoch13: train_loss: 0.0155
eval_loss: 0.9994
epoch14: train_loss: 0.0154
eval_loss: 0.9994
epoch15: train_loss: 0.0155
eval_loss: 0.9994
epoch16: train_loss: 0.0154
eval_loss: 0.9994
epoch17: train_loss: 0.0152
eval_loss: 0.9988
epoch18: train_loss: 0.0156
eval_loss: 0.9988
epoch19: train_loss: 0.0157
eval_loss: 0.9994


In [39]:
print(torch.max(torch.tensor(losses[1])))

tensor(0.9994)


In [40]:
manager.test_ol

[(tensor([[0.2062]], device='cuda:0', dtype=torch.float64),
  tensor([[0.6667]], device='cuda:0', dtype=torch.float64)),
 (tensor([[0.2194]], device='cuda:0', dtype=torch.float64),
  tensor([[0.3333]], device='cuda:0', dtype=torch.float64)),
 (tensor([[0.2127]], device='cuda:0', dtype=torch.float64),
  tensor([[0.5000]], device='cuda:0', dtype=torch.float64)),
 (tensor([[0.2142]], device='cuda:0', dtype=torch.float64),
  tensor([[0.5000]], device='cuda:0', dtype=torch.float64)),
 (tensor([[0.1533]], device='cuda:0', dtype=torch.float64),
  tensor([[0.6667]], device='cuda:0', dtype=torch.float64)),
 (tensor([[0.2259]], device='cuda:0', dtype=torch.float64),
  tensor([[0.6667]], device='cuda:0', dtype=torch.float64)),
 (tensor([[0.2004]], device='cuda:0', dtype=torch.float64),
  tensor([[0.3333]], device='cuda:0', dtype=torch.float64)),
 (tensor([[0.2045]], device='cuda:0', dtype=torch.float64),
  tensor([[0.8333]], device='cuda:0', dtype=torch.float64)),
 (tensor([[0.2160]], device='cud

In [28]:
torch.save(torch.tensor(losses[1]), 'results/Adagrad_P0.pt')

P1

In [40]:
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))])

In [41]:
training_set = torchvision.datasets.FashionMNIST('./data', train=True, transform=transform, download=True)
validation_set = torchvision.datasets.FashionMNIST('./data', train=False, transform=transform, download=True)

In [42]:
training_loader = torch.utils.data.DataLoader(training_set, batch_size=128, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=128, shuffle=False)

In [43]:
model = models.CNN_Simple()

In [44]:
torch.save(model.state_dict(), 'models/CNN_Simple.pt')

In [45]:
loss_fn = torch.nn.CrossEntropyLoss()

start again here if evaluating for multiple optimizers

In [46]:
model.load_state_dict(torch.load('models/CNN_Simple.pt'))

<All keys matched successfully>

In [47]:
optimizer = torch.optim.RMSprop(model.parameters())

In [48]:
manager = train_manager(model, loss_fn, optimizer,training_loader,validation_loader,device=device)

In [49]:
losses = manager.train(20, verbose=True, eval_all_epochs=True, eval_mode='accuracy')

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


epoch0: train_loss: 1.1561
eval_loss: 0.7774
epoch1: train_loss: 0.5578
eval_loss: 0.7893
epoch2: train_loss: 0.4751
eval_loss: 0.8360
epoch3: train_loss: 0.4355
eval_loss: 0.8465
epoch4: train_loss: 0.4009
eval_loss: 0.8421
epoch5: train_loss: 0.3849
eval_loss: 0.7981
epoch6: train_loss: 0.3771
eval_loss: 0.8542
epoch7: train_loss: 0.3633
eval_loss: 0.8564
epoch8: train_loss: 0.3545
eval_loss: 0.8640
epoch9: train_loss: 0.3481
eval_loss: 0.8664
epoch10: train_loss: 0.3402
eval_loss: 0.8668
epoch11: train_loss: 0.3391
eval_loss: 0.8480
epoch12: train_loss: 0.3344
eval_loss: 0.8649
epoch13: train_loss: 0.3317
eval_loss: 0.8620
epoch14: train_loss: 0.3271
eval_loss: 0.8716
epoch15: train_loss: 0.3265
eval_loss: 0.8585
epoch16: train_loss: 0.3303
eval_loss: 0.8709
epoch17: train_loss: 0.3169
eval_loss: 0.8615
epoch18: train_loss: 0.3186
eval_loss: 0.8578
epoch19: train_loss: 0.3149
eval_loss: 0.8617


In [66]:
print(torch.max(torch.tensor(losses[1])))

tensor(0.8732)


In [67]:
torch.save(torch.tensor(losses[1]), 'results/RMSprop_P1.pt')

P2

In [8]:
transform = transforms.Compose(
    [transforms.ToTensor()])

In [9]:
training_set = torchvision.datasets.MNIST('./data', train=True, transform=transform, download=True)
validation_set = torchvision.datasets.MNIST('./data', train=False, transform=transform, download=True)

In [10]:
training_loader = torch.utils.data.DataLoader(training_set, batch_size=64, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=64, shuffle=False)

In [11]:
model = models.VAE()

In [12]:
torch.save(model.state_dict(), 'models/VAE.pt')

In [13]:
class VAE_loss(torch.nn.Module):
    def __init__(self):
        super(VAE_loss, self).__init__()
        self.loss_fn = torch.nn.CrossEntropyLoss()
    def forward(self, preds, labels):
        x, mean, logvar = preds
        reproduction_loss = self.loss_fn(x, labels)
        # KLD = -0.5 * torch.sum(1 + logvar - mean**2 - logvar.exp())
        # (reproduction_loss + KLD) / len(labels)
        return reproduction_loss

In [14]:
loss_fn = VAE_loss()

restart here for a different optimizer

In [22]:
model.load_state_dict(torch.load('models/VAE.pt'))

<All keys matched successfully>

In [23]:
optimizer = torch.optim.Adadelta(model.parameters())

In [24]:
manager = train_manager(model, loss_fn, optimizer,training_loader,validation_loader,device=device)

In [25]:
losses = manager.train(20, verbose=True, eval_all_epochs=True, eval_mode='loss')

epoch0: train_loss: 5.7037
eval_loss: 5.6861
epoch1: train_loss: 5.6861
eval_loss: 5.6861
epoch2: train_loss: 5.6861
eval_loss: 5.6861
epoch3: train_loss: 5.6861
eval_loss: 5.6861
epoch4: train_loss: 5.6861
eval_loss: 5.6861
epoch5: train_loss: 5.6861
eval_loss: 5.6861
epoch6: train_loss: 5.6861
eval_loss: 5.6861
epoch7: train_loss: 5.6861
eval_loss: 5.6861
epoch8: train_loss: 5.6861
eval_loss: 5.6861
epoch9: train_loss: 5.6861
eval_loss: 5.6861
epoch10: train_loss: 5.6861
eval_loss: 5.6861
epoch11: train_loss: 5.6861
eval_loss: 5.6861
epoch12: train_loss: 5.6861
eval_loss: 5.6861
epoch13: train_loss: 5.6861
eval_loss: 5.6861
epoch14: train_loss: 5.6861
eval_loss: 5.6861
epoch15: train_loss: 5.6861
eval_loss: 5.6861
epoch16: train_loss: 5.6861
eval_loss: 5.6861
epoch17: train_loss: 5.6861
eval_loss: 5.6861
epoch18: train_loss: 5.6861
eval_loss: 5.6861
epoch19: train_loss: 5.6861
eval_loss: 5.6861


In [26]:
print(torch.tensor(losses[1]))

tensor([5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861,
        5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861,
        5.6861, 5.6861])


In [27]:
print(torch.load('results/AdaDelta_P2.pt'))

tensor([5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861,
        5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861,
        5.6861, 5.6861])


In [98]:
print(torch.min(torch.tensor(losses[1])))

tensor(5.6861)


In [99]:
torch.save(torch.tensor(losses[1]), 'results/AdaGrad_P2.pt')

P3

In [28]:
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))])

In [29]:
training_set = torchvision.datasets.FashionMNIST('./data', train=True, transform=transform, download=True)
validation_set = torchvision.datasets.FashionMNIST('./data', train=False, transform=transform, download=True)
training_loader = torch.utils.data.DataLoader(training_set, batch_size=128, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=128, shuffle=False)

In [30]:
model = models.VAE()

In [31]:
loss_fn = VAE_loss()

In [32]:
torch.save(model.state_dict(), 'models/VAE.pt')

restart here if evaluating for multiple optimizers

In [33]:
model.load_state_dict(torch.load('models/VAE.pt'))

<All keys matched successfully>

In [34]:
optimizer = torch.optim.RMSprop(model.parameters())

In [35]:
manager = train_manager(model, loss_fn, optimizer, training_loader, validation_loader, device=device)

In [36]:
losses = manager.train(20, verbose=True, eval_all_epochs=True, eval_mode='loss')

epoch0: train_loss: 5.6923
eval_loss: 5.6861
epoch1: train_loss: 5.6861
eval_loss: 5.6861
epoch2: train_loss: 5.6861
eval_loss: 5.6861
epoch3: train_loss: 5.6861
eval_loss: 5.6861
epoch4: train_loss: 5.6861
eval_loss: 5.6861
epoch5: train_loss: 5.6861
eval_loss: 5.6861
epoch6: train_loss: 5.6861
eval_loss: 5.6861
epoch7: train_loss: 5.6861
eval_loss: 5.6861
epoch8: train_loss: 5.6861
eval_loss: 5.6861
epoch9: train_loss: 5.6861
eval_loss: 5.6861
epoch10: train_loss: 5.6861
eval_loss: 5.6861
epoch11: train_loss: 5.6861
eval_loss: 5.6861
epoch12: train_loss: 5.6861
eval_loss: 5.6861
epoch13: train_loss: 5.6861
eval_loss: 5.6861
epoch14: train_loss: 5.6861
eval_loss: 5.6861
epoch15: train_loss: 5.6861
eval_loss: 5.6861
epoch16: train_loss: 5.6861
eval_loss: 5.6861
epoch17: train_loss: 5.6861
eval_loss: 5.6861
epoch18: train_loss: 5.6861
eval_loss: 5.6861
epoch19: train_loss: 5.6861
eval_loss: 5.6861


In [37]:
print(torch.tensor(losses[1]))
print(torch.load('results/RMSprop_P3.pt'))

tensor([5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861,
        5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861, 5.6861,
        5.6861, 5.6861])
tensor([5.6861, 5.6861, 5.6861, 5.6861, 5.9984, 6.0671, 6.0813, 6.1842, 6.1613,
        6.2936, 6.5108, 6.5335, 6.6530, 6.5651, 6.5377, 6.4810, 6.4335, 6.4359,
        6.4354, 6.4798])


In [135]:
print(torch.min(torch.tensor(losses[1])))

tensor(5.6861)


In [38]:
torch.save(torch.tensor(losses[1]), 'results/RMSprop_P3.pt')

P4

In [5]:
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5074,0.4867,0.4411),(0.2011,0.1987,0.2025))]
)

In [6]:
training_set = torchvision.datasets.CIFAR100('./data', train=True, transform=transform, download=True)
validation_set = torchvision.datasets.CIFAR100('./data', train=False, transform=transform, download=True)
training_loader = torch.utils.data.DataLoader(training_set, batch_size=256, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=256, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [7]:
model = models.All_CNN_C()

In [8]:
loss_fn = torch.nn.CrossEntropyLoss()

In [9]:
torch.save(model.state_dict(), 'models/All_CNN_C.pt')

restart here if evaluating multiple optimizers

In [10]:
model.load_state_dict(torch.load('models/All_CNN_C.pt'))

<All keys matched successfully>

In [11]:
optimizer = torch.optim.SGD(model.parameters())

In [12]:
manager = train_manager(model, loss_fn, optimizer, training_loader, validation_loader, device=device)

In [13]:
losses = manager.train(20, verbose=True, eval_all_epochs=True, eval_mode='accuracy')

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


epoch0: train_loss: 4.6056
eval_loss: 0.0100
epoch1: train_loss: 4.6056
eval_loss: 0.0100
epoch2: train_loss: 4.6056
eval_loss: 0.0100
epoch3: train_loss: 4.6056
eval_loss: 0.0100
epoch4: train_loss: 4.6056
eval_loss: 0.0100
epoch5: train_loss: 4.6056
eval_loss: 0.0100
epoch6: train_loss: 4.6056
eval_loss: 0.0100
epoch7: train_loss: 4.6056
eval_loss: 0.0100


KeyboardInterrupt: 

P5

In [52]:
dataset = load_dataset("wikitext", 'wikitext-2-v1', split='train')

Data cleaning

In [53]:
def isEnglish(sample):
    try:
        sample.encode(encoding='utf-8').decode('ascii')
    except UnicodeDecodeError:
        return False
    else:
        return True
def lowerCase(sample):
    return {"text": sample["text"].lower()}    

In [54]:
dataset = dataset.filter(lambda x: 100 <= len(x['text'].split()) <= 128)
dataset = dataset.filter(lambda x: not re.match(" = .* = \n", x['text']))
dataset = dataset.filter(lambda x: isEnglish(x['text']))
dataset = dataset.map(lambda x: lowerCase(x))

In [55]:
def count_tokens(dataset):
    """Counts the frequency of each token in the dataset.
    return a dict with token as keys, frequency as values."""

    token_freq_dict = Counter(" ".join((x['text'] for x in dataset)).split())
    return token_freq_dict

def replace_rare_tokens(sample, rare_tokens, unk_token):
    text = sample["text"]
    modified_tokens = [(token if token not in rare_tokens else unk_token)
                       for token in text.split()]
    return {"text": " ".join(modified_tokens)}

def is_unknown_sequence(sample, unk_token, unk_threshold=0.1):
    sample_tokens = sample["text"].split()
    if sample_tokens.count(unk_token)/len(sample_tokens) > unk_threshold:
        return True
    else:
        return False


def build_vocabulary(dataset, min_freq=5, unk_token='<unk>'):
    """Builds a vocabulary dict for the given dataset."""
    # Get unique tokens and their frequencies.
    token_freq_dict = count_tokens(dataset)

    # Find a set of rare tokens with frequency lower than `min_freq` and replace them with `unk_token`.
    rare_tokens_set = set()
    low_freq = [x[0] for x in token_freq_dict.items() if x[1] <= min_freq]
    rare_tokens_set.update(low_freq)
    dataset = dataset.map(replace_rare_tokens, fn_kwargs={"rare_tokens": rare_tokens_set,
                                                  "unk_token": unk_token})

    # Filter out sequences with more than 15% rare tokens.
    dataset = dataset.filter(lambda x: not is_unknown_sequence(x, unk_token, unk_threshold=0.15))

    # Recompute the token frequency to get final vocabulary dict.
    token_freq_dict = count_tokens(dataset)
    return dataset, token_freq_dict


In [56]:
wikitext_dataset, token_freq_dict = build_vocabulary(dataset, min_freq=5, unk_token='<unk>')

Map:   0%|          | 0/1972 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1972 [00:00<?, ? examples/s]

In [57]:
class LSTMDataset(Dataset):
    def __init__(self,
                 dataset: datasets.arrow_dataset.Dataset,
                 max_seq_length: int, ):
        self.train_data = self.prepare_dataset(dataset)
        self.max_seq_length = max_seq_length + 2  # as <start> and <stop> will be added
        self.dataset_vocab = self.get_vocabulary(dataset)
        self.token2idx = {element: index for index, element in enumerate(self.dataset_vocab)}
        self.idx2token = dict(enumerate(self.dataset_vocab))
        self.pad_idx = self.token2idx["<pad>"]

    def __len__(self):
        return len(self.train_data)

    def __getitem__(self, idx):
        # Get a list of tokens of the given sequence. Represent each token with its index in `self.token2idx`.
        token_list = self.train_data[idx].split()
        # having a fallback to <unk> token if an unseen word is encoded.
        token_ids = [self.token2idx.get(t, self.token2idx['<unk>']) for t in token_list]

        # Add padding token to the sequence to reach the max_seq_length. 
        token_ids += [self.token2idx['<pad>']] * (self.max_seq_length - len(token_ids))

        return torch.tensor(token_ids)

    def get_vocabulary(self, dataset: datasets.arrow_dataset.Dataset):
        vocab = set()
        print("Getting dataset's vocabulary")
        for sample in tqdm(dataset):
            vocab.update(set(sample["text"].split()))
        vocab.update(set(["<start>", "<stop>", "<pad>"]))
        vocab = sorted(vocab)
        return vocab

    @staticmethod
    def prepare_dataset(target_dataset: datasets.arrow_dataset.Dataset):
        """
        Encapsulate sequences between <start> and <stop>.
        
        :param: target_dataset: the target dataset to extract samples
        return: a list of encapsulated samples.
        """
        prepared_dataset = []
        for sample in target_dataset:
            prepared_dataset.append(f"<start> {sample['text']} <stop>")
        return prepared_dataset


In [58]:
MAX_SEQ_LENGTH = 128
lstm_dataset = LSTMDataset(dataset=wikitext_dataset,
                         max_seq_length=MAX_SEQ_LENGTH)

Getting dataset's vocabulary


100%|██████████| 1158/1158 [00:00<00:00, 2650.39it/s]


In [59]:
def get_dataloader(lstm_dataset, test_ratio=0.1):
    # split train/test dataset.
    lstm_train_dataset, lstm_test_dataset = torch.utils.data.random_split(lstm_dataset, [1-test_ratio, test_ratio])
    # get pytorch DataLoader
    train_dataloader = DataLoader(lstm_train_dataset, batch_size=8, shuffle=True)
    test_dataloader = DataLoader(lstm_test_dataset, batch_size=8, shuffle=False)
    return train_dataloader, test_dataloader

In [60]:
train_loader, val_loader = get_dataloader(lstm_dataset, test_ratio=0.1)

In [61]:
vocab_size = len(lstm_dataset.token2idx)
embedding_dim = 100
hidden_dim = 100
dropout_rate = 0.15

In [62]:
model = models.LSTM(vocab_size=vocab_size,input_dim=embedding_dim,hidden_dim=hidden_dim,dropout_rate=dropout_rate)

In [63]:
loss_fn = torch.nn.CrossEntropyLoss(ignore_index=lstm_dataset.pad_idx)

In [17]:
torch.save(model.state_dict(), 'models/LSTM.pt')

Restart here for evaluating multiple optimizers

In [64]:
model.load_state_dict(torch.load('models/LSTM.pt'))

<All keys matched successfully>

In [65]:
optimizer = torch.optim.RMSprop(model.parameters())

In [66]:
manager = train_manager(model, loss_fn, optimizer, train_loader, val_loader, device=device)

In [67]:
losses = manager.train(20, verbose=True, eval_all_epochs=True, eval_mode='accuracy', is_LSTM=True)

epoch0: train_loss: 5.8699
eval_loss: 0.1382
epoch1: train_loss: 5.3145
eval_loss: 0.1538
epoch2: train_loss: 5.0982
eval_loss: 0.1656
epoch3: train_loss: 4.9310
eval_loss: 0.1682
epoch4: train_loss: 4.7907
eval_loss: 0.1647
epoch5: train_loss: 4.6697
eval_loss: 0.1736
epoch6: train_loss: 4.5629
eval_loss: 0.1736
epoch7: train_loss: 4.4713
eval_loss: 0.1742
epoch8: train_loss: 4.3943
eval_loss: 0.1726
epoch9: train_loss: 4.3320
eval_loss: 0.1744
epoch10: train_loss: 4.2764
eval_loss: 0.1734
epoch11: train_loss: 4.2327
eval_loss: 0.1733
epoch12: train_loss: 4.1919
eval_loss: 0.1703
epoch13: train_loss: 4.1596
eval_loss: 0.1727
epoch14: train_loss: 4.1307
eval_loss: 0.1701
epoch15: train_loss: 4.1076
eval_loss: 0.1703
epoch16: train_loss: 4.0847
eval_loss: 0.1699
epoch17: train_loss: 4.0638
eval_loss: 0.1705
epoch18: train_loss: 4.0477
eval_loss: 0.1699
epoch19: train_loss: 4.0279
eval_loss: 0.1665


In [46]:
print(torch.max(torch.tensor(losses[1])))

tensor(0.1736)


In [47]:
torch.save(torch.tensor(losses[1]), 'results/RMSprop_P5.pt')

In [69]:
len(lstm_dataset.dataset_vocab)

4262