# 0. Import

In [265]:
import numpy as np
import sys
import os
import time
import re
from collections import Counter, OrderedDict
from tqdm import tqdm

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import tensorflow as tf

import nltk
nltk.download('stopwords')
nltk.download('punkt')

import torchtext
from torchtext.data import get_tokenizer
from torchtext.vocab import vocab
from torchtext.data.functional import to_map_style_dataset

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torch.utils.data.dataset import random_split
from torch.optim.lr_scheduler import StepLR

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# 1. Data preparetion

In [266]:
# Read dataset
path = '/content/dataset.txt'
with open(path, "r") as f:
  text = f.read().lower()

In [267]:
text = re.sub("\n", " EOS SOS ", text)
text = re.sub("\.", "", text)
text = "SOS "+text+" EOS"

In [268]:
text[:100]

"SOS i can't say how every time i ever put my arms around you i felt that i was home EOS SOS since i "

In [269]:
# with open("dataset.txt", "w") as f:
#   f.write(text)

In [270]:
# Tokenize words and create a vocabulary
tokens = word_tokenize(text)
counter = Counter(tokens)
sorted_by_freq_tuples = sorted(counter.items(), key=lambda x: x[1], reverse=True)
ordered_dict = OrderedDict(sorted_by_freq_tuples)
vocabulary = vocab(ordered_dict)
unk_token = '<unk>'
vocabulary.insert_token(unk_token, 0)
vocabulary.set_default_index(0)

In [271]:
print("Total number of words", len(tokens))
print("Number of unique words", len(vocabulary))

Total number of words 10763
Number of unique words 1284


In [272]:
# Create embeddings
# Reference: "https://towardsdatascience.com/multiclass-text-classification-using-lstm-in-pytorch-eac56baed8df"

glove_path = '/content/drive/MyDrive/Study/ML/glove.6B.100d.txt'

# read GloVe data
def load_glove_vectors(glove_file):
    word_vectors = {}
    with open(glove_file) as f:
        for line in f:
            split = line.split()
            word_vectors[split[0]] = np.array([float(x) for x in split[1:]])
    return word_vectors

# create embedding matrix
def get_emb_matrix(pretrained, vocab, emb_size = 100):
    vocab_size = len(vocab)
    W = np.zeros((vocab_size, emb_size), dtype="float32")
    for word in vocab.get_itos():
        if word in pretrained:
            W[vocab[word]] = pretrained[word]
        else:
            W[vocab[word]] = np.random.uniform(-0.25,0.25, emb_size)  
    return W

word_vecs = load_glove_vectors(glove_path)
pretrained_weights = get_emb_matrix(word_vecs, vocabulary)

# 2. Create data loaders

In [273]:
WORD_FOR_PREDICT = 2

In [274]:
def create_dataset(text_list, pred_words=3):
    labels, texts = [], []
    stop_token = vocabulary["EOS"]
    for i in range(len(text_list)-pred_words):
        word_tokens = []
        for j in range(pred_words+1): 
          word_tokens.append(vocabulary[text_list[i+j]])
        if stop_token in word_tokens[:pred_words]:
          continue
        else:
          texts.append(word_tokens[:pred_words])
          labels.append(word_tokens[-1])
    return TensorDataset(torch.from_numpy(np.array(texts)), torch.from_numpy(np.array(labels)))

In [275]:
train_split = int(len(tokens)*0.85)
test_split = len(tokens)-train_split
train_dataset = create_dataset(tokens[:train_split], WORD_FOR_PREDICT)
test_dataset = create_dataset(tokens[-test_split:], WORD_FOR_PREDICT)
whole_dataset = create_dataset(tokens, WORD_FOR_PREDICT)

In [276]:
BATCH_SIZE = 150
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)
whole_loader = DataLoader(whole_dataset, batch_size=BATCH_SIZE, shuffle=True)

# 3. Training functions

In [277]:
def acc(prediction, label):
    pred = torch.argmax(prediction, dim=1)
    return torch.sum(pred == label.squeeze()).item()

In [278]:
def train_step(model, loader, loss_fn, optimizer, scheduler, device):
    ''' Calculate loss and accuracy for one epoch'''
    train_losses = []
    perpls = []
    total_acc, total_count = 0, 0
    for batch_idx, (text, label) in enumerate(loader):

        text = text.to(device)
        label = label.to(device)

        prediction = model(text)

        loss_value = loss_fn(prediction, label)
        loss_value.backward()
        train_losses.append(loss_value.item())
        perpls.append(torch.exp(loss_value).item())
        
        optimizer.step()
        optimizer.zero_grad()
        total_acc += acc(prediction, label)
        total_count += label.size(0)
    #scheduler.step()
    return np.mean(train_losses), total_acc/total_count, np.mean(perpls)

In [279]:
def val_step(model, loader, loss_fn, device):
    ''' Calculate accuracy for one epoch'''
    total_acc, total_count = 0, 0
    perpls = []
    with torch.no_grad():
      y_pred, y_true = [], []
      for batch_idx, (text, label) in enumerate(loader):          
          text = text.to(device) 
          label = label.to(device)
          prediction = model(text)

          loss_value = loss_fn(prediction, label)
          perpls.append(torch.exp(loss_value).item())

          total_acc += acc(prediction, label)
          total_count += label.size(0)
    return total_acc/total_count, np.mean(perpls)

# 4. Create model

In [280]:
class LSTM_Glove(nn.Module):
    ''' LSTM model with GloVe embeddings'''

    def __init__(self, vocab_size, embedding_dim, hidden_dim, no_layers, glove_weights):
        super().__init__()

        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.no_layers = no_layers

        self.embedding = nn.Embedding.from_pretrained(torch.FloatTensor(glove_weights), padding_idx=0, freeze=False)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, no_layers, batch_first=True)
        self.linear = nn.Linear(hidden_dim, vocab_size)
        self.dropout = nn.Dropout(0.2)
        self.softmax = nn.Softmax()

    def forward(self, x):
        x = self.embedding(x)
        x = self.dropout(x)
        x, (ht, ct) = self.lstm(x)
        x = self.dropout(ht[-1])
        x = self.linear(x)
        x = self.softmax(x)
        return x

In [281]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [282]:
# define parameters
VOCAB_SIZE = len(vocabulary)
NO_LAYERS = 1
EMBED_DIM = 100
HIDDEN_DIM = 300
EPOCHS = 100

In [283]:
# initialize a model with trainable embeddings
model_emb = LSTM_Glove(VOCAB_SIZE, EMBED_DIM, HIDDEN_DIM, NO_LAYERS, pretrained_weights).to(device)
lr=0.005
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(model_emb.parameters(), lr=lr)
scheduler = StepLR(optimizer, step_size=70, gamma=0.1)

In [284]:
model_emb

LSTM_Glove(
  (embedding): Embedding(1284, 100, padding_idx=0)
  (lstm): LSTM(100, 300, batch_first=True)
  (linear): Linear(in_features=300, out_features=1284, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (softmax): Softmax(dim=None)
)

# 5. Training and evaluation

In [285]:
# train with training data and validate with validation data
EPOCHS = 250
for epoch in tqdm(range(EPOCHS)):
    model_emb.train()
    train_loss, train_acc, p = train_step(model_emb, whole_loader, criterion, optimizer, scheduler, device)
    print(f"Epochs = {epoch}, Training Loss : {train_loss}; Training Accuracy: {train_acc}; Peprplexity: {p}.\n")
    #model_emb.eval()
    #val_acc, p = val_step(model_emb, valid_loader, criterion, device)
    #print(f"Epochs = {epoch}, Validation Accuracy : {val_acc}; Peprplexity: {p}\n")
    #scheduler.step()

  1%|          | 2/250 [00:00<00:43,  5.64it/s]

Epochs = 0, Training Loss : 7.08818176814488; Training Accuracy: 0.07205679771113702; Peprplexity: 1198.0077795603918.

Epochs = 1, Training Loss : 7.085540748777843; Training Accuracy: 0.07354032001695454; Peprplexity: 1194.8969067770338.



  2%|▏         | 4/250 [00:00<00:42,  5.80it/s]

Epochs = 2, Training Loss : 7.085320639231848; Training Accuracy: 0.07375225177492847; Peprplexity: 1194.5829864986358.

Epochs = 3, Training Loss : 7.085349907950749; Training Accuracy: 0.07375225177492847; Peprplexity: 1194.6504642547122.



  2%|▏         | 6/250 [00:01<00:41,  5.87it/s]

Epochs = 4, Training Loss : 7.085329608311729; Training Accuracy: 0.07375225177492847; Peprplexity: 1194.5985572451636.

Epochs = 5, Training Loss : 7.085278306688581; Training Accuracy: 0.07385821765391544; Peprplexity: 1194.5775611514136.



  3%|▎         | 8/250 [00:01<00:41,  5.81it/s]

Epochs = 6, Training Loss : 7.085429827372233; Training Accuracy: 0.07385821765391544; Peprplexity: 1194.6645081535219.

Epochs = 7, Training Loss : 7.083803169311039; Training Accuracy: 0.07534173995973296; Peprplexity: 1192.721218532986.



  4%|▍         | 10/250 [00:01<00:41,  5.75it/s]

Epochs = 8, Training Loss : 7.082185919322665; Training Accuracy: 0.07693122814453746; Peprplexity: 1190.7309027777778.

Epochs = 9, Training Loss : 7.082055886586507; Training Accuracy: 0.07703719402352442; Peprplexity: 1190.6745314825148.



  5%|▍         | 12/250 [00:02<00:40,  5.81it/s]

Epochs = 10, Training Loss : 7.07630729675293; Training Accuracy: 0.08297128324679454; Peprplexity: 1183.8972807384673.

Epochs = 11, Training Loss : 7.048802792079865; Training Accuracy: 0.10999258238847091; Peprplexity: 1151.7807907831102.



  6%|▌         | 14/250 [00:02<00:42,  5.55it/s]

Epochs = 12, Training Loss : 7.038933549608503; Training Accuracy: 0.12048320440818057; Peprplexity: 1140.4552563864088.

Epochs = 13, Training Loss : 7.036173835633293; Training Accuracy: 0.12323831726184169; Peprplexity: 1137.5447978670634.



  6%|▋         | 16/250 [00:02<00:41,  5.66it/s]

Epochs = 14, Training Loss : 7.033918365599617; Training Accuracy: 0.1256755324785419; Peprplexity: 1134.8480883401537.

Epochs = 15, Training Loss : 7.0289950673542325; Training Accuracy: 0.13065592879092933; Peprplexity: 1129.4217180524554.



  7%|▋         | 18/250 [00:03<00:40,  5.74it/s]

Epochs = 16, Training Loss : 7.024613304743691; Training Accuracy: 0.13531842746635583; Peprplexity: 1124.4398639012898.

Epochs = 17, Training Loss : 7.016930156283909; Training Accuracy: 0.14252410723746953; Peprplexity: 1115.9009757874503.



  8%|▊         | 20/250 [00:03<00:40,  5.72it/s]

Epochs = 18, Training Loss : 6.998504661378407; Training Accuracy: 0.1621277948500583; Peprplexity: 1095.4490676153273.

Epochs = 19, Training Loss : 6.987846518319751; Training Accuracy: 0.172406485111794; Peprplexity: 1083.914530436198.



  9%|▉         | 22/250 [00:03<00:39,  5.82it/s]

Epochs = 20, Training Loss : 6.97815121544732; Training Accuracy: 0.1834269365264385; Peprplexity: 1073.3716227213542.

Epochs = 21, Training Loss : 6.969073113941011; Training Accuracy: 0.19264596799830455; Peprplexity: 1063.731157575335.



 10%|▉         | 24/250 [00:04<00:39,  5.79it/s]

Epochs = 22, Training Loss : 6.964155280400837; Training Accuracy: 0.19635477376284835; Peprplexity: 1058.5137251596602.

Epochs = 23, Training Loss : 6.957778741442968; Training Accuracy: 0.20324255589700116; Peprplexity: 1051.7434818328372.



 10%|█         | 26/250 [00:04<00:38,  5.75it/s]

Epochs = 24, Training Loss : 6.951547675662571; Training Accuracy: 0.20949454275723217; Peprplexity: 1045.338606577071.

Epochs = 25, Training Loss : 6.946739582788377; Training Accuracy: 0.21394510967468475; Peprplexity: 1040.440659295945.



 11%|█         | 28/250 [00:04<00:39,  5.59it/s]

Epochs = 26, Training Loss : 6.93517063534449; Training Accuracy: 0.22591925400021193; Peprplexity: 1028.2723621186756.

Epochs = 27, Training Loss : 6.929929892222087; Training Accuracy: 0.23121754794956023; Peprplexity: 1023.0578274197048.



 12%|█▏        | 30/250 [00:05<00:39,  5.52it/s]

Epochs = 28, Training Loss : 6.925120436956012; Training Accuracy: 0.23492635371410406; Peprplexity: 1018.1754605732267.

Epochs = 29, Training Loss : 6.92023956208002; Training Accuracy: 0.24086044293737416; Peprplexity: 1013.3896300300719.



 13%|█▎        | 32/250 [00:05<00:38,  5.71it/s]

Epochs = 30, Training Loss : 6.916260113791814; Training Accuracy: 0.24456924870191799; Peprplexity: 1008.9845212906126.

Epochs = 31, Training Loss : 6.909073867495098; Training Accuracy: 0.2525166896259404; Peprplexity: 1001.8082846989707.



 14%|█▎        | 34/250 [00:05<00:37,  5.79it/s]

Epochs = 32, Training Loss : 6.902357434469556; Training Accuracy: 0.25802691533326266; Peprplexity: 995.3677368164062.

Epochs = 33, Training Loss : 6.897947773100838; Training Accuracy: 0.263431175161598; Peprplexity: 990.7339119078621.



 14%|█▍        | 36/250 [00:06<00:37,  5.72it/s]

Epochs = 34, Training Loss : 6.895485620650034; Training Accuracy: 0.26533856098336334; Peprplexity: 988.3918369838169.

Epochs = 35, Training Loss : 6.891629309881301; Training Accuracy: 0.26872946911094625; Peprplexity: 984.5226004464286.



 15%|█▌        | 38/250 [00:06<00:37,  5.64it/s]

Epochs = 36, Training Loss : 6.889773792690701; Training Accuracy: 0.27074282081169865; Peprplexity: 982.7446860661582.

Epochs = 37, Training Loss : 6.884168473501054; Training Accuracy: 0.27635901239800786; Peprplexity: 977.3596898639013.



 16%|█▌        | 40/250 [00:07<00:38,  5.45it/s]

Epochs = 38, Training Loss : 6.881682524605403; Training Accuracy: 0.27890219349369505; Peprplexity: 975.0182650127108.

Epochs = 39, Training Loss : 6.8770901210724364; Training Accuracy: 0.2833527604111476; Peprplexity: 970.4468315972222.



 17%|█▋        | 42/250 [00:07<00:37,  5.52it/s]

Epochs = 40, Training Loss : 6.874941530681792; Training Accuracy: 0.2854720779908869; Peprplexity: 968.2710832868304.

Epochs = 41, Training Loss : 6.872897602262951; Training Accuracy: 0.2878033273286002; Peprplexity: 966.2566005161831.



 18%|█▊        | 44/250 [00:07<00:36,  5.63it/s]

Epochs = 42, Training Loss : 6.870869681948707; Training Accuracy: 0.28981667902935254; Peprplexity: 964.4335530598959.

Epochs = 43, Training Loss : 6.868340484679691; Training Accuracy: 0.29225389424605275; Peprplexity: 962.0148528568328.



 18%|█▊        | 46/250 [00:08<00:37,  5.46it/s]

Epochs = 44, Training Loss : 6.865208693913051; Training Accuracy: 0.29564480237363566; Peprplexity: 958.9934740823413.

Epochs = 45, Training Loss : 6.863393238612583; Training Accuracy: 0.2980820175903359; Peprplexity: 957.0668703109499.



 19%|█▉        | 48/250 [00:08<00:36,  5.50it/s]

Epochs = 46, Training Loss : 6.860033126104446; Training Accuracy: 0.3007311645650101; Peprplexity: 954.1368098183284.

Epochs = 47, Training Loss : 6.856564514220707; Training Accuracy: 0.30391014093461904; Peprplexity: 950.828859359499.



 20%|██        | 50/250 [00:08<00:35,  5.61it/s]

Epochs = 48, Training Loss : 6.8516297037639315; Training Accuracy: 0.30963229839991524; Peprplexity: 946.1529531327504.

Epochs = 49, Training Loss : 6.848782652900333; Training Accuracy: 0.31196354773762847; Peprplexity: 943.1668052067832.



 21%|██        | 52/250 [00:09<00:35,  5.62it/s]

Epochs = 50, Training Loss : 6.846598398117792; Training Accuracy: 0.31429479707534175; Peprplexity: 941.1359485444568.

Epochs = 51, Training Loss : 6.842350248306516; Training Accuracy: 0.31916922750874216; Peprplexity: 937.2417544410342.



 22%|██▏       | 54/250 [00:09<00:36,  5.31it/s]

Epochs = 52, Training Loss : 6.840083167666481; Training Accuracy: 0.32128854508848154; Peprplexity: 935.0241592649429.

Epochs = 53, Training Loss : 6.838123041485983; Training Accuracy: 0.322983999152273; Peprplexity: 933.3684547061011.



 22%|██▏       | 56/250 [00:10<00:38,  5.06it/s]

Epochs = 54, Training Loss : 6.835363032325866; Training Accuracy: 0.3251033167320123; Peprplexity: 930.5621822296627.

Epochs = 55, Training Loss : 6.832999017503527; Training Accuracy: 0.32711666843276466; Peprplexity: 928.7931014772446.



 23%|██▎       | 58/250 [00:10<00:36,  5.31it/s]

Epochs = 56, Training Loss : 6.8289248754107765; Training Accuracy: 0.33220303062413903; Peprplexity: 924.8610452318949.

Epochs = 57, Training Loss : 6.825835061451746; Training Accuracy: 0.33464024584083923; Peprplexity: 922.1190282428075.



 24%|██▍       | 60/250 [00:10<00:34,  5.46it/s]

Epochs = 58, Training Loss : 6.825690284607902; Training Accuracy: 0.3350641093567871; Peprplexity: 921.8278256370908.

Epochs = 59, Training Loss : 6.824266388302758; Training Accuracy: 0.3363356999046307; Peprplexity: 920.7595738002232.



 25%|██▍       | 62/250 [00:11<00:33,  5.56it/s]

Epochs = 60, Training Loss : 6.8213857014973955; Training Accuracy: 0.3391967786372788; Peprplexity: 917.7729598756821.

Epochs = 61, Training Loss : 6.820039665888226; Training Accuracy: 0.3406803009430963; Peprplexity: 916.7587425595239.



 26%|██▌       | 64/250 [00:11<00:33,  5.54it/s]

Epochs = 62, Training Loss : 6.818039969792442; Training Accuracy: 0.34216382324891387; Peprplexity: 914.7513524615575.

Epochs = 63, Training Loss : 6.817020446535141; Training Accuracy: 0.34343541379675746; Peprplexity: 913.8045615544395.



 26%|██▋       | 66/250 [00:11<00:32,  5.59it/s]

Epochs = 64, Training Loss : 6.8160605354914585; Training Accuracy: 0.3441771749496662; Peprplexity: 912.9981340680804.

Epochs = 65, Training Loss : 6.8143846496703135; Training Accuracy: 0.3456606972554837; Peprplexity: 911.4808659629216.



 27%|██▋       | 68/250 [00:12<00:31,  5.72it/s]

Epochs = 66, Training Loss : 6.812353505028619; Training Accuracy: 0.3477800148352231; Peprplexity: 909.5406561957466.

Epochs = 67, Training Loss : 6.812170323871431; Training Accuracy: 0.34820387835117095; Peprplexity: 909.4666331457713.



 28%|██▊       | 70/250 [00:12<00:31,  5.77it/s]

Epochs = 68, Training Loss : 6.811476752871559; Training Accuracy: 0.34947546889901454; Peprplexity: 909.1070760091146.

Epochs = 69, Training Loss : 6.809415045238676; Training Accuracy: 0.3512768888417929; Peprplexity: 906.9694582015749.



 29%|██▉       | 72/250 [00:12<00:31,  5.71it/s]

Epochs = 70, Training Loss : 6.808216942681207; Training Accuracy: 0.3525484793896365; Peprplexity: 906.1728292798239.

Epochs = 71, Training Loss : 6.806149407038613; Training Accuracy: 0.35445586521140193; Peprplexity: 904.0387195405506.



 30%|██▉       | 74/250 [00:13<00:30,  5.78it/s]

Epochs = 72, Training Loss : 6.804615626259456; Training Accuracy: 0.35540955812228464; Peprplexity: 902.6176409040179.

Epochs = 73, Training Loss : 6.803183638860309; Training Accuracy: 0.357210978065063; Peprplexity: 901.1949724469866.



 30%|███       | 76/250 [00:13<00:30,  5.64it/s]

Epochs = 74, Training Loss : 6.8016631414019875; Training Accuracy: 0.3586945003708806; Peprplexity: 899.9782133556548.

Epochs = 75, Training Loss : 6.80022314616612; Training Accuracy: 0.35996609091872417; Peprplexity: 898.8253299773686.



 31%|███       | 78/250 [00:13<00:30,  5.73it/s]

Epochs = 76, Training Loss : 6.798921479119195; Training Accuracy: 0.36123768146656776; Peprplexity: 897.4431840200273.

Epochs = 77, Training Loss : 6.798535808684334; Training Accuracy: 0.3618734767404896; Peprplexity: 897.1559855143229.



 32%|███▏      | 80/250 [00:14<00:29,  5.82it/s]

Epochs = 78, Training Loss : 6.797795363834926; Training Accuracy: 0.3629331355303592; Peprplexity: 896.3370419456845.

Epochs = 79, Training Loss : 6.796484424954369; Training Accuracy: 0.36356893080428104; Peprplexity: 895.5358673580109.



 33%|███▎      | 82/250 [00:14<00:29,  5.67it/s]

Epochs = 80, Training Loss : 6.795662758842347; Training Accuracy: 0.3645226237151637; Peprplexity: 894.4485221741692.

Epochs = 81, Training Loss : 6.79425555183774; Training Accuracy: 0.36600614602098125; Peprplexity: 893.4707748170883.



 34%|███▎      | 84/250 [00:14<00:28,  5.75it/s]

Epochs = 82, Training Loss : 6.793308522966173; Training Accuracy: 0.366853873052877; Peprplexity: 892.6102789015997.

Epochs = 83, Training Loss : 6.791204308706616; Training Accuracy: 0.3690791565116033; Peprplexity: 890.6011391291543.



 34%|███▍      | 86/250 [00:15<00:29,  5.61it/s]

Epochs = 84, Training Loss : 6.789694067031618; Training Accuracy: 0.37035074705944687; Peprplexity: 889.2713458348834.

Epochs = 85, Training Loss : 6.789351054600307; Training Accuracy: 0.37066864469640776; Peprplexity: 888.9865519205729.



 35%|███▌      | 88/250 [00:15<00:28,  5.68it/s]

Epochs = 86, Training Loss : 6.787493667905292; Training Accuracy: 0.37289392815513406; Peprplexity: 887.2646029033358.

Epochs = 87, Training Loss : 6.785772210075741; Training Accuracy: 0.37416551870297765; Peprplexity: 885.9964376782614.



 36%|███▌      | 90/250 [00:15<00:28,  5.71it/s]

Epochs = 88, Training Loss : 6.786028301905072; Training Accuracy: 0.37416551870297765; Peprplexity: 886.1172756618923.

Epochs = 89, Training Loss : 6.784854086618575; Training Accuracy: 0.3750132457348734; Peprplexity: 885.0687527126736.



 37%|███▋      | 92/250 [00:16<00:27,  5.75it/s]

Epochs = 90, Training Loss : 6.783912514883374; Training Accuracy: 0.37639080216170395; Peprplexity: 884.2884686182416.

Epochs = 91, Training Loss : 6.780419243706597; Training Accuracy: 0.3794638126523259; Peprplexity: 881.1189352368551.



 38%|███▊      | 94/250 [00:16<00:26,  5.82it/s]

Epochs = 92, Training Loss : 6.779475302923293; Training Accuracy: 0.3809473349581435; Peprplexity: 880.2767731197297.

Epochs = 93, Training Loss : 6.777969617692251; Training Accuracy: 0.3822189255059871; Peprplexity: 878.9484495132689.



 38%|███▊      | 96/250 [00:17<00:26,  5.82it/s]

Epochs = 94, Training Loss : 6.776223818461101; Training Accuracy: 0.38380841369079155; Peprplexity: 877.6411355639261.

Epochs = 95, Training Loss : 6.775590344080849; Training Accuracy: 0.3844442089647134; Peprplexity: 876.7574191623264.



 39%|███▉      | 98/250 [00:17<00:27,  5.62it/s]

Epochs = 96, Training Loss : 6.774963855743408; Training Accuracy: 0.38486807248066124; Peprplexity: 876.2543247767857.

Epochs = 97, Training Loss : 6.773093942611936; Training Accuracy: 0.3864575606654657; Peprplexity: 874.5624786861359.



 40%|████      | 100/250 [00:17<00:27,  5.55it/s]

Epochs = 98, Training Loss : 6.7737277197459385; Training Accuracy: 0.3865635265444527; Peprplexity: 875.1923682803199.

Epochs = 99, Training Loss : 6.77169408495464; Training Accuracy: 0.3881530147292572; Peprplexity: 873.4641142345611.



 41%|████      | 102/250 [00:18<00:26,  5.63it/s]

Epochs = 100, Training Loss : 6.7709789805942115; Training Accuracy: 0.388788810003179; Peprplexity: 873.0996345641121.

Epochs = 101, Training Loss : 6.770402204422724; Training Accuracy: 0.3900604005510226; Peprplexity: 872.3250112382192.



 42%|████▏     | 104/250 [00:18<00:25,  5.76it/s]

Epochs = 102, Training Loss : 6.769752600836376; Training Accuracy: 0.3903782981879835; Peprplexity: 871.5094371977307.

Epochs = 103, Training Loss : 6.768361742534335; Training Accuracy: 0.3921797181307619; Peprplexity: 870.4900377061632.



 42%|████▏     | 106/250 [00:18<00:25,  5.71it/s]

Epochs = 104, Training Loss : 6.766380461435469; Training Accuracy: 0.39313341104164456; Peprplexity: 868.9247407459077.

Epochs = 105, Training Loss : 6.766163084242079; Training Accuracy: 0.3939811380735403; Peprplexity: 868.323503766741.



 43%|████▎     | 108/250 [00:19<00:25,  5.67it/s]

Epochs = 106, Training Loss : 6.763555647834899; Training Accuracy: 0.39663028504821446; Peprplexity: 866.4417327396453.

Epochs = 107, Training Loss : 6.762830492049929; Training Accuracy: 0.3974780120801102; Peprplexity: 865.573224748884.



 44%|████▍     | 110/250 [00:19<00:24,  5.75it/s]

Epochs = 108, Training Loss : 6.761930617075118; Training Accuracy: 0.3984317049909929; Peprplexity: 864.9005223834325.

Epochs = 109, Training Loss : 6.7611089661007835; Training Accuracy: 0.3993853979018756; Peprplexity: 864.1264435298859.



 45%|████▍     | 112/250 [00:19<00:24,  5.59it/s]

Epochs = 110, Training Loss : 6.760331744239444; Training Accuracy: 0.3992794320228886; Peprplexity: 863.5373167007689.

Epochs = 111, Training Loss : 6.758098526606484; Training Accuracy: 0.4021405107555367; Peprplexity: 861.6423921130952.



 46%|████▌     | 114/250 [00:20<00:23,  5.67it/s]

Epochs = 112, Training Loss : 6.758470701792883; Training Accuracy: 0.4015047154816149; Peprplexity: 861.9231945219494.

Epochs = 113, Training Loss : 6.757898323119632; Training Accuracy: 0.40235244251351066; Peprplexity: 861.4829692537822.



 46%|████▋     | 116/250 [00:20<00:25,  5.33it/s]

Epochs = 114, Training Loss : 6.758018448239281; Training Accuracy: 0.40203454487654977; Peprplexity: 861.6558925083706.

Epochs = 115, Training Loss : 6.7579558160569935; Training Accuracy: 0.4022464766345237; Peprplexity: 861.4057210286459.



 47%|████▋     | 118/250 [00:20<00:23,  5.57it/s]

Epochs = 116, Training Loss : 6.756908901154049; Training Accuracy: 0.4030942036664194; Peprplexity: 860.5185973152281.

Epochs = 117, Training Loss : 6.75668642255995; Training Accuracy: 0.40383596481932815; Peprplexity: 860.5596526615203.



 48%|████▊     | 120/250 [00:21<00:23,  5.51it/s]

Epochs = 118, Training Loss : 6.755892322176979; Training Accuracy: 0.4040478965773021; Peprplexity: 859.8067975725446.

Epochs = 119, Training Loss : 6.755029383159819; Training Accuracy: 0.4046836918512239; Peprplexity: 859.293701171875.



 49%|████▉     | 122/250 [00:21<00:22,  5.64it/s]

Epochs = 120, Training Loss : 6.753491182175893; Training Accuracy: 0.4065910776729893; Peprplexity: 857.7046150328621.

Epochs = 121, Training Loss : 6.753616840120346; Training Accuracy: 0.40584931652008055; Peprplexity: 857.8667747860864.



 50%|████▉     | 124/250 [00:22<00:22,  5.69it/s]

Epochs = 122, Training Loss : 6.752512946961418; Training Accuracy: 0.40722687294691107; Peprplexity: 856.8186161101811.

Epochs = 123, Training Loss : 6.751261635432168; Training Accuracy: 0.4089223270107026; Peprplexity: 855.9804261222719.



 50%|█████     | 126/250 [00:22<00:21,  5.74it/s]

Epochs = 124, Training Loss : 6.750295290871272; Training Accuracy: 0.40987601992158523; Peprplexity: 854.8928271096851.

Epochs = 125, Training Loss : 6.751201538812547; Training Accuracy: 0.4088163611317156; Peprplexity: 855.7308194599455.



 51%|█████     | 128/250 [00:22<00:21,  5.64it/s]

Epochs = 126, Training Loss : 6.749999886467343; Training Accuracy: 0.4097700540425983; Peprplexity: 854.7931983584449.

Epochs = 127, Training Loss : 6.750119201720707; Training Accuracy: 0.40924022464766346; Peprplexity: 854.9486403692337.



 52%|█████▏    | 130/250 [00:23<00:21,  5.70it/s]

Epochs = 128, Training Loss : 6.748052430531335; Training Accuracy: 0.41167743986436367; Peprplexity: 852.9551285032242.

Epochs = 129, Training Loss : 6.747185268099346; Training Accuracy: 0.41273709865423336; Peprplexity: 852.3438623821925.



 53%|█████▎    | 132/250 [00:23<00:20,  5.65it/s]

Epochs = 130, Training Loss : 6.746931893484933; Training Accuracy: 0.41305499629119424; Peprplexity: 852.2538171192956.

Epochs = 131, Training Loss : 6.747047886015877; Training Accuracy: 0.41294903041220726; Peprplexity: 852.5235198490203.



 54%|█████▎    | 134/250 [00:23<00:20,  5.76it/s]

Epochs = 132, Training Loss : 6.745524928683326; Training Accuracy: 0.41432658683903784; Peprplexity: 850.8636445545014.

Epochs = 133, Training Loss : 6.743421758924212; Training Accuracy: 0.41644590441877716; Peprplexity: 849.2702249193949.



 54%|█████▍    | 136/250 [00:24<00:19,  5.74it/s]

Epochs = 134, Training Loss : 6.743155403742715; Training Accuracy: 0.416869767934725; Peprplexity: 849.0818985227554.

Epochs = 135, Training Loss : 6.742222044203016; Training Accuracy: 0.41761152908763377; Peprplexity: 848.028322250124.



 55%|█████▌    | 138/250 [00:24<00:20,  5.57it/s]

Epochs = 136, Training Loss : 6.740942546299526; Training Accuracy: 0.41898908551446434; Peprplexity: 847.0400729709202.

Epochs = 137, Training Loss : 6.740248324379088; Training Accuracy: 0.4203666419412949; Peprplexity: 846.3804301912822.



 56%|█████▌    | 140/250 [00:24<00:19,  5.64it/s]

Epochs = 138, Training Loss : 6.73880734519353; Training Accuracy: 0.42121436897319064; Peprplexity: 845.2551957387773.

Epochs = 139, Training Loss : 6.7385360475570435; Training Accuracy: 0.4210024372152167; Peprplexity: 844.9822019546751.



 57%|█████▋    | 142/250 [00:25<00:19,  5.60it/s]

Epochs = 140, Training Loss : 6.736961160387311; Training Accuracy: 0.423121754794956; Peprplexity: 843.6489868164062.

Epochs = 141, Training Loss : 6.734797666943263; Training Accuracy: 0.4253470382536823; Peprplexity: 841.754652235243.



 58%|█████▊    | 144/250 [00:25<00:19,  5.43it/s]

Epochs = 142, Training Loss : 6.733098120916457; Training Accuracy: 0.4269365264384868; Peprplexity: 840.3548981197297.

Epochs = 143, Training Loss : 6.733358118269178; Training Accuracy: 0.4266186288015259; Peprplexity: 840.6870669410342.



 58%|█████▊    | 146/250 [00:25<00:18,  5.60it/s]

Epochs = 144, Training Loss : 6.733001936049688; Training Accuracy: 0.42736038995443465; Peprplexity: 840.1999870179192.

Epochs = 145, Training Loss : 6.731340809473916; Training Accuracy: 0.4289498781392392; Peprplexity: 838.7730112227183.



 59%|█████▉    | 148/250 [00:26<00:18,  5.66it/s]

Epochs = 146, Training Loss : 6.729864529200962; Training Accuracy: 0.4304334004450567; Peprplexity: 837.6896149166047.

Epochs = 147, Training Loss : 6.72821737471081; Training Accuracy: 0.4322348203878351; Peprplexity: 836.1109367249504.



 60%|██████    | 150/250 [00:26<00:17,  5.60it/s]

Epochs = 148, Training Loss : 6.726322370862204; Training Accuracy: 0.43435413796757444; Peprplexity: 834.7489846850199.

Epochs = 149, Training Loss : 6.724677600557842; Training Accuracy: 0.43562572851541803; Peprplexity: 833.3537684849331.



 61%|██████    | 152/250 [00:27<00:17,  5.54it/s]

Epochs = 150, Training Loss : 6.724252307225788; Training Accuracy: 0.4361555579103529; Peprplexity: 832.9986426943824.

Epochs = 151, Training Loss : 6.722408673119923; Training Accuracy: 0.4379569778531313; Peprplexity: 831.6357654389881.



 62%|██████▏   | 154/250 [00:27<00:17,  5.53it/s]

Epochs = 152, Training Loss : 6.72046432797871; Training Accuracy: 0.44018226131185756; Peprplexity: 829.9333835177952.

Epochs = 153, Training Loss : 6.721023362780374; Training Accuracy: 0.4392285684009749; Peprplexity: 830.4501943436879.



 62%|██████▏   | 156/250 [00:27<00:16,  5.66it/s]

Epochs = 154, Training Loss : 6.71961520210145; Training Accuracy: 0.4405001589488185; Peprplexity: 829.2610870845734.

Epochs = 155, Training Loss : 6.71879445938837; Training Accuracy: 0.4413478859807142; Peprplexity: 828.4684496682788.



 63%|██████▎   | 158/250 [00:28<00:16,  5.69it/s]

Epochs = 156, Training Loss : 6.717549392155239; Training Accuracy: 0.4431493059234926; Peprplexity: 827.1745334201389.

Epochs = 157, Training Loss : 6.716829337770977; Training Accuracy: 0.4430433400445057; Peprplexity: 826.9394075908358.



 64%|██████▎   | 159/250 [00:28<00:16,  5.61it/s]

Epochs = 158, Training Loss : 6.715008463178362; Training Accuracy: 0.445268623503232; Peprplexity: 825.3109692770337.



 64%|██████▍   | 160/250 [00:28<00:16,  5.39it/s]

Epochs = 159, Training Loss : 6.7146506233820835; Training Accuracy: 0.4453745893822189; Peprplexity: 825.2109607514881.



 65%|██████▍   | 162/250 [00:28<00:16,  5.39it/s]

Epochs = 160, Training Loss : 6.713981840345594; Training Accuracy: 0.4457984528981668; Peprplexity: 824.6538192506821.

Epochs = 161, Training Loss : 6.713780191209581; Training Accuracy: 0.4461163505351277; Peprplexity: 824.3355189732143.



 66%|██████▌   | 164/250 [00:29<00:15,  5.58it/s]

Epochs = 162, Training Loss : 6.712717851003011; Training Accuracy: 0.44781180459891917; Peprplexity: 823.4124988374256.

Epochs = 163, Training Loss : 6.711091109684536; Training Accuracy: 0.4489774292677758; Peprplexity: 821.9655713278149.



 66%|██████▋   | 166/250 [00:29<00:15,  5.50it/s]

Epochs = 164, Training Loss : 6.710421721140544; Training Accuracy: 0.4499311221786585; Peprplexity: 821.6321042984251.

Epochs = 165, Training Loss : 6.710693548596095; Training Accuracy: 0.4499311221786585; Peprplexity: 821.7835625542534.



 67%|██████▋   | 168/250 [00:29<00:14,  5.59it/s]

Epochs = 166, Training Loss : 6.70918433628385; Training Accuracy: 0.45088481508954115; Peprplexity: 820.6363573831226.

Epochs = 167, Training Loss : 6.707180787646581; Training Accuracy: 0.4526862350323196; Peprplexity: 818.9745405893477.



 68%|██████▊   | 170/250 [00:30<00:14,  5.39it/s]

Epochs = 168, Training Loss : 6.706596313960969; Training Accuracy: 0.45342799618522833; Peprplexity: 818.7831653413318.

Epochs = 169, Training Loss : 6.706491387079632; Training Accuracy: 0.4537458938221893; Peprplexity: 818.4161066933284.



 69%|██████▉   | 172/250 [00:30<00:14,  5.44it/s]

Epochs = 170, Training Loss : 6.70499461037772; Training Accuracy: 0.4549115184910459; Peprplexity: 817.2247779482886.

Epochs = 171, Training Loss : 6.705310624743265; Training Accuracy: 0.454593620854085; Peprplexity: 817.4254489474827.



 70%|██████▉   | 174/250 [00:31<00:13,  5.62it/s]

Epochs = 172, Training Loss : 6.703163298349532; Training Accuracy: 0.4568189043128113; Peprplexity: 815.6887817382812.

Epochs = 173, Training Loss : 6.701749453468929; Training Accuracy: 0.4583024266186288; Peprplexity: 814.5663277762277.



 70%|███████   | 176/250 [00:31<00:13,  5.59it/s]

Epochs = 174, Training Loss : 6.701213791256859; Training Accuracy: 0.4589382218925506; Peprplexity: 814.0135333348834.

Epochs = 175, Training Loss : 6.701312148381794; Training Accuracy: 0.45883225601356364; Peprplexity: 814.2335040380084.



 71%|███████   | 178/250 [00:31<00:12,  5.73it/s]

Epochs = 176, Training Loss : 6.699059335012285; Training Accuracy: 0.46095157359330297; Peprplexity: 812.4140954396081.

Epochs = 177, Training Loss : 6.698262169247582; Training Accuracy: 0.4620112323831726; Peprplexity: 811.5991637214781.



 72%|███████▏  | 180/250 [00:32<00:12,  5.71it/s]

Epochs = 178, Training Loss : 6.697208245595296; Training Accuracy: 0.4630708911730423; Peprplexity: 811.028322250124.

Epochs = 179, Training Loss : 6.696805946410648; Training Accuracy: 0.46338878881000317; Peprplexity: 810.5546235584077.



 73%|███████▎  | 182/250 [00:32<00:11,  5.69it/s]

Epochs = 180, Training Loss : 6.696131411052885; Training Accuracy: 0.4643424817208859; Peprplexity: 810.2158920045883.

Epochs = 181, Training Loss : 6.6945651220896885; Training Accuracy: 0.46593196990569036; Peprplexity: 808.564947219122.



 74%|███████▎  | 184/250 [00:32<00:11,  5.79it/s]

Epochs = 182, Training Loss : 6.693763392312186; Training Accuracy: 0.4663558334216382; Peprplexity: 807.9633314344618.

Epochs = 183, Training Loss : 6.692343628595745; Training Accuracy: 0.46815725336441666; Peprplexity: 806.8986419193328.



 74%|███████▍  | 186/250 [00:33<00:11,  5.47it/s]

Epochs = 184, Training Loss : 6.691843555087135; Training Accuracy: 0.4690049803963124; Peprplexity: 806.5090012323288.

Epochs = 185, Training Loss : 6.689890377105228; Training Accuracy: 0.47059446858111686; Peprplexity: 804.8065476190476.



 75%|███████▌  | 188/250 [00:33<00:11,  5.61it/s]

Epochs = 186, Training Loss : 6.689660776229132; Training Accuracy: 0.47059446858111686; Peprplexity: 804.9206814236111.

Epochs = 187, Training Loss : 6.6876414314148915; Training Accuracy: 0.47260782028186926; Peprplexity: 802.9730834960938.



 76%|███████▌  | 190/250 [00:33<00:10,  5.79it/s]

Epochs = 188, Training Loss : 6.687790757133847; Training Accuracy: 0.4721839567659214; Peprplexity: 803.2502141074528.

Epochs = 189, Training Loss : 6.685876588972788; Training Accuracy: 0.4737734449507259; Peprplexity: 801.5284472268726.



 77%|███████▋  | 192/250 [00:34<00:10,  5.72it/s]

Epochs = 190, Training Loss : 6.684870931837294; Training Accuracy: 0.4755748648935043; Peprplexity: 801.0770825582837.

Epochs = 191, Training Loss : 6.6836122785295755; Training Accuracy: 0.4770583871993218; Peprplexity: 799.8635341099331.



 78%|███████▊  | 194/250 [00:34<00:09,  5.81it/s]

Epochs = 192, Training Loss : 6.6824672184293235; Training Accuracy: 0.4774822507152697; Peprplexity: 798.7644546750992.

Epochs = 193, Training Loss : 6.681244850158691; Training Accuracy: 0.479495602416022; Peprplexity: 797.8841543046254.



 78%|███████▊  | 196/250 [00:34<00:09,  5.84it/s]

Epochs = 194, Training Loss : 6.681045138646686; Training Accuracy: 0.47917770477906113; Peprplexity: 797.8749321831597.

Epochs = 195, Training Loss : 6.678822963956803; Training Accuracy: 0.48129702235880045; Peprplexity: 795.9187534877232.



 79%|███████▉  | 198/250 [00:35<00:08,  5.79it/s]

Epochs = 196, Training Loss : 6.679368185618567; Training Accuracy: 0.4807671929638656; Peprplexity: 796.5348481677827.

Epochs = 197, Training Loss : 6.6786105822003075; Training Accuracy: 0.4819328176327223; Peprplexity: 795.6317080543155.



 80%|████████  | 200/250 [00:35<00:08,  5.74it/s]

Epochs = 198, Training Loss : 6.676801128992959; Training Accuracy: 0.48362827169651373; Peprplexity: 794.3043232266865.

Epochs = 199, Training Loss : 6.676172907390292; Training Accuracy: 0.48384020345448764; Peprplexity: 793.9258791000124.



 81%|████████  | 202/250 [00:35<00:08,  5.84it/s]

Epochs = 200, Training Loss : 6.674909024011521; Training Accuracy: 0.4852177598813182; Peprplexity: 792.8534071180555.

Epochs = 201, Training Loss : 6.675546850476946; Training Accuracy: 0.48468793048638337; Peprplexity: 793.2409125434028.



 82%|████████▏ | 204/250 [00:36<00:08,  5.68it/s]

Epochs = 202, Training Loss : 6.674839451199486; Training Accuracy: 0.4852177598813182; Peprplexity: 792.8983948722719.

Epochs = 203, Training Loss : 6.6733730104234485; Training Accuracy: 0.48691321394510967; Peprplexity: 791.7573406885541.



 82%|████████▏ | 206/250 [00:36<00:07,  5.63it/s]

Epochs = 204, Training Loss : 6.672328699202764; Training Accuracy: 0.4878669068559924; Peprplexity: 791.0230480375744.

Epochs = 205, Training Loss : 6.672050627451094; Training Accuracy: 0.4880788386139663; Peprplexity: 790.6431070963541.



 83%|████████▎ | 208/250 [00:36<00:07,  5.75it/s]

Epochs = 206, Training Loss : 6.670887651897612; Training Accuracy: 0.48945639504079685; Peprplexity: 789.7120535714286.

Epochs = 207, Training Loss : 6.670335436624194; Training Accuracy: 0.4893504291618099; Peprplexity: 789.1973799448165.



 84%|████████▍ | 210/250 [00:37<00:07,  5.67it/s]

Epochs = 208, Training Loss : 6.670016636924138; Training Accuracy: 0.49072798558864045; Peprplexity: 789.0758250403026.

Epochs = 209, Training Loss : 6.668431304749989; Training Accuracy: 0.4915757126205362; Peprplexity: 787.9175482855903.



 85%|████████▍ | 212/250 [00:37<00:06,  5.72it/s]

Epochs = 210, Training Loss : 6.668356834896027; Training Accuracy: 0.49178764437851014; Peprplexity: 787.6306045774429.

Epochs = 211, Training Loss : 6.6677262518141; Training Accuracy: 0.4925294055314189; Peprplexity: 787.3572494264633.



 86%|████████▌ | 214/250 [00:38<00:06,  5.52it/s]

Epochs = 212, Training Loss : 6.666652588617234; Training Accuracy: 0.4936950302002755; Peprplexity: 786.373794797867.

Epochs = 213, Training Loss : 6.665187964363704; Training Accuracy: 0.4947546889901452; Peprplexity: 785.3494049556672.



 86%|████████▋ | 216/250 [00:38<00:06,  5.50it/s]

Epochs = 214, Training Loss : 6.665368049863785; Training Accuracy: 0.49517855250609305; Peprplexity: 785.2440175858754.

Epochs = 215, Training Loss : 6.663938961331806; Training Accuracy: 0.4962382112959627; Peprplexity: 784.3791155133929.



 87%|████████▋ | 218/250 [00:38<00:05,  5.62it/s]

Epochs = 216, Training Loss : 6.663534066033741; Training Accuracy: 0.49666207481191055; Peprplexity: 783.8805367606027.

Epochs = 217, Training Loss : 6.663252883487278; Training Accuracy: 0.49729787008583237; Peprplexity: 783.9207192072793.



 88%|████████▊ | 220/250 [00:39<00:05,  5.48it/s]

Epochs = 218, Training Loss : 6.662172968425448; Training Accuracy: 0.4980396312387411; Peprplexity: 782.9772135416666.

Epochs = 219, Training Loss : 6.661804895552378; Training Accuracy: 0.49878139239164987; Peprplexity: 782.6328560965402.



 89%|████████▉ | 222/250 [00:39<00:05,  5.55it/s]

Epochs = 220, Training Loss : 6.66075340906779; Training Accuracy: 0.4990992900286108; Peprplexity: 781.8346247597347.

Epochs = 221, Training Loss : 6.659747645968483; Training Accuracy: 0.5005828123344284; Peprplexity: 780.9088280087426.



 90%|████████▉ | 224/250 [00:39<00:04,  5.57it/s]

Epochs = 222, Training Loss : 6.6592526284475175; Training Accuracy: 0.5010066758503762; Peprplexity: 780.5958513532366.

Epochs = 223, Training Loss : 6.658514802418058; Training Accuracy: 0.501748437003285; Peprplexity: 779.9338068886409.



 90%|█████████ | 226/250 [00:40<00:04,  5.45it/s]

Epochs = 224, Training Loss : 6.658040985228523; Training Accuracy: 0.501748437003285; Peprplexity: 779.5454973493304.

Epochs = 225, Training Loss : 6.658313410622733; Training Accuracy: 0.501748437003285; Peprplexity: 779.772966657366.



 91%|█████████ | 228/250 [00:40<00:04,  5.49it/s]

Epochs = 226, Training Loss : 6.657376993270147; Training Accuracy: 0.5025961640351807; Peprplexity: 778.8858477880084.

Epochs = 227, Training Loss : 6.656237481132386; Training Accuracy: 0.5039737204620113; Peprplexity: 778.1535838293651.



 92%|█████████▏| 230/250 [00:40<00:03,  5.63it/s]

Epochs = 228, Training Loss : 6.656813250647651; Training Accuracy: 0.5032319593091025; Peprplexity: 778.6879204644097.

Epochs = 229, Training Loss : 6.65582123256865; Training Accuracy: 0.5040796863409982; Peprplexity: 777.9225124782986.



 93%|█████████▎| 232/250 [00:41<00:03,  5.40it/s]

Epochs = 230, Training Loss : 6.655021788581969; Training Accuracy: 0.504821447493907; Peprplexity: 777.3730555943081.

Epochs = 231, Training Loss : 6.654933558570014; Training Accuracy: 0.5051393451308679; Peprplexity: 777.3305237785219.



 94%|█████████▎| 234/250 [00:41<00:02,  5.46it/s]

Epochs = 232, Training Loss : 6.655303796132405; Training Accuracy: 0.5055632086468157; Peprplexity: 777.5021633572048.

Epochs = 233, Training Loss : 6.6537299156188965; Training Accuracy: 0.5067288333156723; Peprplexity: 776.4983791775173.



 94%|█████████▍| 236/250 [00:42<00:02,  5.67it/s]

Epochs = 234, Training Loss : 6.652736951434423; Training Accuracy: 0.5073646285895942; Peprplexity: 775.6349322606646.

Epochs = 235, Training Loss : 6.651493693154956; Training Accuracy: 0.5086362191374377; Peprplexity: 774.633541046627.



 95%|█████████▌| 238/250 [00:42<00:02,  5.50it/s]

Epochs = 236, Training Loss : 6.650799304719955; Training Accuracy: 0.5094839461693335; Peprplexity: 774.040769546751.

Epochs = 237, Training Loss : 6.650175382220556; Training Accuracy: 0.5102257073222423; Peprplexity: 773.7254183330233.



 96%|█████████▌| 240/250 [00:42<00:01,  5.58it/s]

Epochs = 238, Training Loss : 6.650053955259777; Training Accuracy: 0.5096958779273074; Peprplexity: 773.3705153692337.

Epochs = 239, Training Loss : 6.649900519658649; Training Accuracy: 0.5100137755642683; Peprplexity: 773.3927699497768.



 97%|█████████▋| 242/250 [00:43<00:01,  5.58it/s]

Epochs = 240, Training Loss : 6.648770839448959; Training Accuracy: 0.5116032637490728; Peprplexity: 772.4769316173736.

Epochs = 241, Training Loss : 6.648437908717564; Training Accuracy: 0.5114972978700858; Peprplexity: 772.1070944165426.



 98%|█████████▊| 244/250 [00:43<00:01,  5.51it/s]

Epochs = 242, Training Loss : 6.6485618561033215; Training Accuracy: 0.5113913319910989; Peprplexity: 772.1492144872271.

Epochs = 243, Training Loss : 6.648573799738808; Training Accuracy: 0.5117092296280598; Peprplexity: 772.5852758014013.



 98%|█████████▊| 246/250 [00:43<00:00,  5.63it/s]

Epochs = 244, Training Loss : 6.647709816221207; Training Accuracy: 0.5123450249019815; Peprplexity: 771.6923053075396.

Epochs = 245, Training Loss : 6.646436842661055; Training Accuracy: 0.5132987178128643; Peprplexity: 770.6211131262401.



 99%|█████████▉| 248/250 [00:44<00:00,  5.64it/s]

Epochs = 246, Training Loss : 6.6464888784620495; Training Accuracy: 0.5131927519338773; Peprplexity: 770.7349882579986.

Epochs = 247, Training Loss : 6.644553706759498; Training Accuracy: 0.5151001377556427; Peprplexity: 769.3047601609003.



100%|██████████| 250/250 [00:44<00:00,  5.61it/s]

Epochs = 248, Training Loss : 6.644396910591731; Training Accuracy: 0.5158418989085515; Peprplexity: 769.2342141772074.

Epochs = 249, Training Loss : 6.644499884711371; Training Accuracy: 0.5154180353926036; Peprplexity: 769.0108148484003.






In [286]:
def continue_seq(tokens, pred):
    w = []
    for i in range(pred, 0, -1):
        w.append(vocabulary[tokens[-pred+1]])
    t = torch.IntTensor([w]).to(device)
    pred = model_emb.forward(torch.reshape(t, (1, pred)))
    
    #a = torch.argmax(pred, dim=1)
    #rand_idx = 0
    a = torch.topk(pred.flatten(), 2).indices
    rand_idx = torch.randint(low=0, high=2, size=(1, 1))[0][0].item()
    new_word = vocabulary.lookup_token(a[rand_idx].item())
    return new_word


In [287]:
starts = text.split("EOS")

In [288]:
def beautify(word_list):
  word_list = word_list[1:]
  word_list[-1] = "."
  text = re.sub(r'\s+([?.,!"])', r'\1', " ".join(word_list))
  return text.capitalize()


In [290]:
for count in range(20):
  i = np.random.randint(0, len(starts))
  start = starts[i].split()[:WORD_FOR_PREDICT]
  while start[-1] != 'EOS':
    start.append(continue_seq(start, WORD_FOR_PREDICT))
  if len(start) <= 5 or len(start) > 25:
    continue
  print(f'{count+1}. {beautify(start)}')



1. So much i am must have you are great.
2. We to you are the world.
5. You are when i so this hard.
7. As my heart, i am so, you know i am so, and i am by my joy me.
8. You know i am by, i to hold you are the most thing i am so for the world.
10. You are so much i am by, i am by, i if anything.
11. Love, i am so difficult to you know i am by my heart.
12. But 's the world.
14. I am so much.
16. Never saw you know i so much.
17. You know that i am so, i to you are a part of nights other.
20. I and special in world.


# Epochs experiment

RSMprop  
HID_DIM = 110  
LR = 0.005  
Train on the whole dataset.  

50 epochs, peprlexity = 920
* What i feel can that not my not life have, the i best can that not you the, way i.
* You have touched to am be you that.
* Your name is for that my i life have, to my be, the i way can that be i.
* Ever since you the up most life,, i i can can not with have i that of the life best, of i you have, a most, i i can can to to you you life up, a i of can my be, with i you have up the the way most.
* I want to and you me.
* You have the the most of i my have you that up you.
* You continue to with you and.
* Your positivity is that to i be can a not with the i of you my.
* That thing you that.
* You have such the that way i that have my that life i with have you to.

100 epochs, perplexity = 869
* I get down to and you that are i and can that to i you have.
* You are better.
* When i am be with the me way for.
* Looking back to and you i are have.
* Everything would be be a.
* The day you in are my.
* Your hair looks up that.
* You know you that are i the can way be.
* You are everything.
* You bring out to.

150 epochs, perplexity = 859
* I bet you you.
* You are my a for with my and many me in.
* Let your eyes for and you i are to.
* You are simply a and of me my, life i with can me for, me i the have way me you with.
* That thing you that.
* I need you be to.
* How did you i are can the for way me.
* The love you is.
* In the beginning, most 's, you be are a the you most are in.
* I always knew my that and you that are the.

200 epochs, perplexity = 859
* You inspire me to in you a are of.
* You blew my can life be with.
* I am yours in are my a many of.
* Your ability to and you me.
* Your heart must.
* You are a.
* It is not that be me the, most the in way a a with way just.
* Today i have have and me i, have be a a of with my and for i my have life the in way my you and.
* You always make, me the, most my in and the that way you as are my a can you for are me.
* Though we were to my be and in i my can life not and the i most can, have can me for are me the, way i as have you a.



# Optimizer experiment

With RSMprop

Epochs = 219, Training Loss : 6.870341967387372; Training Accuracy: 0.3340213695968917; Peprplexity: 964.6982421875.

Epochs = 219, Validation Accuracy : 0.12114656571119524; Peprplexity: 1192.6570595189144

Sentences:
The feelings, have for the of a day and the world.

* You've got you when of the of my heart and i i have for me of the day and you smile and i i have for my love, have the world, i used in the day world and you.
* Having met you smile and i i used to of a of my heart.
* And i i i have the world, and i have for the of a of my love and the world and you.
* I send of a world.
* My happiness than i i have the of a of the of a of a of a world.
* Who raised, have to of my heart in the world, i have the day and you.
* Gazing into day and i have for the day and i have the of a day world and i used to you smile than i used in my love and you.
* Your kind.
* If my heart and the day.

With Adam

Epochs = 219, Training Loss : 6.9535920016736865; Training Accuracy: 0.251699854298203; Peprplexity: 1048.0043783532567.

Epochs = 219, Validation Accuracy : 0.11898323418063818; Peprplexity: 1194.3838597347863

* I am thank and me, me, and in love in the me that you, you my.
* Dearest — the me that me, you, me that me that you that you that the me my love and the in love and in me my of me my.
* Your name implies for the you, you, and in love and the you, and in me, and me my love and me my love you my love in the you, and the you that me that the you, you that you that you that me that you my.
* When we of you, you my of the me that me my.
* Thank you my of you my of you that me that me, me my of me my of you, and me that you, me that you, you my of you, and me, you my.
* Through this in love and in love and in the me that you that you that the in me, you, me, and in the in the you that the in the in love in me, you, you that me my love you that the you, you my of me that you, me, me my love and in the me, and the in the in the in the in love and the in the me that me my of you that you my love in me that me my love and in love you, me, and in me, me my of me that the in love you that the me, and the me that the me, you my of you, and the in me my love and me that me that me, me, you, you, you, and in me that me my of the me my.
* You have for you that you that you my love in the you that me that the you that the in love you my love you that you that me that me, and the in me that the in love you, and in love in love you, me that you my love you, and the me my.
* Thank you, me, and in me, me my.
* If i not thank and in love you, you, you my.
* Being with in love you, and in love in the you my.