<a href="https://colab.research.google.com/github/martinpius/PYTORCH/blob/main/Neural_Machine_Translation_With_Attention_Germany_English_Translator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount("/content/drive", force_remount = True)
try:
  COLAB = True
  import torch
  print(f"You are on CoLab with torch version {torch.__version__}")
except Exception as e:
  print(f"{type(e)}: {e}\n>>>>please correct {type(e)} and re-load")
  COLAB = False
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')
def time_fmt(t: float = 123.817)->float:
  h = int(t / (60 * 60))
  m = int(t % (60 * 60) / 60)
  s = int(t % 60)
  return f"hrs: {h}: min: {m:>02}: sec: {s:>05.2f}"
print(f">>>>time formating\ttesting the function.....\n>>>>time elapsed\t{time_fmt()}")

Mounted at /content/drive
You are on CoLab with torch version 1.8.1+cu101
>>>>time formating	testing the function.....
>>>>time elapsed	hrs: 0: min: 02: sec: 03.00


In [2]:
#In this notebook we will build a simple google translater application that translates
#germany language to english: The model is a typical autoencoder architecture that is
#built under rnn with attention in Pytorch.
#For demonstration we will apply the data from torchtext.

In [3]:
#Importing all necessary modules
import torch, spacy, time, math, random,sys
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torchtext.legacy.datasets import Multi30k
from torchtext.legacy.data import Field, BucketIterator
from tqdm import tqdm


In [4]:
#downloading and loading the pre-trained tokenizers for both languges from Spacy 
!python -m spacy download de_core_news_sm
!python -m spacy download en_core_web_sm
spacy_de = spacy.load("de_core_news_sm")
spacy_en = spacy.load("en_core_web_sm")

[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('de_core_news_sm')
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')


In [5]:
#We can now defines our tokenizer functions as follows
def de_tokenizer(text):
  '''
  takes a germany text and returns a list of tokens
  '''
  return [tok.text for tok in spacy_de.tokenizer(text)]

def en_tokenizer(text):
  '''
  take an english text and return a list of tokens
  '''
  return [tok.text for tok in spacy_en.tokenizer(text)]

In [6]:
#We defines fields object to be used to preprocess the texts with the aid of Field module:
germany = Field(tokenize = de_tokenizer, init_token = "<sos>", eos_token = "<eos>", lower = True)
english = Field(tokenize = en_tokenizer, init_token = "<sos>", eos_token = "<eos>", lower = True)


In [7]:
#We finally load and preprocess the data automatically using the above field
#This may take time depending with the internet speed and device(gpu/cpu)
tic = time.time()
train_data, validation_data, test_data = Multi30k.splits(exts = ('.de', '.en'), fields = (germany, english))
toc = time.time()
print(f"\n>>>>Total loading and preprocessing time: {time_fmt(toc - tic)}")


>>>>Total loading and preprocessing time: hrs: 0: min: 25: sec: 07.00


In [8]:
#print total number of examples on each dataset to see if we have downloaded the correct files:
print(f">>>>Total train examples: {len(train_data.examples)}\n>>>>Total validation examples: {len(validation_data.examples)}\n>>>>Total test examples: {len(test_data.examples)}")

>>>>Total train examples: 29000
>>>>Total validation examples: 1014
>>>>Total test examples: 1000


In [9]:
#Print out the first line of data in the training set to see if the pre-process went correctly:
print(f"First train tuple: {vars(train_data.examples[0])}")

First train tuple: {'src': ['zwei', 'junge', 'weiße', 'männer', 'sind', 'im', 'freien', 'in', 'der', 'nähe', 'vieler', 'büsche', '.'], 'trg': ['two', 'young', ',', 'white', 'males', 'are', 'outside', 'near', 'many', 'bushes', '.']}


In [10]:
#We now build vocabulary (List of indices from the tokens) only from using the training dataset
#Pick indices for those tokens which occured at least twice in the list
germany.build_vocab(train_data, min_freq = 2) 
english.build_vocab(train_data, min_freq = 2)

In [11]:
#We prepare the iterator (to stream in data one sample at a time) during training and validation stage:
batch_size = 128
train_iter, validation_iter, test_iter = BucketIterator.splits(
    (train_data, validation_data, test_data),
    batch_size = batch_size,
    sort_within_batch = True,
    device = device
)

In [56]:
#Model Building stage: We start with the ENCODER NETWORK
class ENCODER(nn.Module):
  def __init__(self, input_dim, enc_embd, enc_hidden, dec_hidden, dropout):
    super(ENCODER, self).__init__()
    self.dropout = nn.Dropout(dropout)
    self.embedding = nn.Embedding(input_dim, enc_embd)
    self.rnn = nn.GRU(enc_embd, enc_hidden, bidirectional = True)
    self.fc = nn.Linear((2* enc_hidden), dec_hidden)
  
  def forward(self, enc_input):
    '''
    enc_input shape: [enc_input_len, batch_size]
    embeded shape:  [enc_input_len, batch_size, enc_embd]
    enc_output: [enc_input_len, batch_size, enc_hidden *2 = num_directions]
    hidden shape: [2 * num_layers, batch_size, enc_hidden]
    forward hidden subseting: hidden[-2,:, :]
    backward hidden subseting: hidden[-1,:,:]
    hidden shape (new): [batch_size, dec_hidden]
    '''
    embeded = self.dropout(self.embedding(enc_input))
    enc_output, hidden = self.rnn(embeded)
    #concatenating forward and backward rnn hidden states and pass it through the fc layer
    hidden = torch.tanh(self.fc(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)))
    return enc_output, hidden
    


In [57]:
#The Attention Block:
class Attention(nn.Module):
  def __init__(self, enc_hidden, dec_hidden):
    super(Attention, self).__init__()
    self.attn = nn.Linear((2*enc_hidden) + dec_hidden, dec_hidden)
    self.v = nn.Linear(dec_hidden, 1, bias = False)
  def forward(self,hidden, enc_output):
    '''
    hidden shape: [batch_size, hidden_dim]
    enc_output shape: [enc_inp_len, batch_size, enc_hidden *2]
    -We need to add more dim to the hidden dimension and repeat it several times
    -We need to reshape the decoder output to allow concatenation to take place accross hidden_dim
    -we return information weights ranging between 0,1 and add up to 1 (the softmax transformation)
    '''
    batch_size = enc_output.shape[1]
    seq_len = enc_output.shape[0]
    #Expand hidden dim and repeat seq_len times to match the dec_output
    hidden = hidden.unsqueeze(1).repeat(1, seq_len, 1)
    enc_output = enc_output.permute(1,0,2) #new shape [batch_size, seq_len, enc_hidden *2]
    E_values = torch.tanh(self.attn(torch.cat((hidden, enc_output), dim = 2))) #shape = [batch_size, seq_len, hidden_dim]
    attention = self.v(E_values).squeeze(2) # shape: [batch_size, seq_len]
    return nn.functional.softmax(attention, dim = 1)


In [58]:
#The decoder Class: This do all computation for the prediction:
class DECODER(nn.Module):
  def __init__(self, output_dim, dec_embd, enc_hidden, dec_hidden, dropout, attention):
    super(DECODER, self).__init__()
    self.dropout = nn.Dropout(dropout)
    self.output_dim = output_dim
    self.attention = attention
    self.embedding = nn.Embedding(output_dim, dec_embd)
    self.rnn = nn.GRU((2*enc_hidden) + dec_embd, enc_hidden)
    self.fc = nn.Linear((2*enc_hidden) + dec_embd + dec_hidden, output_dim)

  def forward(self, input, hidden, enc_output):
    '''
    input shape: [batch_size] we are taking one token at a time
    hidden shape: [batch_size, dec_hidden]
    enc_output shape [input_len, batch_size, dec_hidden *2]
    -input need to be expanded
    -compute attention vector a
    -compute weghted quantity
    '''
    input = input.unsqueeze(0) #shape [1, batch_size]
    embeded = self.dropout(self.embedding(input)) # shape [1, batch_size, dec_embed_dim]
    a = self.attention(hidden, enc_output) #shape [batch_size, enc_input_len]
    a = a.unsqueeze(1) #shape [batch_size, 1, enc_input_len]
    enc_output = enc_output.permute(1,0,2) #shape [batch_size, input_len, dec_hidden *2]
    weighted = torch.bmm(a, enc_output) #shape [batch_size,1, enc_hidden_dim*2]
    weighted = weighted.permute(1,0,2) #shape = [1, batch_size, enc_hidden *2]
    rnn_input = torch.cat((embeded, weighted), dim = 2) # shape [1, batch_size, enc_hidden*2 + dec_embed_dim]
    dec_output, hidden = self.rnn(rnn_input, hidden.unsqueeze(0))
    #dec_output shape : [dec_inp_len, batch_size, 2*enc_hidden], hidden shape: [2*num_layers, batch_size, dec_hidden_dim]
    weighted = weighted.squeeze(0) #shape = [batch_size, enc_hidden *2]
    embeded = embeded.squeeze(0) # shape = [batch_size, dec_embeded_dim]
    dec_output = dec_output.squeeze(0) #shape = [batch_size, 2*enc_hidden]
    preds = torch.tanh(self.fc(torch.cat((dec_output, weighted, embeded), dim = 1))) #shape [batch_size, output_dim]
    return preds, hidden.squeeze(0)

    



In [59]:
#The Model Class : We combine the Encoder, decoder and attention classes
class MTL_Model(nn.Module):
  def __init__(self, encoder, decoder, device):
    super(MTL_Model, self).__init__()
    self.encoder = encoder
    self.decoder = decoder
    self.device = device
  
  def forward(self, input, target, teacher_force_ratio = 0.5):
    '''
    This method take input from encoder and attention to yield predictions
    input shape = [input_len, batch_size]
    target shape = [target_len, batch_size]
    if teacher force = 0.5 then 50% of times we use ground truth for the next prediction else
    we use the best guess/previeous prediction as input
    -we create container to store the predicted tokens
    '''
    target_voc_size = self.decoder.output_dim
    target_len = target.shape[0]
    batch_size = input.shape[1]
    outputs = torch.zeros(target_len, batch_size, target_voc_size).to(device)
    enc_output, enc_hidden = self.encoder(input) #run the encoder
    #grab the first token ("<sos>") of the decoder
    first_input = target[0,:] 
    for t in range(1, target_len):
      output, hidden = self.decoder(first_input, enc_hidden, enc_output)
      #store the output of the decoder at every time step
      outputs[t] = output
      teacher_force = random.random() < teacher_force_ratio
      best_guess = output.argmax(1) #grab max proba value
      first_input = target[t] if teacher_force else best_guess
    return outputs
    


In [60]:
#Instantiating the model class

In [61]:
#HyperParameters
enc_input = len(germany.vocab)
dec_input = len(english.vocab)
enc_embd = 300
dec_embd = 300
enc_hidden = 512
dec_hidden = 512
enc_dropout = 0.5
dec_dropout = 0.5
attention = Attention(enc_hidden, dec_hidden)
encoder = ENCODER(enc_input, enc_embd, enc_hidden, enc_hidden, enc_dropout)
decoder = DECODER(dec_input, dec_embd, enc_hidden, dec_hidden, dec_dropout,attention)
model = MTL_Model(encoder, decoder, device).to(device)

#Initialize the parameters to random normal
def param_initializer(model):
  for name, param in model.named_parameters():
    if 'weight' in name:
      torch.nn.init.normal_(param.data, mean = 0, std = 0.01)
    else:
      torch.nn.init.constant_(param.data, 0)
model.apply(param_initializer)

MTL_Model(
  (encoder): ENCODER(
    (dropout): Dropout(p=0.5, inplace=False)
    (embedding): Embedding(7855, 300)
    (rnn): GRU(300, 512, bidirectional=True)
    (fc): Linear(in_features=1024, out_features=512, bias=True)
  )
  (decoder): DECODER(
    (dropout): Dropout(p=0.5, inplace=False)
    (attention): Attention(
      (attn): Linear(in_features=1536, out_features=512, bias=True)
      (v): Linear(in_features=512, out_features=1, bias=False)
    )
    (embedding): Embedding(5893, 300)
    (rnn): GRU(1324, 512)
    (fc): Linear(in_features=1836, out_features=5893, bias=True)
  )
)

In [62]:
#Counting the number of trainable parameters in the network:
def param_counter(model):
  return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f">>>>This model has the total of {param_counter(model):,} trainable parameters")

>>>>This model has the total of 21,585,873 trainable parameters


In [63]:
#Get the loss and the optimizer objects
padded_idx = english.vocab.stoi[english.pad_token]
loss_obj = nn.CrossEntropyLoss(ignore_index = padded_idx)
optimizer = optim.Adam(params = model.parameters(), lr = 1e-3)


In [64]:
#The training loop:
def training_loop(model, iterator, optimizer, loss_obj, clip):
  model.train() #switch on the regularizers
  loss_per_epoch = 0
  for idx, batch in enumerate(tqdm(train_iter)):
    input = batch.src
    target = batch.trg
    optimizer.zero_grad()
    output = model(input, target) #forward pass
    output_dim = output.shape[-1] #grab the last pred dim, [target_len, batch_size, output_dim]
    output = output[1:].view(-1, output_dim) #reshape output: [target_len - 1 * batch_size, output_dim]
    target = target[1:].view(-1) # shape [target_len-1 * batch_size]
    train_loss = loss_obj(output, target)
    train_loss.backward() #backward pass
    torch.nn.utils.clip_grad_norm_(model.parameters(), clip) #clipping
    optimizer.step() #gradient descent
    loss_per_epoch += train_loss.item()
  
  return float(loss_per_epoch / len(iterator))



In [67]:
#The validation Loop:
def validation_loop(model, iterator, loss_obj):
  model.eval() # shut-down the regularizers
  loss_per_epoch = 0
  #No need to compute the gradients
  with torch.no_grad():
    for idx, batch in enumerate(tqdm(iterator)):
      input = batch.src # shape [input_len, batch_size]
      target = batch.trg # shape [target_len, batch_size]
      #We turn-off teacher force technique by substitute  zero
      output = model(input, target, 0) #shape [dec_inp_len, batch_size, output_dim]
      output_dim = output.shape[-1] #grab the output shape
      output = output[1:].view(-1, output_dim) #new shape: [(dec_input_len-1) * batch_size, output_dim]
      target = target[1:].view(-1) # new shape = [(target_len-1) * batch_size]
      valid_loss = loss_obj(output, target)
      loss_per_epoch += valid_loss.item()
  return float(loss_per_epoch / len(iterator))


In [68]:
#We finally train our model using the following codes snippest
#We return both loss and exponentiation of losses (PPL) for better vissualization
#We also save best weights at each epoch
EPOCHS = 100
tic1 = time.time()
best_valid_loss = float('inf')
clip = 1 #For gradient clipping
for epoch in range(EPOCHS):
  tic = time.time()
  print(f"\n>>>>training begins for epoch {epoch + 1}:\n>>>>please wait while the model is training......")
  train_loss = training_loop(model, train_iter, optimizer,loss_obj, clip)
  valid_loss = validation_loop(model,validation_iter,loss_obj)
  if valid_loss < best_valid_loss:
    best_valid_loss = valid_loss
    torch.save(model.state_dict(), 'nmt-new.pt')
  toc = time.time()
  print(f"\n>>>>time elapsed for epoch {epoch + 1}: {time_fmt(toc - tic)}")
  print(f">>>>train loss: {train_loss:.4f}\ttrain PPL: {math.exp(train_loss):7.4f}")
  print(f">>>>validation loss: {valid_loss:.4f}\tvalidation PPL: {math.exp(valid_loss):7.4f}")
toc1 = time.time()
print(f"\n>>>>Total time for training and evaluation for this model is: {time_fmt(toc1 - tic1)}")

  0%|          | 0/227 [00:00<?, ?it/s]


>>>>training begins for epoch 1:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.11it/s]
100%|██████████| 8/8 [00:00<00:00, 13.66it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 1: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.3306	train PPL: 1526.2329
>>>>validation loss: 7.3074	validation PPL: 1491.2518

>>>>training begins for epoch 2:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:36<00:00,  6.19it/s]
100%|██████████| 8/8 [00:00<00:00, 13.90it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 2: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.3430	train PPL: 1545.3768
>>>>validation loss: 7.3238	validation PPL: 1515.9851

>>>>training begins for epoch 3:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:36<00:00,  6.15it/s]
100%|██████████| 8/8 [00:00<00:00, 13.81it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 3: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.3372	train PPL: 1536.3621
>>>>validation loss: 7.3132	validation PPL: 1500.0224

>>>>training begins for epoch 4:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.13it/s]
100%|██████████| 8/8 [00:00<00:00, 13.78it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 4: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.3346	train PPL: 1532.4185
>>>>validation loss: 7.3084	validation PPL: 1492.8209

>>>>training begins for epoch 5:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:36<00:00,  6.15it/s]
100%|██████████| 8/8 [00:00<00:00, 13.71it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 5: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.3329	train PPL: 1529.7964
>>>>validation loss: 7.3057	validation PPL: 1488.6930

>>>>training begins for epoch 6:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.10it/s]
100%|██████████| 8/8 [00:00<00:00, 13.82it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 6: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.3322	train PPL: 1528.7504
>>>>validation loss: 7.3042	validation PPL: 1486.4961

>>>>training begins for epoch 7:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:36<00:00,  6.16it/s]
100%|██████████| 8/8 [00:00<00:00, 13.77it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 7: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.3244	train PPL: 1516.8363
>>>>validation loss: 7.3000	validation PPL: 1480.2378

>>>>training begins for epoch 8:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.12it/s]
100%|██████████| 8/8 [00:00<00:00, 13.67it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 8: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.3188	train PPL: 1508.4073
>>>>validation loss: 7.2950	validation PPL: 1472.8900

>>>>training begins for epoch 9:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.12it/s]
100%|██████████| 8/8 [00:00<00:00, 13.67it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 9: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.3148	train PPL: 1502.4004
>>>>validation loss: 7.2963	validation PPL: 1474.8997

>>>>training begins for epoch 10:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:36<00:00,  6.14it/s]
100%|██████████| 8/8 [00:00<00:00, 13.76it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 10: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.3076	train PPL: 1491.5791
>>>>validation loss: 7.2882	validation PPL: 1462.9960

>>>>training begins for epoch 11:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:36<00:00,  6.14it/s]
100%|██████████| 8/8 [00:00<00:00, 13.69it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 11: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.2998	train PPL: 1479.9836
>>>>validation loss: 7.2787	validation PPL: 1449.0984

>>>>training begins for epoch 12:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.12it/s]
100%|██████████| 8/8 [00:00<00:00, 13.75it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 12: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.2914	train PPL: 1467.6451
>>>>validation loss: 7.2724	validation PPL: 1439.9417

>>>>training begins for epoch 13:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:36<00:00,  6.18it/s]
100%|██████████| 8/8 [00:00<00:00, 13.59it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 13: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.2810	train PPL: 1452.4638
>>>>validation loss: 7.2673	validation PPL: 1432.7095

>>>>training begins for epoch 14:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:36<00:00,  6.14it/s]
100%|██████████| 8/8 [00:00<00:00, 13.65it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 14: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.2750	train PPL: 1443.7073
>>>>validation loss: 7.2605	validation PPL: 1422.9478

>>>>training begins for epoch 15:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:36<00:00,  6.16it/s]
100%|██████████| 8/8 [00:00<00:00, 13.63it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 15: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.2659	train PPL: 1430.6036
>>>>validation loss: 7.2548	validation PPL: 1414.8221

>>>>training begins for epoch 16:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.13it/s]
100%|██████████| 8/8 [00:00<00:00, 13.64it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 16: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.2555	train PPL: 1415.8040
>>>>validation loss: 7.2446	validation PPL: 1400.5661

>>>>training begins for epoch 17:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.12it/s]
100%|██████████| 8/8 [00:00<00:00, 13.62it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 17: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.2396	train PPL: 1393.5398
>>>>validation loss: 7.2334	validation PPL: 1384.8967

>>>>training begins for epoch 18:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.65it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 18: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 7.2186	train PPL: 1364.5326
>>>>validation loss: 7.2272	validation PPL: 1376.4103

>>>>training begins for epoch 19:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.59it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 19: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 7.1973	train PPL: 1335.7994
>>>>validation loss: 7.1941	validation PPL: 1331.5566

>>>>training begins for epoch 20:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.51it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 20: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 7.1692	train PPL: 1298.8295
>>>>validation loss: 7.1726	validation PPL: 1303.1846

>>>>training begins for epoch 21:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.63it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 21: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 7.1451	train PPL: 1267.8216
>>>>validation loss: 7.1569	validation PPL: 1282.8711

>>>>training begins for epoch 22:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.57it/s]
  0%|          | 1/227 [00:00<00:42,  5.31it/s]


>>>>time elapsed for epoch 22: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 7.1247	train PPL: 1242.2589
>>>>validation loss: 7.1382	validation PPL: 1259.1153

>>>>training begins for epoch 23:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.56it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 23: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 7.1064	train PPL: 1219.7160
>>>>validation loss: 7.1272	validation PPL: 1245.3491

>>>>training begins for epoch 24:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.43it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 24: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 7.0921	train PPL: 1202.3943
>>>>validation loss: 7.1167	validation PPL: 1232.3765

>>>>training begins for epoch 25:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.59it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 25: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 7.0789	train PPL: 1186.6472
>>>>validation loss: 7.1219	validation PPL: 1238.8046

>>>>training begins for epoch 26:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.06it/s]
100%|██████████| 8/8 [00:00<00:00, 13.48it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 26: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 7.0687	train PPL: 1174.6365
>>>>validation loss: 7.1047	validation PPL: 1217.6681

>>>>training begins for epoch 27:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.58it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 27: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 7.0545	train PPL: 1158.0979
>>>>validation loss: 7.0895	validation PPL: 1199.2761

>>>>training begins for epoch 28:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.03it/s]
100%|██████████| 8/8 [00:00<00:00, 13.59it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 28: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 7.0408	train PPL: 1142.2585
>>>>validation loss: 7.0852	validation PPL: 1194.2067

>>>>training begins for epoch 29:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.10it/s]
100%|██████████| 8/8 [00:00<00:00, 13.74it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 29: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 7.0306	train PPL: 1130.6616
>>>>validation loss: 7.0755	validation PPL: 1182.6121

>>>>training begins for epoch 30:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.57it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 30: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 7.0186	train PPL: 1117.2140
>>>>validation loss: 7.0751	validation PPL: 1182.1748

>>>>training begins for epoch 31:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.06it/s]
100%|██████████| 8/8 [00:00<00:00, 13.67it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 31: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 7.0082	train PPL: 1105.7082
>>>>validation loss: 7.0709	validation PPL: 1177.1837

>>>>training begins for epoch 32:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.05it/s]
100%|██████████| 8/8 [00:00<00:00, 13.58it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 32: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.9997	train PPL: 1096.2682
>>>>validation loss: 7.0688	validation PPL: 1174.7961

>>>>training begins for epoch 33:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.55it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 33: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.9922	train PPL: 1088.0783
>>>>validation loss: 7.0623	validation PPL: 1167.0925

>>>>training begins for epoch 34:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.54it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 34: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.9835	train PPL: 1078.7132
>>>>validation loss: 7.0603	validation PPL: 1164.7575

>>>>training begins for epoch 35:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.11it/s]
100%|██████████| 8/8 [00:00<00:00, 13.59it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 35: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.9761	train PPL: 1070.7156
>>>>validation loss: 7.0587	validation PPL: 1162.9242

>>>>training begins for epoch 36:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.63it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 36: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.9678	train PPL: 1061.9062
>>>>validation loss: 7.0547	validation PPL: 1158.2569

>>>>training begins for epoch 37:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.60it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 37: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.9611	train PPL: 1054.7573
>>>>validation loss: 7.0528	validation PPL: 1156.1011

>>>>training begins for epoch 38:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.66it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 38: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.9546	train PPL: 1048.0083
>>>>validation loss: 7.0509	validation PPL: 1153.9252

>>>>training begins for epoch 39:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.10it/s]
100%|██████████| 8/8 [00:00<00:00, 13.60it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 39: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.9488	train PPL: 1041.8794
>>>>validation loss: 7.0513	validation PPL: 1154.3886

>>>>training begins for epoch 40:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.06it/s]
100%|██████████| 8/8 [00:00<00:00, 13.63it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 40: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.9429	train PPL: 1035.7428
>>>>validation loss: 7.0496	validation PPL: 1152.3424

>>>>training begins for epoch 41:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.64it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 41: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.9389	train PPL: 1031.6350
>>>>validation loss: 7.0491	validation PPL: 1151.8502

>>>>training begins for epoch 42:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.10it/s]
100%|██████████| 8/8 [00:00<00:00, 13.64it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 42: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.9337	train PPL: 1026.2720
>>>>validation loss: 7.0455	validation PPL: 1147.6907

>>>>training begins for epoch 43:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.56it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 43: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.9300	train PPL: 1022.5173
>>>>validation loss: 7.0488	validation PPL: 1151.4350

>>>>training begins for epoch 44:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.06it/s]
100%|██████████| 8/8 [00:00<00:00, 13.64it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 44: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.9256	train PPL: 1018.0214
>>>>validation loss: 7.0452	validation PPL: 1147.3866

>>>>training begins for epoch 45:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.63it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 45: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.9227	train PPL: 1015.0723
>>>>validation loss: 7.0454	validation PPL: 1147.5320

>>>>training begins for epoch 46:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.69it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 46: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.9200	train PPL: 1012.2838
>>>>validation loss: 7.0452	validation PPL: 1147.2810

>>>>training begins for epoch 47:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.73it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 47: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.9157	train PPL: 1008.0167
>>>>validation loss: 7.0420	validation PPL: 1143.6695

>>>>training begins for epoch 48:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.55it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 48: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.9130	train PPL: 1005.2473
>>>>validation loss: 7.0430	validation PPL: 1144.8196

>>>>training begins for epoch 49:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.64it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 49: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.9104	train PPL: 1002.6455
>>>>validation loss: 7.0400	validation PPL: 1141.3327

>>>>training begins for epoch 50:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.05it/s]
100%|██████████| 8/8 [00:00<00:00, 13.48it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 50: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.9070	train PPL: 999.2422
>>>>validation loss: 7.0423	validation PPL: 1144.0616

>>>>training begins for epoch 51:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.10it/s]
100%|██████████| 8/8 [00:00<00:00, 13.56it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 51: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.9049	train PPL: 997.1093
>>>>validation loss: 7.0402	validation PPL: 1141.6606

>>>>training begins for epoch 52:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.38it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 52: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.9021	train PPL: 994.3391
>>>>validation loss: 7.0431	validation PPL: 1144.9667

>>>>training begins for epoch 53:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.57it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 53: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.9001	train PPL: 992.3565
>>>>validation loss: 7.0448	validation PPL: 1146.8714

>>>>training begins for epoch 54:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.61it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 54: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8978	train PPL: 990.1349
>>>>validation loss: 7.0450	validation PPL: 1147.0993

>>>>training begins for epoch 55:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.10it/s]
100%|██████████| 8/8 [00:00<00:00, 13.48it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 55: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8955	train PPL: 987.8209
>>>>validation loss: 7.0445	validation PPL: 1146.5073

>>>>training begins for epoch 56:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.56it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 56: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.8943	train PPL: 986.5938
>>>>validation loss: 7.0390	validation PPL: 1140.2733

>>>>training begins for epoch 57:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.52it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 57: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8919	train PPL: 984.2585
>>>>validation loss: 7.0429	validation PPL: 1144.7201

>>>>training begins for epoch 58:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.61it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 58: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8899	train PPL: 982.3374
>>>>validation loss: 7.0405	validation PPL: 1141.9550

>>>>training begins for epoch 59:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.50it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 59: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8881	train PPL: 980.5160
>>>>validation loss: 7.0428	validation PPL: 1144.5671

>>>>training begins for epoch 60:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.62it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 60: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8868	train PPL: 979.2237
>>>>validation loss: 7.0432	validation PPL: 1145.0700

>>>>training begins for epoch 61:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.64it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 61: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8850	train PPL: 977.4836
>>>>validation loss: 7.0406	validation PPL: 1142.0209

>>>>training begins for epoch 62:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.58it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 62: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.8837	train PPL: 976.2012
>>>>validation loss: 7.0383	validation PPL: 1139.4593

>>>>training begins for epoch 63:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.10it/s]
100%|██████████| 8/8 [00:00<00:00, 13.56it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 63: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8821	train PPL: 974.6710
>>>>validation loss: 7.0433	validation PPL: 1145.2105

>>>>training begins for epoch 64:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.56it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 64: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8815	train PPL: 974.0656
>>>>validation loss: 7.0389	validation PPL: 1140.0932

>>>>training begins for epoch 65:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.61it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 65: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.8802	train PPL: 972.8143
>>>>validation loss: 7.0382	validation PPL: 1139.3121

>>>>training begins for epoch 66:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.66it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 66: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8784	train PPL: 971.0461
>>>>validation loss: 7.0413	validation PPL: 1142.8669

>>>>training begins for epoch 67:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.57it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 67: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8769	train PPL: 969.6455
>>>>validation loss: 7.0404	validation PPL: 1141.8959

>>>>training begins for epoch 68:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.10it/s]
100%|██████████| 8/8 [00:00<00:00, 13.65it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 68: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8761	train PPL: 968.8178
>>>>validation loss: 7.0388	validation PPL: 1140.0214

>>>>training begins for epoch 69:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.06it/s]
100%|██████████| 8/8 [00:00<00:00, 13.58it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 69: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.8753	train PPL: 968.0656
>>>>validation loss: 7.0423	validation PPL: 1144.0171

>>>>training begins for epoch 70:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.65it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 70: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8741	train PPL: 966.9375
>>>>validation loss: 7.0409	validation PPL: 1142.4414

>>>>training begins for epoch 71:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.13it/s]
100%|██████████| 8/8 [00:00<00:00, 13.65it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 71: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8731	train PPL: 965.9163
>>>>validation loss: 7.0406	validation PPL: 1142.1255

>>>>training begins for epoch 72:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.44it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 72: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8724	train PPL: 965.2935
>>>>validation loss: 7.0440	validation PPL: 1146.0194

>>>>training begins for epoch 73:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.42it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 73: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8712	train PPL: 964.0717
>>>>validation loss: 7.0418	validation PPL: 1143.3919

>>>>training begins for epoch 74:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.07it/s]
100%|██████████| 8/8 [00:00<00:00, 13.62it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 74: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8706	train PPL: 963.5508
>>>>validation loss: 7.0436	validation PPL: 1145.4730

>>>>training begins for epoch 75:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.10it/s]
100%|██████████| 8/8 [00:00<00:00, 13.68it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 75: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8698	train PPL: 962.7902
>>>>validation loss: 7.0416	validation PPL: 1143.2459

>>>>training begins for epoch 76:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.62it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 76: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8691	train PPL: 962.0851
>>>>validation loss: 7.0433	validation PPL: 1145.1465

>>>>training begins for epoch 77:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.59it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 77: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8683	train PPL: 961.3587
>>>>validation loss: 7.0396	validation PPL: 1140.8805

>>>>training begins for epoch 78:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.61it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 78: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8673	train PPL: 960.3999
>>>>validation loss: 7.0394	validation PPL: 1140.6772

>>>>training begins for epoch 79:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.13it/s]
100%|██████████| 8/8 [00:00<00:00, 13.52it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 79: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8662	train PPL: 959.3171
>>>>validation loss: 7.0381	validation PPL: 1139.2064

>>>>training begins for epoch 80:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.05it/s]
100%|██████████| 8/8 [00:00<00:00, 13.58it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 80: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.8655	train PPL: 958.5781
>>>>validation loss: 7.0435	validation PPL: 1145.4393

>>>>training begins for epoch 81:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.11it/s]
100%|██████████| 8/8 [00:00<00:00, 13.56it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 81: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8649	train PPL: 958.0554
>>>>validation loss: 7.0439	validation PPL: 1145.9041

>>>>training begins for epoch 82:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.06it/s]
100%|██████████| 8/8 [00:00<00:00, 13.63it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 82: hrs: 0: min: 00: sec: 38.00
>>>>train loss: 6.8639	train PPL: 957.0956
>>>>validation loss: 7.0390	validation PPL: 1140.2762

>>>>training begins for epoch 83:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.10it/s]
100%|██████████| 8/8 [00:00<00:00, 13.64it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 83: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8636	train PPL: 956.8355
>>>>validation loss: 7.0432	validation PPL: 1145.0479

>>>>training begins for epoch 84:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.38it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 84: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8631	train PPL: 956.2982
>>>>validation loss: 7.0443	validation PPL: 1146.3366

>>>>training begins for epoch 85:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.71it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 85: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8621	train PPL: 955.3547
>>>>validation loss: 7.0438	validation PPL: 1145.6795

>>>>training begins for epoch 86:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.44it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 86: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8615	train PPL: 954.8070
>>>>validation loss: 7.0450	validation PPL: 1147.1648

>>>>training begins for epoch 87:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.75it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 87: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8607	train PPL: 954.0095
>>>>validation loss: 7.0427	validation PPL: 1144.4273

>>>>training begins for epoch 88:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.65it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 88: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8607	train PPL: 954.0109
>>>>validation loss: 7.0443	validation PPL: 1146.2508

>>>>training begins for epoch 89:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.46it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 89: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8592	train PPL: 952.6482
>>>>validation loss: 7.0420	validation PPL: 1143.7292

>>>>training begins for epoch 90:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.12it/s]
100%|██████████| 8/8 [00:00<00:00, 13.66it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 90: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8590	train PPL: 952.3781
>>>>validation loss: 7.0416	validation PPL: 1143.2363

>>>>training begins for epoch 91:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.10it/s]
100%|██████████| 8/8 [00:00<00:00, 13.65it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 91: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8587	train PPL: 952.1698
>>>>validation loss: 7.0423	validation PPL: 1144.0060

>>>>training begins for epoch 92:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.10it/s]
100%|██████████| 8/8 [00:00<00:00, 13.56it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 92: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8585	train PPL: 951.9650
>>>>validation loss: 7.0408	validation PPL: 1142.3215

>>>>training begins for epoch 93:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.67it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 93: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8568	train PPL: 950.2863
>>>>validation loss: 7.0437	validation PPL: 1145.6162

>>>>training begins for epoch 94:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.12it/s]
100%|██████████| 8/8 [00:00<00:00, 13.64it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 94: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8565	train PPL: 950.0710
>>>>validation loss: 7.0374	validation PPL: 1138.4189

>>>>training begins for epoch 95:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.65it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 95: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8563	train PPL: 949.8262
>>>>validation loss: 7.0381	validation PPL: 1139.2201

>>>>training begins for epoch 96:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.08it/s]
100%|██████████| 8/8 [00:00<00:00, 13.66it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 96: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8555	train PPL: 949.1105
>>>>validation loss: 7.0387	validation PPL: 1139.9412

>>>>training begins for epoch 97:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.73it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 97: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8552	train PPL: 948.8190
>>>>validation loss: 7.0393	validation PPL: 1140.5778

>>>>training begins for epoch 98:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.11it/s]
100%|██████████| 8/8 [00:00<00:00, 13.59it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 98: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8549	train PPL: 948.5586
>>>>validation loss: 7.0397	validation PPL: 1141.0243

>>>>training begins for epoch 99:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.09it/s]
100%|██████████| 8/8 [00:00<00:00, 13.51it/s]
  0%|          | 0/227 [00:00<?, ?it/s]


>>>>time elapsed for epoch 99: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8541	train PPL: 947.7246
>>>>validation loss: 7.0422	validation PPL: 1143.9245

>>>>training begins for epoch 100:
>>>>please wait while the model is training......


100%|██████████| 227/227 [00:37<00:00,  6.10it/s]
100%|██████████| 8/8 [00:00<00:00, 13.26it/s]


>>>>time elapsed for epoch 100: hrs: 0: min: 00: sec: 37.00
>>>>train loss: 6.8538	train PPL: 947.4376
>>>>validation loss: 7.0412	validation PPL: 1142.7562

>>>>Total time for training and evaluation for this model is: hrs: 1: min: 03: sec: 12.00



