<a href="https://colab.research.google.com/github/martinpius/RNN-ALIENS/blob/main/RNN_from_Scratch_in_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount("/content/drive/", force_remount = True)
try:
  COLAB = True
  import torch
  print(f">>>> You are on Google CoLaB with torch version {torch.__version__}")
except Exception as e:
  print(f">>>> {type(e)} {e}\n>>>> please correct {type(e)} and reload your drive")
  COLAB = False

def time_fmt(t: float = 123.93)->float:
  h = int(t / (60 * 60))
  m = int(t % (60 * 60) / 60)
  s = int(t / 60)
  return f"hrs: {h} min: {m:>02} sec: {s:>05.2f}"

if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")
print(f">>>> testing the time formating function.........\n>>>> time elapsed\t{time_fmt()}")

Mounted at /content/drive/
>>>> You are on Google CoLaB with torch version 1.9.0+cu102
>>>> testing the time formating function.........
>>>> time elapsed	hrs: 0 min: 02 sec: 02.00


In [None]:
import torch
from torch import nn
import math, random
import numpy as np


In [None]:
seed = 1234
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

In [None]:
# The Single neuron RNN with single layer-Pytorch Implementantion:
class SingleLayerSingleNeuronRNN(nn.Module):
  def __init__(self, input_dim, n_neurons):
    '''
    Wx == weights for the inputs -> dimension == [input_dim, n_neurons = hidden_dim]
    Wy == Weights for the prev_hidden -> dimension == [n_neurons, n_neurons] == [hidden, hidden]
    b == bias : shape == [batch_size = 1, n_neurons] == [1, hidden]
    '''
    super(SingleLayerSingleNeuronRNN, self).__init__()
    self.Wx = torch.randn(size = (input_dim, n_neurons)).to(device = device) # shape == [input_dim, n_neurons == batch_size]
    self.Wy = torch.randn(size = (n_neurons, n_neurons)).to(device = device) 
    self.b = torch.randn(size = (1, n_neurons)).to(device = device)
  
  def forward(self, X0, X1):
    '''
    X0 == current input for the rnn at current time-stamp t
    X1 == previous hidden state for the rnn (at time t-1)
    Y0 == output at the current time stamp (t)
    Y1 == output at the next time stamp (t+1)
    We use tanh activation function
    '''
    self.Y0 = torch.tanh(torch.mm(X0, self.Wx) + self.b)
    self.Y1 = torch.tanh(torch.mm(self.Y0, self.Wy)) + torch.tanh(torch.mm(X1, self.Wx) + self.b)
    return self.Y0.to(device = device), self.Y1.to(device = device) # shape for each outputs == [batch_size, hidden=n_neurons] == [4, 1]

In [None]:
# Testing the class with the input tensor of size 4, assuming a sequence with 2 timestamps
X0 = [[1,3,4,0],[2,8,1,3],[4,8,9,3],[4,3,0,1]] # a batch of X0, batch_size = 4 ------> time-stamp 1
X1 = [[0,2,3,4], [2,1,4,2],[0,2,1,9],[1,3,2,9]] # a batch of X1, batch_size = 4-------> time-stamp 2
X0 = torch.tensor(data = X0, dtype = torch.float32, requires_grad = True).to(device = device)
X1 = torch.tensor(data = X1, dtype = torch.float32, requires_grad = True).to(device = device)
inputs = 4
neurons = 1
model = SingleLayerSingleNeuronRNN(input_dim = inputs, n_neurons = neurons).to(device = device)
print(f">>>> The desired output: {model(X0, X1)}")


>>>> The desired output: (tensor([[-0.9797],
        [ 0.9980],
        [-0.9998],
        [ 0.9711]], device='cuda:0', grad_fn=<TanhBackward>), tensor([[-0.1589],
        [-1.5342],
        [ 1.5235],
        [ 0.4002]], device='cuda:0', grad_fn=<AddBackward0>))


In [None]:
## Adding more neurons to the single layer RNN.
class SingleLayerMultiNeuronsRNN(nn.Module):
  '''
  In this class we define a sigle layer rnn architecture
  with many neurons: An extension to the above class
  Computations remain similar, only output shapes changes
  We initialize the weights to random normal distribution
  We use num_neurons = 16
  '''
  def __init__(self, input_dim, num_neurons):
    super(SingleLayerMultiNeuronsRNN, self).__init__()
    self.Wx = torch.randn(size = (input_dim, num_neurons)).to(device = device) # shape == [4, 16]
    self.Wy = torch.randn(size = (num_neurons, num_neurons)).to(device = device) # shape = [16, 16]
    self.b = torch.randn(size = (1, num_neurons)).to(device = device) # shape = [1, 16]
  
  def forward(self, X0, X1):
    '''
    X0 = input batch for the first time step # shape == [4, 4] == [input_dim, batch_size]
    X1 = input batch for the second time step # shape == [4, 4] == [input_dim, batch_size]
    '''
    self.Y0 = torch.tanh(torch.mm(X0, self.Wx) + self.b)
    self.Y1 = torch.tanh(torch.mm(self.Y0, self.Wy)) + torch.tanh(torch.mm(X1, self.Wx) + self.b)
    return self.Y0.to(device = device), self.Y1.to(device = device)


In [None]:
# Instantiating and testing the class for the desired output
input_dim = 4
num_neurons = 16
model = SingleLayerMultiNeuronsRNN(input_dim = input_dim, num_neurons = num_neurons).to(device = device)
print(f">>>>> the desired output: {model(X0, X1)}")

>>>>> the desired output: (tensor([[ 0.8486, -0.9880,  1.0000,  0.4009,  0.9667,  0.0428, -1.0000, -1.0000,
         -1.0000, -0.9997, -0.9988,  0.6447,  0.9999,  1.0000,  0.9800,  0.7880],
        [ 0.9999, -1.0000,  1.0000,  0.9962, -1.0000,  1.0000, -1.0000, -1.0000,
         -1.0000,  1.0000, -0.1601,  0.9766,  1.0000, -0.4992, -1.0000, -0.9994],
        [-0.5690, -1.0000,  1.0000, -0.8670,  1.0000,  1.0000, -1.0000, -1.0000,
         -1.0000, -0.9964, -0.9925,  0.9429,  1.0000,  1.0000, -0.5357,  0.9785],
        [-0.9523, -1.0000,  0.9939, -0.9551, -0.7750,  0.7055, -1.0000, -1.0000,
         -1.0000,  1.0000,  0.9559, -0.9954, -0.5176, -0.9998, -0.9157, -0.2164]],
       device='cuda:0', grad_fn=<TanhBackward>), tensor([[ 1.6159e+00, -1.9858e+00,  1.7436e+00, -1.4998e+00,  2.4500e-03,
          8.0577e-01, -1.2330e-02, -1.9979e+00, -1.0578e+00,  9.6324e-02,
         -3.7570e-01,  1.9894e+00,  5.9020e-02,  1.1804e-01, -2.6625e-01,
          8.9957e-03],
        [ 9.1924e-03,  5.1

In [None]:
x_batch = torch.tensor([[[0,1,2,0], [3,4,5,2], 
                         [6,7,8,1], [9,0,1,8]],
                        [[9,8,7,2], [1,0,0,0], 
                         [9,6,5,4], [3,3,2,1]]
                       ], dtype = torch.float).to(device = device)

In [None]:
print(x_batch.shape)

torch.Size([2, 4, 4])


In [None]:
# Using RnnCell from torch we can compute similar network as above in a very simple way as follow
inputs_dim = 4
num_neurons = 5
model = nn.RNNCell(input_size = input_dim, hidden_size = num_neurons).to(device = device)
h0 = torch.randn(size = (4, 5)).to(device = device)
# since we have multiple layers, we stack together using the for-loop
# for the above x_batch we have two time stamps = seq-LEN
outputs = [] # place-holder 
for i in range(2):
  hx = model(x_batch[i], h0)
  outputs.append(hx)
print(f">>>> the desired output: {outputs}") # shape == [input_dim, hidden_size]


>>>> the desired output: [tensor([[-0.9463, -0.6893, -0.2524, -0.9637, -0.2208],
        [-0.9883, -0.9644,  0.7246, -0.6490, -0.9979],
        [-0.9997, -0.9820,  0.4870, -0.9534, -0.9969],
        [-0.9551,  0.5289,  0.5171,  0.1352, -1.0000]], device='cuda:0',
       grad_fn=<TanhBackward>), tensor([[-0.9999, -0.9841, -0.2931, -0.9975, -0.9998],
        [-0.7840, -0.3831,  0.4368, -0.1192, -0.8170],
        [-0.9977, -0.5888,  0.1649, -0.8125, -0.9999],
        [-0.9883, -0.9474,  0.8088, -0.7885, -0.9750]], device='cuda:0',
       grad_fn=<TanhBackward>)]


In [None]:
# Using the aid of RNNCell-- we implement the clean version of the above in the following class

In [None]:
class RNNCellClean(nn.Module):
  def __init__(self, input_dim, hidden_dim, batch_size):
    super(RNNCellClean, self).__init__()
    self.rnn_cell = nn.RNNCell(input_size = input_dim, hidden_size = hidden_dim, device = device)
    self.h0 = torch.randn(size = (batch_size, hidden_dim)).to(device = device) # initialize the hidden state to random normal
  
  def forward(self, input_tensor):
    '''
    the input_tensor has the shape of [seq_len, batch_size, input_dim]
    '''
    seq_len = input_tensor.size(0) # grab the sequence length
    outputs = []
    # iterate over the sequence length
    for i in range(seq_len):
      self.hx = self.rnn_cell(input_tensor[i], self.h0)
      outputs.append(self.hx)
    return outputs, self.hx # desired shape: [input_dim, hidden_dim] == [4, 8]


In [None]:
# Instantiating and testing the class for the desired output: 
input_dim = 4
hidden_dim = 8
batch_size = 2
seq_len = 4
input_tensor = torch.randn(size = (seq_len, batch_size, input_dim)).to(device = device)
model = RNNCellClean(input_dim, hidden_dim, batch_size).to(device = device)
output, hidden = model(input_tensor)
print(f">>>> The desired output \n{output}\n\n\n>>>> desired hidden state\n{hidden}")


>>>> The desired output 
[tensor([[ 0.1019, -0.3485,  0.7331,  0.2111, -0.0780, -0.3872,  0.2489,  0.8312],
        [-0.2096,  0.5396,  0.5288, -0.9294, -0.3804,  0.8696, -0.7815, -0.4226]],
       device='cuda:0', grad_fn=<TanhBackward>), tensor([[ 0.2276, -0.5669,  0.6586,  0.5156,  0.0895, -0.2938,  0.5078,  0.7417],
        [-0.1119,  0.4649,  0.6467, -0.8613, -0.0180,  0.9424, -0.7608, -0.3338]],
       device='cuda:0', grad_fn=<TanhBackward>), tensor([[ 0.1276, -0.5777,  0.4672,  0.1322,  0.2423, -0.5699,  0.2902,  0.7489],
        [ 0.1024,  0.1505,  0.5441, -0.6217, -0.0139,  0.9574, -0.4218, -0.6276]],
       device='cuda:0', grad_fn=<TanhBackward>), tensor([[ 0.2517, -0.4415,  0.5477,  0.0918, -0.0569, -0.4595,  0.3082,  0.7920],
        [ 0.1919,  0.7753,  0.7772, -0.9423, -0.0631,  0.9815, -0.9053,  0.3382]],
       device='cuda:0', grad_fn=<TanhBackward>)]


>>>> desired hidden state
tensor([[ 0.2517, -0.4415,  0.5477,  0.0918, -0.0569, -0.4595,  0.3082,  0.7920],
        

In [None]:
## A simple RNN--->Project..Demo on Mnist data.

import torchvision
from torch import optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
from tensorflow import summary
from tqdm import tqdm
import time, datetime
%load_ext tensorboard

In [None]:
# Hyperparameters 
batch_size = 128
EPOCHS = 30
hidden_dim = 512
input_dim = 28
seq_len = 28
num_layers = 2
num_classes = 10
learning_rate = 1e-3

In [None]:
# The model class: We create a 2-layers simple-rnn with an fc layer for the out
class MyRNN(nn.Module):
  def __init__(self,input_dim, hidden_dim, num_layers, num_classes):
    super(MyRNN, self).__init__()
    self.hidden_dim = hidden_dim
    self.num_layers = num_layers
    self.rnn = nn.RNN(input_size = input_dim, 
                         hidden_size = hidden_dim,
                         num_layers = 2,
                         batch_first = True,
                         dropout = 0.5)
    
    self.fc_out = nn.Linear(in_features = hidden_dim * seq_len, out_features = num_classes)
  
  def forward(self, input_tensor, hidden):
    '''
    input_tensor: == mnist images: shape == [128, 28, 28] ==> [batch_size, seq_len, input_dim]
    # we need to squeeze the channel dimension to suit the input shape of [batch, 28, 28]
    hidden == initial hidden state: shape == [num_layers = 1, batch_size, hidden_dim]
    '''
    # initialize the hidden state
    batch_size = input_tensor.size(0)
    num_layers = 2
    hidden = torch.randn(size = (self.num_layers, batch_size, self.hidden_dim)).to(device = device)
    # run the rnn with 2 layers
    out, _ = self.rnn(input_tensor, hidden)
    # out shape == [batch_size, input_dim, hidden]
    out = out.reshape(out.shape[0], -1)
    return self.fc_out(out)


In [None]:
# Hyperparameters and model's instantiation:
batch_size = 128
EPOCHS = 30
hidden_dim = 512
input_dim = 28
seq_len = 28
num_layers = 2
num_classes = 10
learning_rate = 1e-3
h0 = torch.randn(size = (num_layers, batch_size, hidden_dim)).to(device = device)
input_tensor = torch.randn(size = (batch_size, seq_len, input_dim)).to(device = device)
model = MyRNN(input_dim, hidden_dim, num_layers, num_classes).to(device = device)
print(f">>>> The desired output shape:{model(input_tensor, hidden_dim).shape}")
print(model)

>>>> The desired output shape:torch.Size([128, 10])
MyRNN(
  (rnn): RNN(28, 512, num_layers=2, batch_first=True, dropout=0.5)
  (fc_out): Linear(in_features=14336, out_features=10, bias=True)
)


In [None]:
# Get and preprocess the data from torchvission
train_data = datasets.MNIST(root = "mnist_rnn/train", train = True, transform = transforms.ToTensor(), download = True)
valid_data = datasets.MNIST(root = "mnist_rnn/test", train = False, transform = transforms.ToTensor(), download = True)
train_loader = DataLoader(dataset = train_data, batch_size = batch_size, shuffle = True)
valid_loader = DataLoader(dataset = valid_data, batch_size = batch_size, shuffle = False)
x_train_batch, y_train_batch = next(iter(train_loader))
print(f">>>> x_train_batch_shape: {x_train_batch.shape}\t>>>> t_train_batch_shape: {y_train_batch.shape}")


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist_rnn/train/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting mnist_rnn/train/MNIST/raw/train-images-idx3-ubyte.gz to mnist_rnn/train/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to mnist_rnn/train/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting mnist_rnn/train/MNIST/raw/train-labels-idx1-ubyte.gz to mnist_rnn/train/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to mnist_rnn/train/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting mnist_rnn/train/MNIST/raw/t10k-images-idx3-ubyte.gz to mnist_rnn/train/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to mnist_rnn/train/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting mnist_rnn/train/MNIST/raw/t10k-labels-idx1-ubyte.gz to mnist_rnn/train/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist_rnn/test/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting mnist_rnn/test/MNIST/raw/train-images-idx3-ubyte.gz to mnist_rnn/test/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to mnist_rnn/test/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting mnist_rnn/test/MNIST/raw/train-labels-idx1-ubyte.gz to mnist_rnn/test/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to mnist_rnn/test/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting mnist_rnn/test/MNIST/raw/t10k-images-idx3-ubyte.gz to mnist_rnn/test/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to mnist_rnn/test/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting mnist_rnn/test/MNIST/raw/t10k-labels-idx1-ubyte.gz to mnist_rnn/test/MNIST/raw

>>>> x_train_batch_shape: torch.Size([128, 1, 28, 28])	>>>> t_train_batch_shape: torch.Size([128])


In [None]:
# get the loss and the optimizer's objects
loss_obj = nn.CrossEntropyLoss()
optimizer = optim.Adam(params = model.parameters(), lr = learning_rate)


In [None]:
# tensorboard env
current_time = datetime.datetime.now().timestamp()
train_dir = "logs/tensorboard/mnist_rnn/train/" + str(current_time)
valid_dir = "logs/tensorboard/mnist_rnn/valid/" + str(current_time)
acc_dir = "logs/tensorboard/mnist_rnn/accuracy/" + str(current_time)
train_writer = summary.create_file_writer(logdir = train_dir)
valid_writer = summary.create_file_writer(logdir = valid_dir)
acc_writer = summary.create_file_writer(logdir = acc_dir)

In [None]:
def __train__(model, iterator, optimizer,loss_obj, clip):
  model.train()
  loss_per_epoch = 0
  step = 0
  for (idx, (data, label)) in enumerate(tqdm(iterator)):
    data = data.squeeze(1).to(device = device)
    label = label.to(device = device)
    h0 = torch.randn(size = (num_layers, batch_size, hidden_dim)).to(device = device)
    
    optimizer.zero_grad()
    preds = model(data, h0)
    train_loss = loss_obj(preds, label)
    train_loss.backward()
    torch.nn.utils.clip_grad_norm_(parameters = model.parameters(), max_norm = clip)
    loss_per_epoch += train_loss.item()
    optimizer.step()
    if idx % 10 == 0:
      with train_writer.as_default():
        summary.scalar("train_loss", train_loss.item(), step = step)
        step += 1
  return loss_per_epoch / len(iterator)


In [None]:
def __valid__(model, iterator, loss_obj):
  model.eval()
  with torch.no_grad():
    step = 0
    loss_per_epoch = 0
    for (idx, (data, label)) in enumerate(tqdm(iterator)):
      data = data.squeeze(1).to(device = device)
      label = label.to(device = device)
      h0 = torch.randn(size = (num_layers, batch_size, hidden_dim)).to(device = device)
      optimizer.zero_grad()
      preds = model(data, h0)
      valid_loss = loss_obj(preds, label)
      loss_per_epoch += valid_loss.item()
      if idx % 10 == 0:
        with valid_writer.as_default():
          summary.scalar("valid_loss", valid_loss.item(), step = step)
          step += 1
  return loss_per_epoch / len(iterator)

def __accuracy__(model, iterator):
  if iterator.dataset.train:
    print(f"\n>>> checking the accuracy for the training dataset\n>>>> please wait............")
  else:
    print(f"\n>>>> checking the accuracy for the validation dataset\n>>>> please wait............")
  num_examples = 0
  num_correct = 0
  step = 0
  model.eval()
  with torch.no_grad():
    for (idx, (data, label)) in enumerate(tqdm(iterator)):
      data = data.squeeze(1).to(device = device)
      label = label.to(device = device)
      h0 = torch.randn(size = (num_layers, batch_size, hidden_dim)).to(device = device)
      preds = model(data, h0)
      _, predictions = preds.max(1) # grab the maximum proba-value
      num_correct += (predictions == label).sum() # sum all correct predictions examples
      num_examples += predictions.size(0)
      accuracy = num_correct / num_examples
      if idx % 10 == 0:
        with acc_writer.as_default():
          summary.scalar("Accuracy", accuracy.cpu().numpy(), step = step)
          step+=1
  model.train()
  return num_correct / num_examples


In [None]:
clip = 1
best_val_loss = float("inf")
tic = time.time()
for epoch in range(EPOCHS):
  print(f"\n>>>> train starts for epoch {epoch + 1}\n>>>> please wait while the model is training.......................")
  train_loss = __train__(model, train_loader, optimizer, loss_obj, clip)
  valid_loss = __valid__(model, valid_loader, loss_obj)
  train_accuracy = __accuracy__(model, train_loader)
  valid_accuracy = __accuracy__(model, valid_loader)
  if valid_loss < best_val_loss:
    best_val_loss = valid_loss
    torch.save(model.state_dict(), "rnn_simple.pt")
  
  print(f"\n>>>> end of epoch {epoch + 1}, train_loss: {train_loss:.4f}, train_PPL: {math.exp(train_loss):7.4f}")
  print(f"\n>>>> end of epoch {epoch + 1}, valid_loss: {valid_loss:.4f}, valid_PPL: {math.exp(valid_loss):7.4f}")
  print(f"\n>>>> end of epoch {epoch + 1}, train_accuracy: {train_accuracy:.4f}, valid_accuracy: {valid_accuracy:.4f}")
toc = time.time()
print(f">>>> time elapsed for training the model for {EPOCHS} epochs is {time_fmt(toc - tic)}")

  1%|          | 4/469 [00:00<00:14, 33.01it/s]


>>>> train starts for epoch 1
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 42.86it/s]
100%|██████████| 79/79 [00:01<00:00, 70.08it/s]
  2%|▏         | 8/469 [00:00<00:06, 75.15it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 73.00it/s]
 10%|█         | 8/79 [00:00<00:00, 73.84it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 72.90it/s]
  1%|          | 4/469 [00:00<00:13, 34.76it/s]


>>>> end of epoch 1, train_loss: 0.2089, train_PPL:  1.2323

>>>> end of epoch 1, valid_loss: 0.1868, valid_PPL:  1.2053

>>>> end of epoch 1, train_accuracy: 0.9615, valid_accuracy: 0.9569

>>>> train starts for epoch 2
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:11<00:00, 42.45it/s]
100%|██████████| 79/79 [00:01<00:00, 64.32it/s]
  1%|▏         | 7/469 [00:00<00:07, 65.00it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 72.17it/s]
 10%|█         | 8/79 [00:00<00:00, 74.39it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 71.91it/s]
  1%|          | 4/469 [00:00<00:14, 32.93it/s]


>>>> end of epoch 2, train_loss: 0.2282, train_PPL:  1.2564

>>>> end of epoch 2, valid_loss: 0.2346, valid_PPL:  1.2645

>>>> end of epoch 2, train_accuracy: 0.9521, valid_accuracy: 0.9535

>>>> train starts for epoch 3
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 43.98it/s]
100%|██████████| 79/79 [00:01<00:00, 66.68it/s]
  2%|▏         | 8/469 [00:00<00:05, 78.71it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 75.76it/s]
 10%|█         | 8/79 [00:00<00:00, 74.28it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 78.19it/s]
  1%|          | 4/469 [00:00<00:12, 37.57it/s]


>>>> end of epoch 3, train_loss: 0.2817, train_PPL:  1.3253

>>>> end of epoch 3, valid_loss: 0.2322, valid_PPL:  1.2613

>>>> end of epoch 3, train_accuracy: 0.9592, valid_accuracy: 0.9558

>>>> train starts for epoch 4
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 45.03it/s]
100%|██████████| 79/79 [00:01<00:00, 71.18it/s]
  2%|▏         | 8/469 [00:00<00:06, 72.73it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 77.90it/s]
 11%|█▏        | 9/79 [00:00<00:00, 80.71it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 78.01it/s]
  1%|          | 4/469 [00:00<00:12, 37.02it/s]


>>>> end of epoch 4, train_loss: 0.3675, train_PPL:  1.4441

>>>> end of epoch 4, valid_loss: 0.3189, valid_PPL:  1.3756

>>>> end of epoch 4, train_accuracy: 0.9480, valid_accuracy: 0.9478

>>>> train starts for epoch 5
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:11<00:00, 42.53it/s]
100%|██████████| 79/79 [00:01<00:00, 65.18it/s]
  2%|▏         | 8/469 [00:00<00:06, 74.66it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 71.30it/s]
 10%|█         | 8/79 [00:00<00:00, 72.47it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 70.70it/s]
  1%|          | 4/469 [00:00<00:12, 35.96it/s]


>>>> end of epoch 5, train_loss: 0.3941, train_PPL:  1.4831

>>>> end of epoch 5, valid_loss: 0.3992, valid_PPL:  1.4907

>>>> end of epoch 5, train_accuracy: 0.9423, valid_accuracy: 0.9433

>>>> train starts for epoch 6
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:11<00:00, 42.27it/s]
100%|██████████| 79/79 [00:01<00:00, 67.16it/s]
  2%|▏         | 8/469 [00:00<00:05, 78.40it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 77.94it/s]
  9%|▉         | 7/79 [00:00<00:01, 67.88it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 76.50it/s]
  1%|          | 4/469 [00:00<00:12, 37.97it/s]


>>>> end of epoch 6, train_loss: 0.4160, train_PPL:  1.5160

>>>> end of epoch 6, valid_loss: 0.3728, valid_PPL:  1.4518

>>>> end of epoch 6, train_accuracy: 0.9499, valid_accuracy: 0.9475

>>>> train starts for epoch 7
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 44.72it/s]
100%|██████████| 79/79 [00:01<00:00, 69.48it/s]
  2%|▏         | 8/469 [00:00<00:06, 75.45it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 78.09it/s]
 11%|█▏        | 9/79 [00:00<00:00, 81.49it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:00<00:00, 79.94it/s]
  1%|          | 4/469 [00:00<00:12, 36.94it/s]


>>>> end of epoch 7, train_loss: 0.3944, train_PPL:  1.4835

>>>> end of epoch 7, valid_loss: 0.3566, valid_PPL:  1.4285

>>>> end of epoch 7, train_accuracy: 0.9540, valid_accuracy: 0.9528

>>>> train starts for epoch 8
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 45.48it/s]
100%|██████████| 79/79 [00:01<00:00, 67.55it/s]
  1%|▏         | 7/469 [00:00<00:06, 66.06it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 72.51it/s]
  9%|▉         | 7/79 [00:00<00:01, 69.08it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 71.86it/s]
  1%|          | 4/469 [00:00<00:12, 36.36it/s]


>>>> end of epoch 8, train_loss: 0.3892, train_PPL:  1.4759

>>>> end of epoch 8, valid_loss: 0.3264, valid_PPL:  1.3859

>>>> end of epoch 8, train_accuracy: 0.9600, valid_accuracy: 0.9555

>>>> train starts for epoch 9
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:11<00:00, 42.44it/s]
100%|██████████| 79/79 [00:01<00:00, 64.02it/s]
  2%|▏         | 8/469 [00:00<00:06, 74.82it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 72.94it/s]
 10%|█         | 8/79 [00:00<00:00, 77.70it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 78.84it/s]
  1%|          | 4/469 [00:00<00:12, 37.66it/s]


>>>> end of epoch 9, train_loss: 0.3780, train_PPL:  1.4593

>>>> end of epoch 9, valid_loss: 0.3499, valid_PPL:  1.4189

>>>> end of epoch 9, train_accuracy: 0.9554, valid_accuracy: 0.9537

>>>> train starts for epoch 10
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 45.29it/s]
100%|██████████| 79/79 [00:01<00:00, 70.69it/s]
  2%|▏         | 9/469 [00:00<00:05, 81.94it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 77.25it/s]
 10%|█         | 8/79 [00:00<00:00, 76.57it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 75.08it/s]
  1%|          | 4/469 [00:00<00:12, 38.36it/s]


>>>> end of epoch 10, train_loss: 0.3823, train_PPL:  1.4657

>>>> end of epoch 10, valid_loss: 0.3651, valid_PPL:  1.4406

>>>> end of epoch 10, train_accuracy: 0.9541, valid_accuracy: 0.9556

>>>> train starts for epoch 11
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 45.68it/s]
100%|██████████| 79/79 [00:01<00:00, 71.34it/s]
  2%|▏         | 8/469 [00:00<00:05, 79.20it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 77.11it/s]
 11%|█▏        | 9/79 [00:00<00:00, 78.41it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:00<00:00, 79.59it/s]
  1%|          | 4/469 [00:00<00:12, 36.28it/s]


>>>> end of epoch 11, train_loss: 0.3718, train_PPL:  1.4504

>>>> end of epoch 11, valid_loss: 0.3715, valid_PPL:  1.4499

>>>> end of epoch 11, train_accuracy: 0.9565, valid_accuracy: 0.9556

>>>> train starts for epoch 12
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:11<00:00, 42.46it/s]
100%|██████████| 79/79 [00:01<00:00, 64.27it/s]
  2%|▏         | 8/469 [00:00<00:06, 73.61it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 72.11it/s]
 10%|█         | 8/79 [00:00<00:00, 74.54it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 72.95it/s]
  1%|          | 4/469 [00:00<00:12, 35.79it/s]


>>>> end of epoch 12, train_loss: 0.3614, train_PPL:  1.4353

>>>> end of epoch 12, valid_loss: 0.4203, valid_PPL:  1.5224

>>>> end of epoch 12, train_accuracy: 0.9553, valid_accuracy: 0.9491

>>>> train starts for epoch 13
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:11<00:00, 42.36it/s]
100%|██████████| 79/79 [00:01<00:00, 70.07it/s]
  2%|▏         | 9/469 [00:00<00:05, 79.79it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 77.41it/s]
 11%|█▏        | 9/79 [00:00<00:00, 81.65it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 77.56it/s]
  1%|          | 4/469 [00:00<00:11, 38.80it/s]


>>>> end of epoch 13, train_loss: 0.3506, train_PPL:  1.4200

>>>> end of epoch 13, valid_loss: 0.2932, valid_PPL:  1.3408

>>>> end of epoch 13, train_accuracy: 0.9637, valid_accuracy: 0.9637

>>>> train starts for epoch 14
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 45.24it/s]
100%|██████████| 79/79 [00:01<00:00, 68.30it/s]
  2%|▏         | 8/469 [00:00<00:06, 76.26it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 76.81it/s]
 10%|█         | 8/79 [00:00<00:00, 79.62it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 78.44it/s]
  1%|          | 4/469 [00:00<00:12, 36.56it/s]


>>>> end of epoch 14, train_loss: 0.3607, train_PPL:  1.4344

>>>> end of epoch 14, valid_loss: 0.3115, valid_PPL:  1.3654

>>>> end of epoch 14, train_accuracy: 0.9673, valid_accuracy: 0.9661

>>>> train starts for epoch 15
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 45.65it/s]
100%|██████████| 79/79 [00:01<00:00, 65.43it/s]
  2%|▏         | 8/469 [00:00<00:06, 72.10it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 72.05it/s]
  9%|▉         | 7/79 [00:00<00:01, 60.84it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 70.13it/s]
  1%|          | 4/469 [00:00<00:12, 36.05it/s]


>>>> end of epoch 15, train_loss: 0.3364, train_PPL:  1.3999

>>>> end of epoch 15, valid_loss: 0.2307, valid_PPL:  1.2595

>>>> end of epoch 15, train_accuracy: 0.9717, valid_accuracy: 0.9722

>>>> train starts for epoch 16
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:11<00:00, 42.42it/s]
100%|██████████| 79/79 [00:01<00:00, 64.65it/s]
  2%|▏         | 8/469 [00:00<00:06, 72.23it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 74.48it/s]
 10%|█         | 8/79 [00:00<00:00, 77.98it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 77.13it/s]
  1%|          | 4/469 [00:00<00:12, 37.34it/s]


>>>> end of epoch 16, train_loss: 0.3386, train_PPL:  1.4030

>>>> end of epoch 16, valid_loss: 0.2915, valid_PPL:  1.3384

>>>> end of epoch 16, train_accuracy: 0.9702, valid_accuracy: 0.9685

>>>> train starts for epoch 17
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 43.38it/s]
100%|██████████| 79/79 [00:01<00:00, 63.43it/s]
  2%|▏         | 8/469 [00:00<00:06, 72.02it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 72.19it/s]
 10%|█         | 8/79 [00:00<00:00, 73.91it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 71.89it/s]
  1%|          | 4/469 [00:00<00:13, 34.04it/s]


>>>> end of epoch 17, train_loss: 0.3253, train_PPL:  1.3844

>>>> end of epoch 17, valid_loss: 0.2540, valid_PPL:  1.2892

>>>> end of epoch 17, train_accuracy: 0.9749, valid_accuracy: 0.9714

>>>> train starts for epoch 18
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:11<00:00, 42.37it/s]
100%|██████████| 79/79 [00:01<00:00, 69.17it/s]
  2%|▏         | 8/469 [00:00<00:05, 78.55it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 76.00it/s]
 10%|█         | 8/79 [00:00<00:00, 75.41it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 78.91it/s]
  1%|          | 4/469 [00:00<00:13, 34.81it/s]


>>>> end of epoch 18, train_loss: 0.3237, train_PPL:  1.3822

>>>> end of epoch 18, valid_loss: 0.3049, valid_PPL:  1.3565

>>>> end of epoch 18, train_accuracy: 0.9697, valid_accuracy: 0.9648

>>>> train starts for epoch 19
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 45.52it/s]
100%|██████████| 79/79 [00:01<00:00, 71.38it/s]
  2%|▏         | 8/469 [00:00<00:05, 77.25it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 77.95it/s]
 11%|█▏        | 9/79 [00:00<00:00, 81.54it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:00<00:00, 79.60it/s]
  1%|          | 4/469 [00:00<00:12, 37.20it/s]


>>>> end of epoch 19, train_loss: 0.3175, train_PPL:  1.3737

>>>> end of epoch 19, valid_loss: 0.2590, valid_PPL:  1.2957

>>>> end of epoch 19, train_accuracy: 0.9724, valid_accuracy: 0.9713

>>>> train starts for epoch 20
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 45.96it/s]
100%|██████████| 79/79 [00:01<00:00, 68.90it/s]
  2%|▏         | 9/469 [00:00<00:05, 81.80it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 72.49it/s]
 10%|█         | 8/79 [00:00<00:00, 73.70it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 72.57it/s]
  1%|          | 4/469 [00:00<00:12, 36.25it/s]


>>>> end of epoch 20, train_loss: 0.3097, train_PPL:  1.3631

>>>> end of epoch 20, valid_loss: 0.2648, valid_PPL:  1.3032

>>>> end of epoch 20, train_accuracy: 0.9727, valid_accuracy: 0.9715

>>>> train starts for epoch 21
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:11<00:00, 42.38it/s]
100%|██████████| 79/79 [00:01<00:00, 64.19it/s]
  1%|▏         | 7/469 [00:00<00:06, 69.62it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 70.15it/s]
 10%|█         | 8/79 [00:00<00:00, 79.49it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 78.21it/s]
  1%|          | 4/469 [00:00<00:12, 37.86it/s]


>>>> end of epoch 21, train_loss: 0.3246, train_PPL:  1.3835

>>>> end of epoch 21, valid_loss: 0.2482, valid_PPL:  1.2817

>>>> end of epoch 21, train_accuracy: 0.9766, valid_accuracy: 0.9753

>>>> train starts for epoch 22
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 44.63it/s]
100%|██████████| 79/79 [00:01<00:00, 68.49it/s]
  1%|▏         | 7/469 [00:00<00:06, 69.24it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 75.27it/s]
 10%|█         | 8/79 [00:00<00:00, 78.54it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 73.66it/s]
  1%|          | 4/469 [00:00<00:12, 38.06it/s]


>>>> end of epoch 22, train_loss: 0.3131, train_PPL:  1.3677

>>>> end of epoch 22, valid_loss: 0.2980, valid_PPL:  1.3471

>>>> end of epoch 22, train_accuracy: 0.9726, valid_accuracy: 0.9709

>>>> train starts for epoch 23
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 43.61it/s]
100%|██████████| 79/79 [00:01<00:00, 63.51it/s]
  2%|▏         | 9/469 [00:00<00:05, 80.94it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 78.10it/s]
 11%|█▏        | 9/79 [00:00<00:00, 81.75it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 78.69it/s]
  1%|          | 4/469 [00:00<00:12, 37.72it/s]


>>>> end of epoch 23, train_loss: 0.3253, train_PPL:  1.3844

>>>> end of epoch 23, valid_loss: 0.2354, valid_PPL:  1.2654

>>>> end of epoch 23, train_accuracy: 0.9742, valid_accuracy: 0.9733

>>>> train starts for epoch 24
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 42.65it/s]
100%|██████████| 79/79 [00:01<00:00, 63.69it/s]
  2%|▏         | 8/469 [00:00<00:06, 72.16it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 70.54it/s]
 10%|█         | 8/79 [00:00<00:00, 71.77it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 70.72it/s]
  1%|          | 4/469 [00:00<00:12, 36.66it/s]


>>>> end of epoch 24, train_loss: 0.3262, train_PPL:  1.3857

>>>> end of epoch 24, valid_loss: 0.2970, valid_PPL:  1.3458

>>>> end of epoch 24, train_accuracy: 0.9746, valid_accuracy: 0.9728

>>>> train starts for epoch 25
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:11<00:00, 42.35it/s]
100%|██████████| 79/79 [00:01<00:00, 69.28it/s]
  2%|▏         | 8/469 [00:00<00:05, 77.83it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 76.56it/s]
 10%|█         | 8/79 [00:00<00:00, 74.79it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 77.89it/s]
  1%|          | 4/469 [00:00<00:12, 36.50it/s]


>>>> end of epoch 25, train_loss: 0.3234, train_PPL:  1.3818

>>>> end of epoch 25, valid_loss: 0.2275, valid_PPL:  1.2555

>>>> end of epoch 25, train_accuracy: 0.9779, valid_accuracy: 0.9754

>>>> train starts for epoch 26
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 43.55it/s]
100%|██████████| 79/79 [00:01<00:00, 66.64it/s]
  2%|▏         | 8/469 [00:00<00:06, 71.44it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 76.01it/s]
 10%|█         | 8/79 [00:00<00:00, 75.16it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 76.94it/s]
  1%|          | 4/469 [00:00<00:12, 35.80it/s]


>>>> end of epoch 26, train_loss: 0.3168, train_PPL:  1.3728

>>>> end of epoch 26, valid_loss: 0.2875, valid_PPL:  1.3331

>>>> end of epoch 26, train_accuracy: 0.9761, valid_accuracy: 0.9740

>>>> train starts for epoch 27
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 43.24it/s]
100%|██████████| 79/79 [00:01<00:00, 61.89it/s]
  2%|▏         | 8/469 [00:00<00:06, 73.07it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 70.70it/s]
 10%|█         | 8/79 [00:00<00:00, 75.05it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 72.00it/s]
  1%|          | 4/469 [00:00<00:13, 35.69it/s]


>>>> end of epoch 27, train_loss: 0.3022, train_PPL:  1.3528

>>>> end of epoch 27, valid_loss: 0.2740, valid_PPL:  1.3152

>>>> end of epoch 27, train_accuracy: 0.9769, valid_accuracy: 0.9726

>>>> train starts for epoch 28
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:11<00:00, 42.00it/s]
100%|██████████| 79/79 [00:01<00:00, 64.57it/s]
  1%|▏         | 7/469 [00:00<00:07, 63.69it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 74.96it/s]
 10%|█         | 8/79 [00:00<00:00, 78.79it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 72.24it/s]
  1%|          | 4/469 [00:00<00:12, 36.44it/s]


>>>> end of epoch 28, train_loss: 0.3025, train_PPL:  1.3533

>>>> end of epoch 28, valid_loss: 0.2647, valid_PPL:  1.3030

>>>> end of epoch 28, train_accuracy: 0.9774, valid_accuracy: 0.9739

>>>> train starts for epoch 29
>>>> please wait while the model is training.......................


100%|██████████| 469/469 [00:10<00:00, 43.88it/s]
100%|██████████| 79/79 [00:01<00:00, 67.79it/s]
  1%|▏         | 7/469 [00:00<00:06, 69.44it/s]


>>> checking the accuracy for the training dataset
>>>> please wait............


100%|██████████| 469/469 [00:06<00:00, 76.96it/s]
 10%|█         | 8/79 [00:00<00:00, 79.82it/s]


>>>> checking the accuracy for the validation dataset
>>>> please wait............


100%|██████████| 79/79 [00:01<00:00, 77.67it/s]
  1%|          | 4/469 [00:00<00:12, 37.32it/s]


>>>> end of epoch 29, train_loss: 0.3171, train_PPL:  1.3731

>>>> end of epoch 29, valid_loss: 0.3454, valid_PPL:  1.4126

>>>> end of epoch 29, train_accuracy: 0.9732, valid_accuracy: 0.9678

>>>> train starts for epoch 30
>>>> please wait while the model is training.......................


 34%|███▎      | 158/469 [00:03<00:07, 44.07it/s]

In [None]:
%tensorboard --logdir logs/tensorboard