In [1]:
import numpy as np
from collections import Counter
from tqdm import tqdm_notebook

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

from utils import *
from data_utils import Vocabulary
from train_utils import train
import random
import pickle

In [2]:
def tokenize_corpus(corpus_path):
    num_lines = len(open(corpus_path, encoding='utf8').read().split('\n'))
    
    # tokenize corpus
    output = ''
    with open(corpus_path, encoding='utf8') as f:
        for line in tqdm_notebook(f, desc='Tokenizing', total=num_lines):
            tokens = tokenizer(line.strip()) + ['\n']
            output += ' '.join(tokens)
    
    # save tokenized corpus
    tok_corpus_path = corpus_path[:-4] + '_tok.txt'
    with open(tok_corpus_path, 'w', encoding='utf8') as f:
        f.write(output)

In [3]:
train_data = 'moulavi_norm.txt'
train_data_tok = train_data[:-4] + '_tok.txt'

In [4]:
tokenize_corpus(train_data)

text = open(train_data, encoding='utf8').read().split('\n')[:10]
print(text)

print('\n\nAfter Tokenizing:\n\n')
print(tokenizer('\n'.join(text)))

HBox(children=(IntProgress(value=0, description='Tokenizing', max=53313, style=ProgressStyle(description_width…


['', '  \t', 'بشنو این نی چون شکایت می کند', 'از جداییها حکایت می کند', 'کز نیستان تا مرا ببریده اند', 'در نفیرم مرد و زن نالیده اند', 'سینه خواهم شرحه شرحه از فراق', 'تا بگویم شرح درد اشتیاق', 'هر کسی کو دور ماند از اصل خویش', 'باز جوید روزگار وصل خویش']


After Tokenizing:


[' \t ', 'بشنو', 'این', 'نی', 'چون', 'شکایت', 'می', 'کند', 'از', 'جداییها', 'حکایت', 'می', 'کند', 'کز', 'نیستان', 'تا', 'مرا', 'ببریده', 'اند', 'در', 'نفیرم', 'مرد', 'و', 'زن', 'نالیده', 'اند', 'سینه', 'خواهم', 'شرحه', 'شرحه', 'از', 'فراق', 'تا', 'بگویم', 'شرح', 'درد', 'اشتیاق', 'هر', 'کسی', 'کو', 'دور', 'ماند', 'از', 'اصل', 'خویش', 'باز', 'جوید', 'روزگار', 'وصل', 'خویش']


In [5]:
class Corpus(object):
    
    def __init__(self, corpus_path):
        self.vocabulary = Vocabulary()
        self.corpus_path = corpus_path
        self.num_sentences = len([line for line in open(corpus_path, encoding='utf8')])
    
    def get_data(self, max_vocab=30000, min_count=3, batch_size=20, split_ratio=0.2):
        
        # First pass: add words to the vocabulary
        trn_tokens, val_tokens = [], []
        with open(self.corpus_path, encoding='utf8') as f:
            for line in tqdm_notebook(f, desc='Building Vocab...', total=self.num_sentences):
                tokens = line.split() + ['<EOS>']
                if len(line) <= 10: continue
                if random.random() < split_ratio:
                    val_tokens += tokens
                else:
                    trn_tokens += tokens
        
        counter = Counter(trn_tokens + val_tokens)
        
        vocabs = [(w, c) for (w, c) in counter.most_common(max_vocab) if c >= min_count]
        
        for i, (word, count) in enumerate(vocabs):
            self.vocabulary.word2index[word] = i
            self.vocabulary.word2count[word] = count
            self.vocabulary.index2word[i] = word
            self.vocabulary.num_words += 1
        self.vocabulary.add_word('<UNK>')
        
        
        UNK_TOKEN = self.vocabulary.word2index['<UNK>']
        
        # train ids
        trn_ids = torch.LongTensor(len(trn_tokens))
        for idx, token in enumerate(trn_tokens):
            if token in self.vocabulary.word2index:
                trn_ids[idx] = self.vocabulary.word2index[token] 
            else:
                trn_ids[idx] = UNK_TOKEN
        
        val_ids = torch.LongTensor(len(val_tokens))
        for idx, token in enumerate(val_tokens):
            if token in self.vocabulary.word2index:
                val_ids[idx] = self.vocabulary.word2index[token] 
            else:
                val_ids[idx] = UNK_TOKEN
        
        num_batches = trn_ids.size(0) // batch_size
        trn_ids = trn_ids[: num_batches * batch_size]
        
        num_batches = val_ids.size(0) // batch_size
        val_ids = trn_ids[: num_batches * batch_size]

        return trn_ids.view(batch_size, -1), val_ids.view(batch_size, -1)

In [6]:
max_vocab = 30000
min_count = 1

# LSTM hyper-parameters
embed_size = 1500
hidden_size = 1500
num_layers = 2

# Training hyper-parameters
num_epochs = 40
batch_size = 50
seq_length = 60
learning_rate = 0.001

In [7]:
corpus = Corpus(train_data_tok)
trn_ids, val_ids = corpus.get_data(max_vocab, min_count, batch_size)
vocab_size = len(corpus.vocabulary)

# save vocabs and ids
pickle.dump(corpus.vocabulary, open('vocab.pkl', 'wb'))
np.save('trn_ids.npy', trn_ids.view(-1).numpy())
np.save('val_ids.npy', val_ids.view(-1).numpy())

HBox(children=(IntProgress(value=0, description='Building Vocab...', max=53313, style=ProgressStyle(descriptio…




In [8]:
vocab_size = len(corpus.vocabulary)
print(vocab_size)
print(trn_ids.size())
print(val_ids.size())

25175
torch.Size([50, 6341])
torch.Size([50, 1571])


In [9]:
most_commons = [(w, c) for (w, c) in corpus.vocabulary.word2count.items()][:20]

for w, c in most_commons:
    print(w, c)

<EOS> 52956
و 12871
از 7709
را 6764
در 6362
که 6355
آن 5799
تو 4578
می 4078
او 3878
بر 3873
ای 3650
این 3496
چون 3398
ز 3286
به 2848
تا 2847
من 2821
بود 2163
هر 2092


In [10]:
class LSTM_LM(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers=1, drop=0.35, tie=True):
        super(LSTM_LM, self).__init__()
        
        if tie:
            embed_size = hidden_size
        
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
            
        self.dropout = nn.Dropout(drop)
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True, dropout=0.35)
        self.fc = nn.Linear(hidden_size, vocab_size)
        
        if tie:
            # Use the same weights both for embedding and classification
            self.fc.weight.data = self.embedding.weight.data
            
        self.init_weights()
        
    def init_weights(self):
        self.embedding.weight.data.uniform_(-0.1, 0.1)
        self.fc.weight.data.uniform_(-0.1, 0.1)
        self.fc.bias.data.fill_(0)
        
    def init_hidden(self, batch_size):
        return (to_var(torch.zeros(self.num_layers, batch_size, self.hidden_size)),
                to_var(torch.zeros(self.num_layers, batch_size, self.hidden_size)))
        
    def forward(self, x, hidden):
        # embed word ids to vectors
        x = self.embedding(x)
        x = self.dropout(x)  # DROPOUT
        
        # forward RNN step
        x, hidden = self.lstm(x, hidden)
        x = self.dropout(x)  # DROPOUT
        
        # reshape output to (bs * seq_length, hidden_size)
        x = x.contiguous().view(x.size(0) * x.size(1), x.size(2))
        
        # decode hidden states of all time steps
        x = self.fc(x)
        
        return x, hidden
    
    def save(self, epoch, loss):
        filename = 'lm-masnavi-epoch-{}-em-{}-hi-{}-nl-{}-{:.2f}-{:.2f}.pth'.format(
            epoch, self.embed_size, self.hidden_size, self.num_layers, loss, np.exp(loss))
        torch.save(self.state_dict(), filename)

In [11]:
torch.cuda.init()
# # model
model = LSTM_LM(vocab_size, embed_size, hidden_size, num_layers, drop=0.65).cuda()

# loss function
criterion = nn.CrossEntropyLoss().cuda()

    
# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9)

In [12]:
hist = train(model, trn_ids, val_ids, 
             criterion, optimizer, scheduler, 
             num_epochs, batch_size, seq_length)

  return Variable(x, volatile=volatile)


here
here


  torch.nn.utils.clip_grad_norm(model.parameters(), 0.3)


Training: Epoch [1/40], Step [1/105], Loss: 10.131, Perp: 25106.65, Acc: 0.00           here
here
Training: Epoch [1/40], Step [2/105], Loss: 10.128, Perp: 25027.48, Acc: 0.00           here
here
Training: Epoch [1/40], Step [3/105], Loss: 10.159, Perp: 25811.83, Acc: 0.00           here
here
Training: Epoch [1/40], Step [4/105], Loss: 10.155, Perp: 25719.59, Acc: 0.00           here
here
Training: Epoch [1/40], Step [5/105], Loss: 10.151, Perp: 25607.64, Acc: 0.00           here
here
Training: Epoch [1/40], Step [6/105], Loss: 10.145, Perp: 25462.66, Acc: 0.00           here
here
Training: Epoch [1/40], Step [7/105], Loss: 10.139, Perp: 25311.25, Acc: 0.01           here
here
Training: Epoch [1/40], Step [8/105], Loss: 10.134, Perp: 25183.54, Acc: 0.01           here
here
Training: Epoch [1/40], Step [9/105], Loss: 10.129, Perp: 25067.94, Acc: 0.01           here
here
Training: Epoch [1/40], Step [10/105], Loss: 10.125, Perp: 24954.01, Acc: 0.01           here
here
Training: Epoch [1/

Training: Epoch [1/40], Step [84/105], Loss: 10.008, Perp: 22198.44, Acc: 0.01           here
here
Training: Epoch [1/40], Step [85/105], Loss: 10.007, Perp: 22182.97, Acc: 0.01           here
here
Training: Epoch [1/40], Step [86/105], Loss: 10.006, Perp: 22167.47, Acc: 0.01           here
here
Training: Epoch [1/40], Step [87/105], Loss: 10.006, Perp: 22151.81, Acc: 0.01           here
here
Training: Epoch [1/40], Step [88/105], Loss: 10.005, Perp: 22137.17, Acc: 0.01           here
here
Training: Epoch [1/40], Step [89/105], Loss: 10.004, Perp: 22122.55, Acc: 0.01           here
here
Training: Epoch [1/40], Step [90/105], Loss: 10.004, Perp: 22107.59, Acc: 0.01           here
here
Training: Epoch [1/40], Step [91/105], Loss: 10.003, Perp: 22093.02, Acc: 0.01           here
here
Training: Epoch [1/40], Step [92/105], Loss: 10.002, Perp: 22078.53, Acc: 0.01           here
here
Training: Epoch [1/40], Step [93/105], Loss: 10.002, Perp: 22064.41, Acc: 0.01           here
here
Training: 

Training: Epoch [2/40], Step [38/105], Loss: 6.481, Perp: 652.64, Acc: 0.16           here
here
Training: Epoch [2/40], Step [39/105], Loss: 6.481, Perp: 652.58, Acc: 0.16           here
here
Training: Epoch [2/40], Step [40/105], Loss: 6.481, Perp: 652.52, Acc: 0.16           here
here
Training: Epoch [2/40], Step [41/105], Loss: 6.481, Perp: 652.49, Acc: 0.16           here
here
Training: Epoch [2/40], Step [42/105], Loss: 6.481, Perp: 652.46, Acc: 0.16           here
here
Training: Epoch [2/40], Step [43/105], Loss: 6.481, Perp: 652.41, Acc: 0.16           here
here
Training: Epoch [2/40], Step [44/105], Loss: 6.481, Perp: 652.36, Acc: 0.16           here
here
Training: Epoch [2/40], Step [45/105], Loss: 6.481, Perp: 652.32, Acc: 0.16           here
here
Training: Epoch [2/40], Step [46/105], Loss: 6.481, Perp: 652.30, Acc: 0.16           here
here
Training: Epoch [2/40], Step [47/105], Loss: 6.480, Perp: 652.24, Acc: 0.16           here
here
Training: Epoch [2/40], Step [48/105], L

Validation: Epoch [2/40], Step [19/26], Loss: 6.035, Perp: 417.91, Acc: 0.18           here
here
Validation: Epoch [2/40], Step [20/26], Loss: 6.035, Perp: 417.87, Acc: 0.18           here
here
Validation: Epoch [2/40], Step [21/26], Loss: 6.035, Perp: 417.86, Acc: 0.18           here
here
Validation: Epoch [2/40], Step [22/26], Loss: 6.035, Perp: 417.82, Acc: 0.18           here
here
Validation: Epoch [2/40], Step [23/26], Loss: 6.035, Perp: 417.84, Acc: 0.18           here
here
Validation: Epoch [2/40], Step [24/26], Loss: 6.035, Perp: 417.82, Acc: 0.18           here
here
Validation: Epoch [2/40], Step [25/26], Loss: 6.035, Perp: 417.81, Acc: 0.18           here
here
Validation: Epoch [2/40], Step [26/26], Loss: 6.035, Perp: 417.80, Acc: 0.18           here
here
Training: Epoch [3/40], Step [1/105], Loss: 6.261, Perp: 523.84, Acc: 0.17           here
here
Training: Epoch [3/40], Step [2/105], Loss: 6.259, Perp: 522.80, Acc: 0.17           here
here
Training: Epoch [3/40], Step [3/10

Training: Epoch [3/40], Step [79/105], Loss: 6.252, Perp: 518.81, Acc: 0.17           here
here
Training: Epoch [3/40], Step [80/105], Loss: 6.251, Perp: 518.78, Acc: 0.17           here
here
Training: Epoch [3/40], Step [81/105], Loss: 6.251, Perp: 518.75, Acc: 0.17           here
here
Training: Epoch [3/40], Step [82/105], Loss: 6.251, Perp: 518.73, Acc: 0.17           here
here
Training: Epoch [3/40], Step [83/105], Loss: 6.251, Perp: 518.71, Acc: 0.17           here
here
Training: Epoch [3/40], Step [84/105], Loss: 6.251, Perp: 518.69, Acc: 0.17           here
here
Training: Epoch [3/40], Step [85/105], Loss: 6.251, Perp: 518.67, Acc: 0.17           here
here
Training: Epoch [3/40], Step [86/105], Loss: 6.251, Perp: 518.65, Acc: 0.17           here
here
Training: Epoch [3/40], Step [87/105], Loss: 6.251, Perp: 518.62, Acc: 0.17           here
here
Training: Epoch [3/40], Step [88/105], Loss: 6.251, Perp: 518.60, Acc: 0.17           here
here
Training: Epoch [3/40], Step [89/105], L

Training: Epoch [4/40], Step [34/105], Loss: 6.076, Perp: 435.40, Acc: 0.18           here
here
Training: Epoch [4/40], Step [35/105], Loss: 6.076, Perp: 435.36, Acc: 0.18           here
here
Training: Epoch [4/40], Step [36/105], Loss: 6.076, Perp: 435.33, Acc: 0.18           here
here
Training: Epoch [4/40], Step [37/105], Loss: 6.076, Perp: 435.29, Acc: 0.18           here
here
Training: Epoch [4/40], Step [38/105], Loss: 6.076, Perp: 435.25, Acc: 0.18           here
here
Training: Epoch [4/40], Step [39/105], Loss: 6.076, Perp: 435.22, Acc: 0.18           here
here
Training: Epoch [4/40], Step [40/105], Loss: 6.076, Perp: 435.18, Acc: 0.18           here
here
Training: Epoch [4/40], Step [41/105], Loss: 6.076, Perp: 435.17, Acc: 0.18           here
here
Training: Epoch [4/40], Step [42/105], Loss: 6.076, Perp: 435.15, Acc: 0.18           here
here
Training: Epoch [4/40], Step [43/105], Loss: 6.076, Perp: 435.12, Acc: 0.18           here
here
Training: Epoch [4/40], Step [44/105], L

Validation: Epoch [4/40], Step [15/26], Loss: 5.709, Perp: 301.51, Acc: 0.20           here
here
Validation: Epoch [4/40], Step [16/26], Loss: 5.709, Perp: 301.44, Acc: 0.20           here
here
Validation: Epoch [4/40], Step [17/26], Loss: 5.708, Perp: 301.39, Acc: 0.20           here
here
Validation: Epoch [4/40], Step [18/26], Loss: 5.708, Perp: 301.36, Acc: 0.20           here
here
Validation: Epoch [4/40], Step [19/26], Loss: 5.708, Perp: 301.34, Acc: 0.20           here
here
Validation: Epoch [4/40], Step [20/26], Loss: 5.708, Perp: 301.29, Acc: 0.20           here
here
Validation: Epoch [4/40], Step [21/26], Loss: 5.708, Perp: 301.27, Acc: 0.20           here
here
Validation: Epoch [4/40], Step [22/26], Loss: 5.708, Perp: 301.22, Acc: 0.20           here
here
Validation: Epoch [4/40], Step [23/26], Loss: 5.708, Perp: 301.22, Acc: 0.20           here
here
Validation: Epoch [4/40], Step [24/26], Loss: 5.708, Perp: 301.19, Acc: 0.20           here
here
Validation: Epoch [4/40], Step

Training: Epoch [5/40], Step [75/105], Loss: 5.881, Perp: 358.23, Acc: 0.19           here
here
Training: Epoch [5/40], Step [76/105], Loss: 5.881, Perp: 358.21, Acc: 0.19           here
here
Training: Epoch [5/40], Step [77/105], Loss: 5.881, Perp: 358.19, Acc: 0.19           here
here
Training: Epoch [5/40], Step [78/105], Loss: 5.881, Perp: 358.17, Acc: 0.19           here
here
Training: Epoch [5/40], Step [79/105], Loss: 5.881, Perp: 358.15, Acc: 0.19           here
here
Training: Epoch [5/40], Step [80/105], Loss: 5.881, Perp: 358.13, Acc: 0.19           here
here
Training: Epoch [5/40], Step [81/105], Loss: 5.881, Perp: 358.12, Acc: 0.19           here
here
Training: Epoch [5/40], Step [82/105], Loss: 5.881, Perp: 358.10, Acc: 0.19           here
here
Training: Epoch [5/40], Step [83/105], Loss: 5.881, Perp: 358.08, Acc: 0.19           here
here
Training: Epoch [5/40], Step [84/105], Loss: 5.881, Perp: 358.07, Acc: 0.19           here
here
Training: Epoch [5/40], Step [85/105], L

Training: Epoch [6/40], Step [30/105], Loss: 5.724, Perp: 306.15, Acc: 0.20           here
here
Training: Epoch [6/40], Step [31/105], Loss: 5.724, Perp: 306.14, Acc: 0.20           here
here
Training: Epoch [6/40], Step [32/105], Loss: 5.724, Perp: 306.11, Acc: 0.20           here
here
Training: Epoch [6/40], Step [33/105], Loss: 5.724, Perp: 306.09, Acc: 0.20           here
here
Training: Epoch [6/40], Step [34/105], Loss: 5.724, Perp: 306.07, Acc: 0.20           here
here
Training: Epoch [6/40], Step [35/105], Loss: 5.724, Perp: 306.04, Acc: 0.20           here
here
Training: Epoch [6/40], Step [36/105], Loss: 5.724, Perp: 306.03, Acc: 0.20           here
here
Training: Epoch [6/40], Step [37/105], Loss: 5.724, Perp: 306.00, Acc: 0.20           here
here
Training: Epoch [6/40], Step [38/105], Loss: 5.724, Perp: 305.98, Acc: 0.20           here
here
Training: Epoch [6/40], Step [39/105], Loss: 5.723, Perp: 305.96, Acc: 0.20           here
here
Training: Epoch [6/40], Step [40/105], L

Validation: Epoch [6/40], Step [11/26], Loss: 5.304, Perp: 201.05, Acc: 0.22           here
here
Validation: Epoch [6/40], Step [12/26], Loss: 5.303, Perp: 201.00, Acc: 0.22           here
here
Validation: Epoch [6/40], Step [13/26], Loss: 5.303, Perp: 200.97, Acc: 0.22           here
here
Validation: Epoch [6/40], Step [14/26], Loss: 5.303, Perp: 200.92, Acc: 0.22           here
here
Validation: Epoch [6/40], Step [15/26], Loss: 5.303, Perp: 200.91, Acc: 0.22           here
here
Validation: Epoch [6/40], Step [16/26], Loss: 5.303, Perp: 200.86, Acc: 0.22           here
here
Validation: Epoch [6/40], Step [17/26], Loss: 5.302, Perp: 200.82, Acc: 0.22           here
here
Validation: Epoch [6/40], Step [18/26], Loss: 5.302, Perp: 200.80, Acc: 0.22           here
here
Validation: Epoch [6/40], Step [19/26], Loss: 5.302, Perp: 200.78, Acc: 0.22           here
here
Validation: Epoch [6/40], Step [20/26], Loss: 5.302, Perp: 200.75, Acc: 0.22           here
here
Validation: Epoch [6/40], Step

Training: Epoch [7/40], Step [71/105], Loss: 5.553, Perp: 258.00, Acc: 0.20           here
here
Training: Epoch [7/40], Step [72/105], Loss: 5.553, Perp: 257.98, Acc: 0.20           here
here
Training: Epoch [7/40], Step [73/105], Loss: 5.553, Perp: 257.97, Acc: 0.20           here
here
Training: Epoch [7/40], Step [74/105], Loss: 5.553, Perp: 257.96, Acc: 0.20           here
here
Training: Epoch [7/40], Step [75/105], Loss: 5.553, Perp: 257.95, Acc: 0.20           here
here
Training: Epoch [7/40], Step [76/105], Loss: 5.553, Perp: 257.94, Acc: 0.20           here
here
Training: Epoch [7/40], Step [77/105], Loss: 5.553, Perp: 257.92, Acc: 0.20           here
here
Training: Epoch [7/40], Step [78/105], Loss: 5.553, Perp: 257.91, Acc: 0.20           here
here
Training: Epoch [7/40], Step [79/105], Loss: 5.553, Perp: 257.89, Acc: 0.20           here
here
Training: Epoch [7/40], Step [80/105], Loss: 5.552, Perp: 257.88, Acc: 0.20           here
here
Training: Epoch [7/40], Step [81/105], L

Training: Epoch [8/40], Step [26/105], Loss: 5.327, Perp: 205.81, Acc: 0.21           here
here
Training: Epoch [8/40], Step [27/105], Loss: 5.327, Perp: 205.80, Acc: 0.21           here
here
Training: Epoch [8/40], Step [28/105], Loss: 5.327, Perp: 205.79, Acc: 0.21           here
here
Training: Epoch [8/40], Step [29/105], Loss: 5.327, Perp: 205.79, Acc: 0.21           here
here
Training: Epoch [8/40], Step [30/105], Loss: 5.327, Perp: 205.78, Acc: 0.21           here
here
Training: Epoch [8/40], Step [31/105], Loss: 5.327, Perp: 205.78, Acc: 0.21           here
here
Training: Epoch [8/40], Step [32/105], Loss: 5.327, Perp: 205.76, Acc: 0.21           here
here
Training: Epoch [8/40], Step [33/105], Loss: 5.327, Perp: 205.75, Acc: 0.21           here
here
Training: Epoch [8/40], Step [34/105], Loss: 5.327, Perp: 205.74, Acc: 0.21           here
here
Training: Epoch [8/40], Step [35/105], Loss: 5.327, Perp: 205.72, Acc: 0.21           here
here
Training: Epoch [8/40], Step [36/105], L

Validation: Epoch [8/40], Step [7/26], Loss: 4.927, Perp: 138.00, Acc: 0.24           here
here
Validation: Epoch [8/40], Step [8/26], Loss: 4.927, Perp: 137.94, Acc: 0.24           here
here
Validation: Epoch [8/40], Step [9/26], Loss: 4.927, Perp: 137.90, Acc: 0.24           here
here
Validation: Epoch [8/40], Step [10/26], Loss: 4.926, Perp: 137.87, Acc: 0.24           here
here
Validation: Epoch [8/40], Step [11/26], Loss: 4.926, Perp: 137.84, Acc: 0.24           here
here
Validation: Epoch [8/40], Step [12/26], Loss: 4.926, Perp: 137.81, Acc: 0.24           here
here
Validation: Epoch [8/40], Step [13/26], Loss: 4.926, Perp: 137.78, Acc: 0.24           here
here
Validation: Epoch [8/40], Step [14/26], Loss: 4.925, Perp: 137.74, Acc: 0.24           here
here
Validation: Epoch [8/40], Step [15/26], Loss: 4.925, Perp: 137.72, Acc: 0.24           here
here
Validation: Epoch [8/40], Step [16/26], Loss: 4.925, Perp: 137.68, Acc: 0.24           here
here
Validation: Epoch [8/40], Step [1

Training: Epoch [9/40], Step [67/105], Loss: 5.166, Perp: 175.23, Acc: 0.21           here
here
Training: Epoch [9/40], Step [68/105], Loss: 5.166, Perp: 175.23, Acc: 0.21           here
here
Training: Epoch [9/40], Step [69/105], Loss: 5.166, Perp: 175.22, Acc: 0.21           here
here
Training: Epoch [9/40], Step [70/105], Loss: 5.166, Perp: 175.22, Acc: 0.21           here
here
Training: Epoch [9/40], Step [71/105], Loss: 5.166, Perp: 175.21, Acc: 0.21           here
here
Training: Epoch [9/40], Step [72/105], Loss: 5.166, Perp: 175.20, Acc: 0.21           here
here
Training: Epoch [9/40], Step [73/105], Loss: 5.166, Perp: 175.19, Acc: 0.21           here
here
Training: Epoch [9/40], Step [74/105], Loss: 5.166, Perp: 175.19, Acc: 0.21           here
here
Training: Epoch [9/40], Step [75/105], Loss: 5.166, Perp: 175.18, Acc: 0.21           here
here
Training: Epoch [9/40], Step [76/105], Loss: 5.166, Perp: 175.17, Acc: 0.21           here
here
Training: Epoch [9/40], Step [77/105], L

Training: Epoch [10/40], Step [21/105], Loss: 4.995, Perp: 147.72, Acc: 0.22           here
here
Training: Epoch [10/40], Step [22/105], Loss: 4.995, Perp: 147.72, Acc: 0.22           here
here
Training: Epoch [10/40], Step [23/105], Loss: 4.995, Perp: 147.71, Acc: 0.22           here
here
Training: Epoch [10/40], Step [24/105], Loss: 4.995, Perp: 147.71, Acc: 0.22           here
here
Training: Epoch [10/40], Step [25/105], Loss: 4.995, Perp: 147.70, Acc: 0.22           here
here
Training: Epoch [10/40], Step [26/105], Loss: 4.995, Perp: 147.70, Acc: 0.22           here
here
Training: Epoch [10/40], Step [27/105], Loss: 4.995, Perp: 147.69, Acc: 0.22           here
here
Training: Epoch [10/40], Step [28/105], Loss: 4.995, Perp: 147.68, Acc: 0.22           here
here
Training: Epoch [10/40], Step [29/105], Loss: 4.995, Perp: 147.68, Acc: 0.22           here
here
Training: Epoch [10/40], Step [30/105], Loss: 4.995, Perp: 147.67, Acc: 0.22           here
here
Training: Epoch [10/40], Step 

Validation: Epoch [10/40], Step [1/26], Loss: 4.564, Perp: 95.97, Acc: 0.26           here
here
Validation: Epoch [10/40], Step [2/26], Loss: 4.561, Perp: 95.67, Acc: 0.26           here
here
Validation: Epoch [10/40], Step [3/26], Loss: 4.559, Perp: 95.51, Acc: 0.26           here
here
Validation: Epoch [10/40], Step [4/26], Loss: 4.558, Perp: 95.41, Acc: 0.26           here
here
Validation: Epoch [10/40], Step [5/26], Loss: 4.557, Perp: 95.32, Acc: 0.26           here
here
Validation: Epoch [10/40], Step [6/26], Loss: 4.556, Perp: 95.24, Acc: 0.26           here
here
Validation: Epoch [10/40], Step [7/26], Loss: 4.556, Perp: 95.19, Acc: 0.26           here
here
Validation: Epoch [10/40], Step [8/26], Loss: 4.556, Perp: 95.15, Acc: 0.26           here
here
Validation: Epoch [10/40], Step [9/26], Loss: 4.555, Perp: 95.12, Acc: 0.26           here
here
Validation: Epoch [10/40], Step [10/26], Loss: 4.555, Perp: 95.10, Acc: 0.26           here
here
Validation: Epoch [10/40], Step [11/26]

Training: Epoch [11/40], Step [60/105], Loss: 4.785, Perp: 119.71, Acc: 0.24           here
here
Training: Epoch [11/40], Step [61/105], Loss: 4.785, Perp: 119.71, Acc: 0.24           here
here
Training: Epoch [11/40], Step [62/105], Loss: 4.785, Perp: 119.70, Acc: 0.24           here
here
Training: Epoch [11/40], Step [63/105], Loss: 4.785, Perp: 119.70, Acc: 0.24           here
here
Training: Epoch [11/40], Step [64/105], Loss: 4.785, Perp: 119.70, Acc: 0.24           here
here
Training: Epoch [11/40], Step [65/105], Loss: 4.785, Perp: 119.69, Acc: 0.24           here
here
Training: Epoch [11/40], Step [66/105], Loss: 4.785, Perp: 119.69, Acc: 0.24           here
here
Training: Epoch [11/40], Step [67/105], Loss: 4.785, Perp: 119.69, Acc: 0.24           here
here
Training: Epoch [11/40], Step [68/105], Loss: 4.785, Perp: 119.68, Acc: 0.24           here
here
Training: Epoch [11/40], Step [69/105], Loss: 4.785, Perp: 119.68, Acc: 0.24           here
here
Training: Epoch [11/40], Step 

Training: Epoch [12/40], Step [14/105], Loss: 4.610, Perp: 100.46, Acc: 0.24           here
here
Training: Epoch [12/40], Step [15/105], Loss: 4.610, Perp: 100.45, Acc: 0.24           here
here
Training: Epoch [12/40], Step [16/105], Loss: 4.610, Perp: 100.45, Acc: 0.24           here
here
Training: Epoch [12/40], Step [17/105], Loss: 4.610, Perp: 100.45, Acc: 0.24           here
here
Training: Epoch [12/40], Step [18/105], Loss: 4.610, Perp: 100.45, Acc: 0.24           here
here
Training: Epoch [12/40], Step [19/105], Loss: 4.610, Perp: 100.45, Acc: 0.24           here
here
Training: Epoch [12/40], Step [20/105], Loss: 4.610, Perp: 100.45, Acc: 0.24           here
here
Training: Epoch [12/40], Step [21/105], Loss: 4.610, Perp: 100.45, Acc: 0.24           here
here
Training: Epoch [12/40], Step [22/105], Loss: 4.610, Perp: 100.45, Acc: 0.24           here
here
Training: Epoch [12/40], Step [23/105], Loss: 4.610, Perp: 100.45, Acc: 0.24           here
here
Training: Epoch [12/40], Step 

Training: Epoch [12/40], Step [99/105], Loss: 4.607, Perp: 100.13, Acc: 0.24           here
here
Training: Epoch [12/40], Step [100/105], Loss: 4.606, Perp: 100.13, Acc: 0.24           here
here
Training: Epoch [12/40], Step [101/105], Loss: 4.606, Perp: 100.13, Acc: 0.24           here
here
Training: Epoch [12/40], Step [102/105], Loss: 4.606, Perp: 100.12, Acc: 0.24           here
here
Training: Epoch [12/40], Step [103/105], Loss: 4.606, Perp: 100.12, Acc: 0.24           here
here
Training: Epoch [12/40], Step [104/105], Loss: 4.606, Perp: 100.12, Acc: 0.24           here
here
Training: Epoch [12/40], Step [105/105], Loss: 4.606, Perp: 100.11, Acc: 0.24           here
here
Validation: Epoch [12/40], Step [1/26], Loss: 4.330, Perp: 75.95, Acc: 0.28           here
here
Validation: Epoch [12/40], Step [2/26], Loss: 4.327, Perp: 75.72, Acc: 0.28           here
here
Validation: Epoch [12/40], Step [3/26], Loss: 4.325, Perp: 75.59, Acc: 0.28           here
here
Validation: Epoch [12/40], 

Training: Epoch [13/40], Step [54/105], Loss: 4.463, Perp: 86.72, Acc: 0.25           here
here
Training: Epoch [13/40], Step [55/105], Loss: 4.463, Perp: 86.71, Acc: 0.25           here
here
Training: Epoch [13/40], Step [56/105], Loss: 4.463, Perp: 86.71, Acc: 0.25           here
here
Training: Epoch [13/40], Step [57/105], Loss: 4.463, Perp: 86.70, Acc: 0.25           here
here
Training: Epoch [13/40], Step [58/105], Loss: 4.462, Perp: 86.70, Acc: 0.25           here
here
Training: Epoch [13/40], Step [59/105], Loss: 4.462, Perp: 86.69, Acc: 0.25           here
here
Training: Epoch [13/40], Step [60/105], Loss: 4.462, Perp: 86.69, Acc: 0.25           here
here
Training: Epoch [13/40], Step [61/105], Loss: 4.462, Perp: 86.69, Acc: 0.25           here
here
Training: Epoch [13/40], Step [62/105], Loss: 4.462, Perp: 86.68, Acc: 0.25           here
here
Training: Epoch [13/40], Step [63/105], Loss: 4.462, Perp: 86.68, Acc: 0.25           here
here
Training: Epoch [13/40], Step [64/105], 

Training: Epoch [14/40], Step [9/105], Loss: 4.279, Perp: 72.16, Acc: 0.27           here
here
Training: Epoch [14/40], Step [10/105], Loss: 4.279, Perp: 72.15, Acc: 0.27           here
here
Training: Epoch [14/40], Step [11/105], Loss: 4.279, Perp: 72.15, Acc: 0.27           here
here
Training: Epoch [14/40], Step [12/105], Loss: 4.279, Perp: 72.14, Acc: 0.27           here
here
Training: Epoch [14/40], Step [13/105], Loss: 4.279, Perp: 72.14, Acc: 0.27           here
here
Training: Epoch [14/40], Step [14/105], Loss: 4.279, Perp: 72.13, Acc: 0.27           here
here
Training: Epoch [14/40], Step [15/105], Loss: 4.278, Perp: 72.13, Acc: 0.27           here
here
Training: Epoch [14/40], Step [16/105], Loss: 4.278, Perp: 72.12, Acc: 0.27           here
here
Training: Epoch [14/40], Step [17/105], Loss: 4.278, Perp: 72.11, Acc: 0.27           here
here
Training: Epoch [14/40], Step [18/105], Loss: 4.278, Perp: 72.11, Acc: 0.27           here
here
Training: Epoch [14/40], Step [19/105], L

Training: Epoch [14/40], Step [95/105], Loss: 4.275, Perp: 71.88, Acc: 0.27           here
here
Training: Epoch [14/40], Step [96/105], Loss: 4.275, Perp: 71.88, Acc: 0.27           here
here
Training: Epoch [14/40], Step [97/105], Loss: 4.275, Perp: 71.88, Acc: 0.27           here
here
Training: Epoch [14/40], Step [98/105], Loss: 4.275, Perp: 71.87, Acc: 0.27           here
here
Training: Epoch [14/40], Step [99/105], Loss: 4.275, Perp: 71.87, Acc: 0.27           here
here
Training: Epoch [14/40], Step [100/105], Loss: 4.275, Perp: 71.87, Acc: 0.27           here
here
Training: Epoch [14/40], Step [101/105], Loss: 4.275, Perp: 71.86, Acc: 0.27           here
here
Training: Epoch [14/40], Step [102/105], Loss: 4.275, Perp: 71.86, Acc: 0.27           here
here
Training: Epoch [14/40], Step [103/105], Loss: 4.275, Perp: 71.86, Acc: 0.27           here
here
Training: Epoch [14/40], Step [104/105], Loss: 4.275, Perp: 71.85, Acc: 0.27           here
here
Training: Epoch [14/40], Step [105/

Training: Epoch [15/40], Step [50/105], Loss: 4.105, Perp: 60.62, Acc: 0.28           here
here
Training: Epoch [15/40], Step [51/105], Loss: 4.105, Perp: 60.62, Acc: 0.28           here
here
Training: Epoch [15/40], Step [52/105], Loss: 4.105, Perp: 60.62, Acc: 0.28           here
here
Training: Epoch [15/40], Step [53/105], Loss: 4.105, Perp: 60.62, Acc: 0.28           here
here
Training: Epoch [15/40], Step [54/105], Loss: 4.105, Perp: 60.61, Acc: 0.28           here
here
Training: Epoch [15/40], Step [55/105], Loss: 4.104, Perp: 60.61, Acc: 0.28           here
here
Training: Epoch [15/40], Step [56/105], Loss: 4.104, Perp: 60.61, Acc: 0.28           here
here
Training: Epoch [15/40], Step [57/105], Loss: 4.104, Perp: 60.60, Acc: 0.28           here
here
Training: Epoch [15/40], Step [58/105], Loss: 4.104, Perp: 60.60, Acc: 0.28           here
here
Training: Epoch [15/40], Step [59/105], Loss: 4.104, Perp: 60.60, Acc: 0.28           here
here
Training: Epoch [15/40], Step [60/105], 

Training: Epoch [16/40], Step [5/105], Loss: 4.012, Perp: 55.25, Acc: 0.29           here
here
Training: Epoch [16/40], Step [6/105], Loss: 4.011, Perp: 55.22, Acc: 0.29           here
here
Training: Epoch [16/40], Step [7/105], Loss: 4.011, Perp: 55.20, Acc: 0.29           here
here
Training: Epoch [16/40], Step [8/105], Loss: 4.011, Perp: 55.18, Acc: 0.29           here
here
Training: Epoch [16/40], Step [9/105], Loss: 4.010, Perp: 55.17, Acc: 0.29           here
here
Training: Epoch [16/40], Step [10/105], Loss: 4.010, Perp: 55.16, Acc: 0.29           here
here
Training: Epoch [16/40], Step [11/105], Loss: 4.010, Perp: 55.15, Acc: 0.29           here
here
Training: Epoch [16/40], Step [12/105], Loss: 4.010, Perp: 55.14, Acc: 0.29           here
here
Training: Epoch [16/40], Step [13/105], Loss: 4.010, Perp: 55.13, Acc: 0.29           here
here
Training: Epoch [16/40], Step [14/105], Loss: 4.010, Perp: 55.13, Acc: 0.29           here
here
Training: Epoch [16/40], Step [15/105], Loss:

Training: Epoch [16/40], Step [91/105], Loss: 4.004, Perp: 54.83, Acc: 0.29           here
here
Training: Epoch [16/40], Step [92/105], Loss: 4.004, Perp: 54.83, Acc: 0.29           here
here
Training: Epoch [16/40], Step [93/105], Loss: 4.004, Perp: 54.83, Acc: 0.29           here
here
Training: Epoch [16/40], Step [94/105], Loss: 4.004, Perp: 54.83, Acc: 0.29           here
here
Training: Epoch [16/40], Step [95/105], Loss: 4.004, Perp: 54.83, Acc: 0.29           here
here
Training: Epoch [16/40], Step [96/105], Loss: 4.004, Perp: 54.82, Acc: 0.29           here
here
Training: Epoch [16/40], Step [97/105], Loss: 4.004, Perp: 54.82, Acc: 0.29           here
here
Training: Epoch [16/40], Step [98/105], Loss: 4.004, Perp: 54.82, Acc: 0.29           here
here
Training: Epoch [16/40], Step [99/105], Loss: 4.004, Perp: 54.82, Acc: 0.29           here
here
Training: Epoch [16/40], Step [100/105], Loss: 4.004, Perp: 54.82, Acc: 0.29           here
here
Training: Epoch [16/40], Step [101/105]

Training: Epoch [17/40], Step [46/105], Loss: 3.899, Perp: 49.34, Acc: 0.30           here
here
Training: Epoch [17/40], Step [47/105], Loss: 3.899, Perp: 49.34, Acc: 0.30           here
here
Training: Epoch [17/40], Step [48/105], Loss: 3.899, Perp: 49.34, Acc: 0.30           here
here
Training: Epoch [17/40], Step [49/105], Loss: 3.899, Perp: 49.33, Acc: 0.30           here
here
Training: Epoch [17/40], Step [50/105], Loss: 3.899, Perp: 49.33, Acc: 0.30           here
here
Training: Epoch [17/40], Step [51/105], Loss: 3.898, Perp: 49.33, Acc: 0.30           here
here
Training: Epoch [17/40], Step [52/105], Loss: 3.898, Perp: 49.32, Acc: 0.30           here
here
Training: Epoch [17/40], Step [53/105], Loss: 3.898, Perp: 49.32, Acc: 0.30           here
here
Training: Epoch [17/40], Step [54/105], Loss: 3.898, Perp: 49.32, Acc: 0.30           here
here
Training: Epoch [17/40], Step [55/105], Loss: 3.898, Perp: 49.31, Acc: 0.30           here
here
Training: Epoch [17/40], Step [56/105], 

Validation: Epoch [17/40], Step [26/26], Loss: 3.636, Perp: 37.95, Acc: 0.35           here
here
Training: Epoch [18/40], Step [1/105], Loss: 3.770, Perp: 43.36, Acc: 0.31           here
here
Training: Epoch [18/40], Step [2/105], Loss: 3.767, Perp: 43.23, Acc: 0.31           here
here
Training: Epoch [18/40], Step [3/105], Loss: 3.766, Perp: 43.19, Acc: 0.31           here
here
Training: Epoch [18/40], Step [4/105], Loss: 3.765, Perp: 43.16, Acc: 0.31           here
here
Training: Epoch [18/40], Step [5/105], Loss: 3.764, Perp: 43.14, Acc: 0.31           here
here
Training: Epoch [18/40], Step [6/105], Loss: 3.764, Perp: 43.11, Acc: 0.31           here
here
Training: Epoch [18/40], Step [7/105], Loss: 3.763, Perp: 43.09, Acc: 0.31           here
here
Training: Epoch [18/40], Step [8/105], Loss: 3.763, Perp: 43.07, Acc: 0.31           here
here
Training: Epoch [18/40], Step [9/105], Loss: 3.762, Perp: 43.05, Acc: 0.31           here
here
Training: Epoch [18/40], Step [10/105], Loss: 3.

Training: Epoch [18/40], Step [86/105], Loss: 3.755, Perp: 42.73, Acc: 0.31           here
here
Training: Epoch [18/40], Step [87/105], Loss: 3.755, Perp: 42.72, Acc: 0.31           here
here
Training: Epoch [18/40], Step [88/105], Loss: 3.755, Perp: 42.72, Acc: 0.31           here
here
Training: Epoch [18/40], Step [89/105], Loss: 3.755, Perp: 42.72, Acc: 0.31           here
here
Training: Epoch [18/40], Step [90/105], Loss: 3.755, Perp: 42.72, Acc: 0.31           here
here
Training: Epoch [18/40], Step [91/105], Loss: 3.755, Perp: 42.72, Acc: 0.31           here
here
Training: Epoch [18/40], Step [92/105], Loss: 3.755, Perp: 42.71, Acc: 0.31           here
here
Training: Epoch [18/40], Step [93/105], Loss: 3.754, Perp: 42.71, Acc: 0.31           here
here
Training: Epoch [18/40], Step [94/105], Loss: 3.754, Perp: 42.71, Acc: 0.31           here
here
Training: Epoch [18/40], Step [95/105], Loss: 3.754, Perp: 42.71, Acc: 0.31           here
here
Training: Epoch [18/40], Step [96/105], 

Training: Epoch [19/40], Step [41/105], Loss: 3.613, Perp: 37.09, Acc: 0.33           here
here
Training: Epoch [19/40], Step [42/105], Loss: 3.613, Perp: 37.08, Acc: 0.33           here
here
Training: Epoch [19/40], Step [43/105], Loss: 3.613, Perp: 37.08, Acc: 0.33           here
here
Training: Epoch [19/40], Step [44/105], Loss: 3.613, Perp: 37.07, Acc: 0.33           here
here
Training: Epoch [19/40], Step [45/105], Loss: 3.613, Perp: 37.07, Acc: 0.33           here
here
Training: Epoch [19/40], Step [46/105], Loss: 3.613, Perp: 37.07, Acc: 0.33           here
here
Training: Epoch [19/40], Step [47/105], Loss: 3.613, Perp: 37.06, Acc: 0.33           here
here
Training: Epoch [19/40], Step [48/105], Loss: 3.613, Perp: 37.06, Acc: 0.33           here
here
Training: Epoch [19/40], Step [49/105], Loss: 3.613, Perp: 37.06, Acc: 0.33           here
here
Training: Epoch [19/40], Step [50/105], Loss: 3.613, Perp: 37.06, Acc: 0.33           here
here
Training: Epoch [19/40], Step [51/105], 

Validation: Epoch [19/40], Step [22/26], Loss: 3.369, Perp: 29.05, Acc: 0.38           here
here
Validation: Epoch [19/40], Step [23/26], Loss: 3.369, Perp: 29.04, Acc: 0.38           here
here
Validation: Epoch [19/40], Step [24/26], Loss: 3.368, Perp: 29.03, Acc: 0.38           here
here
Validation: Epoch [19/40], Step [25/26], Loss: 3.368, Perp: 29.02, Acc: 0.38           here
here
Validation: Epoch [19/40], Step [26/26], Loss: 3.367, Perp: 29.00, Acc: 0.38           here
here
Training: Epoch [20/40], Step [1/105], Loss: 3.515, Perp: 33.62, Acc: 0.35           here
here
Training: Epoch [20/40], Step [2/105], Loss: 3.513, Perp: 33.55, Acc: 0.35           here
here
Training: Epoch [20/40], Step [3/105], Loss: 3.512, Perp: 33.51, Acc: 0.35           here
here
Training: Epoch [20/40], Step [4/105], Loss: 3.511, Perp: 33.49, Acc: 0.35           here
here
Training: Epoch [20/40], Step [5/105], Loss: 3.510, Perp: 33.46, Acc: 0.35           here
here
Training: Epoch [20/40], Step [6/105], L

Training: Epoch [20/40], Step [82/105], Loss: 3.501, Perp: 33.14, Acc: 0.35           here
here
Training: Epoch [20/40], Step [83/105], Loss: 3.501, Perp: 33.14, Acc: 0.35           here
here
Training: Epoch [20/40], Step [84/105], Loss: 3.501, Perp: 33.14, Acc: 0.35           here
here
Training: Epoch [20/40], Step [85/105], Loss: 3.501, Perp: 33.14, Acc: 0.35           here
here
Training: Epoch [20/40], Step [86/105], Loss: 3.501, Perp: 33.14, Acc: 0.35           here
here
Training: Epoch [20/40], Step [87/105], Loss: 3.501, Perp: 33.13, Acc: 0.35           here
here
Training: Epoch [20/40], Step [88/105], Loss: 3.501, Perp: 33.13, Acc: 0.35           here
here
Training: Epoch [20/40], Step [89/105], Loss: 3.500, Perp: 33.13, Acc: 0.35           here
here
Training: Epoch [20/40], Step [90/105], Loss: 3.500, Perp: 33.13, Acc: 0.35           here
here
Training: Epoch [20/40], Step [91/105], Loss: 3.500, Perp: 33.13, Acc: 0.35           here
here
Training: Epoch [20/40], Step [92/105], 

Training: Epoch [21/40], Step [37/105], Loss: 3.433, Perp: 30.97, Acc: 0.35           here
here
Training: Epoch [21/40], Step [38/105], Loss: 3.433, Perp: 30.97, Acc: 0.35           here
here
Training: Epoch [21/40], Step [39/105], Loss: 3.433, Perp: 30.96, Acc: 0.35           here
here
Training: Epoch [21/40], Step [40/105], Loss: 3.433, Perp: 30.96, Acc: 0.35           here
here
Training: Epoch [21/40], Step [41/105], Loss: 3.433, Perp: 30.96, Acc: 0.35           here
here
Training: Epoch [21/40], Step [42/105], Loss: 3.432, Perp: 30.95, Acc: 0.35           here
here
Training: Epoch [21/40], Step [43/105], Loss: 3.432, Perp: 30.95, Acc: 0.35           here
here
Training: Epoch [21/40], Step [44/105], Loss: 3.432, Perp: 30.94, Acc: 0.35           here
here
Training: Epoch [21/40], Step [45/105], Loss: 3.432, Perp: 30.94, Acc: 0.35           here
here
Training: Epoch [21/40], Step [46/105], Loss: 3.432, Perp: 30.94, Acc: 0.35           here
here
Training: Epoch [21/40], Step [47/105], 

Validation: Epoch [21/40], Step [18/26], Loss: 3.001, Perp: 20.10, Acc: 0.42           here
here
Validation: Epoch [21/40], Step [19/26], Loss: 3.000, Perp: 20.09, Acc: 0.42           here
here
Validation: Epoch [21/40], Step [20/26], Loss: 3.000, Perp: 20.08, Acc: 0.42           here
here
Validation: Epoch [21/40], Step [21/26], Loss: 2.999, Perp: 20.07, Acc: 0.42           here
here
Validation: Epoch [21/40], Step [22/26], Loss: 2.999, Perp: 20.06, Acc: 0.42           here
here
Validation: Epoch [21/40], Step [23/26], Loss: 2.998, Perp: 20.05, Acc: 0.42           here
here
Validation: Epoch [21/40], Step [24/26], Loss: 2.998, Perp: 20.05, Acc: 0.42           here
here
Validation: Epoch [21/40], Step [25/26], Loss: 2.998, Perp: 20.04, Acc: 0.42           here
here
Validation: Epoch [21/40], Step [26/26], Loss: 2.997, Perp: 20.03, Acc: 0.42           here
here
Training: Epoch [22/40], Step [1/105], Loss: 3.288, Perp: 26.78, Acc: 0.36           here
here
Training: Epoch [22/40], Step [2

Training: Epoch [22/40], Step [78/105], Loss: 3.272, Perp: 26.35, Acc: 0.37           here
here
Training: Epoch [22/40], Step [79/105], Loss: 3.271, Perp: 26.35, Acc: 0.37           here
here
Training: Epoch [22/40], Step [80/105], Loss: 3.271, Perp: 26.35, Acc: 0.37           here
here
Training: Epoch [22/40], Step [81/105], Loss: 3.271, Perp: 26.35, Acc: 0.37           here
here
Training: Epoch [22/40], Step [82/105], Loss: 3.271, Perp: 26.34, Acc: 0.37           here
here
Training: Epoch [22/40], Step [83/105], Loss: 3.271, Perp: 26.34, Acc: 0.37           here
here
Training: Epoch [22/40], Step [84/105], Loss: 3.271, Perp: 26.34, Acc: 0.37           here
here
Training: Epoch [22/40], Step [85/105], Loss: 3.271, Perp: 26.34, Acc: 0.37           here
here
Training: Epoch [22/40], Step [86/105], Loss: 3.271, Perp: 26.34, Acc: 0.37           here
here
Training: Epoch [22/40], Step [87/105], Loss: 3.271, Perp: 26.34, Acc: 0.37           here
here
Training: Epoch [22/40], Step [88/105], 

Training: Epoch [23/40], Step [33/105], Loss: 3.064, Perp: 21.42, Acc: 0.39           here
here
Training: Epoch [23/40], Step [34/105], Loss: 3.064, Perp: 21.42, Acc: 0.39           here
here
Training: Epoch [23/40], Step [35/105], Loss: 3.064, Perp: 21.41, Acc: 0.39           here
here
Training: Epoch [23/40], Step [36/105], Loss: 3.064, Perp: 21.41, Acc: 0.39           here
here
Training: Epoch [23/40], Step [37/105], Loss: 3.064, Perp: 21.41, Acc: 0.39           here
here
Training: Epoch [23/40], Step [38/105], Loss: 3.064, Perp: 21.41, Acc: 0.39           here
here
Training: Epoch [23/40], Step [39/105], Loss: 3.064, Perp: 21.41, Acc: 0.39           here
here
Training: Epoch [23/40], Step [40/105], Loss: 3.064, Perp: 21.41, Acc: 0.39           here
here
Training: Epoch [23/40], Step [41/105], Loss: 3.064, Perp: 21.41, Acc: 0.39           here
here
Training: Epoch [23/40], Step [42/105], Loss: 3.064, Perp: 21.40, Acc: 0.39           here
here
Training: Epoch [23/40], Step [43/105], 

Validation: Epoch [23/40], Step [14/26], Loss: 2.695, Perp: 14.81, Acc: 0.47           here
here
Validation: Epoch [23/40], Step [15/26], Loss: 2.695, Perp: 14.80, Acc: 0.47           here
here
Validation: Epoch [23/40], Step [16/26], Loss: 2.694, Perp: 14.80, Acc: 0.47           here
here
Validation: Epoch [23/40], Step [17/26], Loss: 2.694, Perp: 14.79, Acc: 0.47           here
here
Validation: Epoch [23/40], Step [18/26], Loss: 2.693, Perp: 14.78, Acc: 0.47           here
here
Validation: Epoch [23/40], Step [19/26], Loss: 2.693, Perp: 14.78, Acc: 0.47           here
here
Validation: Epoch [23/40], Step [20/26], Loss: 2.693, Perp: 14.77, Acc: 0.47           here
here
Validation: Epoch [23/40], Step [21/26], Loss: 2.692, Perp: 14.76, Acc: 0.47           here
here
Validation: Epoch [23/40], Step [22/26], Loss: 2.692, Perp: 14.76, Acc: 0.47           here
here
Validation: Epoch [23/40], Step [23/26], Loss: 2.692, Perp: 14.75, Acc: 0.47           here
here
Validation: Epoch [23/40], Ste

Training: Epoch [24/40], Step [74/105], Loss: 2.995, Perp: 19.98, Acc: 0.41           here
here
Training: Epoch [24/40], Step [75/105], Loss: 2.995, Perp: 19.98, Acc: 0.41           here
here
Training: Epoch [24/40], Step [76/105], Loss: 2.994, Perp: 19.97, Acc: 0.41           here
here
Training: Epoch [24/40], Step [77/105], Loss: 2.994, Perp: 19.97, Acc: 0.41           here
here
Training: Epoch [24/40], Step [78/105], Loss: 2.994, Perp: 19.97, Acc: 0.41           here
here
Training: Epoch [24/40], Step [79/105], Loss: 2.994, Perp: 19.97, Acc: 0.41           here
here
Training: Epoch [24/40], Step [80/105], Loss: 2.994, Perp: 19.97, Acc: 0.41           here
here
Training: Epoch [24/40], Step [81/105], Loss: 2.994, Perp: 19.97, Acc: 0.41           here
here
Training: Epoch [24/40], Step [82/105], Loss: 2.994, Perp: 19.97, Acc: 0.41           here
here
Training: Epoch [24/40], Step [83/105], Loss: 2.994, Perp: 19.97, Acc: 0.41           here
here
Training: Epoch [24/40], Step [84/105], 

Training: Epoch [25/40], Step [29/105], Loss: 2.862, Perp: 17.50, Acc: 0.42           here
here
Training: Epoch [25/40], Step [30/105], Loss: 2.862, Perp: 17.50, Acc: 0.42           here
here
Training: Epoch [25/40], Step [31/105], Loss: 2.862, Perp: 17.50, Acc: 0.42           here
here
Training: Epoch [25/40], Step [32/105], Loss: 2.862, Perp: 17.50, Acc: 0.42           here
here
Training: Epoch [25/40], Step [33/105], Loss: 2.862, Perp: 17.50, Acc: 0.42           here
here
Training: Epoch [25/40], Step [34/105], Loss: 2.862, Perp: 17.49, Acc: 0.42           here
here
Training: Epoch [25/40], Step [35/105], Loss: 2.862, Perp: 17.49, Acc: 0.42           here
here
Training: Epoch [25/40], Step [36/105], Loss: 2.862, Perp: 17.49, Acc: 0.42           here
here
Training: Epoch [25/40], Step [37/105], Loss: 2.862, Perp: 17.49, Acc: 0.42           here
here
Training: Epoch [25/40], Step [38/105], Loss: 2.862, Perp: 17.49, Acc: 0.42           here
here
Training: Epoch [25/40], Step [39/105], 

Validation: Epoch [25/40], Step [10/26], Loss: 2.549, Perp: 12.80, Acc: 0.50           here
here
Validation: Epoch [25/40], Step [11/26], Loss: 2.549, Perp: 12.79, Acc: 0.50           here
here
Validation: Epoch [25/40], Step [12/26], Loss: 2.548, Perp: 12.79, Acc: 0.50           here
here
Validation: Epoch [25/40], Step [13/26], Loss: 2.548, Perp: 12.78, Acc: 0.50           here
here
Validation: Epoch [25/40], Step [14/26], Loss: 2.547, Perp: 12.77, Acc: 0.50           here
here
Validation: Epoch [25/40], Step [15/26], Loss: 2.547, Perp: 12.76, Acc: 0.50           here
here
Validation: Epoch [25/40], Step [16/26], Loss: 2.546, Perp: 12.76, Acc: 0.50           here
here
Validation: Epoch [25/40], Step [17/26], Loss: 2.546, Perp: 12.75, Acc: 0.50           here
here
Validation: Epoch [25/40], Step [18/26], Loss: 2.545, Perp: 12.75, Acc: 0.50           here
here
Validation: Epoch [25/40], Step [19/26], Loss: 2.545, Perp: 12.74, Acc: 0.50           here
here
Validation: Epoch [25/40], Ste

Training: Epoch [26/40], Step [70/105], Loss: 2.724, Perp: 15.24, Acc: 0.45           here
here
Training: Epoch [26/40], Step [71/105], Loss: 2.724, Perp: 15.24, Acc: 0.45           here
here
Training: Epoch [26/40], Step [72/105], Loss: 2.724, Perp: 15.24, Acc: 0.45           here
here
Training: Epoch [26/40], Step [73/105], Loss: 2.724, Perp: 15.24, Acc: 0.45           here
here
Training: Epoch [26/40], Step [74/105], Loss: 2.724, Perp: 15.24, Acc: 0.45           here
here
Training: Epoch [26/40], Step [75/105], Loss: 2.724, Perp: 15.24, Acc: 0.45           here
here
Training: Epoch [26/40], Step [76/105], Loss: 2.724, Perp: 15.24, Acc: 0.45           here
here
Training: Epoch [26/40], Step [77/105], Loss: 2.724, Perp: 15.24, Acc: 0.45           here
here
Training: Epoch [26/40], Step [78/105], Loss: 2.724, Perp: 15.24, Acc: 0.45           here
here
Training: Epoch [26/40], Step [79/105], Loss: 2.724, Perp: 15.24, Acc: 0.45           here
here
Training: Epoch [26/40], Step [80/105], 

Training: Epoch [27/40], Step [25/105], Loss: 2.713, Perp: 15.07, Acc: 0.44           here
here
Training: Epoch [27/40], Step [26/105], Loss: 2.712, Perp: 15.06, Acc: 0.44           here
here
Training: Epoch [27/40], Step [27/105], Loss: 2.712, Perp: 15.06, Acc: 0.44           here
here
Training: Epoch [27/40], Step [28/105], Loss: 2.712, Perp: 15.06, Acc: 0.44           here
here
Training: Epoch [27/40], Step [29/105], Loss: 2.712, Perp: 15.06, Acc: 0.44           here
here
Training: Epoch [27/40], Step [30/105], Loss: 2.712, Perp: 15.06, Acc: 0.44           here
here
Training: Epoch [27/40], Step [31/105], Loss: 2.712, Perp: 15.06, Acc: 0.44           here
here
Training: Epoch [27/40], Step [32/105], Loss: 2.712, Perp: 15.05, Acc: 0.44           here
here
Training: Epoch [27/40], Step [33/105], Loss: 2.711, Perp: 15.05, Acc: 0.44           here
here
Training: Epoch [27/40], Step [34/105], Loss: 2.711, Perp: 15.05, Acc: 0.44           here
here
Training: Epoch [27/40], Step [35/105], 

Validation: Epoch [27/40], Step [6/26], Loss: 2.258, Perp: 9.56, Acc: 0.55           here
here
Validation: Epoch [27/40], Step [7/26], Loss: 2.257, Perp: 9.55, Acc: 0.55           here
here
Validation: Epoch [27/40], Step [8/26], Loss: 2.256, Perp: 9.55, Acc: 0.55           here
here
Validation: Epoch [27/40], Step [9/26], Loss: 2.256, Perp: 9.54, Acc: 0.55           here
here
Validation: Epoch [27/40], Step [10/26], Loss: 2.255, Perp: 9.53, Acc: 0.55           here
here
Validation: Epoch [27/40], Step [11/26], Loss: 2.254, Perp: 9.53, Acc: 0.55           here
here
Validation: Epoch [27/40], Step [12/26], Loss: 2.254, Perp: 9.52, Acc: 0.55           here
here
Validation: Epoch [27/40], Step [13/26], Loss: 2.253, Perp: 9.52, Acc: 0.55           here
here
Validation: Epoch [27/40], Step [14/26], Loss: 2.253, Perp: 9.51, Acc: 0.55           here
here
Validation: Epoch [27/40], Step [15/26], Loss: 2.252, Perp: 9.51, Acc: 0.55           here
here
Validation: Epoch [27/40], Step [16/26], Los

Training: Epoch [28/40], Step [66/105], Loss: 2.595, Perp: 13.40, Acc: 0.46           here
here
Training: Epoch [28/40], Step [67/105], Loss: 2.595, Perp: 13.40, Acc: 0.46           here
here
Training: Epoch [28/40], Step [68/105], Loss: 2.595, Perp: 13.40, Acc: 0.46           here
here
Training: Epoch [28/40], Step [69/105], Loss: 2.595, Perp: 13.40, Acc: 0.46           here
here
Training: Epoch [28/40], Step [70/105], Loss: 2.595, Perp: 13.40, Acc: 0.46           here
here
Training: Epoch [28/40], Step [71/105], Loss: 2.595, Perp: 13.40, Acc: 0.46           here
here
Training: Epoch [28/40], Step [72/105], Loss: 2.595, Perp: 13.40, Acc: 0.46           here
here
Training: Epoch [28/40], Step [73/105], Loss: 2.595, Perp: 13.40, Acc: 0.46           here
here
Training: Epoch [28/40], Step [74/105], Loss: 2.595, Perp: 13.40, Acc: 0.46           here
here
Training: Epoch [28/40], Step [75/105], Loss: 2.595, Perp: 13.40, Acc: 0.46           here
here
Training: Epoch [28/40], Step [76/105], 

Training: Epoch [29/40], Step [21/105], Loss: 2.723, Perp: 15.23, Acc: 0.45           here
here
Training: Epoch [29/40], Step [22/105], Loss: 2.723, Perp: 15.22, Acc: 0.45           here
here
Training: Epoch [29/40], Step [23/105], Loss: 2.723, Perp: 15.22, Acc: 0.45           here
here
Training: Epoch [29/40], Step [24/105], Loss: 2.722, Perp: 15.22, Acc: 0.45           here
here
Training: Epoch [29/40], Step [25/105], Loss: 2.722, Perp: 15.21, Acc: 0.45           here
here
Training: Epoch [29/40], Step [26/105], Loss: 2.722, Perp: 15.21, Acc: 0.45           here
here
Training: Epoch [29/40], Step [27/105], Loss: 2.722, Perp: 15.21, Acc: 0.45           here
here
Training: Epoch [29/40], Step [28/105], Loss: 2.721, Perp: 15.20, Acc: 0.45           here
here
Training: Epoch [29/40], Step [29/105], Loss: 2.721, Perp: 15.20, Acc: 0.45           here
here
Training: Epoch [29/40], Step [30/105], Loss: 2.721, Perp: 15.19, Acc: 0.45           here
here
Training: Epoch [29/40], Step [31/105], 

Validation: Epoch [29/40], Step [2/26], Loss: 2.241, Perp: 9.40, Acc: 0.57           here
here
Validation: Epoch [29/40], Step [3/26], Loss: 2.238, Perp: 9.37, Acc: 0.57           here
here
Validation: Epoch [29/40], Step [4/26], Loss: 2.235, Perp: 9.35, Acc: 0.57           here
here
Validation: Epoch [29/40], Step [5/26], Loss: 2.233, Perp: 9.33, Acc: 0.57           here
here
Validation: Epoch [29/40], Step [6/26], Loss: 2.232, Perp: 9.31, Acc: 0.57           here
here
Validation: Epoch [29/40], Step [7/26], Loss: 2.230, Perp: 9.30, Acc: 0.57           here
here
Validation: Epoch [29/40], Step [8/26], Loss: 2.229, Perp: 9.29, Acc: 0.57           here
here
Validation: Epoch [29/40], Step [9/26], Loss: 2.228, Perp: 9.29, Acc: 0.57           here
here
Validation: Epoch [29/40], Step [10/26], Loss: 2.228, Perp: 9.28, Acc: 0.57           here
here
Validation: Epoch [29/40], Step [11/26], Loss: 2.227, Perp: 9.27, Acc: 0.57           here
here
Validation: Epoch [29/40], Step [12/26], Loss: 2

Training: Epoch [30/40], Step [62/105], Loss: 2.603, Perp: 13.51, Acc: 0.45           here
here
Training: Epoch [30/40], Step [63/105], Loss: 2.603, Perp: 13.51, Acc: 0.45           here
here
Training: Epoch [30/40], Step [64/105], Loss: 2.603, Perp: 13.51, Acc: 0.45           here
here
Training: Epoch [30/40], Step [65/105], Loss: 2.603, Perp: 13.50, Acc: 0.45           here
here
Training: Epoch [30/40], Step [66/105], Loss: 2.603, Perp: 13.50, Acc: 0.45           here
here
Training: Epoch [30/40], Step [67/105], Loss: 2.603, Perp: 13.50, Acc: 0.45           here
here
Training: Epoch [30/40], Step [68/105], Loss: 2.603, Perp: 13.50, Acc: 0.45           here
here
Training: Epoch [30/40], Step [69/105], Loss: 2.603, Perp: 13.50, Acc: 0.45           here
here
Training: Epoch [30/40], Step [70/105], Loss: 2.603, Perp: 13.50, Acc: 0.45           here
here
Training: Epoch [30/40], Step [71/105], Loss: 2.602, Perp: 13.50, Acc: 0.45           here
here
Training: Epoch [30/40], Step [72/105], 

Training: Epoch [31/40], Step [17/105], Loss: 2.592, Perp: 13.36, Acc: 0.46           here
here
Training: Epoch [31/40], Step [18/105], Loss: 2.592, Perp: 13.35, Acc: 0.46           here
here
Training: Epoch [31/40], Step [19/105], Loss: 2.592, Perp: 13.35, Acc: 0.46           here
here
Training: Epoch [31/40], Step [20/105], Loss: 2.591, Perp: 13.35, Acc: 0.46           here
here
Training: Epoch [31/40], Step [21/105], Loss: 2.591, Perp: 13.34, Acc: 0.46           here
here
Training: Epoch [31/40], Step [22/105], Loss: 2.591, Perp: 13.34, Acc: 0.46           here
here
Training: Epoch [31/40], Step [23/105], Loss: 2.590, Perp: 13.33, Acc: 0.46           here
here
Training: Epoch [31/40], Step [24/105], Loss: 2.590, Perp: 13.33, Acc: 0.46           here
here
Training: Epoch [31/40], Step [25/105], Loss: 2.590, Perp: 13.33, Acc: 0.46           here
here
Training: Epoch [31/40], Step [26/105], Loss: 2.590, Perp: 13.32, Acc: 0.46           here
here
Training: Epoch [31/40], Step [27/105], 

Training: Epoch [31/40], Step [103/105], Loss: 2.581, Perp: 13.21, Acc: 0.46           here
here
Training: Epoch [31/40], Step [104/105], Loss: 2.581, Perp: 13.21, Acc: 0.46           here
here
Training: Epoch [31/40], Step [105/105], Loss: 2.581, Perp: 13.21, Acc: 0.46           here
here
Validation: Epoch [31/40], Step [1/26], Loss: 2.142, Perp: 8.51, Acc: 0.58           here
here
Validation: Epoch [31/40], Step [2/26], Loss: 2.136, Perp: 8.47, Acc: 0.58           here
here
Validation: Epoch [31/40], Step [3/26], Loss: 2.133, Perp: 8.44, Acc: 0.58           here
here
Validation: Epoch [31/40], Step [4/26], Loss: 2.130, Perp: 8.42, Acc: 0.59           here
here
Validation: Epoch [31/40], Step [5/26], Loss: 2.129, Perp: 8.40, Acc: 0.59           here
here
Validation: Epoch [31/40], Step [6/26], Loss: 2.127, Perp: 8.39, Acc: 0.59           here
here
Validation: Epoch [31/40], Step [7/26], Loss: 2.126, Perp: 8.38, Acc: 0.59           here
here
Validation: Epoch [31/40], Step [8/26], Loss

Training: Epoch [32/40], Step [58/105], Loss: 2.425, Perp: 11.30, Acc: 0.49           here
here
Training: Epoch [32/40], Step [59/105], Loss: 2.425, Perp: 11.30, Acc: 0.49           here
here
Training: Epoch [32/40], Step [60/105], Loss: 2.425, Perp: 11.30, Acc: 0.49           here
here
Training: Epoch [32/40], Step [61/105], Loss: 2.424, Perp: 11.30, Acc: 0.49           here
here
Training: Epoch [32/40], Step [62/105], Loss: 2.424, Perp: 11.30, Acc: 0.49           here
here
Training: Epoch [32/40], Step [63/105], Loss: 2.424, Perp: 11.29, Acc: 0.49           here
here
Training: Epoch [32/40], Step [64/105], Loss: 2.424, Perp: 11.29, Acc: 0.49           here
here
Training: Epoch [32/40], Step [65/105], Loss: 2.424, Perp: 11.29, Acc: 0.49           here
here
Training: Epoch [32/40], Step [66/105], Loss: 2.424, Perp: 11.29, Acc: 0.49           here
here
Training: Epoch [32/40], Step [67/105], Loss: 2.424, Perp: 11.29, Acc: 0.49           here
here
Training: Epoch [32/40], Step [68/105], 

Training: Epoch [33/40], Step [13/105], Loss: 2.283, Perp: 9.81, Acc: 0.51           here
here
Training: Epoch [33/40], Step [14/105], Loss: 2.283, Perp: 9.81, Acc: 0.51           here
here
Training: Epoch [33/40], Step [15/105], Loss: 2.283, Perp: 9.81, Acc: 0.51           here
here
Training: Epoch [33/40], Step [16/105], Loss: 2.283, Perp: 9.80, Acc: 0.51           here
here
Training: Epoch [33/40], Step [17/105], Loss: 2.283, Perp: 9.80, Acc: 0.51           here
here
Training: Epoch [33/40], Step [18/105], Loss: 2.282, Perp: 9.80, Acc: 0.51           here
here
Training: Epoch [33/40], Step [19/105], Loss: 2.282, Perp: 9.80, Acc: 0.51           here
here
Training: Epoch [33/40], Step [20/105], Loss: 2.282, Perp: 9.80, Acc: 0.51           here
here
Training: Epoch [33/40], Step [21/105], Loss: 2.282, Perp: 9.80, Acc: 0.51           here
here
Training: Epoch [33/40], Step [22/105], Loss: 2.282, Perp: 9.80, Acc: 0.51           here
here
Training: Epoch [33/40], Step [23/105], Loss: 2.28

Training: Epoch [33/40], Step [100/105], Loss: 2.276, Perp: 9.74, Acc: 0.51           here
here
Training: Epoch [33/40], Step [101/105], Loss: 2.276, Perp: 9.74, Acc: 0.51           here
here
Training: Epoch [33/40], Step [102/105], Loss: 2.276, Perp: 9.74, Acc: 0.51           here
here
Training: Epoch [33/40], Step [103/105], Loss: 2.276, Perp: 9.74, Acc: 0.51           here
here
Training: Epoch [33/40], Step [104/105], Loss: 2.276, Perp: 9.74, Acc: 0.51           here
here
Training: Epoch [33/40], Step [105/105], Loss: 2.276, Perp: 9.74, Acc: 0.51           here
here
Validation: Epoch [33/40], Step [1/26], Loss: 1.889, Perp: 6.61, Acc: 0.63           here
here
Validation: Epoch [33/40], Step [2/26], Loss: 1.883, Perp: 6.57, Acc: 0.63           here
here
Validation: Epoch [33/40], Step [3/26], Loss: 1.880, Perp: 6.55, Acc: 0.63           here
here
Validation: Epoch [33/40], Step [4/26], Loss: 1.877, Perp: 6.54, Acc: 0.64           here
here
Validation: Epoch [33/40], Step [5/26], Loss

Training: Epoch [34/40], Step [56/105], Loss: 2.250, Perp: 9.48, Acc: 0.52           here
here
Training: Epoch [34/40], Step [57/105], Loss: 2.249, Perp: 9.48, Acc: 0.52           here
here
Training: Epoch [34/40], Step [58/105], Loss: 2.249, Perp: 9.48, Acc: 0.52           here
here
Training: Epoch [34/40], Step [59/105], Loss: 2.249, Perp: 9.48, Acc: 0.52           here
here
Training: Epoch [34/40], Step [60/105], Loss: 2.249, Perp: 9.48, Acc: 0.52           here
here
Training: Epoch [34/40], Step [61/105], Loss: 2.249, Perp: 9.48, Acc: 0.52           here
here
Training: Epoch [34/40], Step [62/105], Loss: 2.249, Perp: 9.48, Acc: 0.52           here
here
Training: Epoch [34/40], Step [63/105], Loss: 2.249, Perp: 9.48, Acc: 0.52           here
here
Training: Epoch [34/40], Step [64/105], Loss: 2.249, Perp: 9.48, Acc: 0.52           here
here
Training: Epoch [34/40], Step [65/105], Loss: 2.249, Perp: 9.48, Acc: 0.52           here
here
Training: Epoch [34/40], Step [66/105], Loss: 2.24

Training: Epoch [35/40], Step [12/105], Loss: 2.182, Perp: 8.87, Acc: 0.52           here
here
Training: Epoch [35/40], Step [13/105], Loss: 2.182, Perp: 8.86, Acc: 0.52           here
here
Training: Epoch [35/40], Step [14/105], Loss: 2.182, Perp: 8.86, Acc: 0.52           here
here
Training: Epoch [35/40], Step [15/105], Loss: 2.181, Perp: 8.86, Acc: 0.52           here
here
Training: Epoch [35/40], Step [16/105], Loss: 2.181, Perp: 8.86, Acc: 0.52           here
here
Training: Epoch [35/40], Step [17/105], Loss: 2.181, Perp: 8.86, Acc: 0.52           here
here
Training: Epoch [35/40], Step [18/105], Loss: 2.181, Perp: 8.85, Acc: 0.52           here
here
Training: Epoch [35/40], Step [19/105], Loss: 2.181, Perp: 8.85, Acc: 0.52           here
here
Training: Epoch [35/40], Step [20/105], Loss: 2.180, Perp: 8.85, Acc: 0.52           here
here
Training: Epoch [35/40], Step [21/105], Loss: 2.180, Perp: 8.85, Acc: 0.52           here
here
Training: Epoch [35/40], Step [22/105], Loss: 2.18

Training: Epoch [35/40], Step [99/105], Loss: 2.174, Perp: 8.79, Acc: 0.52           here
here
Training: Epoch [35/40], Step [100/105], Loss: 2.174, Perp: 8.79, Acc: 0.52           here
here
Training: Epoch [35/40], Step [101/105], Loss: 2.174, Perp: 8.79, Acc: 0.52           here
here
Training: Epoch [35/40], Step [102/105], Loss: 2.174, Perp: 8.79, Acc: 0.52           here
here
Training: Epoch [35/40], Step [103/105], Loss: 2.174, Perp: 8.79, Acc: 0.52           here
here
Training: Epoch [35/40], Step [104/105], Loss: 2.174, Perp: 8.79, Acc: 0.52           here
here
Training: Epoch [35/40], Step [105/105], Loss: 2.174, Perp: 8.79, Acc: 0.52           here
here
Validation: Epoch [35/40], Step [1/26], Loss: 1.855, Perp: 6.39, Acc: 0.64           here
here
Validation: Epoch [35/40], Step [2/26], Loss: 1.849, Perp: 6.35, Acc: 0.64           here
here
Validation: Epoch [35/40], Step [3/26], Loss: 1.846, Perp: 6.33, Acc: 0.64           here
here
Validation: Epoch [35/40], Step [4/26], Loss

Training: Epoch [36/40], Step [55/105], Loss: 2.095, Perp: 8.12, Acc: 0.54           here
here
Training: Epoch [36/40], Step [56/105], Loss: 2.094, Perp: 8.12, Acc: 0.54           here
here
Training: Epoch [36/40], Step [57/105], Loss: 2.094, Perp: 8.12, Acc: 0.54           here
here
Training: Epoch [36/40], Step [58/105], Loss: 2.094, Perp: 8.12, Acc: 0.54           here
here
Training: Epoch [36/40], Step [59/105], Loss: 2.094, Perp: 8.12, Acc: 0.54           here
here
Training: Epoch [36/40], Step [60/105], Loss: 2.094, Perp: 8.12, Acc: 0.54           here
here
Training: Epoch [36/40], Step [61/105], Loss: 2.094, Perp: 8.12, Acc: 0.54           here
here
Training: Epoch [36/40], Step [62/105], Loss: 2.094, Perp: 8.12, Acc: 0.54           here
here
Training: Epoch [36/40], Step [63/105], Loss: 2.094, Perp: 8.12, Acc: 0.54           here
here
Training: Epoch [36/40], Step [64/105], Loss: 2.094, Perp: 8.12, Acc: 0.54           here
here
Training: Epoch [36/40], Step [65/105], Loss: 2.09

Training: Epoch [37/40], Step [11/105], Loss: 2.014, Perp: 7.49, Acc: 0.56           here
here
Training: Epoch [37/40], Step [12/105], Loss: 2.014, Perp: 7.49, Acc: 0.56           here
here
Training: Epoch [37/40], Step [13/105], Loss: 2.014, Perp: 7.49, Acc: 0.56           here
here
Training: Epoch [37/40], Step [14/105], Loss: 2.013, Perp: 7.49, Acc: 0.56           here
here
Training: Epoch [37/40], Step [15/105], Loss: 2.013, Perp: 7.49, Acc: 0.56           here
here
Training: Epoch [37/40], Step [16/105], Loss: 2.013, Perp: 7.49, Acc: 0.56           here
here
Training: Epoch [37/40], Step [17/105], Loss: 2.013, Perp: 7.48, Acc: 0.56           here
here
Training: Epoch [37/40], Step [18/105], Loss: 2.013, Perp: 7.48, Acc: 0.56           here
here
Training: Epoch [37/40], Step [19/105], Loss: 2.012, Perp: 7.48, Acc: 0.56           here
here
Training: Epoch [37/40], Step [20/105], Loss: 2.012, Perp: 7.48, Acc: 0.56           here
here
Training: Epoch [37/40], Step [21/105], Loss: 2.01

Training: Epoch [37/40], Step [98/105], Loss: 2.007, Perp: 7.44, Acc: 0.56           here
here
Training: Epoch [37/40], Step [99/105], Loss: 2.007, Perp: 7.44, Acc: 0.56           here
here
Training: Epoch [37/40], Step [100/105], Loss: 2.007, Perp: 7.44, Acc: 0.56           here
here
Training: Epoch [37/40], Step [101/105], Loss: 2.007, Perp: 7.44, Acc: 0.56           here
here
Training: Epoch [37/40], Step [102/105], Loss: 2.007, Perp: 7.44, Acc: 0.56           here
here
Training: Epoch [37/40], Step [103/105], Loss: 2.007, Perp: 7.44, Acc: 0.56           here
here
Training: Epoch [37/40], Step [104/105], Loss: 2.007, Perp: 7.44, Acc: 0.56           here
here
Training: Epoch [37/40], Step [105/105], Loss: 2.007, Perp: 7.44, Acc: 0.56           here
here
Validation: Epoch [37/40], Step [1/26], Loss: 1.712, Perp: 5.54, Acc: 0.67           here
here
Validation: Epoch [37/40], Step [2/26], Loss: 1.705, Perp: 5.50, Acc: 0.67           here
here
Validation: Epoch [37/40], Step [3/26], Loss

Training: Epoch [38/40], Step [54/105], Loss: 1.953, Perp: 7.05, Acc: 0.56           here
here
Training: Epoch [38/40], Step [55/105], Loss: 1.953, Perp: 7.05, Acc: 0.56           here
here
Training: Epoch [38/40], Step [56/105], Loss: 1.953, Perp: 7.05, Acc: 0.56           here
here
Training: Epoch [38/40], Step [57/105], Loss: 1.953, Perp: 7.05, Acc: 0.56           here
here
Training: Epoch [38/40], Step [58/105], Loss: 1.953, Perp: 7.05, Acc: 0.56           here
here
Training: Epoch [38/40], Step [59/105], Loss: 1.953, Perp: 7.05, Acc: 0.56           here
here
Training: Epoch [38/40], Step [60/105], Loss: 1.953, Perp: 7.05, Acc: 0.56           here
here
Training: Epoch [38/40], Step [61/105], Loss: 1.953, Perp: 7.05, Acc: 0.56           here
here
Training: Epoch [38/40], Step [62/105], Loss: 1.953, Perp: 7.05, Acc: 0.56           here
here
Training: Epoch [38/40], Step [63/105], Loss: 1.953, Perp: 7.05, Acc: 0.56           here
here
Training: Epoch [38/40], Step [64/105], Loss: 1.95

Training: Epoch [39/40], Step [10/105], Loss: 1.893, Perp: 6.64, Acc: 0.58           here
here
Training: Epoch [39/40], Step [11/105], Loss: 1.892, Perp: 6.64, Acc: 0.58           here
here
Training: Epoch [39/40], Step [12/105], Loss: 1.892, Perp: 6.63, Acc: 0.58           here
here
Training: Epoch [39/40], Step [13/105], Loss: 1.892, Perp: 6.63, Acc: 0.58           here
here
Training: Epoch [39/40], Step [14/105], Loss: 1.892, Perp: 6.63, Acc: 0.58           here
here
Training: Epoch [39/40], Step [15/105], Loss: 1.892, Perp: 6.63, Acc: 0.58           here
here
Training: Epoch [39/40], Step [16/105], Loss: 1.892, Perp: 6.63, Acc: 0.58           here
here
Training: Epoch [39/40], Step [17/105], Loss: 1.891, Perp: 6.63, Acc: 0.58           here
here
Training: Epoch [39/40], Step [18/105], Loss: 1.891, Perp: 6.63, Acc: 0.58           here
here
Training: Epoch [39/40], Step [19/105], Loss: 1.891, Perp: 6.63, Acc: 0.58           here
here
Training: Epoch [39/40], Step [20/105], Loss: 1.89

Training: Epoch [39/40], Step [97/105], Loss: 1.887, Perp: 6.60, Acc: 0.58           here
here
Training: Epoch [39/40], Step [98/105], Loss: 1.887, Perp: 6.60, Acc: 0.58           here
here
Training: Epoch [39/40], Step [99/105], Loss: 1.886, Perp: 6.60, Acc: 0.58           here
here
Training: Epoch [39/40], Step [100/105], Loss: 1.886, Perp: 6.60, Acc: 0.58           here
here
Training: Epoch [39/40], Step [101/105], Loss: 1.886, Perp: 6.60, Acc: 0.58           here
here
Training: Epoch [39/40], Step [102/105], Loss: 1.886, Perp: 6.60, Acc: 0.58           here
here
Training: Epoch [39/40], Step [103/105], Loss: 1.886, Perp: 6.60, Acc: 0.58           here
here
Training: Epoch [39/40], Step [104/105], Loss: 1.886, Perp: 6.59, Acc: 0.58           here
here
Training: Epoch [39/40], Step [105/105], Loss: 1.886, Perp: 6.59, Acc: 0.58           here
here
Validation: Epoch [39/40], Step [1/26], Loss: 1.565, Perp: 4.78, Acc: 0.72           here
here
Validation: Epoch [39/40], Step [2/26], Loss

Training: Epoch [40/40], Step [53/105], Loss: 1.843, Perp: 6.32, Acc: 0.59           here
here
Training: Epoch [40/40], Step [54/105], Loss: 1.843, Perp: 6.32, Acc: 0.59           here
here
Training: Epoch [40/40], Step [55/105], Loss: 1.843, Perp: 6.32, Acc: 0.59           here
here
Training: Epoch [40/40], Step [56/105], Loss: 1.843, Perp: 6.32, Acc: 0.59           here
here
Training: Epoch [40/40], Step [57/105], Loss: 1.843, Perp: 6.32, Acc: 0.59           here
here
Training: Epoch [40/40], Step [58/105], Loss: 1.843, Perp: 6.32, Acc: 0.59           here
here
Training: Epoch [40/40], Step [59/105], Loss: 1.843, Perp: 6.32, Acc: 0.59           here
here
Training: Epoch [40/40], Step [60/105], Loss: 1.843, Perp: 6.32, Acc: 0.59           here
here
Training: Epoch [40/40], Step [61/105], Loss: 1.843, Perp: 6.32, Acc: 0.59           here
here
Training: Epoch [40/40], Step [62/105], Loss: 1.843, Perp: 6.32, Acc: 0.59           here
here
Training: Epoch [40/40], Step [63/105], Loss: 1.84

In [14]:
torch.save(model.state_dict(), 'masnavi-lm.pth')

In [43]:
N = 1000
p = torch.FloatTensor([0.5, 0.25, 0.15, 0.10])
p = torch.FloatTensor([50, 25, 15, 10])

counter = Counter()
# Draw N samples
for _ in range(N):
    sample = torch.multinomial(p, num_samples=1, replacement=True).numpy()[0]
    counter[sample] += 1

for sample, count in counter.most_common():
    print("{:d}: {:2d}".format(sample, count))

0: 494
1: 240
2: 170
3: 96


In [44]:
def get_sample(model, sample_len):
    model.eval()
    sample = ''
    state = model.init_hidden(1)

    # select a random word id to start sampling
    probs = torch.ones(vocab_size)
    inp = to_var(torch.multinomial(probs, num_samples=1).unsqueeze(1), volatile=True)
    print (inp)
    for i in tqdm_notebook(range(sample_len)):
        output, state = model(inp, state)

        # Sample an id
        probs = output.squeeze().data.exp().cpu()
        word_id = torch.multinomial(probs, 1).numpy()[0]

        # Feed sampled word id to next time step
        inp.data.fill_(word_id)

        # write to file
        word = corpus.vocabulary.index2word[word_id]
        if word == '<EOS>':
            sample += '\n'
        else:
            sample += ' ' + word
    
    return sample

In [45]:
model = LSTM_LM(vocab_size, embed_size=1500, hidden_size=1500, num_layers=2)
model.load_state_dict(torch.load('masnavi-lm.pth'))
model = model.cuda()
sample = get_sample(model, 200)
print(sample)

tensor([[23264]], device='cuda:0')


HBox(children=(IntProgress(value=0, max=200), HTML(value='')))



 کان به نیست بی جوهری
 نیست سر مرگ ما شرع
 مختلف هم حدث از ضلال
 باز سله و بی ندید
 خانه خونابه و زیر نیز
 که آسمانهاست و مردمان
 کای آسمانها از راه
 تا فرو دست دیو
 وقت بازرگان و پهلوان
 نقش یک فرو گشته
 یا فجر و خشک ها
 چون عقلند و می شنید
 تا مگیر و نبیه
 گرچه دیگر سازد چون حیات
 از حدوث و لا سقیم
 از رحمت و در نالیده
 شه خر ز لطف القلوب
 چونک در نیارد در بیم
 زانک اینست بتر سر حق
 کو شدی تا جود و جو
 ای زیان ز سایه و ما
 لقمه بحر اگر یافت
 آدمی چندان چو در نهاد
 پیش اژدرها و سوز تنگ
 کای یزدان بی باغ
 دردسر و شراب کان
 کرده پیمودن ز قهر خویش
 که نعیم و کم ساز
 گاو مرده چه می نمود
 ای صاحب چو می خوردمی
 از قصه هر می آمدی
 این مخنث و می نجست
 معده و شهسواران آن ندید
 در حی که در یافتند
 زندگی در


In [36]:
model = LSTM_LM(vocab_size, embed_size=1500, hidden_size=1500, num_layers=2)
model.load_state_dict(torch.load('lm-masnavi-epoch-38-em-1500-hi-1500-nl-2-1.53-4.63.pth'))
model = model.cuda()

In [37]:
sample = get_sample(model, 300)
print(sample)

tensor([[18976]], device='cuda:0')


HBox(children=(IntProgress(value=0, max=300), HTML(value='')))



 هر دو پیش دین
 تا العارفین بر ترس
 زان به زخم می
 تا شهواتنا آن شرع
 تا اسباب و زیر و کشند
 چار تقوی یک مرد سخت
 وا زراد و ملک مرد
 بود دیگر و دیو و چفت
 گرگ را دهد یک بدان
 ای قلاووزم این مشکل
 از کف خوار چون بهشت
 تا استانید و خانه بود
 همچو دانیش سوی خواب
 که حمال و بانگ گرفت
 دست ندیده می خانه رسید
 صدقوهم و کف و بود
 در حسد و دار و سخن
 که جنبیکم و حیلت و
 ای گبری و ره و
 هست دوزانند از خانه تنگ
 ای اذان از سر به
 در درویشی و تا خاص
 چنگ بگریزد بر گردون
 جوش بگریزد بر بحر
 کز گردونست و پاکی
 بی هوای باز چرخ
 کو حیات رفته ساز
 هاویه و نموده و
 می درمانده بی حال
 پرده ساله و داد
 گر تغب و طاووس
 کای عیسی شده پیش بشیر
 قطره گشته آراست و ریش
 پیش صد ندیدی بی یقین
 بس دیگر گشته از قبور
 لیک رب بود بر زلتست
 لیک یک خفتن و انتقام
 زهر زن پیش ای ستیر
 هرچه چه نگردد چون عزیز
 چون گشادش و هر چه
 باز برگیرم بهر بحر و
 همچو اینجا چو ناری
 وآن حیوان و عار
 کرد هر حالهای در کریم
 ز بحرم و در میگذرد
 سر دیگر کاندرین در غرف
 لیک ما و فعل کاسدست
 قوت و ترکیب و سماع
 سست دان بر آتش حلق
 تا فزونی