In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.autograd import Variable
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import pandas as pd
from bs4 import BeautifulSoup
import itertools
import more_itertools
import numpy as np
import pickle


In [2]:
df=pd.read_csv('imdb.csv')

In [3]:
train = df[df['testOtrain'] == 'train']
test = df[df['testOtrain'] == 'test']

In [4]:
max_sent_len = 20
max_seq_len = 200

In [5]:
def clean_str(string, max_seq_len):
    """
    adapted from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
    """
    string = BeautifulSoup(string, "lxml").text
    string = re.sub(r"[^A-Za-z0-9(),!?\"\`]", " ", string)
    string = re.sub(r"\"s", " \"s", string)
    string = re.sub(r"\"ve", " \"ve", string)
    string = re.sub(r"n\"t", " n\"t", string)
    string = re.sub(r"\"re", " \"re", string)
    string = re.sub(r"\"d", " \"d", string)
    string = re.sub(r"\"ll", " \"ll", string)
    string = re.sub(r",", " , ", string)
    string = re.sub(r"!", " ! ", string)
    string = re.sub(r"\(", " \( ", string)
    string = re.sub(r"\)", " \) ", string)
    string = re.sub(r"\?", " \? ", string)
    string = re.sub(r"\s{2,}", " ", string)
    s =string.strip().lower().split(" ")
    if len(s) > max_seq_len:
        return s[0:max_seq_len] 
    return s

In [6]:
def create3DList(df, max_sent_len,max_seq_len):
    x=[]
    for docs in df["content"].as_matrix():
        x1=[]
        idx = 0
        for seq in "|||".join(re.split("[.?!]", docs)).split("|||"):
            x1.append(clean_str(seq,max_seq_len))
            if(idx>max_sent_len):
                break
            idx= idx+1
        x.append(x1)
    return x

In [7]:
#divides review in sentences and sentences into word creating a 3DList

x_train = create3DList(train, max_sent_len,max_seq_len)
x_test = create3DList(test, max_sent_len,max_seq_len)
print("x_train: {}".format(len(x_train)))
print("x_test: {}".format(len(x_test)))

  '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
  '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)


x_train: 25000
x_test: 25000


In [8]:
def index(data):
    def apply_index(data):
        indexed = [[[unqVoc_LookUp[char] for char in seq] for seq in doc] for doc in data]
        return indexed
    # end

    x_train, x_test = data

    unqVoc = set(list(more_itertools.collapse(x_train[:] + x_test[:])))
    unqVoc_LookUp = {k: v+1 for v, k in enumerate(unqVoc)}
    vocab_size = len(list(unqVoc_LookUp))

    # save lookup table
    pickle._dump(unqVoc_LookUp, open("unqVoc_Lookup.p","wb"))

    x_train = apply_index(data=x_train)
    x_test = apply_index(data=x_test)

    return [x_train, x_test,vocab_size,unqVoc_LookUp]

In [9]:
x_train, x_test,vocab_size,lookup = index(data=[x_train[:], x_test[:]])

In [10]:
y_train = train["label"].tolist()
y_test = test["label"].tolist()

In [11]:
def attention_mul(rnn_outputs, att_weights):
    attn_vectors = None
    for i in range(rnn_outputs.size(0)):
        h_i = rnn_outputs[i]
        a_i = att_weights[i]
        h_i = a_i * h_i
        h_i = h_i.unsqueeze(0)
        if(attn_vectors is None):
            attn_vectors = h_i
        else:
            attn_vectors = torch.cat((attn_vectors,h_i),0)
    return torch.sum(attn_vectors, 0).unsqueeze(0)

In [126]:
class WordRNN(nn.Module):
    def __init__(self, vocab_size,embedsize, batch_size, hid_size):
        super(WordRNN, self).__init__()
        self.batch_size = batch_size
        self.embedsize = embedsize
        self.hid_size = hid_size
        ## Word Encoder
        self.embed = nn.Embedding(vocab_size, embedsize)
        self.wordRNN = nn.GRU(embedsize, hid_size, bidirectional=True)
        ## Word Attention
        self.wordattn = nn.Linear(2*hid_size, 2*hid_size)
        self.attn_combine = nn.Linear(2*hid_size, 2*hid_size,bias=False)
    def forward(self,inp, hid_state):
        try:
            emb_out  = self.embed(inp)
        
            out_state, hid_state = self.wordRNN(emb_out, hid_state)
#         print(out_state.shape)
#         print(hid_state.view(1,self.batch_size,2*self.hid_size).shape)
            word_annotation = self.wordattn(out_state)
            attn = F.softmax(self.attn_combine(word_annotation),dim=1)
#         print(attn.shape)
            sent = attention_mul(out_state,attn)
            return sent, hid_state
        except:
            print(inp)
            print(hid_state)
    def init_hidden(self):
            return Variable(torch.zeros(2, self.batch_size, self.hid_size))


In [127]:
class SentenceRNN(nn.Module):
    def __init__(self,embedsize, batch_size, hid_size,c):
        super(SentenceRNN, self).__init__()
        self.batch_size = batch_size
        self.embedsize = embedsize
        self.hid_size = hid_size
        self.cls = c
        ## Sentence Encoder
        self.sentRNN = nn.GRU(embedsize, hid_size, bidirectional=True)
        ## Sentence Attention
        self.sentattn = nn.Linear(2*hid_size, 2*hid_size)
        self.attn_combine = nn.Linear(2*hid_size, 2*hid_size,bias=False)
        self.doc_linear = nn.Linear(2*hid_size, c)
    
    def forward(self,inp, hid_state):        
        out_state, hid_state = self.sentRNN(inp, hid_state)
#         print(out_state.shape)
#         print(hid_state.view(1,self.batch_size,2*self.hid_size).shape)
        sent_annotation = self.sentattn(out_state)
        attn = F.softmax(self.attn_combine(sent_annotation),dim=1)
#         print(attn.shape)
        doc = attention_mul(out_state,attn)
        d = self.doc_linear(doc)
#         print(d)
        cls = F.softmax(d.view(self.cls,-1),dim=0)
        return cls, hid_state
    
    def init_hidden(self):
            return Variable(torch.zeros(2, self.batch_size, self.hid_size))


In [128]:
y_train_one =  [torch.LongTensor([int(label)]) for label in y_train]

In [129]:
max_seq_len = max([len(seq) for seq in itertools.chain.from_iterable(x_train + x_test)])
max_sent_len = max([len(sent) for sent in (x_train+x_test)])

In [130]:
max_seq_len

200

In [131]:
max_sent_len

22

In [132]:
X1 = [sub_list + [[0]] * (max_sent_len - len(sub_list)) for sub_list in x_train]

In [133]:
batch_size = 1

In [134]:
def train_data(batch_size, review, targets, word_attn_model, sent_attn_model, word_optimizer, sent_optimizer, criterion):

#     max_sents, batch_size, max_tokens = mini_batch.size()
    state_word = word_attn_model.init_hidden()
    state_sent = sent_attn_model.init_hidden()
    word_optimizer.zero_grad()
    sent_optimizer.zero_grad()
#     print(review)
    s = None
    for i in range(len(review[0])):
        r = None
        for j in range(len(review)):
            if(r is None):
                r = [review[j][i]]
            else:
                r.append(review[j][i])
        r1 = np.asarray([sub_list + [0] * (max_seq_len - len(sub_list)) for sub_list in r])
#         print(r1.shape)
        _s, state_word = word_attn_model(torch.tensor(r1).view(-1,batch_size), state_word)
        if(s is None):
            s = _s
        else:
            s = torch.cat((s,_s),0)            
    y_pred, state_sent = sent_attn_model(s, state_sent)
#     print(y_pred)
#     print(targets)
    
    loss = criterion(y_pred.view(-1,2), torch.tensor(targets)) 
    max_index = y_pred.view(-1,2).max(dim = 1)[1]
    correct = (max_index == torch.tensor(targets)).sum()
    acc = correct/batch_size
    loss.backward()
    
    word_optimizer.step()
    sent_optimizer.step()
    
    return loss.data[0],acc

In [135]:
word_attn = WordRNN(vocab_size,100,batch_size,100)

In [136]:
sent_attn = SentenceRNN(200,batch_size,200,2)

In [137]:
learning_rate = 1e-3
momentum = 0.5
word_optmizer = torch.optim.SGD(word_attn.parameters(), lr=learning_rate, momentum= momentum)
sent_optimizer = torch.optim.SGD(sent_attn.parameters(), lr=learning_rate, momentum= momentum)

criterion = nn.NLLLoss()

In [138]:
import time
import math

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [139]:
def gen_batch(x,y,batch_size,max_sent_len):
    k = random.sample(range(len(x)-1),batch_size)
    x_batch=[]
    y_batch=[]
#     print(x_batch)
    for t in k:
        x_batch.append(x[t])
        y_batch.append(y[t])
    x_batch = [sub_list + [[0]] * (max_sent_len - len(sub_list)) for sub_list in x_batch]
    return [x_batch,y_batch]

In [140]:
def train_early_stopping(batch_size, X_train, y_train, word_attn_model, sent_attn_model, 
                         word_attn_optimiser, sent_attn_optimiser, loss_criterion, num_epoch,max_sent_len,
                         print_val_loss_every = 1000, print_loss_every = 50):
    start = time.time()
    loss_full = []
    loss_epoch = []
    acc_epoch = []
    acc_full = []
    epoch_counter = 0
    train_length = len(x_train)
    for i in range(1, num_epoch + 1):
        try:
            loss_epoch = []
            acc_epoch = []
            for j in range(int(train_length/batch_size)):
                x,y = gen_batch(X_train,y_train,batch_size,max_sent_len)
                loss,acc = train_data(batch_size, x, y, word_attn_model, sent_attn_model, word_attn_optimiser, sent_attn_optimiser, loss_criterion)
                loss_full.append(loss)
                loss_epoch.append(loss)
                acc_full.append(acc)
                acc_epoch.append(acc)
                if j % print_loss_every == 0:
                    print ('Loss at %d paragraphs, %d epoch,(%s) is %f' %(j, i, timeSince(start), np.mean(loss_epoch)))
                    print ('Accuracy at %d paragraphs, %d epoch,(%s) is %f' %(j, i, timeSince(start), np.mean(acc_epoch)))
            torch.save(word_attn_model.state_dict(), 'word_attn_model' + str(i+1)+'.pth')
            torch.save(sent_attn_model.state_dict(), 'sent_attn_model' + str(i+1)+'.pth')
        except StopIteration:
            epoch_counter += 1
            print ('Reached %d epocs' % epoch_counter)
            print ('i %d' % i)
            g = gen_minibatch(X_train, y_train, mini_batch_size)
            loss_epoch = []
            accuracy_epoch = []
    return loss_full, state_word, state_sent


In [None]:
 

loss_full, state_word_train, state_sent_train = train_early_stopping(batch_size, x_train, y_train_one, word_attn, sent_attn, word_optmizer, sent_optimizer, 
                            criterion, 10,max_sent_len)





Loss at 0 paragraphs, 1 epoch,(0m 3s) is -0.928255
Accuracy at 0 paragraphs, 1 epoch,(0m 3s) is 1.000000
Loss at 50 paragraphs, 1 epoch,(3m 9s) is -0.469181
Accuracy at 50 paragraphs, 1 epoch,(3m 9s) is 0.470588
Loss at 100 paragraphs, 1 epoch,(6m 14s) is -0.484438
Accuracy at 100 paragraphs, 1 epoch,(6m 14s) is 0.485149
Loss at 150 paragraphs, 1 epoch,(9m 20s) is -0.535949
Accuracy at 150 paragraphs, 1 epoch,(9m 20s) is 0.536424
Loss at 200 paragraphs, 1 epoch,(12m 24s) is -0.527006
Accuracy at 200 paragraphs, 1 epoch,(12m 24s) is 0.527363
Loss at 250 paragraphs, 1 epoch,(15m 29s) is -0.493738
Accuracy at 250 paragraphs, 1 epoch,(15m 29s) is 0.494024
Loss at 300 paragraphs, 1 epoch,(18m 33s) is -0.491456
Accuracy at 300 paragraphs, 1 epoch,(18m 33s) is 0.491694
Loss at 350 paragraphs, 1 epoch,(21m 37s) is -0.489824
Accuracy at 350 paragraphs, 1 epoch,(21m 37s) is 0.490028
Loss at 400 paragraphs, 1 epoch,(24m 42s) is -0.518524
Accuracy at 400 paragraphs, 1 epoch,(24m 42s) is 0.518703
L

Loss at 3600 paragraphs, 1 epoch,(222m 21s) is -0.498730
Accuracy at 3600 paragraphs, 1 epoch,(222m 21s) is 0.498750
Loss at 3650 paragraphs, 1 epoch,(225m 27s) is -0.499296
Accuracy at 3650 paragraphs, 1 epoch,(225m 27s) is 0.499315
Loss at 3700 paragraphs, 1 epoch,(228m 33s) is -0.500926
Accuracy at 3700 paragraphs, 1 epoch,(228m 33s) is 0.500946
Loss at 3750 paragraphs, 1 epoch,(231m 39s) is -0.500914
Accuracy at 3750 paragraphs, 1 epoch,(231m 39s) is 0.500933
Loss at 3800 paragraphs, 1 epoch,(234m 44s) is -0.501428
Accuracy at 3800 paragraphs, 1 epoch,(234m 44s) is 0.501447
Loss at 3850 paragraphs, 1 epoch,(237m 50s) is -0.501150
Accuracy at 3850 paragraphs, 1 epoch,(237m 50s) is 0.501169
Loss at 3900 paragraphs, 1 epoch,(240m 56s) is -0.500110
Accuracy at 3900 paragraphs, 1 epoch,(240m 56s) is 0.500128
Loss at 3950 paragraphs, 1 epoch,(244m 2s) is -0.498337
Accuracy at 3950 paragraphs, 1 epoch,(244m 2s) is 0.498355
Loss at 4000 paragraphs, 1 epoch,(247m 8s) is -0.500107
Accuracy a

Loss at 7150 paragraphs, 1 epoch,(446m 41s) is -0.504535
Accuracy at 7150 paragraphs, 1 epoch,(446m 42s) is 0.504545
Loss at 7200 paragraphs, 1 epoch,(449m 53s) is -0.505475
Accuracy at 7200 paragraphs, 1 epoch,(449m 53s) is 0.505485
Loss at 7250 paragraphs, 1 epoch,(453m 5s) is -0.505438
Accuracy at 7250 paragraphs, 1 epoch,(453m 6s) is 0.505448
Loss at 7300 paragraphs, 1 epoch,(456m 17s) is -0.504990
Accuracy at 7300 paragraphs, 1 epoch,(456m 17s) is 0.504999
Loss at 7350 paragraphs, 1 epoch,(459m 29s) is -0.505092
Accuracy at 7350 paragraphs, 1 epoch,(459m 29s) is 0.505101
Loss at 7400 paragraphs, 1 epoch,(462m 42s) is -0.504922
Accuracy at 7400 paragraphs, 1 epoch,(462m 42s) is 0.504932
Loss at 7450 paragraphs, 1 epoch,(465m 54s) is -0.504889
Accuracy at 7450 paragraphs, 1 epoch,(465m 54s) is 0.504899
Loss at 7500 paragraphs, 1 epoch,(469m 6s) is -0.505256
Accuracy at 7500 paragraphs, 1 epoch,(469m 6s) is 0.505266
Loss at 7550 paragraphs, 1 epoch,(472m 19s) is -0.505222
Accuracy at

Loss at 10700 paragraphs, 1 epoch,(672m 23s) is -0.505554
Accuracy at 10700 paragraphs, 1 epoch,(672m 23s) is 0.505560
Loss at 10750 paragraphs, 1 epoch,(675m 29s) is -0.504970
Accuracy at 10750 paragraphs, 1 epoch,(675m 29s) is 0.504976
Loss at 10800 paragraphs, 1 epoch,(678m 36s) is -0.504391
Accuracy at 10800 paragraphs, 1 epoch,(678m 36s) is 0.504398
Loss at 10850 paragraphs, 1 epoch,(681m 43s) is -0.504094
Accuracy at 10850 paragraphs, 1 epoch,(681m 43s) is 0.504101
Loss at 10900 paragraphs, 1 epoch,(684m 50s) is -0.504167
Accuracy at 10900 paragraphs, 1 epoch,(684m 50s) is 0.504174
Loss at 10950 paragraphs, 1 epoch,(687m 57s) is -0.504514
Accuracy at 10950 paragraphs, 1 epoch,(687m 57s) is 0.504520
Loss at 11000 paragraphs, 1 epoch,(691m 4s) is -0.504402
Accuracy at 11000 paragraphs, 1 epoch,(691m 4s) is 0.504409
Loss at 11050 paragraphs, 1 epoch,(694m 11s) is -0.504382
Accuracy at 11050 paragraphs, 1 epoch,(694m 11s) is 0.504389
Loss at 11100 paragraphs, 1 epoch,(697m 18s) is -0

Loss at 14150 paragraphs, 1 epoch,(888m 0s) is -0.505825
Accuracy at 14150 paragraphs, 1 epoch,(888m 0s) is 0.505830
Loss at 14200 paragraphs, 1 epoch,(891m 8s) is -0.506086
Accuracy at 14200 paragraphs, 1 epoch,(891m 8s) is 0.506091
Loss at 14250 paragraphs, 1 epoch,(894m 16s) is -0.506135
Accuracy at 14250 paragraphs, 1 epoch,(894m 16s) is 0.506140
Loss at 14300 paragraphs, 1 epoch,(897m 24s) is -0.506533
Accuracy at 14300 paragraphs, 1 epoch,(897m 25s) is 0.506538
Loss at 14350 paragraphs, 1 epoch,(900m 34s) is -0.506301
Accuracy at 14350 paragraphs, 1 epoch,(900m 34s) is 0.506306
Loss at 14400 paragraphs, 1 epoch,(903m 43s) is -0.506418
Accuracy at 14400 paragraphs, 1 epoch,(903m 43s) is 0.506423
Loss at 14450 paragraphs, 1 epoch,(906m 51s) is -0.506258
Accuracy at 14450 paragraphs, 1 epoch,(906m 51s) is 0.506263
Loss at 14500 paragraphs, 1 epoch,(910m 0s) is -0.506098
Accuracy at 14500 paragraphs, 1 epoch,(910m 0s) is 0.506103
Loss at 14550 paragraphs, 1 epoch,(913m 10s) is -0.506

Loss at 17600 paragraphs, 1 epoch,(1105m 27s) is -0.503149
Accuracy at 17600 paragraphs, 1 epoch,(1105m 27s) is 0.503153
Loss at 17650 paragraphs, 1 epoch,(1108m 36s) is -0.503084
Accuracy at 17650 paragraphs, 1 epoch,(1108m 36s) is 0.503088
Loss at 17700 paragraphs, 1 epoch,(1111m 46s) is -0.502962
Accuracy at 17700 paragraphs, 1 epoch,(1111m 46s) is 0.502966
Loss at 17750 paragraphs, 1 epoch,(1114m 54s) is -0.502897
Accuracy at 17750 paragraphs, 1 epoch,(1114m 54s) is 0.502901
Loss at 17800 paragraphs, 1 epoch,(1118m 3s) is -0.502945
Accuracy at 17800 paragraphs, 1 epoch,(1118m 3s) is 0.502949
Loss at 17850 paragraphs, 1 epoch,(1121m 12s) is -0.502881
Accuracy at 17850 paragraphs, 1 epoch,(1121m 12s) is 0.502885
Loss at 17900 paragraphs, 1 epoch,(1124m 21s) is -0.502985
Accuracy at 17900 paragraphs, 1 epoch,(1124m 21s) is 0.502989
Loss at 17950 paragraphs, 1 epoch,(1127m 31s) is -0.503032
Accuracy at 17950 paragraphs, 1 epoch,(1127m 31s) is 0.503036
Loss at 18000 paragraphs, 1 epoch,

Loss at 21000 paragraphs, 1 epoch,(1320m 3s) is -0.501973
Accuracy at 21000 paragraphs, 1 epoch,(1320m 3s) is 0.501976
Loss at 21050 paragraphs, 1 epoch,(1323m 13s) is -0.501968
Accuracy at 21050 paragraphs, 1 epoch,(1323m 13s) is 0.501971
Loss at 21100 paragraphs, 1 epoch,(1326m 24s) is -0.501632
Accuracy at 21100 paragraphs, 1 epoch,(1326m 24s) is 0.501635
Loss at 21150 paragraphs, 1 epoch,(1329m 36s) is -0.501722
Accuracy at 21150 paragraphs, 1 epoch,(1329m 36s) is 0.501726
Loss at 21200 paragraphs, 1 epoch,(1332m 47s) is -0.501907
Accuracy at 21200 paragraphs, 1 epoch,(1332m 47s) is 0.501910
Loss at 21250 paragraphs, 1 epoch,(1335m 58s) is -0.501855
Accuracy at 21250 paragraphs, 1 epoch,(1335m 58s) is 0.501859
Loss at 21300 paragraphs, 1 epoch,(1339m 9s) is -0.501663
Accuracy at 21300 paragraphs, 1 epoch,(1339m 9s) is 0.501667
Loss at 21350 paragraphs, 1 epoch,(1342m 20s) is -0.501612
Accuracy at 21350 paragraphs, 1 epoch,(1342m 20s) is 0.501616
Loss at 21400 paragraphs, 1 epoch,(1

Loss at 24400 paragraphs, 1 epoch,(1536m 18s) is -0.500181
Accuracy at 24400 paragraphs, 1 epoch,(1536m 18s) is 0.500184
Loss at 24450 paragraphs, 1 epoch,(1539m 28s) is -0.500181
Accuracy at 24450 paragraphs, 1 epoch,(1539m 28s) is 0.500184
Loss at 24500 paragraphs, 1 epoch,(1542m 38s) is -0.500344
Accuracy at 24500 paragraphs, 1 epoch,(1542m 38s) is 0.500347
Loss at 24550 paragraphs, 1 epoch,(1545m 49s) is -0.500262
Accuracy at 24550 paragraphs, 1 epoch,(1545m 49s) is 0.500265
Loss at 24600 paragraphs, 1 epoch,(1549m 0s) is -0.500261
Accuracy at 24600 paragraphs, 1 epoch,(1549m 0s) is 0.500264
Loss at 24650 paragraphs, 1 epoch,(1552m 14s) is -0.500301
Accuracy at 24650 paragraphs, 1 epoch,(1552m 14s) is 0.500304
Loss at 24700 paragraphs, 1 epoch,(1555m 26s) is -0.500382
Accuracy at 24700 paragraphs, 1 epoch,(1555m 26s) is 0.500385
Loss at 24750 paragraphs, 1 epoch,(1558m 37s) is -0.500341
Accuracy at 24750 paragraphs, 1 epoch,(1558m 38s) is 0.500343
Loss at 24800 paragraphs, 1 epoch,

Loss at 2900 paragraphs, 2 epoch,(1758m 37s) is -0.498449
Accuracy at 2900 paragraphs, 2 epoch,(1758m 37s) is 0.498449
Loss at 2950 paragraphs, 2 epoch,(1761m 48s) is -0.500169
Accuracy at 2950 paragraphs, 2 epoch,(1761m 48s) is 0.500169
Loss at 3000 paragraphs, 2 epoch,(1764m 58s) is -0.502166
Accuracy at 3000 paragraphs, 2 epoch,(1764m 58s) is 0.502166
Loss at 3050 paragraphs, 2 epoch,(1768m 8s) is -0.501803
Accuracy at 3050 paragraphs, 2 epoch,(1768m 8s) is 0.501803
Loss at 3100 paragraphs, 2 epoch,(1771m 19s) is -0.502741
Accuracy at 3100 paragraphs, 2 epoch,(1771m 19s) is 0.502741
Loss at 3150 paragraphs, 2 epoch,(1774m 30s) is -0.502698
Accuracy at 3150 paragraphs, 2 epoch,(1774m 30s) is 0.502698
Loss at 3200 paragraphs, 2 epoch,(1777m 40s) is -0.503905
Accuracy at 3200 paragraphs, 2 epoch,(1777m 40s) is 0.503905
Loss at 3250 paragraphs, 2 epoch,(1780m 50s) is -0.504768
Accuracy at 3250 paragraphs, 2 epoch,(1780m 50s) is 0.504768
Loss at 3300 paragraphs, 2 epoch,(1784m 1s) is -0.

Loss at 6400 paragraphs, 2 epoch,(1982m 17s) is -0.503203
Accuracy at 6400 paragraphs, 2 epoch,(1982m 17s) is 0.503203
Loss at 6450 paragraphs, 2 epoch,(1985m 28s) is -0.503023
Accuracy at 6450 paragraphs, 2 epoch,(1985m 28s) is 0.503023
Loss at 6500 paragraphs, 2 epoch,(1988m 40s) is -0.502692
Accuracy at 6500 paragraphs, 2 epoch,(1988m 40s) is 0.502692
Loss at 6550 paragraphs, 2 epoch,(1991m 52s) is -0.501145
Accuracy at 6550 paragraphs, 2 epoch,(1991m 52s) is 0.501145
Loss at 6600 paragraphs, 2 epoch,(1995m 4s) is -0.500682
Accuracy at 6600 paragraphs, 2 epoch,(1995m 4s) is 0.500682
Loss at 6650 paragraphs, 2 epoch,(1998m 16s) is -0.500376
Accuracy at 6650 paragraphs, 2 epoch,(1998m 16s) is 0.500376
Loss at 6700 paragraphs, 2 epoch,(2001m 28s) is -0.499776
Accuracy at 6700 paragraphs, 2 epoch,(2001m 28s) is 0.499776
Loss at 6750 paragraphs, 2 epoch,(2004m 40s) is -0.499926
Accuracy at 6750 paragraphs, 2 epoch,(2004m 40s) is 0.499926
Loss at 6800 paragraphs, 2 epoch,(2007m 51s) is -0

Loss at 9900 paragraphs, 2 epoch,(2206m 22s) is -0.499243
Accuracy at 9900 paragraphs, 2 epoch,(2206m 22s) is 0.499243
Loss at 9950 paragraphs, 2 epoch,(2209m 34s) is -0.499246
Accuracy at 9950 paragraphs, 2 epoch,(2209m 34s) is 0.499246
Loss at 10000 paragraphs, 2 epoch,(2212m 46s) is -0.500050
Accuracy at 10000 paragraphs, 2 epoch,(2212m 46s) is 0.500050
Loss at 10050 paragraphs, 2 epoch,(2215m 58s) is -0.500149
Accuracy at 10050 paragraphs, 2 epoch,(2215m 58s) is 0.500149
Loss at 10100 paragraphs, 2 epoch,(2219m 9s) is -0.500148
Accuracy at 10100 paragraphs, 2 epoch,(2219m 9s) is 0.500149
Loss at 10150 paragraphs, 2 epoch,(2222m 21s) is -0.500148
Accuracy at 10150 paragraphs, 2 epoch,(2222m 21s) is 0.500148
Loss at 10200 paragraphs, 2 epoch,(2225m 33s) is -0.500245
Accuracy at 10200 paragraphs, 2 epoch,(2225m 33s) is 0.500245
Loss at 10250 paragraphs, 2 epoch,(2228m 45s) is -0.500439
Accuracy at 10250 paragraphs, 2 epoch,(2228m 45s) is 0.500439
Loss at 10300 paragraphs, 2 epoch,(223

Loss at 13300 paragraphs, 2 epoch,(2428m 30s) is -0.499286
Accuracy at 13300 paragraphs, 2 epoch,(2428m 30s) is 0.499286
Loss at 13350 paragraphs, 2 epoch,(2431m 49s) is -0.499588
Accuracy at 13350 paragraphs, 2 epoch,(2431m 49s) is 0.499588
Loss at 13400 paragraphs, 2 epoch,(2435m 9s) is -0.499515
Accuracy at 13400 paragraphs, 2 epoch,(2435m 9s) is 0.499515
Loss at 13450 paragraphs, 2 epoch,(2438m 29s) is -0.499740
Accuracy at 13450 paragraphs, 2 epoch,(2438m 29s) is 0.499740
Loss at 13500 paragraphs, 2 epoch,(2441m 48s) is -0.499444
Accuracy at 13500 paragraphs, 2 epoch,(2441m 48s) is 0.499444
Loss at 13550 paragraphs, 2 epoch,(2445m 9s) is -0.499299
Accuracy at 13550 paragraphs, 2 epoch,(2445m 9s) is 0.499299
Loss at 13600 paragraphs, 2 epoch,(2448m 28s) is -0.499007
Accuracy at 13600 paragraphs, 2 epoch,(2448m 28s) is 0.499007
Loss at 13650 paragraphs, 2 epoch,(2451m 47s) is -0.499084
Accuracy at 13650 paragraphs, 2 epoch,(2451m 47s) is 0.499084
Loss at 13700 paragraphs, 2 epoch,(2

Loss at 16700 paragraphs, 2 epoch,(2653m 46s) is -0.500928
Accuracy at 16700 paragraphs, 2 epoch,(2653m 46s) is 0.500928
Loss at 16750 paragraphs, 2 epoch,(2657m 0s) is -0.500985
Accuracy at 16750 paragraphs, 2 epoch,(2657m 0s) is 0.500985
Loss at 16800 paragraphs, 2 epoch,(2660m 14s) is -0.501339
Accuracy at 16800 paragraphs, 2 epoch,(2660m 14s) is 0.501339
Loss at 16850 paragraphs, 2 epoch,(2663m 27s) is -0.501454
Accuracy at 16850 paragraphs, 2 epoch,(2663m 27s) is 0.501454
Loss at 16900 paragraphs, 2 epoch,(2666m 39s) is -0.501745
Accuracy at 16900 paragraphs, 2 epoch,(2666m 40s) is 0.501745
Loss at 16950 paragraphs, 2 epoch,(2669m 52s) is -0.502094
Accuracy at 16950 paragraphs, 2 epoch,(2669m 52s) is 0.502094
Loss at 17000 paragraphs, 2 epoch,(2673m 5s) is -0.502206
Accuracy at 17000 paragraphs, 2 epoch,(2673m 5s) is 0.502206
Loss at 17050 paragraphs, 2 epoch,(2676m 19s) is -0.502317
Accuracy at 17050 paragraphs, 2 epoch,(2676m 19s) is 0.502317
Loss at 17100 paragraphs, 2 epoch,(2

Loss at 20100 paragraphs, 2 epoch,(2872m 21s) is -0.502164
Accuracy at 20100 paragraphs, 2 epoch,(2872m 21s) is 0.502164
Loss at 20150 paragraphs, 2 epoch,(2875m 34s) is -0.501911
Accuracy at 20150 paragraphs, 2 epoch,(2875m 34s) is 0.501911
Loss at 20200 paragraphs, 2 epoch,(2878m 47s) is -0.502153
Accuracy at 20200 paragraphs, 2 epoch,(2878m 47s) is 0.502153
Loss at 20250 paragraphs, 2 epoch,(2881m 59s) is -0.502395
Accuracy at 20250 paragraphs, 2 epoch,(2881m 59s) is 0.502395
Loss at 20300 paragraphs, 2 epoch,(2885m 12s) is -0.502241
Accuracy at 20300 paragraphs, 2 epoch,(2885m 12s) is 0.502241
Loss at 20350 paragraphs, 2 epoch,(2888m 25s) is -0.502187
Accuracy at 20350 paragraphs, 2 epoch,(2888m 25s) is 0.502187
Loss at 20400 paragraphs, 2 epoch,(2891m 37s) is -0.501838
Accuracy at 20400 paragraphs, 2 epoch,(2891m 38s) is 0.501838
Loss at 20450 paragraphs, 2 epoch,(2894m 50s) is -0.501736
Accuracy at 20450 paragraphs, 2 epoch,(2894m 51s) is 0.501736
Loss at 20500 paragraphs, 2 epoc