In [12]:
from torch.utils.data import Dataset, DataLoader
from nltk.corpus import treebank
from abc import abstractmethod
from nltk import sent_tokenize
import torch.nn as nn
import pandas as pd
import numpy as np
import collections
import itertools
import argparse
import easydict
import random
import pickle
import MeCab
import torch
import json
import sys
import os
import re

In [47]:
collections.Counter(itertools.chain.from_iterable(treebank.sents()))

Counter({'Pierre': 1,
         'Vinken': 2,
         ',': 4885,
         '61': 5,
         'years': 115,
         'old': 24,
         'will': 281,
         'join': 4,
         'the': 4045,
         'board': 30,
         'as': 385,
         'a': 1878,
         'nonexecutive': 5,
         'director': 32,
         'Nov.': 24,
         '29': 5,
         '.': 3828,
         'Mr.': 375,
         'is': 671,
         'chairman': 45,
         'of': 2319,
         'Elsevier': 1,
         'N.V.': 3,
         'Dutch': 3,
         'publishing': 13,
         'group': 43,
         'Rudolph': 3,
         'Agnew': 1,
         '55': 10,
         'and': 1511,
         'former': 19,
         'Consolidated': 2,
         'Gold': 2,
         'Fields': 2,
         'PLC': 13,
         'was': 367,
         'named': 22,
         '*-1': 1123,
         'this': 184,
         'British': 11,
         'industrial': 18,
         'conglomerate': 3,
         'A': 110,
         'form': 17,
         'asbestos': 11,
       

In [2]:
def config_parser(args):
    print('file path is ' + str(args.file_path))
    with open(args.config_path, 'rb') as f:
        config = easydict.EasyDict(json.load(f))
    config.model = args.model
    config.file_path = args.file_path
    config.dataset_path = args.dataset_path
    config.device = torch.device(args.device)
    return config

args = argparse.ArgumentParser(description='nlp embedding')
args.add_argument('-m', '--model', default='neg-sampling', type=str,
                  help='which model to use')
args.add_argument('-cp', '--config-path', default='config.json', type=str,
                  help='config file path (default: None)')
args.add_argument('-fp', '--file-path', default='D:\\data\\text\\news-articles\\kbanker_articles_subtitles.csv', type=str,
                  help='path to latest checkpoint (default: None)')
args.add_argument('-dp', '--dataset-path', default='data\\nlp_dataset.pkl', type=str,
                  help='if there is a pickled dataset')
args.add_argument('-d', '--device', default='cuda:0', type=str,
                  help='indices of GPUs to enable (default: all)')
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname('__file__'))))
config = config_parser(args.parse_args())
config['neg_sample_size'] = 5
config['file_path'] = 'D:\\data\\text\\torch-dataset\\kbanker_articles_processed.pkl'
config['file_path'] = 'E:\\data\\text\\news-articles\\kbanker_articles_subtitles.csv'
config['file_path'] = 'treebank'
config['model'] = 'fast-text'

file path is C:\Users\sylim2357\AppData\Roaming\jupyter\runtime\kernel-5eba494d-85f6-492a-8d9c-86555979d69c.json


In [3]:
config

{'batch_size': 128,
 'epoch': 20,
 'gpu': 0,
 'window_size': 2,
 'embed_dim': 20,
 'h_dim': 500,
 'neg_sample_size': 5,
 'model': 'fast-text',
 'file_path': 'treebank',
 'dataset_path': 'data\\nlp_dataset.pkl',
 'device': device(type='cuda', index=0)}

In [4]:
#pre-processing shit
def pre_process_raw_article(article):
    """Pre-processing news articles.
    
    Args
        article (str): article text
    
    """
    replacements = [
        ('[“”]', '"'),
        ('[‘’]', '\''),
        ('\([^)]*\)', ''),
        ('[^가-힣\'"A-Za-z0-9.\s\?\!]', ' '),
        ('(?=[^0-9])\.(?=[^0-9])', '. '),
        ('\s\s+', ' ')
    ]
    
    for old, new in replacements:
        article = re.sub(old, new, article)
        
    return article

def mecab_tokenize(sentence):
    t = MeCab.Tagger()
    return [re.split(',', re.sub('\t', ',', s))[0] for s in t.parse(sentence).split('\n') if (s!='') & ('EOS' not in s)]

In [6]:
def pre_process(articles):
    print('preprocessing the corpus')
    articles = [pre_process_raw_article(article) for article in articles]
    sentences = itertools.chain.from_iterable([sent_tokenize(article) for article in articles])
    corpus = [mecab_tokenize(s) for s in list(sentences)]
    return corpus

kbanker_processed = pre_process(pd.read_csv(config.file_path, encoding='utf-8')['article'].dropna().values)

preprocessing the corpus


In [15]:
with open('kbanker_articles_processed.pkl', 'wb') as f:
    pickle.dump(kbanker_processed, f)

In [18]:
def pre_process(articles):
    print('preprocessing the corpus')
    articles = [pre_process_raw_article(article) for article in articles]
    sentences = itertools.chain.from_iterable([sent_tokenize(article) for article in articles])
    corpus = [mecab_tokenize(s) for s in list(sentences)]
    corpus = [[ngram(w, 3) for w in s] for s in corpus]

    return corpus

def ngram(w, n):
    word = '<' + w + '>'
    if len(word) <= 3:
        return [word]
    else:
        ngram = []
        for i in range(n, len(word)+1):
            ngram += [word[i-n:i]]

        return ngram + [word]

articles = pd.read_csv(config.file_path, encoding='utf-8')['article'].dropna().values
corpus = pre_process(articles)

preprocessing the corpus


In [19]:
with open('D:\\data\\text\\torch-dataset\\kbanker_articles_processed_fasttext.pkl', 'wb') as f:
    pickle.dump(corpus, f)

In [5]:
class W2VDataset(Dataset):
    """W2V Dataset

    Args:
        config (dict): hyperparameters

    Attributes:
        root_dir (str): root
        word_to_idx (dict): word_to_idx mapping
        idx_to_word (dict): idx_to_word mapping
        x (list): train data (5-gram)
        y (list): label

    """

    def __init__(self, config):
        if 'pkl' in config.file_path:
            with open(config.file_path, 'rb') as f:
                corpus = pickle.load(f)[:1000]
        elif config.file_path == 'treebank':
            corpus = treebank.sents()
        else:
            articles = pd.read_csv(config.file_path, encoding='utf-8')['article'].dropna().values

            #pre process
            corpus = self.pre_process(articles)

        #construct word matrix
        self.word_to_idx, self.idx_to_word = self.construct_word_idx(corpus)
        corpus = [[self.word_to_idx[word] for word in sentence] for sentence in corpus]
        #make dataset
        self.x, self.y = self.construct_dataset(corpus, config)

    def pre_process(self, articles):
        print('preprocessing the corpus')
        articles = [pre_process_raw_article(article) for article in articles]
        sentences = itertools.chain.from_iterable([sent_tokenize(article) for article in articles])
        corpus = [mecab_tokenize(s) for s in list(sentences)]
        
        return corpus

    def construct_word_idx(self, corpus):
        print('constructing word matrix')
        word_set = set(itertools.chain.from_iterable(corpus))
        word_to_idx = {word : idx for idx, word in enumerate(word_set)}
        idx_to_word = {word_to_idx[word] : word for word in word_to_idx}
        
        return word_to_idx, idx_to_word

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        if torch.is_tensor(idx): idx = idx.tolist()
        
        return self.x[idx], self.y[idx]

    def fast_text_pre_process(self, corpus):
        pass
    
    @abstractmethod
    def construct_dataset(self, corpus, config):
        raise NotImplementedError
        
class CBOWDataset(W2VDataset):
    """CBOW Dataset"""

    def __init__(self, config):
        super().__init__(config)

    def construct_dataset(self, corpus, config):
        print('constructing training dataset')
        x, y = [], []
        for sentence in corpus:
            for i in range(config.window_size, len(sentence) - config.window_size):
                x += [sentence[i-config.window_size:i] + sentence[i+1:i+condig.window_size+1]]
                y += [sentence[i]]
        
        return x, y

class SkipGramDataset(W2VDataset):
    """Skip-Gram Dataset"""

    def __init__(self, config):
        super().__init__(config)

    def construct_dataset(self, corpus, config):
        print('constructing training dataset')
        x, y = [], []
        for sentence in corpus:
            for i in range(config.window_size, len(sentence) - config.window_size):
                x += [sentence[i]] * (config.window_size*2)
                y += sentence[i-config.window_size:i] + sentence[i+1:i+config.window_size+1]
        
        return x, y

class NegSamplingDataset(W2VDataset):
    """Negative Sampling Dataset.

    Args:
        config (dict): hyperparameters
        word_frequency (dict): word index - word frequency map for negative sampling
        
    """

    def __init__(self, config):
        super().__init__(config)

    def construct_word_idx(self, corpus):
        print('constructing word matrix')
        word_frequency = collections.Counter(itertools.chain.from_iterable(corpus))
        word_frequency = {word: word_frequency[word]**(3/4) for idx, word in enumerate(word_frequency)}
        word_to_idx = {word: idx for idx, word in enumerate(word_frequency)}
        idx_to_word = {word_to_idx[word]: word for word in word_to_idx}
        self.word_frequency = {word_to_idx[word]: word_frequency[word] for word in word_frequency}
        
        return word_to_idx, idx_to_word
    
    def construct_dataset(self, corpus, config):
        print('constructing training dataset')
        target, pos, neg = [], [], []
        for sentence in corpus:
            for i in range(config.window_size, len(sentence) - config.window_size):
                target += [sentence[i]] * (config.window_size*2)
                pos += sentence[i-config.window_size:i] + sentence[i+1:i+config.window_size+1]
                neg.append(self.neg_sample(sentence[i-config.window_size:i+config.window_size+1], config))
        neg = np.array(neg).reshape(-1, config.window_size*2)
        
        return (pos, neg), target
    
    def neg_sample(self, word_contxt, config):
        word_universe = self.idx_to_word.keys() - set(word_contxt)
        word_distn = np.array([self.word_frequency[idx] for idx in word_universe])
        word_distn = word_distn / word_distn.sum()
        
        return np.random.choice(a=list(word_universe), size=config.neg_sample_size*config.window_size*2, p=word_distn)

    def __getitem__(self, idx):
        if torch.is_tensor(idx): idx = idx.tolist()
            
        return [self.x[0][idx], self.y[idx], 1], [self.x[1][idx], self.y[idx], 0]

In [5]:
neg_sam_dataset = NegSamplingDataset(config)

constructing word matrix
constructing training dataset


In [5]:
import pickle
# with open('kbanker_nlp_dataset.pkl', 'wb') as f:
#     pickle.dump(nlp_dataset, f)
    
with open('D:\\data\\text\\torch-dataset\\kbanker_nlp_dataset.pkl', 'rb') as f:
    nlp_dataset = pickle.load(f)

In [None]:
def collate_fn(data):
    seqs, labels = zip(*data)
    return seqs, labels

dataloader = DataLoader(neg_sam_dataset, batch_size=config.batch_size, \
                        shuffle=False, num_workers=0)

target_emb = nn.Embedding(len(neg_sam_dataset), config.embed_dim).float().to(config.device)
context_emb = nn.Embedding(len(neg_sam_dataset), config.embed_dim).float().to(config.device)
target_emb.weight.data.uniform_(-1,1)
context_emb.weight.data.uniform_(-1,1)
sigmoid = nn.Sigmoid()
similar = nn.CosineSimilarity()

criterion = nn.BCELoss()
optimizer = torch.optim.SGD(list(target_emb.parameters()) + list(context_emb.parameters()), lr=5e-3, momentum=0.9)

for epoch in range(500):
    print('Epoch ' + str(epoch))
    for i, sample in enumerate(dataloader):
        pos_idx = sample[0][0].long().to(config.device)
        neg_idx = sample[1][0].long().to(config.device)
        target_idx = sample[0][1].long().to(config.device)
        pos_label = sample[0][2].float().to(config.device)
        neg_label = sample[1][2].float().to(config.device)
        
        pos = context_emb(pos_idx)
        neg = context_emb(neg_idx)
        target = target_emb(target_idx)
        
        pred_pos = sigmoid(similar(pos, target))
        pred_neg = sigmoid(similar(neg, target.unsqueeze(1).expand_as(neg)))

        pos_loss = criterion(pred_pos, pos_label)
        neg_loss = torch.sum(criterion(pred_neg, neg_label.unsqueeze(1).expand_as(pred_neg)))

        pos_loss.retain_grad()
        neg_loss.retain_grad()
        
        loss = pos_loss + neg_loss
        loss.retain_grad()
        
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        optimizer.step()
        
        if i % 100 == 99:
            print(i, loss.item())
            if i % 100000 == 99:
                with open('./checkpoints/neg_sample_checkpoint_epoch' + str(epoch) + '_' + str(i) + '.pkl', 'wb') as f:
                    pickle.dump(target_emb, f)
                print('./checkpoints/neg_sample_checkpoint_epoch' + str(epoch) + '_' + str(i) + '.pkl saved')

Epoch 0
99 1.4274840354919434
./checkpoints/neg_sample_checkpoint_epoch0_99.pkl saved
199 1.4266541004180908
299 1.4117457866668701
399 1.4178102016448975
499 1.4213440418243408
599 1.422420859336853
699 1.410521388053894
799 1.4329781532287598
899 1.4238032102584839
999 1.4325814247131348
1099 1.427785873413086
1199 1.418020486831665
1299 1.4091848134994507
1399 1.4011914730072021
1499 1.4018125534057617
1599 1.4198497533798218
1699 1.4153056144714355
1799 1.4285775423049927
1899 1.4180374145507812
1999 1.4312057495117188
2099 1.4218358993530273
2199 1.442793369293213
2299 1.4347388744354248
2399 1.4156382083892822
2499 1.4087920188903809
2599 1.4141075611114502
Epoch 1
99 1.426992416381836
./checkpoints/neg_sample_checkpoint_epoch1_99.pkl saved
199 1.4265809059143066
299 1.411516547203064
399 1.417083740234375
499 1.4206494092941284
599 1.4223313331604004
699 1.408756971359253
799 1.4311903715133667
899 1.4232122898101807
999 1.4319427013397217
1099 1.4277105331420898
1199 1.41714417

499 1.411481261253357
599 1.4230128526687622
699 1.401421070098877
799 1.4225894212722778
899 1.4124460220336914
999 1.426813006401062
1099 1.4283559322357178
1199 1.4030455350875854
1299 1.4003541469573975
1399 1.3928070068359375
1499 1.4007227420806885
1599 1.4203131198883057
1699 1.4159891605377197
1799 1.425146222114563
1899 1.4122073650360107
1999 1.4290112257003784
2099 1.410962462425232
2199 1.4258168935775757
2299 1.4335510730743408
2399 1.4107117652893066
2499 1.398393154144287
2599 1.4064035415649414
Epoch 13
99 1.4235801696777344
./checkpoints/neg_sample_checkpoint_epoch13_99.pkl saved
199 1.4245445728302002
299 1.4107062816619873
399 1.4192402362823486
499 1.4108277559280396
599 1.4225919246673584
699 1.4003541469573975
799 1.4218244552612305
899 1.4114875793457031
999 1.4260783195495605
1099 1.4279133081436157
1199 1.402204155921936
1299 1.3995816707611084
1399 1.3923684358596802
1499 1.4004777669906616
1599 1.4201717376708984
1699 1.4159451723098755
1799 1.424801588058471

1099 1.4239299297332764
1199 1.3934950828552246
1299 1.3950456380844116
1399 1.3919074535369873
1499 1.3999524116516113
1599 1.418320894241333
1699 1.4160966873168945
1799 1.419672966003418
1899 1.409827709197998
1999 1.4232912063598633
2099 1.4046841859817505
2199 1.4205100536346436
2299 1.4274259805679321
2399 1.40207839012146
2499 1.388907551765442
2599 1.4037381410598755
Epoch 25
99 1.421613335609436
./checkpoints/neg_sample_checkpoint_epoch25_99.pkl saved
199 1.420454502105713
299 1.406466007232666
399 1.4137810468673706
499 1.4030784368515015
599 1.4181790351867676
699 1.3903248310089111
799 1.4138500690460205
899 1.3965990543365479
999 1.4166507720947266
1099 1.4240434169769287
1199 1.3930941820144653
1299 1.3945589065551758
1399 1.3914291858673096
1499 1.3996973037719727
1599 1.4181259870529175
1699 1.4159162044525146
1799 1.4199070930480957
1899 1.4099854230880737
1999 1.4228609800338745
2099 1.4041752815246582
2199 1.4203684329986572
2299 1.427168607711792
2399 1.400967955589

1599 1.414900541305542
1699 1.4147396087646484
1799 1.4201767444610596
1899 1.4082609415054321
1999 1.4186935424804688
2099 1.3983937501907349
2199 1.4150291681289673
2299 1.4257317781448364
2399 1.3953149318695068
2499 1.3821896314620972
2599 1.4033031463623047
Epoch 37
99 1.4206249713897705
./checkpoints/neg_sample_checkpoint_epoch37_99.pkl saved
199 1.4154770374298096
299 1.4006407260894775
399 1.4065568447113037
499 1.3990633487701416
599 1.41661536693573
699 1.383568525314331
799 1.404003620147705
899 1.3828493356704712
999 1.40566086769104
1099 1.4181911945343018
1199 1.3872089385986328
1299 1.387812614440918
1399 1.385753870010376
1499 1.3975651264190674
1599 1.41412353515625
1699 1.4144560098648071
1799 1.419750690460205
1899 1.4079875946044922
1999 1.4186134338378906
2099 1.3976044654846191
2199 1.4145652055740356
2299 1.425318956375122
2399 1.3948884010314941
2499 1.3816217184066772
2599 1.4030910730361938
Epoch 38
99 1.4202053546905518
./checkpoints/neg_sample_checkpoint_epo

2199 1.4114980697631836
2299 1.4208877086639404
2399 1.3972787857055664
2499 1.375667929649353
2599 1.405508041381836
Epoch 49
99 1.4225778579711914
./checkpoints/neg_sample_checkpoint_epoch49_99.pkl saved
199 1.4105374813079834
299 1.3964884281158447
399 1.4015612602233887
499 1.394449234008789
599 1.4144253730773926
699 1.3779879808425903
799 1.4000033140182495
899 1.383259892463684
999 1.3981573581695557
1099 1.414625644683838
1199 1.3822894096374512
1299 1.401282548904419
1399 1.380066990852356
1499 1.40195894241333
1599 1.4194676876068115
1699 1.4123680591583252
1799 1.414267659187317
1899 1.4033931493759155
1999 1.4181458950042725
2099 1.3873755931854248
2199 1.4109652042388916
2299 1.4204926490783691
2399 1.3974597454071045
2499 1.37516450881958
2599 1.40618896484375
Epoch 50
99 1.4219553470611572
./checkpoints/neg_sample_checkpoint_epoch50_99.pkl saved
199 1.4099621772766113
299 1.3961021900177002
399 1.4017460346221924
499 1.3938307762145996
599 1.4138331413269043
699 1.377071

99 1.4169681072235107
./checkpoints/neg_sample_checkpoint_epoch61_99.pkl saved
199 1.4034409523010254
299 1.390337347984314
399 1.398298740386963
499 1.3889451026916504
599 1.4074901342391968
699 1.3683714866638184
799 1.3948100805282593
899 1.3740323781967163
999 1.385892629623413
1099 1.4120193719863892
1199 1.3739880323410034
1299 1.395032525062561
1399 1.3757147789001465
1499 1.3994271755218506
1599 1.4166865348815918
1699 1.4089299440383911
1799 1.4095759391784668
1899 1.3983136415481567
1999 1.4158101081848145
2099 1.3786507844924927
2199 1.4039943218231201
2299 1.4188852310180664
2399 1.3937993049621582
2499 1.370390772819519
2599 1.4070930480957031
Epoch 62
99 1.4163732528686523
./checkpoints/neg_sample_checkpoint_epoch62_99.pkl saved
199 1.4028687477111816
299 1.389768362045288
399 1.3978941440582275
499 1.3883920907974243
599 1.4068670272827148
699 1.3674876689910889
799 1.3944576978683472
899 1.3730833530426025
999 1.3848278522491455
1099 1.4116623401641846
1199 1.3735499382

399 1.3949518203735352
499 1.3860034942626953
599 1.4026546478271484
699 1.3695317506790161
799 1.3968186378479004
899 1.361940622329712
999 1.374861717224121
1099 1.4069610834121704
1199 1.3705353736877441
1299 1.3926328420639038
1399 1.3698208332061768
1499 1.3954529762268066
1599 1.4147260189056396
1699 1.4039802551269531
1799 1.4027631282806396
1899 1.395398736000061
1999 1.4141627550125122
2099 1.378661870956421
2199 1.3980497121810913
2299 1.421278953552246
2399 1.3913944959640503
2499 1.3672879934310913
2599 1.4092336893081665
Epoch 74
99 1.410707712173462
./checkpoints/neg_sample_checkpoint_epoch74_99.pkl saved
199 1.4044511318206787
299 1.3909664154052734
399 1.3935589790344238
499 1.3861160278320312
599 1.4022228717803955
699 1.3685383796691895
799 1.3964295387268066
899 1.3611795902252197
999 1.3743207454681396
1099 1.4069050550460815
1199 1.3701119422912598
1299 1.3921003341674805
1399 1.3692729473114014
1499 1.3948907852172852
1599 1.414453387260437
1699 1.403568148612976


899 1.3505682945251465
999 1.3627560138702393
1099 1.4024837017059326
1199 1.3659143447875977
1299 1.3863468170166016
1399 1.3646275997161865
1499 1.3908476829528809
1599 1.4131187200546265
1699 1.4000790119171143
1799 1.3938848972320557
1899 1.394275426864624
1999 1.4126200675964355
2099 1.3726590871810913
2199 1.3901605606079102
2299 1.4160431623458862
2399 1.3892598152160645
2499 1.3611741065979004
2599 1.4049510955810547
Epoch 86
99 1.4049649238586426
./checkpoints/neg_sample_checkpoint_epoch86_99.pkl saved
199 1.3987305164337158
299 1.3844997882843018
399 1.389517068862915
499 1.37980318069458
599 1.3934032917022705
699 1.3586663007736206
799 1.3929336071014404
899 1.3497514724731445
999 1.3622419834136963
1099 1.4026832580566406
1199 1.3655235767364502
1299 1.3858039379119873
1399 1.3640248775482178
1499 1.3903696537017822
1599 1.4128379821777344
1699 1.3996591567993164
1799 1.3933284282684326
1899 1.393933892250061
1999 1.4124387502670288
2099 1.3727773427963257
2199 1.389607667

1499 1.3854680061340332
1599 1.4103935956954956
1699 1.396897792816162
1799 1.3868355751037598
1899 1.3914612531661987
1999 1.4100542068481445
2099 1.3671321868896484
2199 1.38387131690979
2299 1.410680890083313
2399 1.3866229057312012
2499 1.356022596359253
2599 1.402057409286499
Epoch 98
99 1.3999340534210205
./checkpoints/neg_sample_checkpoint_epoch98_99.pkl saved
199 1.3956806659698486
299 1.375588059425354
399 1.3860487937927246
499 1.3753836154937744
599 1.3884615898132324
699 1.3487277030944824
799 1.3906419277191162
899 1.3420053720474243
999 1.3503406047821045
1099 1.397647738456726
1199 1.3608603477478027
1299 1.3797757625579834
1399 1.3566277027130127
1499 1.3854353427886963
1599 1.4105262756347656
1699 1.396350383758545
1799 1.3855721950531006
1899 1.3906867504119873
1999 1.4095752239227295
2099 1.3666030168533325
2199 1.3836008310317993
2299 1.4102451801300049
2399 1.3865761756896973
2499 1.355633020401001
2599 1.4015250205993652
Epoch 99
99 1.399375081062317
./checkpoints

1999 1.4068522453308105
2099 1.3621139526367188
2199 1.3776628971099854
2299 1.4074029922485352
2399 1.383603811264038
2499 1.3522133827209473
2599 1.4009251594543457
Epoch 110
99 1.3964831829071045
./checkpoints/neg_sample_checkpoint_epoch110_99.pkl saved
199 1.3912956714630127
299 1.3700675964355469
399 1.3824355602264404
499 1.3705191612243652
599 1.3830630779266357
699 1.3388080596923828
799 1.3874297142028809
899 1.334328532218933
999 1.338128685951233
1099 1.3935363292694092
1199 1.355223298072815
1299 1.3780999183654785
1399 1.3507128953933716
1499 1.3819859027862549
1599 1.4066277742385864
1699 1.3921959400177002
1799 1.377455711364746
1899 1.3869998455047607
1999 1.4065580368041992
2099 1.3617430925369263
2199 1.3767478466033936
2299 1.4069308042526245
2399 1.383852481842041
2499 1.3534095287322998
2599 1.4006052017211914
Epoch 111
99 1.3960905075073242
./checkpoints/neg_sample_checkpoint_epoch111_99.pkl saved
199 1.390847086906433
299 1.3693338632583618
399 1.3820650577545166

2499 1.3499821424484253
2599 1.3960893154144287
Epoch 122
99 1.3922375440597534
./checkpoints/neg_sample_checkpoint_epoch122_99.pkl saved
199 1.386121392250061
299 1.3625508546829224
399 1.3776135444641113
499 1.3655729293823242
599 1.376435399055481
699 1.3300031423568726
799 1.383713960647583
899 1.325951099395752
999 1.3268060684204102
1099 1.3903403282165527
1199 1.3512024879455566
1299 1.3730499744415283
1399 1.3447303771972656
1499 1.3775813579559326
1599 1.4035544395446777
1699 1.3871216773986816
1799 1.3718905448913574
1899 1.3827396631240845
1999 1.401978850364685
2099 1.358266830444336
2199 1.3715384006500244
2299 1.4042713642120361
2399 1.3812317848205566
2499 1.3496140241622925
2599 1.3958020210266113
Epoch 123
99 1.3919343948364258
./checkpoints/neg_sample_checkpoint_epoch123_99.pkl saved
199 1.3857512474060059
299 1.3619917631149292
399 1.3772706985473633
499 1.3651752471923828
599 1.3759620189666748
699 1.3293871879577637
799 1.3834123611450195
899 1.325394630432129
999 

199 1.3831918239593506
299 1.3573174476623535
399 1.3731848001480103
499 1.3618056774139404
599 1.3712859153747559
699 1.3237203359603882
799 1.380091905593872
899 1.3203537464141846
999 1.3171403408050537
1099 1.3857759237289429
1199 1.3476463556289673
1299 1.3678979873657227
1399 1.3386576175689697
1499 1.371446132659912
1599 1.4015032052993774
1699 1.3840796947479248
1799 1.3669021129608154
1899 1.3790931701660156
1999 1.398662805557251
2099 1.354149580001831
2199 1.3657290935516357
2299 1.3992143869400024
2399 1.378987193107605
2499 1.3448381423950195
2599 1.3928494453430176
Epoch 135
99 1.3883816003799438
./checkpoints/neg_sample_checkpoint_epoch135_99.pkl saved
199 1.383527398109436
299 1.3568553924560547
399 1.3728625774383545
499 1.36138117313385
599 1.3709053993225098
699 1.323216199874878
799 1.3799680471420288
899 1.3198578357696533
999 1.3164565563201904
1099 1.3854281902313232
1199 1.3472542762756348
1299 1.3674982786178589
1399 1.3381714820861816
1499 1.3710532188415527
1

699 1.3186275959014893
799 1.3784661293029785
899 1.3138294219970703
999 1.3073842525482178
1099 1.381402850151062
1199 1.3434481620788574
1299 1.3631057739257812
1399 1.3343197107315063
1499 1.366528034210205
1599 1.4003264904022217
1699 1.380976676940918
1799 1.3612067699432373
1899 1.37424898147583
1999 1.3961057662963867
2099 1.3505616188049316
2199 1.3589204549789429
2299 1.394282579421997
2399 1.3769457340240479
2499 1.3407471179962158
2599 1.389006495475769
Epoch 147
99 1.3844516277313232
./checkpoints/neg_sample_checkpoint_epoch147_99.pkl saved
199 1.3811817169189453
299 1.3514997959136963
399 1.3688629865646362
499 1.356935977935791
599 1.3678226470947266
699 1.3185625076293945
799 1.378204345703125
899 1.3133726119995117
999 1.3065651655197144
1099 1.3811454772949219
1199 1.3430471420288086
1299 1.3627164363861084
1399 1.3334836959838867
1499 1.366146445274353
1599 1.400069236755371
1699 1.3806285858154297
1799 1.3607356548309326
1899 1.3738734722137451
1999 1.395857095718383

1199 1.3398382663726807
1299 1.3599607944488525
1399 1.329218864440918
1499 1.3625946044921875
1599 1.3964643478393555
1699 1.3781384229660034
1799 1.3554186820983887
1899 1.370206356048584
1999 1.3936467170715332
2099 1.3469345569610596
2199 1.3519397974014282
2299 1.388282299041748
2399 1.3740955591201782
2499 1.334472894668579
2599 1.3857135772705078
Epoch 159
99 1.3807908296585083
./checkpoints/neg_sample_checkpoint_epoch159_99.pkl saved
199 1.3771638870239258
299 1.3455710411071777
399 1.3648074865341187
499 1.3508665561676025
599 1.363759994506836
699 1.312575101852417
799 1.3739131689071655
899 1.3071987628936768
999 1.2969119548797607
1099 1.377264142036438
1199 1.3394229412078857
1299 1.3595707416534424
1399 1.3287602663040161
1499 1.3622238636016846
1599 1.3961951732635498
1699 1.3777916431427002
1799 1.3549476861953735
1899 1.3698304891586304
1999 1.393513798713684
2099 1.3466873168945312
2199 1.3517000675201416
2299 1.387820839881897
2399 1.3737988471984863
2499 1.334043502

1699 1.3735036849975586
1799 1.3497254848480225
1899 1.3654332160949707
1999 1.3906090259552002
2099 1.3445011377334595
2199 1.346189260482788
2299 1.3824024200439453
2399 1.3726537227630615
2499 1.3299205303192139
2599 1.3836960792541504
Epoch 171
99 1.3762305974960327
./checkpoints/neg_sample_checkpoint_epoch171_99.pkl saved
199 1.3739595413208008
299 1.3404371738433838
399 1.3607449531555176
499 1.3449465036392212
599 1.359465479850769
699 1.306123971939087
799 1.370281457901001
899 1.3019638061523438
999 1.2876060009002686
1099 1.3740301132202148
1199 1.3330178260803223
1299 1.3547358512878418
1399 1.3221404552459717
1499 1.357956886291504
1599 1.3929486274719238
1699 1.3731482028961182
1799 1.3492083549499512
1899 1.36505126953125
1999 1.3903342485427856
2099 1.344299077987671
2199 1.345693588256836
2299 1.3817334175109863
2399 1.3725690841674805
2499 1.3295233249664307
2599 1.3834136724472046
Epoch 172
99 1.375910758972168
./checkpoints/neg_sample_checkpoint_epoch172_99.pkl saved

2199 1.347900152206421
2299 1.3822821378707886
2399 1.3773550987243652
2499 1.3474526405334473
2599 1.390615463256836
Epoch 183
99 1.3845829963684082
./checkpoints/neg_sample_checkpoint_epoch183_99.pkl saved
199 1.3778414726257324
299 1.3385978937149048
399 1.3576747179031372
499 1.3422763347625732
599 1.3563873767852783
699 1.3093739748001099
799 1.368511438369751
899 1.2995193004608154
999 1.2798690795898438
1099 1.3732597827911377
1199 1.3448314666748047
1299 1.3541929721832275
1399 1.332688808441162
1499 1.353721261024475
1599 1.3885622024536133
1699 1.374069333076477
1799 1.349906325340271
1899 1.3636878728866577
1999 1.3918776512145996
2099 1.3515315055847168
2199 1.3474256992340088
2299 1.3818821907043457
2399 1.3771815299987793
2499 1.3469836711883545
2599 1.390343189239502
Epoch 184
99 1.3840901851654053
./checkpoints/neg_sample_checkpoint_epoch184_99.pkl saved
199 1.3782920837402344
299 1.3382165431976318
399 1.357375144958496
499 1.3418684005737305
599 1.3560459613800049
699

Epoch 195
99 1.380115032196045
./checkpoints/neg_sample_checkpoint_epoch195_99.pkl saved
199 1.3748605251312256
299 1.3345212936401367
399 1.354502558708191
499 1.3375376462936401
599 1.3532590866088867
699 1.305431604385376
799 1.366335391998291
899 1.2968891859054565
999 1.2728923559188843
1099 1.369779348373413
1199 1.3406697511672974
1299 1.3505940437316895
1399 1.3304011821746826
1499 1.3503304719924927
1599 1.384864091873169
1699 1.36997652053833
1799 1.344944715499878
1899 1.3579180240631104
1999 1.3887932300567627
2099 1.3479702472686768
2199 1.3419179916381836
2299 1.3779438734054565
2399 1.3748650550842285
2499 1.3437002897262573
2599 1.38675856590271
Epoch 196
99 1.3797491788864136
./checkpoints/neg_sample_checkpoint_epoch196_99.pkl saved
199 1.3745596408843994
299 1.3344924449920654
399 1.3542156219482422
499 1.3371297121047974
599 1.353161334991455
699 1.3050724267959595
799 1.366072416305542
899 1.2963883876800537
999 1.272256851196289
1099 1.3696067333221436
1199 1.34030

299 1.3291833400726318
399 1.3505054712295532
499 1.3323273658752441
599 1.3484160900115967
699 1.299978256225586
799 1.363276481628418
899 1.2940497398376465
999 1.2657558917999268
1099 1.367370367050171
1199 1.3355770111083984
1299 1.3476674556732178
1399 1.3258557319641113
1499 1.3459879159927368
1599 1.3828762769699097
1699 1.365525722503662
1799 1.3393046855926514
1899 1.3561313152313232
1999 1.38472580909729
2099 1.3469984531402588
2199 1.3363356590270996
2299 1.3734686374664307
2399 1.3731615543365479
2499 1.3390628099441528
2599 1.382371425628662
Epoch 208
99 1.3751864433288574
./checkpoints/neg_sample_checkpoint_epoch208_99.pkl saved
199 1.3711726665496826
299 1.3287800550460815
399 1.3502130508422852
499 1.3318884372711182
599 1.3479883670806885
699 1.2996747493743896
799 1.363194227218628
899 1.2937722206115723
999 1.265189290046692
1099 1.3671424388885498
1199 1.3352289199829102
1299 1.3473756313323975
1399 1.3254706859588623
1499 1.345984697341919
1599 1.3825875520706177
1

799 1.3616440296173096
899 1.290507435798645
999 1.2577331066131592
1099 1.3675625324249268
1199 1.3348913192749023
1299 1.3477938175201416
1399 1.321234107017517
1499 1.343112587928772
1599 1.3802235126495361
1699 1.362115740776062
1799 1.3348376750946045
1899 1.3525292873382568
1999 1.3818652629852295
2099 1.3455153703689575
2199 1.3328912258148193
2299 1.3706146478652954
2399 1.3850985765457153
2499 1.3369933366775513
2599 1.3902904987335205
Epoch 220
99 1.3735392093658447
./checkpoints/neg_sample_checkpoint_epoch220_99.pkl saved
199 1.3689970970153809
299 1.3315212726593018
399 1.3478267192840576
499 1.3284711837768555
599 1.3450559377670288
699 1.2985435724258423
799 1.3614444732666016
899 1.290224552154541
999 1.2572119235992432
1099 1.3672609329223633
1199 1.3351161479949951
1299 1.347550868988037
1399 1.3208972215652466
1499 1.3429102897644043
1599 1.3799784183502197
1699 1.3617784976959229
1799 1.334441065788269
1899 1.352224349975586
1999 1.3816016912460327
2099 1.34535002708

1299 1.3449666500091553
1399 1.3167747259140015
1499 1.3404823541641235
1599 1.378225564956665
1699 1.358349323272705
1799 1.330463171005249
1899 1.348095417022705
1999 1.3788472414016724
2099 1.342727780342102
2199 1.3278248310089111
2299 1.3646461963653564
2399 1.3817058801651
2499 1.3328959941864014
2599 1.3885616064071655
Epoch 232
99 1.3697192668914795
./checkpoints/neg_sample_checkpoint_epoch232_99.pkl saved
199 1.366868019104004
299 1.3273581266403198
399 1.3456541299819946
499 1.3245537281036377
599 1.3417208194732666
699 1.2949974536895752
799 1.3588459491729736
899 1.289471983909607
999 1.2510087490081787
1099 1.3640903234481812
1199 1.3317934274673462
1299 1.3446769714355469
1399 1.3170362710952759
1499 1.3405735492706299
1599 1.377995491027832
1699 1.3573628664016724
1799 1.330058217048645
1899 1.3478124141693115
1999 1.3790481090545654
2099 1.3425812721252441
2199 1.3274272680282593
2299 1.3642706871032715
2399 1.3814773559570312
2499 1.332590103149414
2599 1.3889949321746

1799 1.3264813423156738
1899 1.3448636531829834
1999 1.3733913898468018
2099 1.344501256942749
2199 1.3238914012908936
2299 1.356119155883789
2399 1.3774819374084473
2499 1.322458267211914
2599 1.3933161497116089
Epoch 244
99 1.3680729866027832
./checkpoints/neg_sample_checkpoint_epoch244_99.pkl saved
199 1.3672082424163818
299 1.324019193649292
399 1.3439083099365234
499 1.319939374923706
599 1.3375145196914673
699 1.2948565483093262
799 1.3560899496078491
899 1.2821965217590332
999 1.2463772296905518
1099 1.361074686050415
1199 1.3280407190322876
1299 1.343066930770874
1399 1.3147008419036865
1499 1.3385697603225708
1599 1.3726606369018555
1699 1.3514151573181152
1799 1.326070785522461
1899 1.3445725440979004
1999 1.3737566471099854
2099 1.3444795608520508
2199 1.3236993551254272
2299 1.3556325435638428
2399 1.3783490657806396
2499 1.3221745491027832
2599 1.3931100368499756
Epoch 245
99 1.3687522411346436
./checkpoints/neg_sample_checkpoint_epoch245_99.pkl saved
199 1.366410017013549

2299 1.3494372367858887
2399 1.3756344318389893
2499 1.3195552825927734
2599 1.3913012742996216
Epoch 256
99 1.3659908771514893
./checkpoints/neg_sample_checkpoint_epoch256_99.pkl saved
199 1.3642477989196777
299 1.3194239139556885
399 1.3407447338104248
499 1.314400553703308
599 1.334686279296875
699 1.2910881042480469
799 1.3552675247192383
899 1.2822163105010986
999 1.2414870262145996
1099 1.3599262237548828
1199 1.3240711688995361
1299 1.339310884475708
1399 1.3124306201934814
1499 1.3367578983306885
1599 1.3709220886230469
1699 1.3461910486221313
1799 1.3214173316955566
1899 1.3391287326812744
1999 1.371288776397705
2099 1.3423385620117188
2199 1.32021164894104
2299 1.3488235473632812
2399 1.3753902912139893
2499 1.3198246955871582
2599 1.3910644054412842
Epoch 257
99 1.365630865097046
./checkpoints/neg_sample_checkpoint_epoch257_99.pkl saved
199 1.3640376329421997
299 1.3190040588378906
399 1.3404810428619385
499 1.3140143156051636
599 1.334409236907959
699 1.2908076047897339
799

199 1.3609367609024048
299 1.3155851364135742
399 1.3376948833465576
499 1.3102624416351318
599 1.3328866958618164
699 1.2887791395187378
799 1.3539612293243408
899 1.2794082164764404
999 1.2382590770721436
1099 1.3581883907318115
1199 1.3208122253417969
1299 1.3362419605255127
1399 1.3099277019500732
1499 1.3336095809936523
1599 1.3684438467025757
1699 1.342862606048584
1799 1.3161554336547852
1899 1.3362147808074951
1999 1.3683173656463623
2099 1.3408986330032349
2199 1.3156185150146484
2299 1.3459386825561523
2399 1.3736231327056885
2499 1.3163583278656006
2599 1.3888466358184814
Epoch 269
99 1.3617053031921387
./checkpoints/neg_sample_checkpoint_epoch269_99.pkl saved
199 1.360695481300354
299 1.3152592182159424
399 1.337449073791504
499 1.3099309206008911
599 1.3326244354248047
699 1.2885581254959106
799 1.3537970781326294
899 1.2787294387817383
999 1.2378920316696167
1099 1.3579802513122559
1199 1.3209501504898071
1299 1.335989236831665
1399 1.309699296951294
1499 1.33343327045440

699 1.287023663520813
799 1.3528984785079956
899 1.2770791053771973
999 1.2331383228302002
1099 1.3548409938812256
1199 1.3173457384109497
1299 1.3334290981292725
1399 1.3073058128356934
1499 1.332585334777832
1599 1.364895224571228
1699 1.3384332656860352
1799 1.3128628730773926
1899 1.3327666521072388
1999 1.3665523529052734
2099 1.339421033859253
2199 1.3119373321533203
2299 1.3429877758026123
2399 1.3709628582000732
2499 1.3123540878295898
2599 1.387235164642334
Epoch 281
99 1.3581902980804443
./checkpoints/neg_sample_checkpoint_epoch281_99.pkl saved
199 1.3568650484085083
299 1.312192440032959
399 1.3363460302352905
499 1.305457353591919
599 1.3295612335205078
699 1.2868225574493408
799 1.3527374267578125
899 1.2768514156341553
999 1.2327919006347656
1099 1.3542616367340088
1199 1.3170981407165527
1299 1.3339641094207764
1399 1.3072519302368164
1499 1.332410454750061
1599 1.364600658416748
1699 1.3381708860397339
1799 1.3127844333648682
1899 1.3324952125549316
1999 1.3669788837432

1199 1.3144190311431885
1299 1.331411600112915
1399 1.3026962280273438
1499 1.3316959142684937
1599 1.3622093200683594
1699 1.33620285987854
1799 1.3079721927642822
