In [None]:
import pandas as pd
import numpy as np
import re
from tqdm import tqdm

from nltk.corpus import stopwords

from gensim.models import Word2Vec
from gensim.models import Phrases
from gensim.models.phrases import Phraser

from sklearn.manifold import TSNE
import matplotlib.pyplot as plt


import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

%matplotlib inline

In [None]:
data = pd.read_csv('../HTML-extraction/output.csv')
data.describe()

Unnamed: 0.1,Unnamed: 0
count,115.0
mean,216.286957
std,107.237298
min,1.0
25%,143.0
50%,263.0
75%,306.5
max,355.0


In [16]:
stopWords = stopwords.words('dutch')
# pre processing data
def cleanData(sentence):
    processedList = ""
    
    # convert to lowercase, ignore all special characters - keep only alpha-numericals and spaces (not removing full-stop here)
    sentence = re.sub(r'[^A-Za-z0-9\s.]',r'',str(sentence).lower())
    sentence = re.sub(r'\n',r' ',sentence)
    
    # remove stop words
    sentence = " ".join([word for word in sentence.split() if word not in stopWords])
    
    return sentence

In [18]:
print(cleanData(data['body'][0]))

woensdag 21 maart gemeente heerenveen raadsverkiezingen. uitkomst bepaalt komende jaren koers heerenveen. kiezer goed bereiden 5 maart 14 maart 19 maart politieke debatten georganiseerd. hierbij vertegenwoordigers alle gemeentelijke partijen aanwezig. gaan elkaar debat zaken publiek plaatselijke belangen ingebracht. uiteraard gelegenheid avonden vragen stellen.


In [22]:
data['body'] = data['body'].map(lambda x: cleanData(x))

In [28]:
tmp_corpus = data['body'].map(lambda x: x.split('.'))
tmp_corpus

0      [woensdag 21 maart gemeente heerenveen raadsve...
1      [alle stemmen geteld,  ongeveer 60, 000 mensen...
2      [afgelopen zaterdag onze raadsleden lia aris l...
3      [carla dikfaber minister weten heerenveense co...
4      [juist toekomst bijzonder onderwijs druk staat...
5      [woensdag 21 maart gemeente heerenveen raadsve...
6      [intensieve campagneperiode spannende verkiezi...
7      [maandag 19 maart vindt heerenveen politiek de...
8      [woensdag 14 maart vindt akkrum politiek debat...
9      [maandag 5 maart vindt jubbega politiek debat ...
10     [aris fervent hardloper maakte tijdens trainin...
11     [prostitutieprobleem heerenveen groter wij had...
12     [zaterdag ontving boerenblij liefst 14 oudere ...
13     [christenunie cda heerenveen geschrokken advie...
14     [christenunie heerenveen klaar herindelingsver...
15     [aris leijendekker lijsttrekker christenunie h...
16     [naam lia heij getrouwd gerard samen we 5 kind...
17     [lijsttrekker aris leije

In [29]:
# corpus [[w1,w2,w3..],[..]]
corpus = []
for i in tqdm(range(len(tmp_corpus))):
    for line in tmp_corpus[i]:
        words = [x for x in line.split()]
        corpus.append(words)

  0%|          | 0/115 [00:00<?, ?it/s]

100%|██████████| 115/115 [00:00<00:00, 12988.26it/s]




In [31]:
num_of_sentences = len(corpus)
num_of_words = 0
for line in corpus:
    num_of_words += len(line)

print('Num of sentences - %s'%(num_of_sentences))
print('Num of words - %s'%(num_of_words))

Num of sentences - 1865
Num of words - 16108


In [33]:
phrases = Phrases(sentences=corpus,min_count=25,threshold=50)
bigram = Phraser(phrases)

2019-02-22 11:52:31,137 : INFO : collecting all words and their counts


2019-02-22 11:52:31,138 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types


2019-02-22 11:52:31,164 : INFO : collected 8994 word types from a corpus of 16108 words (unigram + bigrams) and 1865 sentences


2019-02-22 11:52:31,165 : INFO : using 8994 counts as vocab in Phrases<0 vocab, min_count=25, threshold=50, max_vocab_size=40000000>


2019-02-22 11:52:31,166 : INFO : source_vocab length 8994


2019-02-22 11:52:31,228 : INFO : Phraser built with 0 phrasegrams


In [35]:
for index,sentence in enumerate(corpus):
    corpus[index] = bigram[sentence]

In [36]:
# shuffle corpus
def shuffle_corpus(sentences):
    shuffled = list(sentences)
    random.shuffle(shuffled)
    return shuffled

In [38]:
# sg - skip gram |  window = size of the window | size = vector dimension
size = 100
window_size = 2 # sentences weren't too long, so
epochs = 100
min_count = 2
workers = 4

# train word2vec model using gensim
model = Word2Vec(corpus, sg=1,window=window_size,size=size,
                 min_count=min_count,workers=workers,iter=epochs,sample=0.01)

2019-02-22 11:54:34,593 : INFO : collecting all words and their counts


2019-02-22 11:54:34,593 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types


2019-02-22 11:54:34,597 : INFO : collected 2929 word types from a corpus of 16108 raw words and 1865 sentences


2019-02-22 11:54:34,598 : INFO : Loading a fresh vocabulary


2019-02-22 11:54:34,603 : INFO : effective_min_count=2 retains 2916 unique words (99% of original 2929, drops 13)


2019-02-22 11:54:34,604 : INFO : effective_min_count=2 leaves 16095 word corpus (99% of original 16108, drops 13)


2019-02-22 11:54:34,611 : INFO : deleting the raw counts dictionary of 2929 items


2019-02-22 11:54:34,612 : INFO : sample=0.01 downsamples 0 most-common words


2019-02-22 11:54:34,613 : INFO : downsampling leaves estimated 16095 word corpus (100.0% of prior 16095)


2019-02-22 11:54:34,619 : INFO : estimated required memory for 2916 words and 100 dimensions: 3790800 bytes


2019-02-22 11:54:34,620 : INFO : resetting layer weights


2019-02-22 11:54:34,652 : INFO : training model with 4 workers on 2916 vocabulary and 100 features, using sg=1 hs=0 sample=0.01 negative=5 window=2


2019-02-22 11:54:34,661 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:34,662 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:34,678 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:34,684 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:34,685 : INFO : EPOCH - 1 : training on 16108 raw words (16095 effective words) took 0.0s, 565647 effective words/s


2019-02-22 11:54:34,693 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:34,695 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:34,711 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:34,719 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:34,720 : INFO : EPOCH - 2 : training on 16108 raw words (16095 effective words) took 0.0s, 569852 effective words/s


2019-02-22 11:54:34,728 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:34,730 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:34,746 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:34,752 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:34,753 : INFO : EPOCH - 3 : training on 16108 raw words (16095 effective words) took 0.0s, 639953 effective words/s


2019-02-22 11:54:34,760 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:34,763 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:34,778 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:34,784 : INFO : worker thread finished; awaiting finish of 0 more threads




2019-02-22 11:54:34,786 : INFO : EPOCH - 4 : training on 16108 raw words (16095 effective words) took 0.0s, 607988 effective words/s


2019-02-22 11:54:34,792 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:34,798 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:34,812 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:34,825 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:34,825 : INFO : EPOCH - 5 : training on 16108 raw words (16095 effective words) took 0.0s, 488833 effective words/s


2019-02-22 11:54:34,832 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:34,836 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:34,853 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:34,865 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:34,866 : INFO : EPOCH - 6 : training on 16108 raw words (16095 effective words) took 0.0s, 460286 effective words/s


2019-02-22 11:54:34,872 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:34,875 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:34,889 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:34,897 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:34,898 : INFO : EPOCH - 7 : training on 16108 raw words (16095 effective words) took 0.0s, 602240 effective words/s


2019-02-22 11:54:34,904 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:34,907 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:34,923 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:34,930 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:34,930 : INFO : EPOCH - 8 : training on 16108 raw words (16095 effective words) took 0.0s, 556492 effective words/s


2019-02-22 11:54:34,938 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:34,942 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:34,956 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:34,963 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:34,964 : INFO : EPOCH - 9 : training on 16108 raw words (16095 effective words) took 0.0s, 559097 effective words/s


2019-02-22 11:54:34,969 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:34,973 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:34,994 : INFO : worker thread finished; awaiting finish of 1 more threads




2019-02-22 11:54:35,000 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,001 : INFO : EPOCH - 10 : training on 16108 raw words (16095 effective words) took 0.0s, 509008 effective words/s


2019-02-22 11:54:35,009 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,012 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,027 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,033 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,034 : INFO : EPOCH - 11 : training on 16108 raw words (16095 effective words) took 0.0s, 556979 effective words/s


2019-02-22 11:54:35,042 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,046 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,066 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,072 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,073 : INFO : EPOCH - 12 : training on 16108 raw words (16095 effective words) took 0.0s, 478150 effective words/s


2019-02-22 11:54:35,080 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,083 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,098 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,105 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,106 : INFO : EPOCH - 13 : training on 16108 raw words (16095 effective words) took 0.0s, 619449 effective words/s


2019-02-22 11:54:35,115 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,118 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,133 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,139 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,140 : INFO : EPOCH - 14 : training on 16108 raw words (16095 effective words) took 0.0s, 591099 effective words/s


2019-02-22 11:54:35,146 : INFO : worker thread finished; awaiting finish of 3 more threads




2019-02-22 11:54:35,151 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,169 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,172 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,172 : INFO : EPOCH - 15 : training on 16108 raw words (16095 effective words) took 0.0s, 561358 effective words/s


2019-02-22 11:54:35,178 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,181 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,196 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,203 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,204 : INFO : EPOCH - 16 : training on 16108 raw words (16095 effective words) took 0.0s, 573880 effective words/s


2019-02-22 11:54:35,211 : INFO : worker thread finished; awaiting finish of 3 more threads




2019-02-22 11:54:35,214 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,235 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,241 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,242 : INFO : EPOCH - 17 : training on 16108 raw words (16095 effective words) took 0.0s, 498280 effective words/s


2019-02-22 11:54:35,247 : INFO : worker thread finished; awaiting finish of 3 more threads




2019-02-22 11:54:35,251 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,271 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,280 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,281 : INFO : EPOCH - 18 : training on 16108 raw words (16095 effective words) took 0.0s, 451938 effective words/s


2019-02-22 11:54:35,291 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,291 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,308 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,315 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,316 : INFO : EPOCH - 19 : training on 16108 raw words (16095 effective words) took 0.0s, 510091 effective words/s


2019-02-22 11:54:35,322 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,323 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,344 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,350 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,351 : INFO : EPOCH - 20 : training on 16108 raw words (16095 effective words) took 0.0s, 525229 effective words/s


2019-02-22 11:54:35,359 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,361 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,380 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,387 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,388 : INFO : EPOCH - 21 : training on 16108 raw words (16095 effective words) took 0.0s, 506863 effective words/s


2019-02-22 11:54:35,395 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,398 : INFO : worker thread finished; awaiting finish of 2 more threads




2019-02-22 11:54:35,418 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,425 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,426 : INFO : EPOCH - 22 : training on 16108 raw words (16095 effective words) took 0.0s, 490911 effective words/s


2019-02-22 11:54:35,432 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,435 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,451 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,458 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,459 : INFO : EPOCH - 23 : training on 16108 raw words (16095 effective words) took 0.0s, 552931 effective words/s


2019-02-22 11:54:35,468 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,470 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,485 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,500 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,500 : INFO : EPOCH - 24 : training on 16108 raw words (16095 effective words) took 0.0s, 506874 effective words/s


2019-02-22 11:54:35,509 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,513 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,534 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,541 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,542 : INFO : EPOCH - 25 : training on 16108 raw words (16095 effective words) took 0.0s, 452116 effective words/s


2019-02-22 11:54:35,549 : INFO : worker thread finished; awaiting finish of 3 more threads




2019-02-22 11:54:35,553 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,579 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,590 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,591 : INFO : EPOCH - 26 : training on 16108 raw words (16095 effective words) took 0.0s, 356008 effective words/s


2019-02-22 11:54:35,638 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,639 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,653 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,667 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,667 : INFO : EPOCH - 27 : training on 16108 raw words (16095 effective words) took 0.0s, 328879 effective words/s


2019-02-22 11:54:35,675 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,678 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,694 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,699 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,700 : INFO : EPOCH - 28 : training on 16108 raw words (16095 effective words) took 0.0s, 578413 effective words/s


2019-02-22 11:54:35,705 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,710 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,729 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,736 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,736 : INFO : EPOCH - 29 : training on 16108 raw words (16095 effective words) took 0.0s, 494097 effective words/s


2019-02-22 11:54:35,742 : INFO : worker thread finished; awaiting finish of 3 more threads




2019-02-22 11:54:35,746 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,761 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,770 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,771 : INFO : EPOCH - 30 : training on 16108 raw words (16095 effective words) took 0.0s, 551538 effective words/s


2019-02-22 11:54:35,778 : INFO : worker thread finished; awaiting finish of 3 more threads




2019-02-22 11:54:35,783 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,800 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,803 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,803 : INFO : EPOCH - 31 : training on 16108 raw words (16095 effective words) took 0.0s, 609065 effective words/s


2019-02-22 11:54:35,810 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,814 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,832 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,835 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,836 : INFO : EPOCH - 32 : training on 16108 raw words (16095 effective words) took 0.0s, 560117 effective words/s


2019-02-22 11:54:35,844 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,845 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,859 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,866 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,867 : INFO : EPOCH - 33 : training on 16108 raw words (16095 effective words) took 0.0s, 620708 effective words/s


2019-02-22 11:54:35,873 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,877 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,894 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,905 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,906 : INFO : EPOCH - 34 : training on 16108 raw words (16095 effective words) took 0.0s, 458354 effective words/s


2019-02-22 11:54:35,913 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,915 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,929 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,939 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,942 : INFO : EPOCH - 35 : training on 16108 raw words (16095 effective words) took 0.0s, 534281 effective words/s


2019-02-22 11:54:35,950 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:35,952 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:35,968 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:35,972 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:35,973 : INFO : EPOCH - 36 : training on 16108 raw words (16095 effective words) took 0.0s, 620900 effective words/s


2019-02-22 11:54:35,982 : INFO : worker thread finished; awaiting finish of 3 more threads




2019-02-22 11:54:35,984 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,001 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,007 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,008 : INFO : EPOCH - 37 : training on 16108 raw words (16095 effective words) took 0.0s, 571190 effective words/s


2019-02-22 11:54:36,013 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,016 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,032 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,038 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,038 : INFO : EPOCH - 38 : training on 16108 raw words (16095 effective words) took 0.0s, 640734 effective words/s


2019-02-22 11:54:36,046 : INFO : worker thread finished; awaiting finish of 3 more threads




2019-02-22 11:54:36,049 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,063 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,070 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,071 : INFO : EPOCH - 39 : training on 16108 raw words (16095 effective words) took 0.0s, 658161 effective words/s


2019-02-22 11:54:36,076 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,080 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,095 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,102 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,102 : INFO : EPOCH - 40 : training on 16108 raw words (16095 effective words) took 0.0s, 618017 effective words/s


2019-02-22 11:54:36,110 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,111 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,125 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,133 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,133 : INFO : EPOCH - 41 : training on 16108 raw words (16095 effective words) took 0.0s, 627616 effective words/s


2019-02-22 11:54:36,141 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,143 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,158 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,164 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,165 : INFO : EPOCH - 42 : training on 16108 raw words (16095 effective words) took 0.0s, 594403 effective words/s


2019-02-22 11:54:36,173 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,175 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,191 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,196 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,196 : INFO : EPOCH - 43 : training on 16108 raw words (16095 effective words) took 0.0s, 610430 effective words/s


2019-02-22 11:54:36,205 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,207 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,222 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,228 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,229 : INFO : EPOCH - 44 : training on 16108 raw words (16095 effective words) took 0.0s, 654544 effective words/s


2019-02-22 11:54:36,236 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,239 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,254 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,262 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,263 : INFO : EPOCH - 45 : training on 16108 raw words (16095 effective words) took 0.0s, 588834 effective words/s


2019-02-22 11:54:36,270 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,272 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,286 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,294 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,295 : INFO : EPOCH - 46 : training on 16108 raw words (16095 effective words) took 0.0s, 581526 effective words/s


2019-02-22 11:54:36,302 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,306 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,320 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,328 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,328 : INFO : EPOCH - 47 : training on 16108 raw words (16095 effective words) took 0.0s, 605512 effective words/s


2019-02-22 11:54:36,335 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,336 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,352 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,358 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,359 : INFO : EPOCH - 48 : training on 16108 raw words (16095 effective words) took 0.0s, 589456 effective words/s


2019-02-22 11:54:36,366 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,368 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,382 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,389 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,390 : INFO : EPOCH - 49 : training on 16108 raw words (16095 effective words) took 0.0s, 604711 effective words/s


2019-02-22 11:54:36,397 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,401 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,420 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,421 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,421 : INFO : EPOCH - 50 : training on 16108 raw words (16095 effective words) took 0.0s, 632218 effective words/s


2019-02-22 11:54:36,427 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,430 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,444 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,452 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,453 : INFO : EPOCH - 51 : training on 16108 raw words (16095 effective words) took 0.0s, 621713 effective words/s


2019-02-22 11:54:36,459 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,461 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,475 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,482 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,483 : INFO : EPOCH - 52 : training on 16108 raw words (16095 effective words) took 0.0s, 656285 effective words/s


2019-02-22 11:54:36,489 : INFO : worker thread finished; awaiting finish of 3 more threads




2019-02-22 11:54:36,494 : INFO : worker thread finished; awaiting finish of 2 more threads




2019-02-22 11:54:36,513 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,519 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,520 : INFO : EPOCH - 53 : training on 16108 raw words (16095 effective words) took 0.0s, 507153 effective words/s


2019-02-22 11:54:36,527 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,530 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,544 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,551 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,552 : INFO : EPOCH - 54 : training on 16108 raw words (16095 effective words) took 0.0s, 664063 effective words/s


2019-02-22 11:54:36,558 : INFO : worker thread finished; awaiting finish of 3 more threads




2019-02-22 11:54:36,563 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,581 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,581 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,582 : INFO : EPOCH - 55 : training on 16108 raw words (16095 effective words) took 0.0s, 607719 effective words/s


2019-02-22 11:54:36,588 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,592 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,607 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,620 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,621 : INFO : EPOCH - 56 : training on 16108 raw words (16095 effective words) took 0.0s, 487481 effective words/s


2019-02-22 11:54:36,628 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,631 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,645 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,658 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,659 : INFO : EPOCH - 57 : training on 16108 raw words (16095 effective words) took 0.0s, 472750 effective words/s


2019-02-22 11:54:36,665 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,667 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,685 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,692 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,692 : INFO : EPOCH - 58 : training on 16108 raw words (16095 effective words) took 0.0s, 583518 effective words/s


2019-02-22 11:54:36,700 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,702 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,717 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,724 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,725 : INFO : EPOCH - 59 : training on 16108 raw words (16095 effective words) took 0.0s, 601362 effective words/s


2019-02-22 11:54:36,732 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,734 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,748 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,754 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,754 : INFO : EPOCH - 60 : training on 16108 raw words (16095 effective words) took 0.0s, 680123 effective words/s


2019-02-22 11:54:36,762 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,766 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,781 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,784 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,785 : INFO : EPOCH - 61 : training on 16108 raw words (16095 effective words) took 0.0s, 604742 effective words/s


2019-02-22 11:54:36,791 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,794 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,808 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,815 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,815 : INFO : EPOCH - 62 : training on 16108 raw words (16095 effective words) took 0.0s, 658925 effective words/s


2019-02-22 11:54:36,823 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,825 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,839 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,846 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,847 : INFO : EPOCH - 63 : training on 16108 raw words (16095 effective words) took 0.0s, 611005 effective words/s


2019-02-22 11:54:36,853 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,854 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,872 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,878 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,879 : INFO : EPOCH - 64 : training on 16108 raw words (16095 effective words) took 0.0s, 612596 effective words/s


2019-02-22 11:54:36,886 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,889 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,902 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,909 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,910 : INFO : EPOCH - 65 : training on 16108 raw words (16095 effective words) took 0.0s, 673994 effective words/s


2019-02-22 11:54:36,919 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,921 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,938 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,944 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,945 : INFO : EPOCH - 66 : training on 16108 raw words (16095 effective words) took 0.0s, 520817 effective words/s


2019-02-22 11:54:36,951 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,957 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:36,974 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:36,980 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:36,981 : INFO : EPOCH - 67 : training on 16108 raw words (16095 effective words) took 0.0s, 552061 effective words/s


2019-02-22 11:54:36,987 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:36,991 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,004 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,011 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,012 : INFO : EPOCH - 68 : training on 16108 raw words (16095 effective words) took 0.0s, 583368 effective words/s


2019-02-22 11:54:37,025 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,026 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,042 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,048 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,049 : INFO : EPOCH - 69 : training on 16108 raw words (16095 effective words) took 0.0s, 687567 effective words/s


2019-02-22 11:54:37,059 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,061 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,079 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,081 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,081 : INFO : EPOCH - 70 : training on 16108 raw words (16095 effective words) took 0.0s, 560919 effective words/s


2019-02-22 11:54:37,087 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,089 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,105 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,111 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,112 : INFO : EPOCH - 71 : training on 16108 raw words (16095 effective words) took 0.0s, 625292 effective words/s


2019-02-22 11:54:37,119 : INFO : worker thread finished; awaiting finish of 3 more threads




2019-02-22 11:54:37,122 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,141 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,147 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,147 : INFO : EPOCH - 72 : training on 16108 raw words (16095 effective words) took 0.0s, 540992 effective words/s


2019-02-22 11:54:37,153 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,156 : INFO : worker thread finished; awaiting finish of 2 more threads




2019-02-22 11:54:37,170 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,177 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,179 : INFO : EPOCH - 73 : training on 16108 raw words (16095 effective words) took 0.0s, 576758 effective words/s


2019-02-22 11:54:37,187 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,191 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,204 : INFO : worker thread finished; awaiting finish of 1 more threads




2019-02-22 11:54:37,213 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,214 : INFO : EPOCH - 74 : training on 16108 raw words (16095 effective words) took 0.0s, 601215 effective words/s


2019-02-22 11:54:37,222 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,223 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,241 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,247 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,248 : INFO : EPOCH - 75 : training on 16108 raw words (16095 effective words) took 0.0s, 547931 effective words/s


2019-02-22 11:54:37,255 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,257 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,270 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,277 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,277 : INFO : EPOCH - 76 : training on 16108 raw words (16095 effective words) took 0.0s, 638636 effective words/s


2019-02-22 11:54:37,282 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,284 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,300 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,306 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,307 : INFO : EPOCH - 77 : training on 16108 raw words (16095 effective words) took 0.0s, 644248 effective words/s


2019-02-22 11:54:37,314 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,316 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,331 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,335 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,336 : INFO : EPOCH - 78 : training on 16108 raw words (16095 effective words) took 0.0s, 632190 effective words/s


2019-02-22 11:54:37,346 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,347 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,361 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,366 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,366 : INFO : EPOCH - 79 : training on 16108 raw words (16095 effective words) took 0.0s, 637236 effective words/s


2019-02-22 11:54:37,373 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,374 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,392 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,400 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,401 : INFO : EPOCH - 80 : training on 16108 raw words (16095 effective words) took 0.0s, 575286 effective words/s


2019-02-22 11:54:37,409 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,412 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,425 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,432 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,433 : INFO : EPOCH - 81 : training on 16108 raw words (16095 effective words) took 0.0s, 665276 effective words/s


2019-02-22 11:54:37,440 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,442 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,455 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,461 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,462 : INFO : EPOCH - 82 : training on 16108 raw words (16095 effective words) took 0.0s, 648841 effective words/s


2019-02-22 11:54:37,468 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,472 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,486 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,492 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,493 : INFO : EPOCH - 83 : training on 16108 raw words (16095 effective words) took 0.0s, 607374 effective words/s


2019-02-22 11:54:37,501 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,504 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,517 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,524 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,525 : INFO : EPOCH - 84 : training on 16108 raw words (16095 effective words) took 0.0s, 607683 effective words/s


2019-02-22 11:54:37,532 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,536 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,552 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,555 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,556 : INFO : EPOCH - 85 : training on 16108 raw words (16095 effective words) took 0.0s, 634736 effective words/s


2019-02-22 11:54:37,564 : INFO : worker thread finished; awaiting finish of 3 more threads




2019-02-22 11:54:37,568 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,580 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,591 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,592 : INFO : EPOCH - 86 : training on 16108 raw words (16095 effective words) took 0.0s, 507559 effective words/s


2019-02-22 11:54:37,600 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,603 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,617 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,622 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,622 : INFO : EPOCH - 87 : training on 16108 raw words (16095 effective words) took 0.0s, 622399 effective words/s


2019-02-22 11:54:37,628 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,632 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,648 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,653 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,654 : INFO : EPOCH - 88 : training on 16108 raw words (16095 effective words) took 0.0s, 569280 effective words/s


2019-02-22 11:54:37,662 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,666 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,680 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,685 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,686 : INFO : EPOCH - 89 : training on 16108 raw words (16095 effective words) took 0.0s, 667593 effective words/s


2019-02-22 11:54:37,692 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,696 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,710 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,717 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,718 : INFO : EPOCH - 90 : training on 16108 raw words (16095 effective words) took 0.0s, 629110 effective words/s


2019-02-22 11:54:37,724 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,725 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,740 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,747 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,748 : INFO : EPOCH - 91 : training on 16108 raw words (16095 effective words) took 0.0s, 640235 effective words/s


2019-02-22 11:54:37,753 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,756 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,769 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,777 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,778 : INFO : EPOCH - 92 : training on 16108 raw words (16095 effective words) took 0.0s, 642638 effective words/s


2019-02-22 11:54:37,783 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,788 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,805 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,810 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,811 : INFO : EPOCH - 93 : training on 16108 raw words (16095 effective words) took 0.0s, 542929 effective words/s


2019-02-22 11:54:37,818 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,820 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,834 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,839 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,840 : INFO : EPOCH - 94 : training on 16108 raw words (16095 effective words) took 0.0s, 658583 effective words/s


2019-02-22 11:54:37,849 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,852 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,865 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,869 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,870 : INFO : EPOCH - 95 : training on 16108 raw words (16095 effective words) took 0.0s, 631653 effective words/s


2019-02-22 11:54:37,876 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,879 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,893 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,899 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,900 : INFO : EPOCH - 96 : training on 16108 raw words (16095 effective words) took 0.0s, 650558 effective words/s


2019-02-22 11:54:37,906 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,909 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,921 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,929 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,930 : INFO : EPOCH - 97 : training on 16108 raw words (16095 effective words) took 0.0s, 612885 effective words/s


2019-02-22 11:54:37,936 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,940 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,956 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,962 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,963 : INFO : EPOCH - 98 : training on 16108 raw words (16095 effective words) took 0.0s, 542469 effective words/s


2019-02-22 11:54:37,971 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:37,973 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:37,985 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:37,992 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:37,993 : INFO : EPOCH - 99 : training on 16108 raw words (16095 effective words) took 0.0s, 637068 effective words/s


2019-02-22 11:54:38,005 : INFO : worker thread finished; awaiting finish of 3 more threads


2019-02-22 11:54:38,007 : INFO : worker thread finished; awaiting finish of 2 more threads


2019-02-22 11:54:38,020 : INFO : worker thread finished; awaiting finish of 1 more threads


2019-02-22 11:54:38,026 : INFO : worker thread finished; awaiting finish of 0 more threads


2019-02-22 11:54:38,026 : INFO : EPOCH - 100 : training on 16108 raw words (16095 effective words) took 0.0s, 600388 effective words/s


2019-02-22 11:54:38,027 : INFO : training on a 1610800 raw words (1609500 effective words) took 3.4s, 477078 effective words/s


In [39]:
model.build_vocab(sentences=shuffle_corpus(corpus),update=True)

for i in range(5):
    model.train(sentences=shuffle_corpus(corpus),epochs=50,total_examples=model.corpus_count)

NameError: name 'random' is not defined

In [41]:
# save model
model.save('w2v_model')

2019-02-22 11:57:00,601 : INFO : saving Word2Vec object under w2v_model, separately None


2019-02-22 11:57:00,602 : INFO : not storing attribute vectors_norm


2019-02-22 11:57:00,603 : INFO : not storing attribute cum_table


2019-02-22 11:57:00,636 : INFO : saved w2v_model


In [42]:
# load word2vec model
model = Word2Vec.load('w2v_model')

2019-02-22 11:57:09,709 : INFO : loading Word2Vec object from w2v_model


2019-02-22 11:57:09,791 : INFO : loading wv recursively from w2v_model.wv.* with mmap=None


2019-02-22 11:57:09,794 : INFO : setting ignored attribute vectors_norm to None


2019-02-22 11:57:09,796 : INFO : loading vocabulary recursively from w2v_model.vocabulary.* with mmap=None


2019-02-22 11:57:09,799 : INFO : loading trainables recursively from w2v_model.trainables.* with mmap=None


2019-02-22 11:57:09,802 : INFO : setting ignored attribute cum_table to None


2019-02-22 11:57:09,804 : INFO : loaded w2v_model


In [54]:
model.most_similar(positive=['christenunie', 'lijsttrekker'])


  """Entry point for launching an IPython kernel.


[('49', 0.754084050655365),
 ('hardlopende', 0.7387628555297852),
 ('ondersteunde', 0.7354644536972046),
 ('brug', 0.7350203990936279),
 ('wens', 0.7188327312469482),
 ('pleit', 0.7175378799438477),
 ('raadsverkiezingen', 0.7173354625701904),
 ('namens', 0.7112743854522705),
 ('jaartje', 0.7042461633682251),
 ('verzet', 0.695603609085083)]

In [None]:
vector = model.wv['computer']  # numpy vector of a word
vector

  """Entry point for launching an IPython kernel.


[('49', 0.754084050655365),
 ('hardlopende', 0.7387628555297852),
 ('ondersteunde', 0.7354644536972046),
 ('brug', 0.7350203990936279),
 ('wens', 0.7188327312469482),
 ('pleit', 0.7175378799438477),
 ('raadsverkiezingen', 0.7173354625701904),
 ('namens', 0.7112743854522705),
 ('jaartje', 0.7042461633682251),
 ('verzet', 0.695603609085083)]

In [None]:
if t