In [18]:
# Importation des modules

import pandas as pd
import re  # For preprocessing
import pandas as pd  # For data handling
from time import time  # To time our operations
from collections import defaultdict  # For word frequency

import spacy  # For preprocessing

import logging  # Setting up the loggings to monitor gensim

logging.basicConfig(format="%(levelname)s - %(asctime)s: %(message)s", datefmt= '%H:%M:%S', level=logging.INFO)

from gensim.models.phrases import Phrases, Phraser

import multiprocessing

from gensim.models import Word2Vec
from gensim.test.utils import common_texts, get_tmpfile
from gensim.models.callbacks import CallbackAny2Vec

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
 
import seaborn as sns
sns.set_style("darkgrid")

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

#Affichage de toutes les colonnes

pd.set_option('display.max_columns', 500)

INFO - 21:02:18: adding document #0 to Dictionary(0 unique tokens: [])
INFO - 21:02:18: built Dictionary(12 unique tokens: ['computer', 'human', 'interface', 'response', 'survey']...) from 9 documents (total 29 corpus positions)


In [2]:
df0 = pd.read_csv('data/data_cleaned_NLP.csv', sep = ',', encoding = 'latin-1')

In [3]:
df1 = df0[['Réplique', 'Groupe', 'tokenized_replique']].copy()

df2 = df1[['Groupe', 'tokenized_replique']].copy()

df_novice = df2[df2['Groupe'] == 'Novice'].copy()
df_exp = df2[df2['Groupe'] == 'Exp'].copy()

In [4]:
df_novice = df_novice.dropna()

df_exp = df_exp.dropna()

In [5]:
sent_novice = [row.split() for row in df_novice['tokenized_replique']]

sent_exp = [row.split() for row in df_exp['tokenized_replique']]

In [6]:
phrases_novice = Phrases(sent_novice, min_count = 3, progress_per = 20000)

phrases_exp = Phrases(sent_exp, min_count = 3, progress_per = 20000)


# min_count : Ignore all words and bigrams with total collected count lower than this value.

INFO - 20:52:11: collecting all words and their counts
INFO - 20:52:11: PROGRESS: at sentence #0, processed 0 words and 0 word types
INFO - 20:52:12: PROGRESS: at sentence #20000, processed 544399 words and 367942 word types
INFO - 20:52:14: PROGRESS: at sentence #40000, processed 1061844 words and 645502 word types
INFO - 20:52:14: collected 773338 word types from a corpus of 1326670 words (unigram + bigrams) and 50191 sentences
INFO - 20:52:14: using 773338 counts as vocab in Phrases<0 vocab, min_count=3, threshold=10.0, max_vocab_size=40000000>
INFO - 20:52:14: collecting all words and their counts
INFO - 20:52:14: PROGRESS: at sentence #0, processed 0 words and 0 word types
INFO - 20:52:15: PROGRESS: at sentence #20000, processed 518787 words and 365570 word types
INFO - 20:52:17: PROGRESS: at sentence #40000, processed 1056472 words and 658662 word types
INFO - 20:52:18: PROGRESS: at sentence #60000, processed 1530225 words and 884119 word types
INFO - 20:52:19: PROGRESS: at sente

In [7]:
bigram_novice = Phraser(phrases_novice)

bigram_exp = Phraser(phrases_exp)

INFO - 20:52:39: source_vocab length 773338
INFO - 20:52:49: Phraser built with 16435 phrasegrams
INFO - 20:52:49: source_vocab length 3813404
INFO - 20:52:59: Phraser added 50000 phrasegrams
INFO - 20:53:37: Phraser built with 73757 phrasegrams


In [8]:
sentences_novice = bigram_novice[sent_novice]

sentences_exp = bigram_exp[sent_exp]

In [9]:
word_freq_novice = defaultdict(int)
word_freq_exp = defaultdict(int)

for sent in sentences_novice:
    for i in sent:
        word_freq_novice[i] += 1
        
for sent in sentences_exp:
    for i in sent:
        word_freq_exp[i] += 1


In [10]:
cores = multiprocessing.cpu_count()

### Choix des hyperparamètres

In [68]:
# Choix de la fenêtre, on aimerait que le modèle aprenne vite, on alongera ensuite le nombre d'epochs

liste_modeles = [Word2Vec(
                     window = i,
                     size = 300,
                     sample = 6e-5, 
                     alpha = 0.03, 
                     min_alpha = 0.0007, 
                     negative = 20,
                     workers = cores - 1,
                     compute_loss = True)
                 
                 for i in range(1, 10)
                ]

tests_fenetre = []

for i in range(len(liste_modeles)):
    
    model = liste_modeles[i]
    model.build_vocab(sentences_novice, progress_per = 10000)


    model.train(sentences_novice, total_examples = model.corpus_count, epochs = 5, report_delay = 1)

    tests_fenetre.append([
            (model.wv.most_similar(positive=['droite'])[i][0],
             model.wv.most_similar(positive=['vitesse'])[i][0],
             model.wv.most_similar(positive=['donc'])[i][0],
            )
         for i in range(10)
         ])

INFO - 00:14:18: collecting all words and their counts
INFO - 00:14:18: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO - 00:14:19: PROGRESS: at sentence #10000, processed 233965 words, keeping 30761 word types
INFO - 00:14:20: PROGRESS: at sentence #20000, processed 450032 words, keeping 39850 word types
INFO - 00:14:21: PROGRESS: at sentence #30000, processed 677772 words, keeping 48410 word types
INFO - 00:14:22: PROGRESS: at sentence #40000, processed 880381 words, keeping 51772 word types
INFO - 00:14:22: PROGRESS: at sentence #50000, processed 1095690 words, keeping 55266 word types
INFO - 00:14:22: collected 55316 word types from a corpus of 1099932 raw words and 50191 sentences
INFO - 00:14:22: Loading a fresh vocabulary
INFO - 00:14:25: effective_min_count=5 retains 26248 unique words (47% of original 55316, drops 29068)
INFO - 00:14:25: effective_min_count=5 leaves 1041812 word corpus (94% of original 1099932, drops 58120)
INFO - 00:14:25: deleting the 

INFO - 00:15:24: EPOCH 1 - PROGRESS: at 80.86% examples, 116687 words/s, in_qsize 0, out_qsize 0
INFO - 00:15:25: EPOCH 1 - PROGRESS: at 97.94% examples, 118390 words/s, in_qsize 2, out_qsize 1
INFO - 00:15:25: worker thread finished; awaiting finish of 2 more threads
INFO - 00:15:26: worker thread finished; awaiting finish of 1 more threads
INFO - 00:15:26: worker thread finished; awaiting finish of 0 more threads
INFO - 00:15:26: EPOCH - 1 : training on 1099932 raw words (749440 effective words) took 6.3s, 119339 effective words/s
INFO - 00:15:27: EPOCH 2 - PROGRESS: at 13.68% examples, 111637 words/s, in_qsize 0, out_qsize 1
INFO - 00:15:28: EPOCH 2 - PROGRESS: at 30.72% examples, 116361 words/s, in_qsize 0, out_qsize 0
INFO - 00:15:29: EPOCH 2 - PROGRESS: at 44.53% examples, 113176 words/s, in_qsize 0, out_qsize 0
INFO - 00:15:30: EPOCH 2 - PROGRESS: at 59.03% examples, 110460 words/s, in_qsize 0, out_qsize 0
INFO - 00:15:31: EPOCH 2 - PROGRESS: at 72.66% examples, 105978 words/s, 

INFO - 00:16:22: EPOCH 3 - PROGRESS: at 89.34% examples, 109034 words/s, in_qsize 0, out_qsize 0
INFO - 00:16:23: worker thread finished; awaiting finish of 2 more threads
INFO - 00:16:23: worker thread finished; awaiting finish of 1 more threads
INFO - 00:16:23: worker thread finished; awaiting finish of 0 more threads
INFO - 00:16:23: EPOCH - 3 : training on 1099932 raw words (749271 effective words) took 6.7s, 111363 effective words/s
INFO - 00:16:24: EPOCH 4 - PROGRESS: at 14.59% examples, 121419 words/s, in_qsize 0, out_qsize 0
INFO - 00:16:25: EPOCH 4 - PROGRESS: at 32.44% examples, 121836 words/s, in_qsize 0, out_qsize 0
INFO - 00:16:26: EPOCH 4 - PROGRESS: at 47.15% examples, 118365 words/s, in_qsize 0, out_qsize 0
INFO - 00:16:27: EPOCH 4 - PROGRESS: at 62.67% examples, 116044 words/s, in_qsize 0, out_qsize 0
INFO - 00:16:28: EPOCH 4 - PROGRESS: at 79.48% examples, 115878 words/s, in_qsize 0, out_qsize 0
INFO - 00:16:29: EPOCH 4 - PROGRESS: at 95.64% examples, 115767 words/s, 

INFO - 00:17:19: worker thread finished; awaiting finish of 1 more threads
INFO - 00:17:19: EPOCH 5 - PROGRESS: at 100.00% examples, 121729 words/s, in_qsize 0, out_qsize 1
INFO - 00:17:19: worker thread finished; awaiting finish of 0 more threads
INFO - 00:17:19: EPOCH - 5 : training on 1099932 raw words (749375 effective words) took 6.2s, 121700 effective words/s
INFO - 00:17:19: training on a 5499660 raw words (3746064 effective words) took 32.1s, 116596 effective words/s
INFO - 00:17:19: precomputing L2-norms of word weight vectors
INFO - 00:17:19: collecting all words and their counts
INFO - 00:17:19: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO - 00:17:20: PROGRESS: at sentence #10000, processed 233965 words, keeping 30761 word types
INFO - 00:17:21: PROGRESS: at sentence #20000, processed 450032 words, keeping 39850 word types
INFO - 00:17:22: PROGRESS: at sentence #30000, processed 677772 words, keeping 48410 word types
INFO - 00:17:22: PROGRESS: at se

INFO - 00:18:08: estimated required memory for 26248 words and 300 dimensions: 76119200 bytes
INFO - 00:18:08: resetting layer weights
INFO - 00:18:15: training model with 3 workers on 26248 vocabulary and 300 features, using sg=0 hs=0 sample=6e-05 negative=20 window=6
INFO - 00:18:16: EPOCH 1 - PROGRESS: at 11.29% examples, 91755 words/s, in_qsize 0, out_qsize 0
INFO - 00:18:17: EPOCH 1 - PROGRESS: at 28.29% examples, 105464 words/s, in_qsize 1, out_qsize 0
INFO - 00:18:18: EPOCH 1 - PROGRESS: at 42.19% examples, 106024 words/s, in_qsize 0, out_qsize 0
INFO - 00:18:19: EPOCH 1 - PROGRESS: at 57.44% examples, 106489 words/s, in_qsize 0, out_qsize 0
INFO - 00:18:20: EPOCH 1 - PROGRESS: at 70.25% examples, 103471 words/s, in_qsize 0, out_qsize 0
INFO - 00:18:21: EPOCH 1 - PROGRESS: at 87.49% examples, 106164 words/s, in_qsize 0, out_qsize 0
INFO - 00:18:22: worker thread finished; awaiting finish of 2 more threads
INFO - 00:18:22: worker thread finished; awaiting finish of 1 more threads

INFO - 00:19:14: EPOCH 2 - PROGRESS: at 100.00% examples, 101542 words/s, in_qsize 0, out_qsize 1
INFO - 00:19:14: worker thread finished; awaiting finish of 0 more threads
INFO - 00:19:14: EPOCH - 2 : training on 1099932 raw words (748969 effective words) took 7.4s, 101523 effective words/s
INFO - 00:19:15: EPOCH 3 - PROGRESS: at 14.59% examples, 119437 words/s, in_qsize 0, out_qsize 0
INFO - 00:19:16: EPOCH 3 - PROGRESS: at 32.44% examples, 122039 words/s, in_qsize 0, out_qsize 0
INFO - 00:19:17: EPOCH 3 - PROGRESS: at 46.07% examples, 116082 words/s, in_qsize 0, out_qsize 0
INFO - 00:19:18: EPOCH 3 - PROGRESS: at 60.80% examples, 114078 words/s, in_qsize 0, out_qsize 0
INFO - 00:19:19: EPOCH 3 - PROGRESS: at 76.24% examples, 112640 words/s, in_qsize 1, out_qsize 0
INFO - 00:19:20: EPOCH 3 - PROGRESS: at 91.94% examples, 112384 words/s, in_qsize 0, out_qsize 0
INFO - 00:19:21: worker thread finished; awaiting finish of 2 more threads
INFO - 00:19:21: worker thread finished; awaiting 

INFO - 00:20:13: worker thread finished; awaiting finish of 0 more threads
INFO - 00:20:13: EPOCH - 4 : training on 1099932 raw words (749412 effective words) took 6.9s, 108704 effective words/s
INFO - 00:20:14: EPOCH 5 - PROGRESS: at 12.88% examples, 107218 words/s, in_qsize 0, out_qsize 0
INFO - 00:20:15: EPOCH 5 - PROGRESS: at 29.08% examples, 109962 words/s, in_qsize 0, out_qsize 0
INFO - 00:20:17: EPOCH 5 - PROGRESS: at 44.53% examples, 112383 words/s, in_qsize 0, out_qsize 0
INFO - 00:20:18: EPOCH 5 - PROGRESS: at 60.80% examples, 113896 words/s, in_qsize 0, out_qsize 0
INFO - 00:20:19: EPOCH 5 - PROGRESS: at 75.18% examples, 111247 words/s, in_qsize 0, out_qsize 0
INFO - 00:20:20: EPOCH 5 - PROGRESS: at 87.49% examples, 106795 words/s, in_qsize 0, out_qsize 0
INFO - 00:20:20: worker thread finished; awaiting finish of 2 more threads
INFO - 00:20:20: worker thread finished; awaiting finish of 1 more threads
INFO - 00:20:20: worker thread finished; awaiting finish of 0 more thread

In [72]:
tests_fenetre

[[('résolu', 'suivis', 'donne_avis'),
  ('vécu', 'multiplient', 'adopté'),
  ('assemblée_sénat', 'gratuitement', 'voter'),
  ('représentés', 'conflit_intérêts', 'amendements'),
  ('abouti', 'faire_croire', 'déposé'),
  ('voter_texte', 'allégements', 'émets_donc'),
  ('débouché', 'vingt_sept', 'sous_amendements'),
  ('fait_unanimité', 'donnent', 'voilà_pourquoi'),
  ('france_comores', 'instaurée', 'gouvernement'),
  ('accord_trouvé', 'réclament', 'adopter')],
 [('pouvions', 'saurait_être', 'conséquent'),
  ('dépassé', 'reconstruction', 'article'),
  ('gauche', 'multiplient', 'donne_avis'),
  ('avril', 'rendra', 'voter'),
  ('vécu', 'bonne_nouvelle', 'rédaction'),
  ('résolu', 'confort', 'adopté'),
  ('représentés', 'regroupement', 'donc_satisfait'),
  ('faisait', 'cherché', 'texte'),
  ('attend', 'ajoutent', 'déposé'),
  ('référendum', 'continueront', 'adopter')],
 [('gauche', 'plus_élevés', 'sous_amendements'),
  ('pouvions', 'rendra', 'donne_avis'),
  ('résolu', 'massivement', 'rédact

On choisit window = 4 qualitativement (on se restreint d'abord aux modèles qui associent en premier 'gauche' à 'droite', puis on sélectionne sur la pertinence des autres mots sélectionnés)

In [None]:
# Maintenant qu'on a la fenêtre, on fait le choix du nombre d'époques lors de l'apprentissage. 
# On reprend le même procédé pour tester la fiabilité

tests_epochs = []

liste_nb_epochs = [250]

for nb_epochs in liste_nb_epochs:
    
    model = Word2Vec(
                     window = 4,
                     size = 300,
                     sample = 6e-5, 
                     alpha = 0.03, 
                     min_alpha = 0.0007, 
                     negative = 20,
                     workers = cores - 1,
                     compute_loss = True)
    
    model.build_vocab(sentences_novice, progress_per = 10000)

    model.train(sentences_novice, total_examples = model.corpus_count, epochs = nb_epochs, report_delay = 1)

    tests_epochs.append([
            (model.wv.most_similar(positive=['droite'])[i][0],
             model.wv.most_similar(positive=['vitesse'])[i][0],
             model.wv.most_similar(positive=['donc'])[i][0],
            )
         for i in range(10)
         ])

INFO - 01:04:04: collecting all words and their counts
INFO - 01:04:04: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO - 01:04:05: PROGRESS: at sentence #10000, processed 233965 words, keeping 30761 word types
INFO - 01:04:06: PROGRESS: at sentence #20000, processed 450032 words, keeping 39850 word types
INFO - 01:04:07: PROGRESS: at sentence #30000, processed 677772 words, keeping 48410 word types
INFO - 01:04:08: PROGRESS: at sentence #40000, processed 880381 words, keeping 51772 word types
INFO - 01:04:09: PROGRESS: at sentence #50000, processed 1095690 words, keeping 55266 word types
INFO - 01:04:09: collected 55316 word types from a corpus of 1099932 raw words and 50191 sentences
INFO - 01:04:09: Loading a fresh vocabulary
INFO - 01:04:09: effective_min_count=5 retains 26248 unique words (47% of original 55316, drops 29068)
INFO - 01:04:09: effective_min_count=5 leaves 1041812 word corpus (94% of original 1099932, drops 58120)
INFO - 01:04:09: deleting the 

INFO - 01:05:11: EPOCH 7 - PROGRESS: at 57.44% examples, 105880 words/s, in_qsize 0, out_qsize 0
INFO - 01:05:12: EPOCH 7 - PROGRESS: at 71.59% examples, 104363 words/s, in_qsize 0, out_qsize 0
INFO - 01:05:13: EPOCH 7 - PROGRESS: at 84.93% examples, 101802 words/s, in_qsize 1, out_qsize 0
INFO - 01:05:14: EPOCH 7 - PROGRESS: at 97.79% examples, 100627 words/s, in_qsize 4, out_qsize 0
INFO - 01:05:14: worker thread finished; awaiting finish of 2 more threads
INFO - 01:05:14: worker thread finished; awaiting finish of 1 more threads
INFO - 01:05:14: worker thread finished; awaiting finish of 0 more threads
INFO - 01:05:14: EPOCH - 7 : training on 1099932 raw words (748935 effective words) took 7.4s, 101529 effective words/s
INFO - 01:05:15: EPOCH 8 - PROGRESS: at 9.69% examples, 80663 words/s, in_qsize 1, out_qsize 0
INFO - 01:05:16: EPOCH 8 - PROGRESS: at 26.72% examples, 100547 words/s, in_qsize 0, out_qsize 0
INFO - 01:05:17: EPOCH 8 - PROGRESS: at 38.94% examples, 97063 words/s, in_

INFO - 01:06:18: EPOCH 14 - PROGRESS: at 81.71% examples, 97302 words/s, in_qsize 0, out_qsize 0
INFO - 01:06:19: worker thread finished; awaiting finish of 2 more threads
INFO - 01:06:19: worker thread finished; awaiting finish of 1 more threads
INFO - 01:06:19: EPOCH 14 - PROGRESS: at 100.00% examples, 102236 words/s, in_qsize 0, out_qsize 1
INFO - 01:06:19: worker thread finished; awaiting finish of 0 more threads
INFO - 01:06:19: EPOCH - 14 : training on 1099932 raw words (749076 effective words) took 7.3s, 102217 effective words/s
INFO - 01:06:20: EPOCH 15 - PROGRESS: at 12.09% examples, 101778 words/s, in_qsize 0, out_qsize 0
INFO - 01:06:21: EPOCH 15 - PROGRESS: at 23.37% examples, 91117 words/s, in_qsize 0, out_qsize 0
INFO - 01:06:22: EPOCH 15 - PROGRESS: at 38.12% examples, 96759 words/s, in_qsize 0, out_qsize 0
INFO - 01:06:23: EPOCH 15 - PROGRESS: at 53.64% examples, 102741 words/s, in_qsize 0, out_qsize 0
INFO - 01:06:24: EPOCH 15 - PROGRESS: at 65.34% examples, 98374 word

INFO - 01:07:20: EPOCH 22 - PROGRESS: at 94.60% examples, 114803 words/s, in_qsize 0, out_qsize 0
INFO - 01:07:21: worker thread finished; awaiting finish of 2 more threads
INFO - 01:07:21: worker thread finished; awaiting finish of 1 more threads
INFO - 01:07:21: worker thread finished; awaiting finish of 0 more threads
INFO - 01:07:21: EPOCH - 22 : training on 1099932 raw words (749082 effective words) took 6.5s, 114410 effective words/s
INFO - 01:07:22: EPOCH 23 - PROGRESS: at 10.44% examples, 89331 words/s, in_qsize 0, out_qsize 0
INFO - 01:07:23: EPOCH 23 - PROGRESS: at 24.42% examples, 94685 words/s, in_qsize 0, out_qsize 0
INFO - 01:07:24: EPOCH 23 - PROGRESS: at 38.12% examples, 95364 words/s, in_qsize 0, out_qsize 0
INFO - 01:07:25: EPOCH 23 - PROGRESS: at 52.74% examples, 98725 words/s, in_qsize 1, out_qsize 0
INFO - 01:07:26: EPOCH 23 - PROGRESS: at 66.47% examples, 98291 words/s, in_qsize 0, out_qsize 0
INFO - 01:07:27: EPOCH 23 - PROGRESS: at 83.31% examples, 100459 words/

INFO - 01:08:19: EPOCH 31 - PROGRESS: at 96.69% examples, 117800 words/s, in_qsize 1, out_qsize 0
INFO - 01:08:20: worker thread finished; awaiting finish of 2 more threads
INFO - 01:08:20: worker thread finished; awaiting finish of 1 more threads
INFO - 01:08:20: worker thread finished; awaiting finish of 0 more threads
INFO - 01:08:20: EPOCH - 31 : training on 1099932 raw words (749424 effective words) took 6.3s, 118680 effective words/s
INFO - 01:08:21: EPOCH 32 - PROGRESS: at 14.59% examples, 116166 words/s, in_qsize 0, out_qsize 0
INFO - 01:08:22: EPOCH 32 - PROGRESS: at 29.08% examples, 108333 words/s, in_qsize 0, out_qsize 0
INFO - 01:08:23: EPOCH 32 - PROGRESS: at 42.19% examples, 104932 words/s, in_qsize 0, out_qsize 0
INFO - 01:08:24: EPOCH 32 - PROGRESS: at 58.21% examples, 107409 words/s, in_qsize 0, out_qsize 0
INFO - 01:08:25: EPOCH 32 - PROGRESS: at 75.18% examples, 110138 words/s, in_qsize 0, out_qsize 0
INFO - 01:08:26: EPOCH 32 - PROGRESS: at 93.76% examples, 113700 w

INFO - 01:09:17: EPOCH 40 - PROGRESS: at 49.26% examples, 120573 words/s, in_qsize 0, out_qsize 0
INFO - 01:09:18: EPOCH 40 - PROGRESS: at 62.67% examples, 114656 words/s, in_qsize 0, out_qsize 0
INFO - 01:09:19: EPOCH 40 - PROGRESS: at 81.71% examples, 117033 words/s, in_qsize 0, out_qsize 0
INFO - 01:09:20: EPOCH 40 - PROGRESS: at 97.79% examples, 117680 words/s, in_qsize 0, out_qsize 0
INFO - 01:09:20: worker thread finished; awaiting finish of 2 more threads
INFO - 01:09:20: worker thread finished; awaiting finish of 1 more threads
INFO - 01:09:20: worker thread finished; awaiting finish of 0 more threads
INFO - 01:09:20: EPOCH - 40 : training on 1099932 raw words (749190 effective words) took 6.4s, 117733 effective words/s
INFO - 01:09:21: EPOCH 41 - PROGRESS: at 12.09% examples, 100105 words/s, in_qsize 1, out_qsize 0
INFO - 01:09:22: EPOCH 41 - PROGRESS: at 28.29% examples, 106847 words/s, in_qsize 0, out_qsize 0
INFO - 01:09:23: EPOCH 41 - PROGRESS: at 44.53% examples, 111286 w

INFO - 01:10:16: EPOCH 49 - PROGRESS: at 12.88% examples, 107534 words/s, in_qsize 0, out_qsize 0
INFO - 01:10:17: EPOCH 49 - PROGRESS: at 30.72% examples, 114279 words/s, in_qsize 1, out_qsize 0
INFO - 01:10:18: EPOCH 49 - PROGRESS: at 45.26% examples, 112189 words/s, in_qsize 0, out_qsize 0
INFO - 01:10:19: EPOCH 49 - PROGRESS: at 60.80% examples, 112946 words/s, in_qsize 0, out_qsize 0
INFO - 01:10:20: EPOCH 49 - PROGRESS: at 80.86% examples, 116545 words/s, in_qsize 0, out_qsize 0
INFO - 01:10:21: EPOCH 49 - PROGRESS: at 96.69% examples, 116274 words/s, in_qsize 0, out_qsize 0
INFO - 01:10:21: worker thread finished; awaiting finish of 2 more threads
INFO - 01:10:21: worker thread finished; awaiting finish of 1 more threads
INFO - 01:10:21: worker thread finished; awaiting finish of 0 more threads
INFO - 01:10:21: EPOCH - 49 : training on 1099932 raw words (749497 effective words) took 6.4s, 117426 effective words/s
INFO - 01:10:22: EPOCH 50 - PROGRESS: at 12.09% examples, 102096 w

INFO - 01:11:20: EPOCH 57 - PROGRESS: at 76.24% examples, 109910 words/s, in_qsize 0, out_qsize 0
INFO - 01:11:22: EPOCH 57 - PROGRESS: at 90.20% examples, 107784 words/s, in_qsize 0, out_qsize 0
INFO - 01:11:22: worker thread finished; awaiting finish of 2 more threads
INFO - 01:11:22: worker thread finished; awaiting finish of 1 more threads
INFO - 01:11:22: worker thread finished; awaiting finish of 0 more threads
INFO - 01:11:22: EPOCH - 57 : training on 1099932 raw words (749402 effective words) took 6.8s, 109808 effective words/s
INFO - 01:11:23: EPOCH 58 - PROGRESS: at 12.88% examples, 108059 words/s, in_qsize 1, out_qsize 0
INFO - 01:11:24: EPOCH 58 - PROGRESS: at 29.88% examples, 114619 words/s, in_qsize 1, out_qsize 0
INFO - 01:11:25: EPOCH 58 - PROGRESS: at 47.15% examples, 118427 words/s, in_qsize 0, out_qsize 0
INFO - 01:11:26: EPOCH 58 - PROGRESS: at 62.67% examples, 117352 words/s, in_qsize 0, out_qsize 0
INFO - 01:11:27: EPOCH 58 - PROGRESS: at 81.71% examples, 119141 w

INFO - 01:12:18: EPOCH 66 - PROGRESS: at 57.44% examples, 103305 words/s, in_qsize 0, out_qsize 0
INFO - 01:12:19: EPOCH 66 - PROGRESS: at 70.25% examples, 100710 words/s, in_qsize 0, out_qsize 0
INFO - 01:12:20: EPOCH 66 - PROGRESS: at 84.14% examples, 99806 words/s, in_qsize 0, out_qsize 0
INFO - 01:12:21: EPOCH 66 - PROGRESS: at 92.76% examples, 93068 words/s, in_qsize 0, out_qsize 0
INFO - 01:12:21: worker thread finished; awaiting finish of 2 more threads
INFO - 01:12:21: worker thread finished; awaiting finish of 1 more threads
INFO - 01:12:21: worker thread finished; awaiting finish of 0 more threads
INFO - 01:12:21: EPOCH - 66 : training on 1099932 raw words (749106 effective words) took 8.0s, 94200 effective words/s
INFO - 01:12:23: EPOCH 67 - PROGRESS: at 4.82% examples, 31097 words/s, in_qsize 0, out_qsize 0
INFO - 01:12:24: EPOCH 67 - PROGRESS: at 15.51% examples, 55016 words/s, in_qsize 0, out_qsize 0
INFO - 01:12:25: EPOCH 67 - PROGRESS: at 29.88% examples, 68087 words/s,

INFO - 01:13:19: worker thread finished; awaiting finish of 1 more threads
INFO - 01:13:19: worker thread finished; awaiting finish of 0 more threads
INFO - 01:13:19: EPOCH - 74 : training on 1099932 raw words (748933 effective words) took 8.0s, 93213 effective words/s
INFO - 01:13:20: EPOCH 75 - PROGRESS: at 15.51% examples, 126255 words/s, in_qsize 1, out_qsize 0
INFO - 01:13:21: EPOCH 75 - PROGRESS: at 30.72% examples, 117076 words/s, in_qsize 0, out_qsize 0
INFO - 01:13:22: EPOCH 75 - PROGRESS: at 47.15% examples, 119825 words/s, in_qsize 0, out_qsize 0
INFO - 01:13:23: EPOCH 75 - PROGRESS: at 65.34% examples, 121738 words/s, in_qsize 0, out_qsize 0
INFO - 01:13:24: EPOCH 75 - PROGRESS: at 84.14% examples, 122400 words/s, in_qsize 0, out_qsize 0
INFO - 01:13:25: worker thread finished; awaiting finish of 2 more threads
INFO - 01:13:25: worker thread finished; awaiting finish of 1 more threads
INFO - 01:13:25: worker thread finished; awaiting finish of 0 more threads
INFO - 01:13:25

INFO - 01:14:20: worker thread finished; awaiting finish of 2 more threads
INFO - 01:14:20: worker thread finished; awaiting finish of 1 more threads
INFO - 01:14:20: worker thread finished; awaiting finish of 0 more threads
INFO - 01:14:20: EPOCH - 83 : training on 1099932 raw words (749616 effective words) took 6.6s, 112758 effective words/s
INFO - 01:14:21: EPOCH 84 - PROGRESS: at 15.51% examples, 129262 words/s, in_qsize 0, out_qsize 0
INFO - 01:14:22: EPOCH 84 - PROGRESS: at 30.72% examples, 116933 words/s, in_qsize 0, out_qsize 0
INFO - 01:14:23: EPOCH 84 - PROGRESS: at 43.78% examples, 110615 words/s, in_qsize 0, out_qsize 0
INFO - 01:14:24: EPOCH 84 - PROGRESS: at 59.03% examples, 110790 words/s, in_qsize 0, out_qsize 0
INFO - 01:14:25: EPOCH 84 - PROGRESS: at 79.48% examples, 115587 words/s, in_qsize 1, out_qsize 0
INFO - 01:14:26: EPOCH 84 - PROGRESS: at 93.76% examples, 112731 words/s, in_qsize 0, out_qsize 0
INFO - 01:14:26: worker thread finished; awaiting finish of 2 more

INFO - 01:15:19: EPOCH 92 - PROGRESS: at 96.69% examples, 117885 words/s, in_qsize 0, out_qsize 0
INFO - 01:15:19: worker thread finished; awaiting finish of 2 more threads
INFO - 01:15:19: worker thread finished; awaiting finish of 1 more threads
INFO - 01:15:19: worker thread finished; awaiting finish of 0 more threads
INFO - 01:15:19: EPOCH - 92 : training on 1099932 raw words (748727 effective words) took 6.3s, 119152 effective words/s
INFO - 01:15:20: EPOCH 93 - PROGRESS: at 14.59% examples, 121838 words/s, in_qsize 0, out_qsize 0
INFO - 01:15:21: EPOCH 93 - PROGRESS: at 29.08% examples, 110880 words/s, in_qsize 0, out_qsize 0
INFO - 01:15:22: EPOCH 93 - PROGRESS: at 44.53% examples, 113252 words/s, in_qsize 0, out_qsize 0
INFO - 01:15:23: EPOCH 93 - PROGRESS: at 59.89% examples, 113356 words/s, in_qsize 0, out_qsize 0
INFO - 01:15:24: EPOCH 93 - PROGRESS: at 73.50% examples, 109372 words/s, in_qsize 0, out_qsize 0
INFO - 01:15:25: EPOCH 93 - PROGRESS: at 90.98% examples, 112322 w

INFO - 01:16:17: EPOCH 101 - PROGRESS: at 48.16% examples, 118677 words/s, in_qsize 0, out_qsize 0
INFO - 01:16:18: EPOCH 101 - PROGRESS: at 67.56% examples, 123972 words/s, in_qsize 1, out_qsize 0
INFO - 01:16:19: EPOCH 101 - PROGRESS: at 87.49% examples, 126745 words/s, in_qsize 1, out_qsize 0
INFO - 01:16:19: worker thread finished; awaiting finish of 2 more threads
INFO - 01:16:19: worker thread finished; awaiting finish of 1 more threads
INFO - 01:16:19: worker thread finished; awaiting finish of 0 more threads
INFO - 01:16:19: EPOCH - 101 : training on 1099932 raw words (748942 effective words) took 5.9s, 127600 effective words/s
INFO - 01:16:20: EPOCH 102 - PROGRESS: at 15.51% examples, 130293 words/s, in_qsize 0, out_qsize 0
INFO - 01:16:21: EPOCH 102 - PROGRESS: at 34.14% examples, 131743 words/s, in_qsize 0, out_qsize 0
INFO - 01:16:22: EPOCH 102 - PROGRESS: at 51.93% examples, 130808 words/s, in_qsize 0, out_qsize 0
INFO - 01:16:23: EPOCH 102 - PROGRESS: at 68.31% examples, 

INFO - 01:17:13: EPOCH - 110 : training on 1099932 raw words (749428 effective words) took 5.7s, 132175 effective words/s
INFO - 01:17:14: EPOCH 111 - PROGRESS: at 16.52% examples, 129742 words/s, in_qsize 0, out_qsize 0
INFO - 01:17:15: EPOCH 111 - PROGRESS: at 36.24% examples, 132792 words/s, in_qsize 1, out_qsize 0
INFO - 01:17:16: EPOCH 111 - PROGRESS: at 54.41% examples, 134312 words/s, in_qsize 1, out_qsize 0
INFO - 01:17:17: EPOCH 111 - PROGRESS: at 74.28% examples, 134012 words/s, in_qsize 0, out_qsize 0
INFO - 01:17:18: EPOCH 111 - PROGRESS: at 93.76% examples, 134987 words/s, in_qsize 0, out_qsize 0
INFO - 01:17:18: worker thread finished; awaiting finish of 2 more threads
INFO - 01:17:18: worker thread finished; awaiting finish of 1 more threads
INFO - 01:17:18: worker thread finished; awaiting finish of 0 more threads
INFO - 01:17:18: EPOCH - 111 : training on 1099932 raw words (749100 effective words) took 5.5s, 136174 effective words/s
INFO - 01:17:19: EPOCH 112 - PROGRES

INFO - 01:18:09: EPOCH 120 - PROGRESS: at 63.54% examples, 118776 words/s, in_qsize 0, out_qsize 0
INFO - 01:18:10: EPOCH 120 - PROGRESS: at 83.31% examples, 120442 words/s, in_qsize 0, out_qsize 0
INFO - 01:18:11: worker thread finished; awaiting finish of 2 more threads
INFO - 01:18:11: worker thread finished; awaiting finish of 1 more threads
INFO - 01:18:11: worker thread finished; awaiting finish of 0 more threads
INFO - 01:18:11: EPOCH - 120 : training on 1099932 raw words (749120 effective words) took 6.1s, 123492 effective words/s
INFO - 01:18:12: EPOCH 121 - PROGRESS: at 16.52% examples, 127197 words/s, in_qsize 0, out_qsize 0
INFO - 01:18:13: EPOCH 121 - PROGRESS: at 35.26% examples, 128956 words/s, in_qsize 0, out_qsize 0
INFO - 01:18:14: EPOCH 121 - PROGRESS: at 53.64% examples, 131259 words/s, in_qsize 0, out_qsize 0
INFO - 01:18:15: EPOCH 121 - PROGRESS: at 72.66% examples, 131181 words/s, in_qsize 0, out_qsize 0
INFO - 01:18:16: EPOCH 121 - PROGRESS: at 90.20% examples, 

INFO - 01:19:07: worker thread finished; awaiting finish of 2 more threads
INFO - 01:19:07: worker thread finished; awaiting finish of 1 more threads
INFO - 01:19:07: worker thread finished; awaiting finish of 0 more threads
INFO - 01:19:07: EPOCH - 129 : training on 1099932 raw words (748862 effective words) took 6.5s, 114357 effective words/s
INFO - 01:19:08: EPOCH 130 - PROGRESS: at 12.88% examples, 108204 words/s, in_qsize 0, out_qsize 0
INFO - 01:19:09: EPOCH 130 - PROGRESS: at 31.64% examples, 114464 words/s, in_qsize 0, out_qsize 0
INFO - 01:19:10: EPOCH 130 - PROGRESS: at 40.63% examples, 96651 words/s, in_qsize 0, out_qsize 0
INFO - 01:19:11: EPOCH 130 - PROGRESS: at 48.16% examples, 86226 words/s, in_qsize 0, out_qsize 0
INFO - 01:19:12: EPOCH 130 - PROGRESS: at 62.67% examples, 90067 words/s, in_qsize 0, out_qsize 0
INFO - 01:19:13: EPOCH 130 - PROGRESS: at 76.24% examples, 90922 words/s, in_qsize 0, out_qsize 0
INFO - 01:19:15: EPOCH 130 - PROGRESS: at 90.98% examples, 9326

INFO - 01:20:05: worker thread finished; awaiting finish of 2 more threads
INFO - 01:20:05: worker thread finished; awaiting finish of 1 more threads
INFO - 01:20:05: worker thread finished; awaiting finish of 0 more threads
INFO - 01:20:05: EPOCH - 138 : training on 1099932 raw words (748774 effective words) took 5.5s, 135466 effective words/s
INFO - 01:20:07: EPOCH 139 - PROGRESS: at 16.52% examples, 130405 words/s, in_qsize 1, out_qsize 0
INFO - 01:20:08: EPOCH 139 - PROGRESS: at 36.24% examples, 132854 words/s, in_qsize 1, out_qsize 0
INFO - 01:20:09: EPOCH 139 - PROGRESS: at 54.41% examples, 134114 words/s, in_qsize 0, out_qsize 0
INFO - 01:20:10: EPOCH 139 - PROGRESS: at 74.28% examples, 134097 words/s, in_qsize 0, out_qsize 0
INFO - 01:20:11: EPOCH 139 - PROGRESS: at 93.76% examples, 134881 words/s, in_qsize 0, out_qsize 0
INFO - 01:20:11: worker thread finished; awaiting finish of 2 more threads
INFO - 01:20:11: worker thread finished; awaiting finish of 1 more threads
INFO - 0

INFO - 01:21:01: EPOCH 148 - PROGRESS: at 15.51% examples, 125532 words/s, in_qsize 0, out_qsize 0
INFO - 01:21:03: EPOCH 148 - PROGRESS: at 33.28% examples, 124292 words/s, in_qsize 0, out_qsize 0
INFO - 01:21:04: EPOCH 148 - PROGRESS: at 51.09% examples, 126906 words/s, in_qsize 0, out_qsize 0
INFO - 01:21:05: EPOCH 148 - PROGRESS: at 69.26% examples, 128233 words/s, in_qsize 0, out_qsize 0
INFO - 01:21:06: EPOCH 148 - PROGRESS: at 88.45% examples, 127928 words/s, in_qsize 0, out_qsize 0
INFO - 01:21:06: worker thread finished; awaiting finish of 2 more threads
INFO - 01:21:06: worker thread finished; awaiting finish of 1 more threads
INFO - 01:21:06: worker thread finished; awaiting finish of 0 more threads
INFO - 01:21:06: EPOCH - 148 : training on 1099932 raw words (749398 effective words) took 5.8s, 129340 effective words/s
INFO - 01:21:07: EPOCH 149 - PROGRESS: at 15.51% examples, 129542 words/s, in_qsize 0, out_qsize 0
INFO - 01:21:08: EPOCH 149 - PROGRESS: at 32.44% examples, 

INFO - 01:22:06: EPOCH 156 - PROGRESS: at 96.69% examples, 118202 words/s, in_qsize 0, out_qsize 0
INFO - 01:22:06: worker thread finished; awaiting finish of 2 more threads
INFO - 01:22:06: worker thread finished; awaiting finish of 1 more threads
INFO - 01:22:06: worker thread finished; awaiting finish of 0 more threads
INFO - 01:22:06: EPOCH - 156 : training on 1099932 raw words (749051 effective words) took 6.3s, 119238 effective words/s
INFO - 01:22:07: EPOCH 157 - PROGRESS: at 12.88% examples, 110026 words/s, in_qsize 0, out_qsize 0
INFO - 01:22:08: EPOCH 157 - PROGRESS: at 31.64% examples, 120558 words/s, in_qsize 0, out_qsize 0
INFO - 01:22:09: EPOCH 157 - PROGRESS: at 47.15% examples, 120928 words/s, in_qsize 0, out_qsize 0
INFO - 01:22:10: EPOCH 157 - PROGRESS: at 62.67% examples, 117897 words/s, in_qsize 0, out_qsize 0
INFO - 01:22:11: EPOCH 157 - PROGRESS: at 80.86% examples, 117721 words/s, in_qsize 1, out_qsize 0
INFO - 01:22:12: EPOCH 157 - PROGRESS: at 96.69% examples, 

INFO - 01:23:03: EPOCH 165 - PROGRESS: at 67.56% examples, 125983 words/s, in_qsize 0, out_qsize 0
INFO - 01:23:04: EPOCH 165 - PROGRESS: at 86.59% examples, 127067 words/s, in_qsize 0, out_qsize 0
INFO - 01:23:05: worker thread finished; awaiting finish of 2 more threads
INFO - 01:23:05: worker thread finished; awaiting finish of 1 more threads
INFO - 01:23:05: worker thread finished; awaiting finish of 0 more threads
INFO - 01:23:05: EPOCH - 165 : training on 1099932 raw words (748838 effective words) took 5.8s, 128577 effective words/s
INFO - 01:23:06: EPOCH 166 - PROGRESS: at 14.59% examples, 122079 words/s, in_qsize 0, out_qsize 0
INFO - 01:23:07: EPOCH 166 - PROGRESS: at 31.64% examples, 120751 words/s, in_qsize 0, out_qsize 0
INFO - 01:23:08: EPOCH 166 - PROGRESS: at 48.16% examples, 120668 words/s, in_qsize 0, out_qsize 0
INFO - 01:23:09: EPOCH 166 - PROGRESS: at 63.54% examples, 117969 words/s, in_qsize 0, out_qsize 0
INFO - 01:23:10: EPOCH 166 - PROGRESS: at 81.71% examples, 

INFO - 01:24:02: worker thread finished; awaiting finish of 2 more threads
INFO - 01:24:02: worker thread finished; awaiting finish of 1 more threads
INFO - 01:24:02: worker thread finished; awaiting finish of 0 more threads
INFO - 01:24:02: EPOCH - 174 : training on 1099932 raw words (749241 effective words) took 6.9s, 108654 effective words/s
INFO - 01:24:03: EPOCH 175 - PROGRESS: at 12.88% examples, 109862 words/s, in_qsize 0, out_qsize 0
INFO - 01:24:04: EPOCH 175 - PROGRESS: at 27.51% examples, 103473 words/s, in_qsize 0, out_qsize 0
INFO - 01:24:05: EPOCH 175 - PROGRESS: at 39.79% examples, 99313 words/s, in_qsize 0, out_qsize 0
INFO - 01:24:06: EPOCH 175 - PROGRESS: at 51.09% examples, 95624 words/s, in_qsize 0, out_qsize 0
INFO - 01:24:08: EPOCH 175 - PROGRESS: at 64.46% examples, 95614 words/s, in_qsize 0, out_qsize 0
INFO - 01:24:09: EPOCH 175 - PROGRESS: at 84.14% examples, 101845 words/s, in_qsize 1, out_qsize 0
INFO - 01:24:09: worker thread finished; awaiting finish of 2 

INFO - 01:24:59: worker thread finished; awaiting finish of 0 more threads
INFO - 01:24:59: EPOCH - 183 : training on 1099932 raw words (749391 effective words) took 6.1s, 122401 effective words/s
INFO - 01:25:00: EPOCH 184 - PROGRESS: at 13.68% examples, 115902 words/s, in_qsize 0, out_qsize 1
INFO - 01:25:01: EPOCH 184 - PROGRESS: at 33.28% examples, 124399 words/s, in_qsize 0, out_qsize 0
INFO - 01:25:02: EPOCH 184 - PROGRESS: at 51.01% examples, 123791 words/s, in_qsize 0, out_qsize 1
INFO - 01:25:03: EPOCH 184 - PROGRESS: at 69.26% examples, 125756 words/s, in_qsize 0, out_qsize 0
INFO - 01:25:04: EPOCH 184 - PROGRESS: at 87.49% examples, 126267 words/s, in_qsize 0, out_qsize 0
INFO - 01:25:05: worker thread finished; awaiting finish of 2 more threads
INFO - 01:25:05: worker thread finished; awaiting finish of 1 more threads
INFO - 01:25:05: worker thread finished; awaiting finish of 0 more threads
INFO - 01:25:05: EPOCH - 184 : training on 1099932 raw words (749102 effective word

INFO - 01:25:56: worker thread finished; awaiting finish of 1 more threads
INFO - 01:25:57: worker thread finished; awaiting finish of 0 more threads
INFO - 01:25:57: EPOCH - 192 : training on 1099932 raw words (749423 effective words) took 6.6s, 114055 effective words/s
INFO - 01:25:58: EPOCH 193 - PROGRESS: at 13.68% examples, 115410 words/s, in_qsize 0, out_qsize 0
INFO - 01:25:59: EPOCH 193 - PROGRESS: at 32.44% examples, 123697 words/s, in_qsize 1, out_qsize 0
INFO - 01:26:00: EPOCH 193 - PROGRESS: at 50.17% examples, 126672 words/s, in_qsize 1, out_qsize 0
INFO - 01:26:01: EPOCH 193 - PROGRESS: at 67.56% examples, 126005 words/s, in_qsize 1, out_qsize 0
INFO - 01:26:02: EPOCH 193 - PROGRESS: at 85.67% examples, 125623 words/s, in_qsize 0, out_qsize 0
INFO - 01:26:02: worker thread finished; awaiting finish of 2 more threads
INFO - 01:26:02: worker thread finished; awaiting finish of 1 more threads
INFO - 01:26:02: worker thread finished; awaiting finish of 0 more threads
INFO - 0

INFO - 01:26:58: EPOCH - 200 : training on 1099932 raw words (749259 effective words) took 6.1s, 123087 effective words/s
INFO - 01:26:59: EPOCH 201 - PROGRESS: at 12.09% examples, 99443 words/s, in_qsize 0, out_qsize 0
INFO - 01:27:00: EPOCH 201 - PROGRESS: at 28.29% examples, 106873 words/s, in_qsize 0, out_qsize 0
INFO - 01:27:01: EPOCH 201 - PROGRESS: at 42.96% examples, 109404 words/s, in_qsize 0, out_qsize 0
INFO - 01:27:02: EPOCH 201 - PROGRESS: at 59.03% examples, 111656 words/s, in_qsize 0, out_qsize 0
INFO - 01:27:03: EPOCH 201 - PROGRESS: at 75.18% examples, 111576 words/s, in_qsize 0, out_qsize 0
INFO - 01:27:04: EPOCH 201 - PROGRESS: at 89.34% examples, 108297 words/s, in_qsize 0, out_qsize 0
INFO - 01:27:04: worker thread finished; awaiting finish of 2 more threads
INFO - 01:27:04: worker thread finished; awaiting finish of 1 more threads
INFO - 01:27:04: worker thread finished; awaiting finish of 0 more threads
INFO - 01:27:04: EPOCH - 201 : training on 1099932 raw words

INFO - 01:27:58: EPOCH 209 - PROGRESS: at 85.67% examples, 102649 words/s, in_qsize 0, out_qsize 0
INFO - 01:28:00: EPOCH 209 - PROGRESS: at 97.79% examples, 99912 words/s, in_qsize 0, out_qsize 0
INFO - 01:28:00: worker thread finished; awaiting finish of 2 more threads
INFO - 01:28:00: worker thread finished; awaiting finish of 1 more threads
INFO - 01:28:00: worker thread finished; awaiting finish of 0 more threads
INFO - 01:28:00: EPOCH - 209 : training on 1099932 raw words (749465 effective words) took 7.6s, 98698 effective words/s
INFO - 01:28:01: EPOCH 210 - PROGRESS: at 8.94% examples, 72520 words/s, in_qsize 0, out_qsize 0
INFO - 01:28:02: EPOCH 210 - PROGRESS: at 24.42% examples, 92961 words/s, in_qsize 0, out_qsize 0
INFO - 01:28:03: EPOCH 210 - PROGRESS: at 41.41% examples, 104782 words/s, in_qsize 1, out_qsize 0
INFO - 01:28:04: EPOCH 210 - PROGRESS: at 59.03% examples, 111402 words/s, in_qsize 0, out_qsize 0
INFO - 01:28:05: EPOCH 210 - PROGRESS: at 78.07% examples, 11479

INFO - 01:28:56: worker thread finished; awaiting finish of 2 more threads
INFO - 01:28:56: worker thread finished; awaiting finish of 1 more threads
INFO - 01:28:56: worker thread finished; awaiting finish of 0 more threads
INFO - 01:28:56: EPOCH - 218 : training on 1099932 raw words (749305 effective words) took 5.7s, 132241 effective words/s
INFO - 01:28:57: EPOCH 219 - PROGRESS: at 15.51% examples, 125246 words/s, in_qsize 0, out_qsize 0
INFO - 01:28:58: EPOCH 219 - PROGRESS: at 34.14% examples, 129303 words/s, in_qsize 0, out_qsize 0
INFO - 01:28:59: EPOCH 219 - PROGRESS: at 51.93% examples, 129204 words/s, in_qsize 0, out_qsize 0
INFO - 01:29:00: EPOCH 219 - PROGRESS: at 69.26% examples, 128691 words/s, in_qsize 0, out_qsize 0
INFO - 01:29:01: EPOCH 219 - PROGRESS: at 89.32% examples, 129289 words/s, in_qsize 0, out_qsize 0
INFO - 01:29:01: worker thread finished; awaiting finish of 2 more threads
INFO - 01:29:01: worker thread finished; awaiting finish of 1 more threads
INFO - 0

INFO - 01:29:50: EPOCH 228 - PROGRESS: at 30.72% examples, 116913 words/s, in_qsize 1, out_qsize 0
INFO - 01:29:51: EPOCH 228 - PROGRESS: at 46.07% examples, 117093 words/s, in_qsize 0, out_qsize 0
INFO - 01:29:52: EPOCH 228 - PROGRESS: at 64.46% examples, 121228 words/s, in_qsize 1, out_qsize 0
INFO - 01:29:53: EPOCH 228 - PROGRESS: at 84.14% examples, 122783 words/s, in_qsize 0, out_qsize 0
INFO - 01:29:54: worker thread finished; awaiting finish of 2 more threads
INFO - 01:29:54: worker thread finished; awaiting finish of 1 more threads
INFO - 01:29:54: worker thread finished; awaiting finish of 0 more threads
INFO - 01:29:54: EPOCH - 228 : training on 1099932 raw words (748984 effective words) took 6.1s, 122935 effective words/s
INFO - 01:29:55: EPOCH 229 - PROGRESS: at 15.51% examples, 124992 words/s, in_qsize 0, out_qsize 0
INFO - 01:29:56: EPOCH 229 - PROGRESS: at 33.28% examples, 126840 words/s, in_qsize 0, out_qsize 0
INFO - 01:29:57: EPOCH 229 - PROGRESS: at 50.17% examples, 

INFO - 01:30:49: EPOCH 237 - PROGRESS: at 28.29% examples, 107760 words/s, in_qsize 0, out_qsize 0
INFO - 01:30:50: EPOCH 237 - PROGRESS: at 39.79% examples, 100247 words/s, in_qsize 0, out_qsize 0
INFO - 01:30:51: EPOCH 237 - PROGRESS: at 51.93% examples, 97293 words/s, in_qsize 0, out_qsize 0
INFO - 01:30:53: EPOCH 237 - PROGRESS: at 65.34% examples, 96603 words/s, in_qsize 0, out_qsize 0
INFO - 01:30:54: EPOCH 237 - PROGRESS: at 79.48% examples, 95385 words/s, in_qsize 0, out_qsize 0
INFO - 01:30:55: EPOCH 237 - PROGRESS: at 96.69% examples, 99421 words/s, in_qsize 0, out_qsize 0
INFO - 01:30:55: worker thread finished; awaiting finish of 2 more threads
INFO - 01:30:55: worker thread finished; awaiting finish of 1 more threads
INFO - 01:30:55: worker thread finished; awaiting finish of 0 more threads
INFO - 01:30:55: EPOCH - 237 : training on 1099932 raw words (749251 effective words) took 7.4s, 100699 effective words/s
INFO - 01:30:56: EPOCH 238 - PROGRESS: at 13.68% examples, 1145

In [None]:
tests_epochs

In [57]:
w2v_model_novice = Word2Vec(
                     window = 1,
                     size = 300,
                     sample = 6e-5, 
                     alpha = 0.03, 
                     min_alpha = 0.0007, 
                     negative = 20,
                     workers = cores - 1,
                     compute_loss = True)

w2v_model_exp = Word2Vec(
                     window = 4,
                     size = 300,
                     sample = 6e-5, 
                     alpha = 0.03, 
                     min_alpha = 0.0007, 
                     negative = 20,
                     workers = cores - 1,
                     compute_loss = True)

In [58]:
t = time()

w2v_model_novice.build_vocab(sentences_novice, progress_per = 10000)

print('Time to build vocab: {} mins'.format(round((time() - t) / 60, 2)))

t = time()

w2v_model_novice.train(sentences_novice, total_examples = w2v_model_novice.corpus_count, epochs = 5, report_delay = 1)

print('Time to train the model: {} mins'.format(round((time() - t) / 60, 2)))

INFO - 21:28:19: collecting all words and their counts
INFO - 21:28:19: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO - 21:28:19: PROGRESS: at sentence #10000, processed 233965 words, keeping 30761 word types
INFO - 21:28:20: PROGRESS: at sentence #20000, processed 450032 words, keeping 39850 word types
INFO - 21:28:21: PROGRESS: at sentence #30000, processed 677772 words, keeping 48410 word types
INFO - 21:28:22: PROGRESS: at sentence #40000, processed 880381 words, keeping 51772 word types
INFO - 21:28:22: PROGRESS: at sentence #50000, processed 1095690 words, keeping 55266 word types
INFO - 21:28:22: collected 55316 word types from a corpus of 1099932 raw words and 50191 sentences
INFO - 21:28:22: Loading a fresh vocabulary
INFO - 21:28:22: effective_min_count=5 retains 26248 unique words (47% of original 55316, drops 29068)
INFO - 21:28:22: effective_min_count=5 leaves 1041812 word corpus (94% of original 1099932, drops 58120)
INFO - 21:28:23: deleting the 

Time to build vocab: 0.18 mins


INFO - 21:28:30: EPOCH 1 - PROGRESS: at 14.59% examples, 119085 words/s, in_qsize 0, out_qsize 1
INFO - 21:28:31: EPOCH 1 - PROGRESS: at 29.88% examples, 113220 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:32: EPOCH 1 - PROGRESS: at 41.41% examples, 104014 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:33: EPOCH 1 - PROGRESS: at 55.41% examples, 104272 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:34: EPOCH 1 - PROGRESS: at 70.25% examples, 104299 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:35: EPOCH 1 - PROGRESS: at 85.67% examples, 104728 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:36: worker thread finished; awaiting finish of 2 more threads
INFO - 21:28:36: worker thread finished; awaiting finish of 1 more threads
INFO - 21:28:36: worker thread finished; awaiting finish of 0 more threads
INFO - 21:28:36: EPOCH - 1 : training on 1099932 raw words (749232 effective words) took 6.9s, 108082 effective words/s


Loss after epoch 0: 0.0


INFO - 21:28:37: EPOCH 2 - PROGRESS: at 14.59% examples, 123502 words/s, in_qsize 1, out_qsize 0
INFO - 21:28:38: EPOCH 2 - PROGRESS: at 32.44% examples, 125605 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:39: EPOCH 2 - PROGRESS: at 50.17% examples, 128152 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:40: EPOCH 2 - PROGRESS: at 66.47% examples, 126086 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:41: EPOCH 2 - PROGRESS: at 82.48% examples, 121550 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:42: EPOCH 2 - PROGRESS: at 97.94% examples, 120822 words/s, in_qsize 2, out_qsize 1
INFO - 21:28:42: worker thread finished; awaiting finish of 2 more threads
INFO - 21:28:42: worker thread finished; awaiting finish of 1 more threads
INFO - 21:28:42: worker thread finished; awaiting finish of 0 more threads
INFO - 21:28:42: EPOCH - 2 : training on 1099932 raw words (749796 effective words) took 6.2s, 121716 effective words/s


Loss after epoch 1: 0.0


INFO - 21:28:43: EPOCH 3 - PROGRESS: at 13.68% examples, 114145 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:44: EPOCH 3 - PROGRESS: at 31.64% examples, 120600 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:45: EPOCH 3 - PROGRESS: at 45.26% examples, 115764 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:46: EPOCH 3 - PROGRESS: at 60.80% examples, 114749 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:47: EPOCH 3 - PROGRESS: at 78.07% examples, 116350 words/s, in_qsize 1, out_qsize 0
INFO - 21:28:48: EPOCH 3 - PROGRESS: at 92.94% examples, 114964 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:49: worker thread finished; awaiting finish of 2 more threads
INFO - 21:28:49: worker thread finished; awaiting finish of 1 more threads
INFO - 21:28:49: worker thread finished; awaiting finish of 0 more threads
INFO - 21:28:49: EPOCH - 3 : training on 1099932 raw words (749338 effective words) took 6.6s, 113274 effective words/s


Loss after epoch 2: 0.0


INFO - 21:28:50: EPOCH 4 - PROGRESS: at 11.29% examples, 88611 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:51: EPOCH 4 - PROGRESS: at 26.72% examples, 98172 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:52: EPOCH 4 - PROGRESS: at 42.19% examples, 105034 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:53: EPOCH 4 - PROGRESS: at 59.89% examples, 110851 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:54: EPOCH 4 - PROGRESS: at 76.24% examples, 111668 words/s, in_qsize 1, out_qsize 0
INFO - 21:28:55: EPOCH 4 - PROGRESS: at 92.94% examples, 112536 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:56: worker thread finished; awaiting finish of 2 more threads
INFO - 21:28:56: worker thread finished; awaiting finish of 1 more threads
INFO - 21:28:56: worker thread finished; awaiting finish of 0 more threads
INFO - 21:28:56: EPOCH - 4 : training on 1099932 raw words (748841 effective words) took 6.6s, 113427 effective words/s


Loss after epoch 3: 0.0


INFO - 21:28:57: EPOCH 5 - PROGRESS: at 13.68% examples, 112279 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:58: EPOCH 5 - PROGRESS: at 31.64% examples, 117498 words/s, in_qsize 0, out_qsize 0
INFO - 21:28:59: EPOCH 5 - PROGRESS: at 48.16% examples, 118506 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:00: EPOCH 5 - PROGRESS: at 64.46% examples, 118211 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:01: EPOCH 5 - PROGRESS: at 80.86% examples, 116209 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:02: EPOCH 5 - PROGRESS: at 97.79% examples, 117621 words/s, in_qsize 3, out_qsize 0
INFO - 21:29:02: worker thread finished; awaiting finish of 2 more threads
INFO - 21:29:02: worker thread finished; awaiting finish of 1 more threads
INFO - 21:29:02: worker thread finished; awaiting finish of 0 more threads
INFO - 21:29:02: EPOCH - 5 : training on 1099932 raw words (749292 effective words) took 6.3s, 118681 effective words/s
INFO - 21:29:02: training on a 5499660 raw words (3746499 effective words

Loss after epoch 4: 0.0
Time to train the model: 0.54 mins


In [59]:
t = time()

w2v_model_exp.build_vocab(sentences_novice, progress_per = 10000)

print('Time to build vocab: {} mins'.format(round((time() - t) / 60, 2)))

t = time()

w2v_model_exp.train(sentences_novice, total_examples = w2v_model_exp.corpus_count, epochs = 10, report_delay = 1)

print('Time to train the model: {} mins'.format(round((time() - t) / 60, 2)))

INFO - 21:29:02: collecting all words and their counts
INFO - 21:29:02: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO - 21:29:03: PROGRESS: at sentence #10000, processed 233965 words, keeping 30761 word types
INFO - 21:29:04: PROGRESS: at sentence #20000, processed 450032 words, keeping 39850 word types
INFO - 21:29:05: PROGRESS: at sentence #30000, processed 677772 words, keeping 48410 word types
INFO - 21:29:05: PROGRESS: at sentence #40000, processed 880381 words, keeping 51772 word types
INFO - 21:29:06: PROGRESS: at sentence #50000, processed 1095690 words, keeping 55266 word types
INFO - 21:29:06: collected 55316 word types from a corpus of 1099932 raw words and 50191 sentences
INFO - 21:29:06: Loading a fresh vocabulary
INFO - 21:29:06: effective_min_count=5 retains 26248 unique words (47% of original 55316, drops 29068)
INFO - 21:29:06: effective_min_count=5 leaves 1041812 word corpus (94% of original 1099932, drops 58120)
INFO - 21:29:06: deleting the 

Time to build vocab: 0.19 mins


INFO - 21:29:14: EPOCH 1 - PROGRESS: at 8.94% examples, 75749 words/s, in_qsize 1, out_qsize 0
INFO - 21:29:15: EPOCH 1 - PROGRESS: at 18.67% examples, 74386 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:16: EPOCH 1 - PROGRESS: at 29.08% examples, 72742 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:17: EPOCH 1 - PROGRESS: at 42.19% examples, 79422 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:18: EPOCH 1 - PROGRESS: at 55.41% examples, 82884 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:19: EPOCH 1 - PROGRESS: at 69.26% examples, 85337 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:20: EPOCH 1 - PROGRESS: at 83.31% examples, 86118 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:21: EPOCH 1 - PROGRESS: at 96.69% examples, 87870 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:22: worker thread finished; awaiting finish of 2 more threads
INFO - 21:29:22: worker thread finished; awaiting finish of 1 more threads
INFO - 21:29:22: worker thread finished; awaiting finish of 0 more threads
INFO - 2

Loss after epoch 0: 0.0


INFO - 21:29:23: EPOCH 2 - PROGRESS: at 12.09% examples, 102900 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:24: EPOCH 2 - PROGRESS: at 28.29% examples, 108079 words/s, in_qsize 1, out_qsize 0
INFO - 21:29:25: EPOCH 2 - PROGRESS: at 42.96% examples, 108831 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:26: EPOCH 2 - PROGRESS: at 57.44% examples, 108245 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:27: EPOCH 2 - PROGRESS: at 73.50% examples, 109556 words/s, in_qsize 1, out_qsize 0
INFO - 21:29:28: EPOCH 2 - PROGRESS: at 89.34% examples, 110488 words/s, in_qsize 1, out_qsize 0
INFO - 21:29:28: worker thread finished; awaiting finish of 2 more threads
INFO - 21:29:28: worker thread finished; awaiting finish of 1 more threads
INFO - 21:29:28: worker thread finished; awaiting finish of 0 more threads
INFO - 21:29:28: EPOCH - 2 : training on 1099932 raw words (749026 effective words) took 6.7s, 111941 effective words/s


Loss after epoch 1: 0.0


INFO - 21:29:29: EPOCH 3 - PROGRESS: at 13.68% examples, 116468 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:30: EPOCH 3 - PROGRESS: at 30.72% examples, 118834 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:31: EPOCH 3 - PROGRESS: at 45.26% examples, 116017 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:32: EPOCH 3 - PROGRESS: at 61.71% examples, 117252 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:33: EPOCH 3 - PROGRESS: at 76.24% examples, 112925 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:34: EPOCH 3 - PROGRESS: at 84.88% examples, 103613 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:35: EPOCH 3 - PROGRESS: at 97.79% examples, 102387 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:36: worker thread finished; awaiting finish of 2 more threads
INFO - 21:29:36: worker thread finished; awaiting finish of 1 more threads
INFO - 21:29:36: worker thread finished; awaiting finish of 0 more threads
INFO - 21:29:36: EPOCH - 3 : training on 1099932 raw words (749041 effective words) took 7.4s, 1

Loss after epoch 2: 0.0


INFO - 21:29:37: EPOCH 4 - PROGRESS: at 12.09% examples, 96841 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:38: EPOCH 4 - PROGRESS: at 28.29% examples, 104410 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:39: EPOCH 4 - PROGRESS: at 42.96% examples, 107451 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:40: EPOCH 4 - PROGRESS: at 58.21% examples, 109042 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:41: EPOCH 4 - PROGRESS: at 74.28% examples, 110096 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:42: EPOCH 4 - PROGRESS: at 89.34% examples, 109838 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:43: worker thread finished; awaiting finish of 2 more threads
INFO - 21:29:43: worker thread finished; awaiting finish of 1 more threads
INFO - 21:29:43: worker thread finished; awaiting finish of 0 more threads
INFO - 21:29:43: EPOCH - 4 : training on 1099932 raw words (749485 effective words) took 7.0s, 107413 effective words/s


Loss after epoch 3: 0.0


INFO - 21:29:44: EPOCH 5 - PROGRESS: at 5.60% examples, 45192 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:45: EPOCH 5 - PROGRESS: at 18.67% examples, 72414 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:46: EPOCH 5 - PROGRESS: at 32.44% examples, 80824 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:47: EPOCH 5 - PROGRESS: at 49.26% examples, 91395 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:48: EPOCH 5 - PROGRESS: at 66.47% examples, 97581 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:49: EPOCH 5 - PROGRESS: at 84.93% examples, 102339 words/s, in_qsize 1, out_qsize 0
INFO - 21:29:50: worker thread finished; awaiting finish of 2 more threads
INFO - 21:29:50: worker thread finished; awaiting finish of 1 more threads
INFO - 21:29:50: worker thread finished; awaiting finish of 0 more threads
INFO - 21:29:50: EPOCH - 5 : training on 1099932 raw words (749059 effective words) took 7.1s, 105619 effective words/s


Loss after epoch 4: 0.0


INFO - 21:29:51: EPOCH 6 - PROGRESS: at 13.68% examples, 107661 words/s, in_qsize 0, out_qsize 1
INFO - 21:29:52: EPOCH 6 - PROGRESS: at 28.29% examples, 104059 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:53: EPOCH 6 - PROGRESS: at 43.78% examples, 109781 words/s, in_qsize 1, out_qsize 0
INFO - 21:29:54: EPOCH 6 - PROGRESS: at 59.89% examples, 112487 words/s, in_qsize 1, out_qsize 0
INFO - 21:29:55: EPOCH 6 - PROGRESS: at 77.09% examples, 114525 words/s, in_qsize 1, out_qsize 0
INFO - 21:29:56: EPOCH 6 - PROGRESS: at 94.60% examples, 116083 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:56: worker thread finished; awaiting finish of 2 more threads
INFO - 21:29:56: worker thread finished; awaiting finish of 1 more threads
INFO - 21:29:56: worker thread finished; awaiting finish of 0 more threads
INFO - 21:29:56: EPOCH - 6 : training on 1099932 raw words (748910 effective words) took 6.4s, 117051 effective words/s


Loss after epoch 5: 0.0


INFO - 21:29:57: EPOCH 7 - PROGRESS: at 14.59% examples, 116907 words/s, in_qsize 1, out_qsize 0
INFO - 21:29:58: EPOCH 7 - PROGRESS: at 32.44% examples, 121593 words/s, in_qsize 0, out_qsize 0
INFO - 21:29:59: EPOCH 7 - PROGRESS: at 46.07% examples, 115467 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:00: EPOCH 7 - PROGRESS: at 61.71% examples, 115066 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:01: EPOCH 7 - PROGRESS: at 80.86% examples, 116855 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:02: EPOCH 7 - PROGRESS: at 97.79% examples, 118155 words/s, in_qsize 1, out_qsize 0
INFO - 21:30:02: worker thread finished; awaiting finish of 2 more threads
INFO - 21:30:02: worker thread finished; awaiting finish of 1 more threads
INFO - 21:30:03: worker thread finished; awaiting finish of 0 more threads
INFO - 21:30:03: EPOCH - 7 : training on 1099932 raw words (749052 effective words) took 6.3s, 118804 effective words/s


Loss after epoch 6: 0.0


INFO - 21:30:04: EPOCH 8 - PROGRESS: at 14.59% examples, 115425 words/s, in_qsize 0, out_qsize 1
INFO - 21:30:05: EPOCH 8 - PROGRESS: at 32.44% examples, 120776 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:06: EPOCH 8 - PROGRESS: at 47.15% examples, 117979 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:07: EPOCH 8 - PROGRESS: at 63.54% examples, 117762 words/s, in_qsize 1, out_qsize 0
INFO - 21:30:08: EPOCH 8 - PROGRESS: at 79.48% examples, 114819 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:09: EPOCH 8 - PROGRESS: at 95.64% examples, 115588 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:09: worker thread finished; awaiting finish of 2 more threads
INFO - 21:30:09: worker thread finished; awaiting finish of 1 more threads
INFO - 21:30:09: worker thread finished; awaiting finish of 0 more threads
INFO - 21:30:09: EPOCH - 8 : training on 1099932 raw words (749594 effective words) took 6.4s, 116618 effective words/s


Loss after epoch 7: 0.0


INFO - 21:30:10: EPOCH 9 - PROGRESS: at 14.59% examples, 117248 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:11: EPOCH 9 - PROGRESS: at 32.44% examples, 120240 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:12: EPOCH 9 - PROGRESS: at 49.26% examples, 122064 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:13: EPOCH 9 - PROGRESS: at 65.34% examples, 121538 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:14: EPOCH 9 - PROGRESS: at 83.31% examples, 121394 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:15: EPOCH 9 - PROGRESS: at 97.79% examples, 118012 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:15: worker thread finished; awaiting finish of 2 more threads
INFO - 21:30:15: worker thread finished; awaiting finish of 1 more threads
INFO - 21:30:15: worker thread finished; awaiting finish of 0 more threads
INFO - 21:30:15: EPOCH - 9 : training on 1099932 raw words (749475 effective words) took 6.4s, 117322 effective words/s


Loss after epoch 8: 0.0


INFO - 21:30:16: EPOCH 10 - PROGRESS: at 13.68% examples, 116357 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:17: EPOCH 10 - PROGRESS: at 30.72% examples, 119219 words/s, in_qsize 1, out_qsize 0
INFO - 21:30:18: EPOCH 10 - PROGRESS: at 47.15% examples, 121133 words/s, in_qsize 1, out_qsize 0
INFO - 21:30:19: EPOCH 10 - PROGRESS: at 64.46% examples, 121811 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:20: EPOCH 10 - PROGRESS: at 83.31% examples, 122310 words/s, in_qsize 0, out_qsize 0
INFO - 21:30:21: worker thread finished; awaiting finish of 2 more threads
INFO - 21:30:21: worker thread finished; awaiting finish of 1 more threads
INFO - 21:30:21: worker thread finished; awaiting finish of 0 more threads
INFO - 21:30:21: EPOCH - 10 : training on 1099932 raw words (749640 effective words) took 6.1s, 123889 effective words/s
INFO - 21:30:21: training on a 10999320 raw words (7492722 effective words) took 68.4s, 109599 effective words/s


Loss after epoch 9: 0.0
Time to train the model: 1.14 mins


In [108]:
w2v_model_novice.init_sims(replace = True)
w2v_model_exp.init_sims(replace = True)

w2v_model_novice.save("results/word2vec_novice_test.model")
w2v_model_exp.save("results/word2vec_exp_test.model")

INFO - 23:07:23: precomputing L2-norms of word weight vectors
INFO - 23:07:23: precomputing L2-norms of word weight vectors
INFO - 23:07:23: saving Word2Vec object under results/word2vec_novice.model, separately None
INFO - 23:07:23: not storing attribute vectors_norm
INFO - 23:07:23: not storing attribute cum_table
INFO - 23:07:23: saved results/word2vec_novice.model
INFO - 23:07:23: saving Word2Vec object under results/word2vec_exp.model, separately None
INFO - 23:07:23: not storing attribute vectors_norm
INFO - 23:07:23: not storing attribute cum_table
INFO - 23:07:23: saved results/word2vec_exp.model


In [109]:
#w2v_model.wv.most_similar(positive=["macron"])
#w2v_model.wv.most_similar(negative=["promesse"])
#w2v_model.wv.similarity("élection", 'présidentielle')
#w2v_model.wv.similarity("sport", 'études')
#print(w2v_model.wv.similarity("macron", 'droite'))
#print(w2v_model.wv.similarity("macron", 'gauche'))
#w2v_model.wv.doesnt_match(['gauche', 'président', 'droite'])
#w2v_model.wv.most_similar(positive=["père", "femme"], negative = ['homme'], topn=3)

In [127]:
w2v_model_novice.wv.most_similar(positive=["droite"])

[('gauche', 0.986516535282135),
 ('républicains', 0.9116498231887817),
 ('bancs', 0.9007534384727478),
 ('cet_hémicycle', 0.8813148736953735),
 ('hémicycle', 0.8689683675765991),
 ('france_insoumise', 0.8554179668426514),
 ('opposition', 0.8358474969863892),
 ('groupes', 0.802750825881958),
 ('voix', 0.800241231918335),
 ('groupe', 0.7991034984588623)]

In [128]:
w2v_model_exp.wv.most_similar(positive=["droite"])

[('droite_gauche', 0.6979137659072876),
 ('extrême_gauche', 0.6682906150817871),
 ('gauche', 0.6396193504333496),
 ('socialistes', 0.6157995462417603),
 ('majorité', 0.561996340751648),
 ('bancs', 0.5594807267189026),
 ('communistes', 0.5519882440567017),
 ('extrême_droite', 0.5514141321182251),
 ('rangs', 0.5431515574455261),
 ('oreille', 0.5347355008125305)]

In [129]:
df_export_novice = pd.DataFrame(w2v_model_novice.wv.vectors)

df_export_exp = pd.DataFrame(w2v_model_exp.wv.vectors)

In [130]:
words_novice = [w2v_model_novice.wv.most_similar(positive=[np.array(df_export_novice.iloc[i])])[0][0] for i in range(df_export_novice.shape[0])]

words_exp = [w2v_model_exp.wv.most_similar(positive=[np.array(df_export_exp.iloc[i])])[0][0] for i in range(df_export_exp.shape[0])]

In [131]:
df_export_novice['word'] = words_novice

df_export_exp['word'] = words_exp

In [132]:
df_export_novice.to_csv('results/embeddings_novice_test.csv')
df_export_exp.to_csv('results/embeddings_exp_test.csv')