In [18]:
import logging
import random

import numpy as np
import pandas as pd

from gensim.models import doc2vec
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

In [3]:
def read_dataset(path):
    dataset = pd.read_csv(path, header=0, delimiter="\t")
    x_train, x_test, y_train, y_test = train_test_split(dataset.review, dataset.sentiment, random_state=0, test_size=0.1)
    x_train = label_sentences(x_train, 'Train')
    x_test = label_sentences(x_test, 'Test')
    all_data = x_train + x_test
    return x_train, x_test, y_train, y_test, all_data

In [17]:
def read_dataset_df(df):
    dataset = df
    x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.outcome, random_state=0, test_size=0.3)
    x_train = label_sentences(x_train, 'Train')
    x_test = label_sentences(x_test, 'Test')
    all_data = x_train + x_test
    return x_train, x_test, y_train, y_test, all_data

In [4]:
def label_sentences(corpus, label_type):
    """
    Gensim's Doc2Vec implementation requires each document/paragraph to have a label associated with it.
    We do this by using the LabeledSentence method. The format will be "TRAIN_i" or "TEST_i" where "i" is
    a dummy index of the review.
    """
    labeled = []
    for i, v in enumerate(corpus):
        label = label_type + '_' + str(i)
        labeled.append(doc2vec.LabeledSentence(v.split(), [label]))
    return labeled

In [5]:
def get_vectors(doc2vec_model, corpus_size, vectors_size, vectors_type):
    """
    Get vectors from trained doc2vec model
    :param doc2vec_model: Trained Doc2Vec model
    :param corpus_size: Size of the data
    :param vectors_size: Size of the embedding vectors
    :param vectors_type: Training or Testing vectors
    :return: list of vectors
    """
    vectors = np.zeros((corpus_size, vectors_size))
    for i in range(0, corpus_size):
        prefix = vectors_type + '_' + str(i)
        vectors[i] = doc2vec_model.docvecs[prefix]
    return vectors

In [25]:
def train_doc2vec(corpus):
    logging.info("Building Doc2Vec vocabulary")
    d2v = doc2vec.Doc2Vec(min_count=1,  # Ignores all words with total frequency lower than this
                          window=10,  # The maximum distance between the current and predicted word within a sentence
                          vector_size=300,  # Dimensionality of the generated feature vectors
                          workers=5,  # Number of worker threads to train the model
                          alpha=0.025,  # The initial learning rate
                          min_alpha=0.00025,  # Learning rate will linearly drop to min_alpha as training progresses
                          dm=0, # dm defines the training algorithm. If dm=1 means ‘distributed memory’ (PV-DM)
                                 # and dm =0 means ‘distributed bag of words’ (PV-DBOW)
                          dbow_words  = 1,
                          dm_mean = 1)  
    d2v.build_vocab(corpus)

    logging.info("Training Doc2Vec model")
    # 10 epochs take around 10 minutes on my machine (i7), if you have more time/computational power make it 20
    for epoch in range(10):
        logging.info('Training iteration #{0}'.format(epoch))
        d2v.train(corpus, total_examples=d2v.corpus_count, epochs=d2v.iter)
        # shuffle the corpus
        random.shuffle(corpus)
        # decrease the learning rate
        d2v.alpha -= 0.0002
        # fix the learning rate, no decay
        d2v.min_alpha = d2v.alpha

    logging.info("Saving trained Doc2Vec model")
    d2v.save("d2v.model")
    return d2v

In [20]:
def train_classifier(d2v, training_vectors, training_labels):
    logging.info("Classifier training")
    train_vectors = get_vectors(d2v, len(training_vectors), 300, 'Train')
    model = LinearSVC(penalty='l2')
    model.fit(train_vectors, np.array(training_labels))
    training_predictions = model.predict(train_vectors)
    logging.info('Training predicted classes: {}'.format(np.unique(training_predictions)))
    logging.info('Training accuracy: {}'.format(accuracy_score(training_labels, training_predictions)))
    logging.info('Training F1 score: {}'.format(f1_score(training_labels, training_predictions, average='weighted')))
    return model

In [8]:
def test_classifier(d2v, classifier, testing_vectors, testing_labels):
    logging.info("Classifier testing")
    test_vectors = get_vectors(d2v, len(testing_vectors), 300, 'Test')
    testing_predictions = classifier.predict(test_vectors)
    logging.info('Testing predicted classes: {}'.format(np.unique(testing_predictions)))
    logging.info('Testing accuracy: {}'.format(accuracy_score(testing_labels, testing_predictions)))
    logging.info('Testing F1 score: {}'.format(f1_score(testing_labels, testing_predictions, average='weighted')))

In [13]:
import os
import pandas as pd
from collections import Counter

PATH = '..\\Emotion Flow GitHub\\data_all\\'

data = []

for root, dirs, files in os.walk(PATH):
    for i,name in enumerate(files):
        if name.endswith((".txt")):
            dir_split = root.replace('.','')
            dir_split = dir_split.split('\\')
            category = dir_split[3]
            outcome = dir_split[4]
            file = open(root+'\\'+name, 'r', encoding='utf-8',errors='ignore')
            text_data = file.read()
            temp = []
            temp.append(category)
            temp.append(text_data)
            temp.append(outcome)
            data.append(temp)
            
            #print(category,outcome,name)
file.close()

In [14]:
df = pd.DataFrame(data)
df.columns = ['category','data','outcome']
print(len(df))
#df['value']=df.outcome.str[7:8]
df['outcome']=df.outcome.str[0:7]
print(df.head())

1003
            category                                               data  \
0  Detective_Mystery  \n\nE-text prepared by Juliet Sutherland, Mary...   
1  Detective_Mystery  PINES***\n\n\nE-text prepared by Juliet Suther...   
2  Detective_Mystery  \n\n\n\nProduced by Juliet Sutherland, Mary Me...   
3  Detective_Mystery  \n\nE-text prepared by Juliet Sutherland, Mary...   
4  Detective_Mystery  \n\n\n\nProduced by Steven desJardins and PG D...   

   outcome  
0  failure  
1  failure  
2  failure  
3  failure  
4  failure  


In [26]:
if __name__ == "__main__":
    x_train, x_test, y_train, y_test, all_data = read_dataset_df(df)
    d2v_model = train_doc2vec(all_data)
    classifier = train_classifier(d2v_model, x_train, y_train)
    test_classifier(d2v_model, classifier, x_test, y_test)

  # Remove the CWD from sys.path while we load stuff.
2018-11-20 18:02:40,376 : INFO : Building Doc2Vec vocabulary
2018-11-20 18:02:40,376 : INFO : collecting all words and their counts
2018-11-20 18:02:40,376 : INFO : PROGRESS: at example #0, processed 0 words (0/s), 0 word types, 0 tags
2018-11-20 18:02:42,922 : INFO : collected 511920 word types and 1003 unique tags from a corpus of 1003 examples and 13654253 words
2018-11-20 18:02:42,922 : INFO : Loading a fresh vocabulary
2018-11-20 18:02:44,075 : INFO : min_count=1 retains 511920 unique words (100% of original 511920, drops 0)
2018-11-20 18:02:44,076 : INFO : min_count=1 leaves 13654253 word corpus (100% of original 13654253, drops 0)
2018-11-20 18:02:45,205 : INFO : deleting the raw counts dictionary of 511920 items
2018-11-20 18:02:45,220 : INFO : sample=0.001 downsamples 39 most-common words
2018-11-20 18:02:45,220 : INFO : downsampling leaves estimated 10877305 word corpus (79.7% of prior 13654253)
2018-11-20 18:02:46,375 : I

2018-11-20 18:03:51,486 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-11-20 18:03:51,539 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-11-20 18:03:51,546 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-11-20 18:03:51,547 : INFO : EPOCH - 1 : training on 13654253 raw words (8238048 effective words) took 59.2s, 139124 effective words/s
2018-11-20 18:03:52,703 : INFO : EPOCH 2 - PROGRESS: at 1.60% examples, 103426 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:03:53,754 : INFO : EPOCH 2 - PROGRESS: at 3.19% examples, 116183 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:03:54,867 : INFO : EPOCH 2 - PROGRESS: at 4.89% examples, 122009 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:03:56,047 : INFO : EPOCH 2 - PROGRESS: at 6.88% examples, 124165 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:03:57,202 : INFO : EPOCH 2 - PROGRESS: at 8.67% examples, 126220 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:03:58,249 : 

2018-11-20 18:05:01,566 : INFO : EPOCH 3 - PROGRESS: at 9.67% examples, 125625 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:05:02,650 : INFO : EPOCH 3 - PROGRESS: at 10.97% examples, 122141 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:05:03,696 : INFO : EPOCH 3 - PROGRESS: at 12.16% examples, 120218 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:05:04,736 : INFO : EPOCH 3 - PROGRESS: at 13.56% examples, 119489 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:05:05,770 : INFO : EPOCH 3 - PROGRESS: at 15.15% examples, 120512 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:05:06,829 : INFO : EPOCH 3 - PROGRESS: at 16.65% examples, 120333 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:05:07,836 : INFO : EPOCH 3 - PROGRESS: at 18.25% examples, 122544 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:05:08,949 : INFO : EPOCH 3 - PROGRESS: at 19.84% examples, 121714 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:05:09,944 : INFO : EPOCH 3 - PROGRESS: at 21.73% examples, 123844 words/s, in_qsi

2018-11-20 18:06:13,353 : INFO : EPOCH 4 - PROGRESS: at 18.44% examples, 124185 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:06:14,376 : INFO : EPOCH 4 - PROGRESS: at 20.34% examples, 125835 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:06:15,466 : INFO : EPOCH 4 - PROGRESS: at 22.03% examples, 125962 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:06:16,502 : INFO : EPOCH 4 - PROGRESS: at 23.73% examples, 127286 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:06:17,564 : INFO : EPOCH 4 - PROGRESS: at 25.52% examples, 128091 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:06:18,567 : INFO : EPOCH 4 - PROGRESS: at 27.22% examples, 127777 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:06:19,592 : INFO : EPOCH 4 - PROGRESS: at 28.91% examples, 128609 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:06:20,688 : INFO : EPOCH 4 - PROGRESS: at 30.71% examples, 129180 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:06:21,708 : INFO : EPOCH 4 - PROGRESS: at 32.60% examples, 129471 words/s, in_qsi

2018-11-20 18:07:26,949 : INFO : EPOCH 5 - PROGRESS: at 35.69% examples, 125861 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:07:27,966 : INFO : EPOCH 5 - PROGRESS: at 37.59% examples, 126590 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:07:28,996 : INFO : EPOCH 5 - PROGRESS: at 38.98% examples, 126416 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:07:29,998 : INFO : EPOCH 5 - PROGRESS: at 40.68% examples, 126929 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:07:31,014 : INFO : EPOCH 5 - PROGRESS: at 42.57% examples, 127245 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:07:32,119 : INFO : EPOCH 5 - PROGRESS: at 44.67% examples, 127790 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:07:33,193 : INFO : EPOCH 5 - PROGRESS: at 46.56% examples, 128211 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:07:34,227 : INFO : EPOCH 5 - PROGRESS: at 48.06% examples, 128293 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:07:35,244 : INFO : EPOCH 5 - PROGRESS: at 49.45% examples, 128273 words/s, in_

2018-11-20 18:08:34,721 : INFO : EPOCH 1 - PROGRESS: at 41.67% examples, 123327 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:08:35,849 : INFO : EPOCH 1 - PROGRESS: at 43.47% examples, 123883 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:08:36,884 : INFO : EPOCH 1 - PROGRESS: at 45.26% examples, 124202 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:08:37,935 : INFO : EPOCH 1 - PROGRESS: at 46.76% examples, 124825 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:08:38,949 : INFO : EPOCH 1 - PROGRESS: at 48.45% examples, 125254 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:08:40,182 : INFO : EPOCH 1 - PROGRESS: at 49.95% examples, 124480 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:08:41,255 : INFO : EPOCH 1 - PROGRESS: at 51.55% examples, 124638 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:08:42,263 : INFO : EPOCH 1 - PROGRESS: at 53.24% examples, 125189 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:08:43,280 : INFO : EPOCH 1 - PROGRESS: at 54.94% examples, 125542 words/s, in_

2018-11-20 18:09:48,265 : INFO : EPOCH 2 - PROGRESS: at 58.03% examples, 130424 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:09:49,268 : INFO : EPOCH 2 - PROGRESS: at 59.32% examples, 130089 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:09:50,341 : INFO : EPOCH 2 - PROGRESS: at 61.02% examples, 130020 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:09:51,432 : INFO : EPOCH 2 - PROGRESS: at 62.91% examples, 130084 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:09:52,449 : INFO : EPOCH 2 - PROGRESS: at 64.21% examples, 129614 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:09:53,524 : INFO : EPOCH 2 - PROGRESS: at 65.70% examples, 129315 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:09:54,608 : INFO : EPOCH 2 - PROGRESS: at 67.30% examples, 129234 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:09:55,744 : INFO : EPOCH 2 - PROGRESS: at 68.99% examples, 129273 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:09:56,773 : INFO : EPOCH 2 - PROGRESS: at 70.99% examples, 129525 words/s, i

2018-11-20 18:11:01,170 : INFO : EPOCH 3 - PROGRESS: at 67.50% examples, 120716 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:11:02,177 : INFO : EPOCH 3 - PROGRESS: at 68.99% examples, 120766 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:11:03,217 : INFO : EPOCH 3 - PROGRESS: at 70.99% examples, 121374 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:11:04,308 : INFO : EPOCH 3 - PROGRESS: at 72.88% examples, 121346 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:11:05,337 : INFO : EPOCH 3 - PROGRESS: at 74.48% examples, 121492 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:11:06,430 : INFO : EPOCH 3 - PROGRESS: at 76.27% examples, 121682 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:11:07,520 : INFO : EPOCH 3 - PROGRESS: at 77.77% examples, 121599 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:11:08,630 : INFO : EPOCH 3 - PROGRESS: at 79.66% examples, 121792 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:11:09,715 : INFO : EPOCH 3 - PROGRESS: at 81.56% examples, 122263 words/s, in

2018-11-20 18:12:13,498 : INFO : EPOCH 4 - PROGRESS: at 87.64% examples, 135854 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:12:14,500 : INFO : EPOCH 4 - PROGRESS: at 89.23% examples, 136052 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:12:15,592 : INFO : EPOCH 4 - PROGRESS: at 90.83% examples, 135862 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:12:16,626 : INFO : EPOCH 4 - PROGRESS: at 92.22% examples, 135678 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:12:17,710 : INFO : EPOCH 4 - PROGRESS: at 94.02% examples, 135795 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:12:18,734 : INFO : EPOCH 4 - PROGRESS: at 95.61% examples, 135908 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:12:19,796 : INFO : EPOCH 4 - PROGRESS: at 97.21% examples, 135906 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:12:20,806 : INFO : EPOCH 4 - PROGRESS: at 98.90% examples, 135992 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:12:21,247 : INFO : worker thread finished; awaiting finish of 4 more threads
201

2018-11-20 18:13:24,395 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-11-20 18:13:24,401 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-11-20 18:13:24,406 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-11-20 18:13:24,493 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-11-20 18:13:24,494 : INFO : EPOCH - 5 : training on 13654253 raw words (8238495 effective words) took 63.1s, 130609 effective words/s
2018-11-20 18:13:24,494 : INFO : training on a 68271265 raw words (41189209 effective words) took 317.2s, 129843 effective words/s
2018-11-20 18:13:24,496 : INFO : Training iteration #2
2018-11-20 18:13:24,496 : INFO : training model with 5 workers on 511920 vocabulary and 300 features, using sg=1 hs=0 sample=0.001 negative=5 window=10
2018-11-20 18:13:25,500 : INFO : EPOCH 1 - PROGRESS: at 1.50% examples, 113018 words/s, in_qsize 8, out_qsize 0
2018-11-20 18:13:26,559 : INFO : EPOCH 1 - PROG

2018-11-20 18:14:29,678 : INFO : EPOCH 2 - PROGRESS: at 4.69% examples, 117399 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:14:30,666 : INFO : EPOCH 2 - PROGRESS: at 5.88% examples, 114781 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:14:31,847 : INFO : EPOCH 2 - PROGRESS: at 7.18% examples, 112533 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:14:32,862 : INFO : EPOCH 2 - PROGRESS: at 8.57% examples, 115001 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:14:33,963 : INFO : EPOCH 2 - PROGRESS: at 10.27% examples, 116903 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:14:34,991 : INFO : EPOCH 2 - PROGRESS: at 12.06% examples, 119525 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:14:36,010 : INFO : EPOCH 2 - PROGRESS: at 13.66% examples, 120481 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:14:37,012 : INFO : EPOCH 2 - PROGRESS: at 15.45% examples, 122369 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:14:38,112 : INFO : EPOCH 2 - PROGRESS: at 16.95% examples, 122340 words/s, in_qsize 

2018-11-20 18:15:42,306 : INFO : EPOCH 3 - PROGRESS: at 17.95% examples, 128228 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:15:43,399 : INFO : EPOCH 3 - PROGRESS: at 19.74% examples, 129010 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:15:44,419 : INFO : EPOCH 3 - PROGRESS: at 21.54% examples, 130407 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:15:45,494 : INFO : EPOCH 3 - PROGRESS: at 23.23% examples, 130411 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:15:46,549 : INFO : EPOCH 3 - PROGRESS: at 24.83% examples, 130340 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:15:47,568 : INFO : EPOCH 3 - PROGRESS: at 26.42% examples, 130332 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:15:48,576 : INFO : EPOCH 3 - PROGRESS: at 28.02% examples, 130150 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:15:49,628 : INFO : EPOCH 3 - PROGRESS: at 29.71% examples, 131333 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:15:50,640 : INFO : EPOCH 3 - PROGRESS: at 31.31% examples, 131177 words/s, in_qsi

2018-11-20 18:16:53,890 : INFO : EPOCH 4 - PROGRESS: at 32.60% examples, 135800 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:16:54,920 : INFO : EPOCH 4 - PROGRESS: at 34.40% examples, 135621 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:16:55,928 : INFO : EPOCH 4 - PROGRESS: at 35.99% examples, 135524 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:16:57,098 : INFO : EPOCH 4 - PROGRESS: at 37.79% examples, 135021 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:16:58,100 : INFO : EPOCH 4 - PROGRESS: at 39.48% examples, 135281 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:16:59,110 : INFO : EPOCH 4 - PROGRESS: at 40.98% examples, 135052 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:17:00,156 : INFO : EPOCH 4 - PROGRESS: at 42.67% examples, 134860 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:17:01,264 : INFO : EPOCH 4 - PROGRESS: at 44.27% examples, 134326 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:17:02,287 : INFO : EPOCH 4 - PROGRESS: at 46.16% examples, 134756 words/s, in_q

2018-11-20 18:18:06,002 : INFO : EPOCH 5 - PROGRESS: at 49.95% examples, 136334 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:18:07,091 : INFO : EPOCH 5 - PROGRESS: at 52.04% examples, 136468 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:18:08,097 : INFO : EPOCH 5 - PROGRESS: at 53.74% examples, 136844 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:18:09,184 : INFO : EPOCH 5 - PROGRESS: at 55.73% examples, 136848 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:18:10,283 : INFO : EPOCH 5 - PROGRESS: at 57.43% examples, 136853 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:18:11,304 : INFO : EPOCH 5 - PROGRESS: at 59.22% examples, 137138 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:18:12,325 : INFO : EPOCH 5 - PROGRESS: at 61.02% examples, 137300 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:18:13,370 : INFO : EPOCH 5 - PROGRESS: at 62.81% examples, 137063 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:18:14,390 : INFO : EPOCH 5 - PROGRESS: at 64.51% examples, 137172 words/s, in_q

2018-11-20 18:19:14,854 : INFO : EPOCH 1 - PROGRESS: at 60.12% examples, 126335 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:19:15,884 : INFO : EPOCH 1 - PROGRESS: at 62.11% examples, 126845 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:19:16,909 : INFO : EPOCH 1 - PROGRESS: at 63.51% examples, 126482 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:19:17,914 : INFO : EPOCH 1 - PROGRESS: at 65.20% examples, 126884 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:19:18,914 : INFO : EPOCH 1 - PROGRESS: at 66.60% examples, 126592 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:19:19,908 : INFO : EPOCH 1 - PROGRESS: at 67.90% examples, 126336 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:19:21,063 : INFO : EPOCH 1 - PROGRESS: at 69.49% examples, 126044 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:19:22,185 : INFO : EPOCH 1 - PROGRESS: at 71.19% examples, 126021 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:19:23,251 : INFO : EPOCH 1 - PROGRESS: at 72.48% examples, 125668 words/s, in_qs

2018-11-20 18:20:27,902 : INFO : EPOCH 2 - PROGRESS: at 72.18% examples, 126493 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:20:28,917 : INFO : EPOCH 2 - PROGRESS: at 73.18% examples, 125737 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:20:29,994 : INFO : EPOCH 2 - PROGRESS: at 74.68% examples, 125567 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:20:31,097 : INFO : EPOCH 2 - PROGRESS: at 76.27% examples, 125107 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:20:32,100 : INFO : EPOCH 2 - PROGRESS: at 77.97% examples, 125447 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:20:33,110 : INFO : EPOCH 2 - PROGRESS: at 79.56% examples, 125326 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:20:34,173 : INFO : EPOCH 2 - PROGRESS: at 81.26% examples, 125407 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:20:35,245 : INFO : EPOCH 2 - PROGRESS: at 83.15% examples, 125501 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:20:36,267 : INFO : EPOCH 2 - PROGRESS: at 84.65% examples, 125556 words/s, in_

2018-11-20 18:21:40,146 : INFO : EPOCH 3 - PROGRESS: at 86.04% examples, 131408 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:21:41,165 : INFO : EPOCH 3 - PROGRESS: at 87.94% examples, 131496 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:21:42,200 : INFO : EPOCH 3 - PROGRESS: at 89.63% examples, 131657 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:21:43,230 : INFO : EPOCH 3 - PROGRESS: at 91.33% examples, 131786 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:21:44,231 : INFO : EPOCH 3 - PROGRESS: at 92.92% examples, 131823 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:21:45,264 : INFO : EPOCH 3 - PROGRESS: at 94.72% examples, 132096 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:21:46,414 : INFO : EPOCH 3 - PROGRESS: at 96.51% examples, 132189 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:21:47,527 : INFO : EPOCH 3 - PROGRESS: at 98.31% examples, 132144 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:21:48,445 : INFO : worker thread finished; awaiting finish of 4 more threads
20

2018-11-20 18:22:49,049 : INFO : EPOCH 4 - PROGRESS: at 99.90% examples, 135961 words/s, in_qsize 1, out_qsize 1
2018-11-20 18:22:49,050 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-11-20 18:22:49,074 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-11-20 18:22:49,074 : INFO : EPOCH - 4 : training on 13654253 raw words (8237882 effective words) took 60.5s, 136070 effective words/s
2018-11-20 18:22:50,132 : INFO : EPOCH 5 - PROGRESS: at 1.69% examples, 116038 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:22:51,256 : INFO : EPOCH 5 - PROGRESS: at 3.39% examples, 124238 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:22:52,263 : INFO : EPOCH 5 - PROGRESS: at 5.08% examples, 130785 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:22:53,367 : INFO : EPOCH 5 - PROGRESS: at 6.78% examples, 133667 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:22:54,379 : INFO : EPOCH 5 - PROGRESS: at 8.47% examples, 136002 words/s, in_qsize 10, out_qsize 0
2018

2018-11-20 18:23:55,538 : INFO : EPOCH 1 - PROGRESS: at 10.17% examples, 136476 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:23:56,616 : INFO : EPOCH 1 - PROGRESS: at 11.86% examples, 135972 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:23:57,712 : INFO : EPOCH 1 - PROGRESS: at 13.76% examples, 137639 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:23:58,800 : INFO : EPOCH 1 - PROGRESS: at 15.45% examples, 136550 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:23:59,835 : INFO : EPOCH 1 - PROGRESS: at 17.15% examples, 137517 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:24:00,847 : INFO : EPOCH 1 - PROGRESS: at 18.84% examples, 136764 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:24:01,850 : INFO : EPOCH 1 - PROGRESS: at 20.74% examples, 137696 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:24:02,889 : INFO : EPOCH 1 - PROGRESS: at 22.33% examples, 136259 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:24:03,942 : INFO : EPOCH 1 - PROGRESS: at 23.93% examples, 136256 words/s, in_q

2018-11-20 18:25:07,812 : INFO : EPOCH 2 - PROGRESS: at 31.70% examples, 137314 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:25:08,904 : INFO : EPOCH 2 - PROGRESS: at 33.60% examples, 137791 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:25:09,923 : INFO : EPOCH 2 - PROGRESS: at 35.39% examples, 138028 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:25:10,928 : INFO : EPOCH 2 - PROGRESS: at 37.09% examples, 137891 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:25:12,003 : INFO : EPOCH 2 - PROGRESS: at 38.98% examples, 137857 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:25:13,040 : INFO : EPOCH 2 - PROGRESS: at 40.88% examples, 137911 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:25:14,122 : INFO : EPOCH 2 - PROGRESS: at 42.67% examples, 137844 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:25:15,177 : INFO : EPOCH 2 - PROGRESS: at 44.37% examples, 137985 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:25:16,190 : INFO : EPOCH 2 - PROGRESS: at 46.26% examples, 138429 words/s, in_q

2018-11-20 18:26:20,316 : INFO : EPOCH 3 - PROGRESS: at 53.54% examples, 136812 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:26:21,319 : INFO : EPOCH 3 - PROGRESS: at 55.43% examples, 137066 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:26:22,319 : INFO : EPOCH 3 - PROGRESS: at 57.13% examples, 137255 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:26:23,335 : INFO : EPOCH 3 - PROGRESS: at 58.72% examples, 136923 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:26:24,340 : INFO : EPOCH 3 - PROGRESS: at 60.02% examples, 136477 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:26:25,575 : INFO : EPOCH 3 - PROGRESS: at 61.71% examples, 135814 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:26:26,590 : INFO : EPOCH 3 - PROGRESS: at 63.41% examples, 135730 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:26:27,734 : INFO : EPOCH 3 - PROGRESS: at 65.00% examples, 135193 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:26:28,785 : INFO : EPOCH 3 - PROGRESS: at 66.80% examples, 135336 words/s, in

2018-11-20 18:27:32,474 : INFO : EPOCH 4 - PROGRESS: at 68.10% examples, 130879 words/s, in_qsize 10, out_qsize 1
2018-11-20 18:27:33,485 : INFO : EPOCH 4 - PROGRESS: at 69.99% examples, 131178 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:27:34,504 : INFO : EPOCH 4 - PROGRESS: at 71.49% examples, 131368 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:27:35,588 : INFO : EPOCH 4 - PROGRESS: at 73.18% examples, 131288 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:27:36,691 : INFO : EPOCH 4 - PROGRESS: at 74.68% examples, 130958 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:27:37,690 : INFO : EPOCH 4 - PROGRESS: at 76.37% examples, 131321 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:27:38,748 : INFO : EPOCH 4 - PROGRESS: at 77.97% examples, 131348 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:27:39,870 : INFO : EPOCH 4 - PROGRESS: at 79.76% examples, 131465 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:27:40,949 : INFO : EPOCH 4 - PROGRESS: at 81.46% examples, 131601 words/s, i

2018-11-20 18:28:45,390 : INFO : EPOCH 5 - PROGRESS: at 83.65% examples, 130161 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:28:46,435 : INFO : EPOCH 5 - PROGRESS: at 85.24% examples, 130077 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:28:47,512 : INFO : EPOCH 5 - PROGRESS: at 86.74% examples, 130046 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:28:48,560 : INFO : EPOCH 5 - PROGRESS: at 88.53% examples, 130219 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:28:49,651 : INFO : EPOCH 5 - PROGRESS: at 90.13% examples, 130277 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:28:50,691 : INFO : EPOCH 5 - PROGRESS: at 92.02% examples, 130558 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:28:51,740 : INFO : EPOCH 5 - PROGRESS: at 94.02% examples, 130741 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:28:52,818 : INFO : EPOCH 5 - PROGRESS: at 95.41% examples, 130593 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:28:53,836 : INFO : EPOCH 5 - PROGRESS: at 97.41% examples, 131049 words/s, in

2018-11-20 18:29:53,651 : INFO : EPOCH 1 - PROGRESS: at 97.21% examples, 137642 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:29:54,735 : INFO : EPOCH 1 - PROGRESS: at 98.50% examples, 137107 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:29:55,305 : INFO : worker thread finished; awaiting finish of 4 more threads
2018-11-20 18:29:55,390 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-11-20 18:29:55,395 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-11-20 18:29:55,505 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-11-20 18:29:55,559 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-11-20 18:29:55,560 : INFO : EPOCH - 1 : training on 13654253 raw words (8238463 effective words) took 60.1s, 137153 effective words/s
2018-11-20 18:29:56,681 : INFO : EPOCH 2 - PROGRESS: at 1.10% examples, 89474 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:29:57,690 : INFO : EPOCH 2 - PROGRESS: at 2.69% exampl

2018-11-20 18:31:01,091 : INFO : EPOCH 3 - PROGRESS: at 1.30% examples, 106287 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:31:02,142 : INFO : EPOCH 3 - PROGRESS: at 2.89% examples, 119092 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:31:03,223 : INFO : EPOCH 3 - PROGRESS: at 4.69% examples, 129499 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:31:04,235 : INFO : EPOCH 3 - PROGRESS: at 6.58% examples, 131489 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:31:05,430 : INFO : EPOCH 3 - PROGRESS: at 8.28% examples, 130307 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:31:06,468 : INFO : EPOCH 3 - PROGRESS: at 10.07% examples, 133520 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:31:07,484 : INFO : EPOCH 3 - PROGRESS: at 11.86% examples, 134404 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:31:08,552 : INFO : EPOCH 3 - PROGRESS: at 13.76% examples, 135280 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:31:09,602 : INFO : EPOCH 3 - PROGRESS: at 15.35% examples, 136473 words/s, in_qsize 

2018-11-20 18:32:13,089 : INFO : EPOCH 4 - PROGRESS: at 22.63% examples, 141283 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:32:14,102 : INFO : EPOCH 4 - PROGRESS: at 24.33% examples, 141522 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:32:15,141 : INFO : EPOCH 4 - PROGRESS: at 25.72% examples, 140933 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:32:16,161 : INFO : EPOCH 4 - PROGRESS: at 27.32% examples, 141053 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:32:17,212 : INFO : EPOCH 4 - PROGRESS: at 29.41% examples, 141469 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:32:18,235 : INFO : EPOCH 4 - PROGRESS: at 31.11% examples, 141740 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:32:19,279 : INFO : EPOCH 4 - PROGRESS: at 32.90% examples, 141520 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:32:20,338 : INFO : EPOCH 4 - PROGRESS: at 34.90% examples, 141598 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:32:21,469 : INFO : EPOCH 4 - PROGRESS: at 36.79% examples, 141493 words/s, in

2018-11-20 18:33:24,481 : INFO : EPOCH 5 - PROGRESS: at 43.77% examples, 140189 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:33:25,596 : INFO : EPOCH 5 - PROGRESS: at 45.66% examples, 140251 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:33:26,600 : INFO : EPOCH 5 - PROGRESS: at 47.46% examples, 140752 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:33:27,707 : INFO : EPOCH 5 - PROGRESS: at 49.55% examples, 140563 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:33:28,700 : INFO : EPOCH 5 - PROGRESS: at 51.35% examples, 140799 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:33:29,767 : INFO : EPOCH 5 - PROGRESS: at 53.14% examples, 140571 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:33:30,791 : INFO : EPOCH 5 - PROGRESS: at 55.13% examples, 140692 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:33:31,832 : INFO : EPOCH 5 - PROGRESS: at 56.73% examples, 140781 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:33:32,893 : INFO : EPOCH 5 - PROGRESS: at 58.62% examples, 140782 words/s, in_qs

2018-11-20 18:34:33,497 : INFO : EPOCH 1 - PROGRESS: at 61.62% examples, 139059 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:34:34,528 : INFO : EPOCH 1 - PROGRESS: at 63.31% examples, 138845 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:34:35,559 : INFO : EPOCH 1 - PROGRESS: at 65.40% examples, 138907 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:34:36,610 : INFO : EPOCH 1 - PROGRESS: at 67.10% examples, 138713 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:34:37,632 : INFO : EPOCH 1 - PROGRESS: at 69.09% examples, 138804 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:34:38,664 : INFO : EPOCH 1 - PROGRESS: at 70.89% examples, 138955 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:34:39,692 : INFO : EPOCH 1 - PROGRESS: at 72.78% examples, 139172 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:34:40,711 : INFO : EPOCH 1 - PROGRESS: at 74.38% examples, 139392 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:34:41,710 : INFO : EPOCH 1 - PROGRESS: at 75.97% examples, 139341 words/s, in_qsiz

2018-11-20 18:35:45,254 : INFO : EPOCH 2 - PROGRESS: at 84.45% examples, 140382 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:35:46,313 : INFO : EPOCH 2 - PROGRESS: at 86.34% examples, 140400 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:35:47,334 : INFO : EPOCH 2 - PROGRESS: at 88.04% examples, 140436 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:35:48,357 : INFO : EPOCH 2 - PROGRESS: at 89.73% examples, 140333 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:35:49,379 : INFO : EPOCH 2 - PROGRESS: at 91.63% examples, 140604 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:35:50,461 : INFO : EPOCH 2 - PROGRESS: at 93.42% examples, 140524 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:35:51,588 : INFO : EPOCH 2 - PROGRESS: at 95.21% examples, 140490 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:35:52,622 : INFO : EPOCH 2 - PROGRESS: at 97.41% examples, 140570 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:35:53,662 : INFO : EPOCH 2 - PROGRESS: at 99.20% examples, 140533 words/s, in_qs

2018-11-20 18:36:52,982 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-11-20 18:36:52,982 : INFO : EPOCH - 3 : training on 13654253 raw words (8238242 effective words) took 58.9s, 139819 effective words/s
2018-11-20 18:36:54,095 : INFO : EPOCH 4 - PROGRESS: at 1.60% examples, 111049 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:36:55,099 : INFO : EPOCH 4 - PROGRESS: at 3.29% examples, 129596 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:36:56,133 : INFO : EPOCH 4 - PROGRESS: at 4.89% examples, 131336 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:36:57,225 : INFO : EPOCH 4 - PROGRESS: at 6.88% examples, 135578 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:36:58,252 : INFO : EPOCH 4 - PROGRESS: at 8.67% examples, 138814 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:36:59,284 : INFO : EPOCH 4 - PROGRESS: at 10.67% examples, 140093 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:37:00,333 : INFO : EPOCH 4 - PROGRESS: at 12.26% examples, 139730 words/s, in_qsi

2018-11-20 18:38:03,670 : INFO : EPOCH 5 - PROGRESS: at 21.44% examples, 142255 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:38:04,688 : INFO : EPOCH 5 - PROGRESS: at 23.33% examples, 142997 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:38:05,683 : INFO : EPOCH 5 - PROGRESS: at 25.02% examples, 142156 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:38:06,723 : INFO : EPOCH 5 - PROGRESS: at 27.02% examples, 143091 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:38:07,830 : INFO : EPOCH 5 - PROGRESS: at 28.61% examples, 142021 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:38:08,899 : INFO : EPOCH 5 - PROGRESS: at 30.41% examples, 142209 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:38:09,904 : INFO : EPOCH 5 - PROGRESS: at 32.10% examples, 141926 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:38:10,961 : INFO : EPOCH 5 - PROGRESS: at 34.10% examples, 142021 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:38:11,952 : INFO : EPOCH 5 - PROGRESS: at 35.69% examples, 141608 words/s, in_

2018-11-20 18:39:12,587 : INFO : EPOCH 1 - PROGRESS: at 37.89% examples, 132971 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:39:13,678 : INFO : EPOCH 1 - PROGRESS: at 39.68% examples, 133488 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:39:14,717 : INFO : EPOCH 1 - PROGRESS: at 41.48% examples, 134083 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:39:15,819 : INFO : EPOCH 1 - PROGRESS: at 43.07% examples, 134038 words/s, in_qsize 8, out_qsize 1
2018-11-20 18:39:16,960 : INFO : EPOCH 1 - PROGRESS: at 45.26% examples, 134370 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:39:18,009 : INFO : EPOCH 1 - PROGRESS: at 47.26% examples, 134431 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:39:19,059 : INFO : EPOCH 1 - PROGRESS: at 48.75% examples, 134113 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:39:20,076 : INFO : EPOCH 1 - PROGRESS: at 50.25% examples, 134131 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:39:21,178 : INFO : EPOCH 1 - PROGRESS: at 51.74% examples, 133308 words/s, in_qs

2018-11-20 18:40:24,739 : INFO : EPOCH 2 - PROGRESS: at 50.45% examples, 129462 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:40:25,938 : INFO : EPOCH 2 - PROGRESS: at 51.74% examples, 128162 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:40:26,946 : INFO : EPOCH 2 - PROGRESS: at 53.34% examples, 128364 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:40:27,996 : INFO : EPOCH 2 - PROGRESS: at 55.03% examples, 128985 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:40:29,046 : INFO : EPOCH 2 - PROGRESS: at 56.83% examples, 129315 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:40:30,081 : INFO : EPOCH 2 - PROGRESS: at 58.62% examples, 129439 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:40:31,083 : INFO : EPOCH 2 - PROGRESS: at 59.82% examples, 128924 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:40:32,168 : INFO : EPOCH 2 - PROGRESS: at 61.52% examples, 128903 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:40:33,184 : INFO : EPOCH 2 - PROGRESS: at 63.31% examples, 129488 words/s, in_qsi

2018-11-20 18:41:36,149 : INFO : EPOCH 3 - PROGRESS: at 72.88% examples, 144219 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:41:37,270 : INFO : EPOCH 3 - PROGRESS: at 74.98% examples, 144437 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:41:38,301 : INFO : EPOCH 3 - PROGRESS: at 76.77% examples, 144448 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:41:39,334 : INFO : EPOCH 3 - PROGRESS: at 78.66% examples, 144491 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:41:40,340 : INFO : EPOCH 3 - PROGRESS: at 80.56% examples, 144489 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:41:41,361 : INFO : EPOCH 3 - PROGRESS: at 82.25% examples, 144289 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:41:42,401 : INFO : EPOCH 3 - PROGRESS: at 84.15% examples, 144323 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:41:43,478 : INFO : EPOCH 3 - PROGRESS: at 86.04% examples, 144401 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:41:44,523 : INFO : EPOCH 3 - PROGRESS: at 87.94% examples, 144371 words/s, in_qs

2018-11-20 18:42:48,187 : INFO : EPOCH 4 - PROGRESS: at 99.60% examples, 144444 words/s, in_qsize 4, out_qsize 1
2018-11-20 18:42:48,187 : INFO : worker thread finished; awaiting finish of 4 more threads
2018-11-20 18:42:48,203 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-11-20 18:42:48,238 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-11-20 18:42:48,301 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-11-20 18:42:48,336 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-11-20 18:42:48,336 : INFO : EPOCH - 4 : training on 13654253 raw words (8238353 effective words) took 56.9s, 144724 effective words/s
2018-11-20 18:42:49,349 : INFO : EPOCH 5 - PROGRESS: at 1.69% examples, 125756 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:42:50,447 : INFO : EPOCH 5 - PROGRESS: at 3.99% examples, 137054 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:42:51,495 : INFO : EPOCH 5 - PROGRESS: at 5.78% examp

2018-11-20 18:43:50,657 : INFO : EPOCH 1 - PROGRESS: at 9.07% examples, 139560 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:43:51,723 : INFO : EPOCH 1 - PROGRESS: at 10.87% examples, 140852 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:43:52,790 : INFO : EPOCH 1 - PROGRESS: at 12.86% examples, 142565 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:43:53,793 : INFO : EPOCH 1 - PROGRESS: at 14.56% examples, 142890 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:43:54,828 : INFO : EPOCH 1 - PROGRESS: at 16.35% examples, 143065 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:43:55,880 : INFO : EPOCH 1 - PROGRESS: at 18.34% examples, 143662 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:43:56,978 : INFO : EPOCH 1 - PROGRESS: at 20.44% examples, 143268 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:43:57,981 : INFO : EPOCH 1 - PROGRESS: at 22.33% examples, 143548 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:43:59,015 : INFO : EPOCH 1 - PROGRESS: at 24.33% examples, 143837 words/s, in_qsi

2018-11-20 18:45:02,304 : INFO : EPOCH 2 - PROGRESS: at 34.90% examples, 144043 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:45:03,317 : INFO : EPOCH 2 - PROGRESS: at 36.79% examples, 144004 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:45:04,400 : INFO : EPOCH 2 - PROGRESS: at 38.48% examples, 143695 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:45:05,451 : INFO : EPOCH 2 - PROGRESS: at 40.38% examples, 144042 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:45:06,564 : INFO : EPOCH 2 - PROGRESS: at 42.47% examples, 143883 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:45:07,598 : INFO : EPOCH 2 - PROGRESS: at 44.37% examples, 144380 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:45:08,776 : INFO : EPOCH 2 - PROGRESS: at 46.16% examples, 143913 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:45:09,824 : INFO : EPOCH 2 - PROGRESS: at 48.35% examples, 144120 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:45:10,847 : INFO : EPOCH 2 - PROGRESS: at 50.45% examples, 144579 words/s, in_

2018-11-20 18:46:13,684 : INFO : EPOCH 3 - PROGRESS: at 60.52% examples, 144562 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:46:14,740 : INFO : EPOCH 3 - PROGRESS: at 62.11% examples, 144516 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:46:15,746 : INFO : EPOCH 3 - PROGRESS: at 63.91% examples, 144588 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:46:16,827 : INFO : EPOCH 3 - PROGRESS: at 66.00% examples, 144677 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:46:17,861 : INFO : EPOCH 3 - PROGRESS: at 67.90% examples, 144734 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:46:18,990 : INFO : EPOCH 3 - PROGRESS: at 69.99% examples, 144593 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:46:20,009 : INFO : EPOCH 3 - PROGRESS: at 72.08% examples, 144961 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:46:21,013 : INFO : EPOCH 3 - PROGRESS: at 73.78% examples, 144700 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:46:22,031 : INFO : EPOCH 3 - PROGRESS: at 75.57% examples, 144740 words/s, in_qs

2018-11-20 18:47:25,392 : INFO : EPOCH 4 - PROGRESS: at 85.94% examples, 144075 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:47:26,446 : INFO : EPOCH 4 - PROGRESS: at 87.54% examples, 143885 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:47:27,522 : INFO : EPOCH 4 - PROGRESS: at 89.53% examples, 143816 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:47:28,597 : INFO : EPOCH 4 - PROGRESS: at 91.43% examples, 143782 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:47:29,628 : INFO : EPOCH 4 - PROGRESS: at 93.32% examples, 143831 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:47:30,724 : INFO : EPOCH 4 - PROGRESS: at 95.21% examples, 144000 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:47:31,806 : INFO : EPOCH 4 - PROGRESS: at 97.11% examples, 144102 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:47:32,967 : INFO : EPOCH 4 - PROGRESS: at 98.90% examples, 143878 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:47:33,318 : INFO : worker thread finished; awaiting finish of 4 more threads
201

2018-11-20 18:48:30,564 : INFO : training on a 68271265 raw words (41190181 effective words) took 285.2s, 144403 effective words/s
2018-11-20 18:48:30,564 : INFO : Training iteration #9
2018-11-20 18:48:30,564 : INFO : training model with 5 workers on 511920 vocabulary and 300 features, using sg=1 hs=0 sample=0.001 negative=5 window=10
2018-11-20 18:48:31,629 : INFO : EPOCH 1 - PROGRESS: at 1.50% examples, 125683 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:48:32,629 : INFO : EPOCH 1 - PROGRESS: at 3.29% examples, 139520 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:48:33,664 : INFO : EPOCH 1 - PROGRESS: at 5.28% examples, 139042 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:48:34,669 : INFO : EPOCH 1 - PROGRESS: at 6.88% examples, 141341 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:48:35,686 : INFO : EPOCH 1 - PROGRESS: at 8.57% examples, 141267 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:48:36,722 : INFO : EPOCH 1 - PROGRESS: at 10.37% examples, 142115 words/s, in_qsize 9

2018-11-20 18:49:40,005 : INFO : EPOCH 2 - PROGRESS: at 21.64% examples, 143619 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:49:41,024 : INFO : EPOCH 2 - PROGRESS: at 23.63% examples, 144047 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:49:42,135 : INFO : EPOCH 2 - PROGRESS: at 25.42% examples, 143902 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:49:43,221 : INFO : EPOCH 2 - PROGRESS: at 27.62% examples, 144440 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:49:44,269 : INFO : EPOCH 2 - PROGRESS: at 29.41% examples, 144335 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:49:45,293 : INFO : EPOCH 2 - PROGRESS: at 31.70% examples, 144746 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:49:46,318 : INFO : EPOCH 2 - PROGRESS: at 33.70% examples, 145093 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:49:47,324 : INFO : EPOCH 2 - PROGRESS: at 35.39% examples, 145048 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:49:48,451 : INFO : EPOCH 2 - PROGRESS: at 37.19% examples, 144886 words/s, in_qsiz

2018-11-20 18:50:52,089 : INFO : EPOCH 3 - PROGRESS: at 49.25% examples, 144037 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:50:53,111 : INFO : EPOCH 3 - PROGRESS: at 51.15% examples, 144283 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:50:54,137 : INFO : EPOCH 3 - PROGRESS: at 52.94% examples, 144169 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:50:55,181 : INFO : EPOCH 3 - PROGRESS: at 54.64% examples, 144262 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:50:56,203 : INFO : EPOCH 3 - PROGRESS: at 56.63% examples, 144199 words/s, in_qsize 10, out_qsize 1
2018-11-20 18:50:57,228 : INFO : EPOCH 3 - PROGRESS: at 58.52% examples, 144356 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:50:58,294 : INFO : EPOCH 3 - PROGRESS: at 60.22% examples, 144121 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:50:59,331 : INFO : EPOCH 3 - PROGRESS: at 62.11% examples, 144517 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:51:00,429 : INFO : EPOCH 3 - PROGRESS: at 63.91% examples, 144245 words/s, in_

2018-11-20 18:52:03,713 : INFO : EPOCH 4 - PROGRESS: at 76.37% examples, 146272 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:52:04,792 : INFO : EPOCH 4 - PROGRESS: at 78.07% examples, 145911 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:52:05,813 : INFO : EPOCH 4 - PROGRESS: at 79.86% examples, 146045 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:52:06,832 : INFO : EPOCH 4 - PROGRESS: at 81.56% examples, 145939 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:52:07,961 : INFO : EPOCH 4 - PROGRESS: at 83.65% examples, 146084 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:52:08,965 : INFO : EPOCH 4 - PROGRESS: at 85.64% examples, 146096 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:52:09,970 : INFO : EPOCH 4 - PROGRESS: at 87.34% examples, 146136 words/s, in_qsize 10, out_qsize 0
2018-11-20 18:52:10,970 : INFO : EPOCH 4 - PROGRESS: at 89.13% examples, 146220 words/s, in_qsize 9, out_qsize 0
2018-11-20 18:52:12,038 : INFO : EPOCH 4 - PROGRESS: at 90.73% examples, 145967 words/s, in_

2018-11-20 18:53:13,178 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-11-20 18:53:13,203 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-11-20 18:53:13,237 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-11-20 18:53:13,237 : INFO : EPOCH - 5 : training on 13654253 raw words (8237712 effective words) took 56.3s, 146276 effective words/s
2018-11-20 18:53:13,237 : INFO : training on a 68271265 raw words (41190077 effective words) took 282.7s, 145718 effective words/s
2018-11-20 18:53:13,237 : INFO : Saving trained Doc2Vec model
2018-11-20 18:53:13,237 : INFO : saving Doc2Vec object under d2v.model, separately None
2018-11-20 18:53:13,237 : INFO : storing np array 'syn1neg' to d2v.model.trainables.syn1neg.npy
2018-11-20 18:53:13,751 : INFO : storing np array 'vectors' to d2v.model.wv.vectors.npy
2018-11-20 18:53:15,511 : INFO : saved d2v.model
2018-11-20 18:53:15,965 : INFO : Classifier training
2018-11-20 18:53:16,1