## Creates the doc2vec vector embeddings for a specific configuration

In [1]:
import json
import nltk
from nltk.tokenize import RegexpTokenizer
import string
import math
import os
import io
import time
from collections import namedtuple
import cPickle as pickle
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import random
import gzip

from multiprocessing import Process, Queue

from multiprocessing.dummy import Pool as ThreadPool
import itertools

from sklearn.metrics import coverage_error
import sklearn.metrics
from sklearn.multiclass import OneVsRestClassifier
from sklearn import linear_model
from sklearn.preprocessing import MultiLabelBinarizer

from gensim.models.doc2vec import Doc2Vec, LabeledSentence

import logging
from logging import info
from functools import partial

from thesis.utils.metrics import *
from thesis.utils.file import *

## Global variables used throughout the script

In [2]:
root = logging.getLogger()
for handler in root.handlers[:]:
    root.removeHandler(handler)
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) # adds a default StreamHanlder
#root.addHandler(logging.StreamHandler())

In [3]:
SVM_SEED = 1234
DOC2VEC_SEED = 1234

In [4]:
MIN_WORD_COUNT = 100
NUM_CORES = 16

In [5]:
GLOBAL_VARS = namedtuple('GLOBAL_VARS', ['MODEL_NAME', 'DOC2VEC_MODEL_NAME', 'DOC2VEC_MODEL', 
                                         'SVM_MODEL_NAME', 'NN_MODEL_NAME'])

In [6]:
VOCAB_MODEL = "vocab_model"
MODEL_PREFIX = "model"
VALIDATION_DICT = "validation_dict.pkl"
TEST_MATRIX = "test_matrix.pkl"
TEST_DICT = "test_dict.pkl"
METRICS = "metrics.pkl"
CLASSIFIER = "classifier.pkl"

In [7]:
root_location = "/mnt/virtual-machines/data/"
exports_location = root_location + "exported_data/"

doc_classifications_map_file = exports_location + "doc_classification_map.pkl"
training_docs_list_file = exports_location + "training_docs_list.pkl"
validation_docs_list_file = exports_location + "validation_docs_list.pkl"
test_docs_list_file = exports_location + "test_docs_list.pkl"

preprocessed_location = root_location + "preprocessed_data/extended_pv_abs_desc_claims_full_chunks/"

training_preprocessed_files_prefix = preprocessed_location + "extended_pv_training_docs_data_preprocessed-"
validation_preprocessed_files_prefix = preprocessed_location + "extended_pv_validation_docs_data_preprocessed-"
test_preprocessed_files_prefix = preprocessed_location + "extended_pv_test_docs_data_preprocessed-"

## Load general data required for classification

In [8]:
%%time
doc_classification_map = pickle.load(open(doc_classifications_map_file))
training_docs_list = pickle.load(open(training_docs_list_file))
validation_docs_list = pickle.load(open(validation_docs_list_file))
test_docs_list = pickle.load(open(test_docs_list_file))

CPU times: user 18.4 s, sys: 1.24 s, total: 19.6 s
Wall time: 19.6 s


In [9]:
len(training_docs_list)

1286325

In [10]:
len(validation_docs_list)

321473

In [11]:
len(test_docs_list)

401877

# Utility functions for data loading

In [12]:
VALIDATION_MINI_BATCH_SIZE = 10000
def get_extended_docs_with_inference_data_only(doc2vec_model, file_to_write, preprocessed_files_prefix, level, model_name):
    """
    Use the trained doc2vec model to get the paragraph vector representations of the validation or test documents
    """

    def infer_one_doc(doc_tuple):
        # doc2vec_model.random = np.random.RandomState(DOC2VEC_SEED)
        doc_id, doc_tokens = doc_tuple
        rep = doc2vec_model.infer_vector(doc_tokens)
        return (doc_id, rep)

    if os.path.exists(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, file_to_write)):
        info("===== Loading inference vectors")
        inference_documents_reps = pickle.load(open(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, file_to_write)))
        info("Loaded inference vectors matrix")
    else:
        inference_documents_reps = {}
        info("===== Getting vectors with inference")

        # Multi-threaded inference
#         inference_docs_iterator = ExtendedPVDocumentBatchGenerator(preprocessed_files_prefix, batch_size=None)
        inference_docs_iterator = BatchWrapper(preprocessed_files_prefix, batch_size=None, level=level, level_type=model_name)
        generator_func = inference_docs_iterator.__iter__()
        # map consumes the whole iterator on the spot, so we have to use itertools.islice to fake mini-batching
        mini_batch_size = VALIDATION_MINI_BATCH_SIZE
        batches_run = 1
        pool = ThreadPool(NUM_CORES)
        while True:
            threaded_reps_partial = pool.map(infer_one_doc, itertools.islice(generator_func, mini_batch_size))
            info("Finished: {} tags".format(batches_run * mini_batch_size))
            batches_run += 1
            if threaded_reps_partial:
                # threaded_reps.extend(threaded_reps_partial)
                inference_documents_reps.update(threaded_reps_partial)
            else:
                break
                
        pool.close()
        pool.terminate()

        pickle.dump(inference_documents_reps,
                    open(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, file_to_write), 'w'))

    return inference_documents_reps

In [13]:
class ExtendedPVDocumentBatchGenerator(Process):
    def __init__(self, filename_prefix, queue, batch_size=10000, start_file=0, offset=10000):
        super(ExtendedPVDocumentBatchGenerator, self).__init__()
        self.queue = queue
        self.offset = offset
        self.filename_prefix = filename_prefix
        self.files_loaded = start_file - offset

    def run(self):
        cur_file = None
        while True:
            try:
                if cur_file is None:
                    info("Loading new file for index: {}".format(str(self.files_loaded + self.offset)))
                    cur_file = io.BufferedReader(gzip.open(self.filename_prefix + str(self.files_loaded + self.offset) + '.gz'))
#                     cur_file = open(self.filename_prefix + str(self.files_loaded + self.offset))
                    self.files_loaded += self.offset
                for line in cur_file:
                    self.queue.put(line)
                cur_file.close()
                cur_file = None
            except IOError:
                self.queue.put(False, block=True, timeout=None)
                info("All files are loaded - last file: {}".format(str(self.files_loaded + self.offset)))
                return


class BatchWrapper(object):
    def __init__(self, training_preprocessed_files_prefix, buffer_size=10000, batch_size=10000, level=1, level_type=None):
        assert batch_size <= 10000 or batch_size is None
        self.level = level
        self.level_type = level_type[0] if level_type is not None else None
        self.batch_size = batch_size
        self.q = Queue(maxsize=buffer_size)
        self.p = ExtendedPVDocumentBatchGenerator(training_preprocessed_files_prefix, queue=self.q,
                                                  batch_size=batch_size, start_file=0, offset=10000)
        self.p.start()
        self.cur_data = []

    def is_correct_type(self, doc_id):
        parts = doc_id.split("_")
        len_parts = len(parts)
        if len_parts == self.level:
            if len_parts == 1:
                return True
            if len_parts == self.level and (parts[1][0] == self.level_type or self.level_type is None):
                return True
        return False

    def return_sentences(self, line):
        line_array = tuple(line.split(" "))
        doc_id = line_array[0]
        if not self.is_correct_type(doc_id):
            return False
        line_array = line_array[1:]
        len_line_array = len(line_array)
        # divide the document to batches according to the batch size
        sentences = []
        
        if self.batch_size is None:
            # dont use LabeledSentence for validation iterator
            sentences.append((doc_id, line_array))
        else:
            curr_batch_iter = 0
            while curr_batch_iter < len_line_array:
                sentences.append(LabeledSentence(words=line_array[curr_batch_iter: curr_batch_iter + self.batch_size], tags=[doc_id]))
                curr_batch_iter += self.batch_size
        return tuple(sentences)

    def __iter__(self):
        while True:
            item = self.q.get(block=True)
            if item is False:
                self.p.terminate()
                raise StopIteration()
            else:
                sentences = self.return_sentences(item)
                if not sentences:
                    None
                else:
                    for sentence in sentences:
                        yield sentence


# Doc2vec and SVM Parameters

In [14]:
DOC2VEC_SIZE = 200
DOC2VEC_WINDOW = 2
DOC2VEC_MAX_VOCAB_SIZE = None
DOC2VEC_SAMPLE = 1e-3
DOC2VEC_TYPE = 1
DOC2VEC_HIERARCHICAL_SAMPLE = 0
DOC2VEC_NEGATIVE_SAMPLE_SIZE = 10
DOC2VEC_CONCAT = 0
DOC2VEC_MEAN = 1
DOC2VEC_TRAIN_WORDS = 0
DOC2VEC_EPOCHS = 1 # we do our training manually one epoch at a time
DOC2VEC_MAX_EPOCHS = 8
REPORT_DELAY = 20 # report the progress every x seconds
REPORT_VOCAB_PROGRESS = 100000 # report vocab progress every x documents

## Create the Doc2vec model and create/load the vocab

In [15]:
models = [
    (1, 'document')
]
level, model_name = models[0]

In [17]:
info("creating/loading vocabulary for " + str(level) + ' ' + model_name + ' in ')
doc2vec_model_save_location = os.path.join(root_location,
                                           "parameter_search_doc2vec_models_recalc_" + str(level) + '_' + model_name,
                                           "full")
if not os.path.exists(doc2vec_model_save_location):
    os.makedirs(doc2vec_model_save_location)
if not os.path.exists(os.path.join(doc2vec_model_save_location, VOCAB_MODEL)):
    os.makedirs(os.path.join(doc2vec_model_save_location, VOCAB_MODEL))

placeholder_model_name = 'doc2vec_size_{}_w_{}_type_{}_concat_{}_mean_{}_trainwords_{}_hs_{}_neg_{}_vocabsize_{}_model_{}'.format(DOC2VEC_SIZE,
                                                                DOC2VEC_WINDOW,
                                                                'dm' if DOC2VEC_TYPE == 1 else 'pv-dbow',
                                                                DOC2VEC_CONCAT, DOC2VEC_MEAN,
                                                                DOC2VEC_TRAIN_WORDS,
                                                                DOC2VEC_HIERARCHICAL_SAMPLE,DOC2VEC_NEGATIVE_SAMPLE_SIZE,
                                                                str(DOC2VEC_MAX_VOCAB_SIZE),
                                                                str(level) + '_' + model_name
                                                                )
GLOBAL_VARS.DOC2VEC_MODEL_NAME = placeholder_model_name
placeholder_model_name = os.path.join(placeholder_model_name, "epoch_{}")
info("FILE " + os.path.join(doc2vec_model_save_location, VOCAB_MODEL, MODEL_PREFIX))
doc2vec_model = Doc2Vec(size=DOC2VEC_SIZE, window=DOC2VEC_WINDOW, min_count=MIN_WORD_COUNT,
                max_vocab_size= DOC2VEC_MAX_VOCAB_SIZE,
                sample=DOC2VEC_SAMPLE, seed=DOC2VEC_SEED, workers=NUM_CORES,
                # doc2vec algorithm dm=1 => PV-DM, dm=2 => PV-DBOW, PV-DM dictates CBOW for words
                dm=DOC2VEC_TYPE,
                # hs=0 => negative sampling, hs=1 => hierarchical softmax
                hs=DOC2VEC_HIERARCHICAL_SAMPLE, negative=DOC2VEC_NEGATIVE_SAMPLE_SIZE,
                dm_concat=DOC2VEC_CONCAT,
                # would train words with skip-gram on top of cbow, we don't need that for now
                dbow_words=DOC2VEC_TRAIN_WORDS,
                iter=DOC2VEC_EPOCHS)

GLOBAL_VARS.DOC2VEC_MODEL = doc2vec_model


2017-04-19 01:57:41,817 : INFO : creating/loading vocabulary for 1 document in 
2017-04-19 01:57:41,818 : INFO : FILE /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/vocab_model/model


In [18]:
if not os.path.exists(os.path.join(doc2vec_model_save_location, VOCAB_MODEL, MODEL_PREFIX)):
    
    info("Creating vocab model")
    training_docs_iterator = BatchWrapper(training_preprocessed_files_prefix, batch_size=10000, level=level,
                                          level_type=model_name)
    doc2vec_model.build_vocab(sentences=training_docs_iterator, progress_per=REPORT_VOCAB_PROGRESS)
    doc2vec_model.save(os.path.join(doc2vec_model_save_location, VOCAB_MODEL, MODEL_PREFIX))
else:
    info("Loading vocab model")
    doc2vec_model_vocab_model = Doc2Vec.load(os.path.join(doc2vec_model_save_location, VOCAB_MODEL, MODEL_PREFIX))
    doc2vec_model.reset_from(doc2vec_model_vocab_model)

2017-04-19 01:57:44,566 : INFO : Creating vocab model
2017-04-19 01:57:44,621 : INFO : collecting all words and their counts
2017-04-19 01:57:44,623 : INFO : Loading new file for index: 0
2017-04-19 01:57:44,643 : INFO : PROGRESS: at example #0, processed 0 words (0/s), 0 word types, 0 tags
2017-04-19 01:58:14,800 : INFO : Loading new file for index: 10000
2017-04-19 01:58:42,546 : INFO : Loading new file for index: 20000
2017-04-19 01:59:11,676 : INFO : Loading new file for index: 30000
2017-04-19 01:59:41,056 : INFO : Loading new file for index: 40000
2017-04-19 02:00:10,160 : INFO : Loading new file for index: 50000
2017-04-19 02:00:39,829 : INFO : Loading new file for index: 60000
2017-04-19 02:01:04,537 : INFO : PROGRESS: at example #100000, processed 671106633 words (3357329/s), 3023524 word types, 68086 tags
2017-04-19 02:01:09,727 : INFO : Loading new file for index: 70000
2017-04-19 02:01:40,091 : INFO : Loading new file for index: 80000
2017-04-19 02:02:09,754 : INFO : Loadin

2017-04-19 02:43:34,365 : INFO : Loading new file for index: 920000
2017-04-19 02:44:03,928 : INFO : Loading new file for index: 930000
2017-04-19 02:44:33,161 : INFO : Loading new file for index: 940000
2017-04-19 02:45:02,647 : INFO : Loading new file for index: 950000
2017-04-19 02:45:25,575 : INFO : PROGRESS: at example #1400000, processed 9398449921 words (3300525/s), 20601656 word types, 957326 tags
2017-04-19 02:45:32,341 : INFO : Loading new file for index: 960000
2017-04-19 02:46:02,183 : INFO : Loading new file for index: 970000
2017-04-19 02:46:31,791 : INFO : Loading new file for index: 980000
2017-04-19 02:47:01,510 : INFO : Loading new file for index: 990000
2017-04-19 02:47:31,974 : INFO : Loading new file for index: 1000000
2017-04-19 02:48:02,375 : INFO : Loading new file for index: 1010000
2017-04-19 02:48:33,494 : INFO : Loading new file for index: 1020000
2017-04-19 02:48:51,764 : INFO : PROGRESS: at example #1500000, processed 10068733754 words (3250848/s), 2164610

## Actual Training, validation and Metrics Loop

In [19]:
doc2vec_model.min_alpha = 0.025
DOC2VEC_ALPHA_DECREASE = 0.001

In [20]:
doc2vec_model.workers = NUM_CORES

In [21]:
%%time
# when resuming, resume from an epoch with a previously created doc2vec model to get the learning rate right
start_from = 1
for epoch in range(start_from, DOC2VEC_MAX_EPOCHS+1):
    GLOBAL_VARS.MODEL_NAME = placeholder_model_name.format(epoch)
    info("****************** Epoch {} --- Working on {} *******************".format(epoch, GLOBAL_VARS.MODEL_NAME))
    
    # if we have the model, just load it, otherwise train the previous model
    if os.path.exists(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, MODEL_PREFIX)):
        doc2vec_model = Doc2Vec.load(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, MODEL_PREFIX))
        doc2vec_model.workers = NUM_CORES
        GLOBAL_VARS.DOC2VEC_MODEL = doc2vec_model
    else:
        # train the doc2vec model
        training_docs_iterator = BatchWrapper(training_preprocessed_files_prefix, batch_size=10000, level=level,
                                          level_type=model_name)
        %time doc2vec_model.train(sentences=training_docs_iterator, report_delay=REPORT_DELAY)
        doc2vec_model.alpha -= DOC2VEC_ALPHA_DECREASE  # decrease the learning rate
        doc2vec_model.min_alpha = doc2vec_model.alpha  # fix the learning rate, no decay
        ensure_disk_location_exists(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME))
        doc2vec_model.save(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, MODEL_PREFIX))
        GLOBAL_VARS.DOC2VEC_MODEL = doc2vec_model
        
    # only do the inference for higher epochs, as inference usually takes as much time as the actual training
    if epoch == 3 or epoch == 5:
        # Validation Embeddings
        info('Getting Validation Embeddings')
        Xv = get_extended_docs_with_inference_data_only(doc2vec_model, VALIDATION_DICT, 
                                         validation_preprocessed_files_prefix, level, model_name)

2017-04-19 03:02:52,121 : INFO : ****************** Epoch 1 --- Working on doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_1 *******************
2017-04-19 03:02:52,223 : INFO : training model with 16 workers on 446814 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=10 window=2
2017-04-19 03:02:52,224 : INFO : expecting 1879865 sentences, matching count from corpus used for vocabulary survey
2017-04-19 03:02:52,223 : INFO : Loading new file for index: 0
2017-04-19 03:02:53,231 : INFO : PROGRESS: at 0.01% examples, 485323 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:03:13,251 : INFO : PROGRESS: at 0.17% examples, 691574 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:03:33,266 : INFO : PROGRESS: at 0.34% examples, 713148 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:03:53,266 : INFO : PROGRESS: at 0.51% examples, 721711 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:04:13,271 : INFO : PROGRESS: at 0.69% exa

2017-04-19 03:24:42,790 : INFO : Loading new file for index: 150000
2017-04-19 03:24:53,720 : INFO : PROGRESS: at 11.79% examples, 767853 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:25:13,727 : INFO : PROGRESS: at 11.97% examples, 768005 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:25:33,731 : INFO : PROGRESS: at 12.14% examples, 767928 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:25:53,732 : INFO : PROGRESS: at 12.33% examples, 768033 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:26:10,060 : INFO : Loading new file for index: 160000
2017-04-19 03:26:13,735 : INFO : PROGRESS: at 12.51% examples, 768161 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:26:33,743 : INFO : PROGRESS: at 12.69% examples, 768325 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:26:53,755 : INFO : PROGRESS: at 12.87% examples, 768501 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:27:13,760 : INFO : PROGRESS: at 13.05% examples, 768666 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:27:33,763 : INFO : PROGRES

2017-04-19 03:47:54,183 : INFO : PROGRESS: at 24.34% examples, 775612 words/s, in_qsize 0, out_qsize 4
2017-04-19 03:48:14,196 : INFO : PROGRESS: at 24.52% examples, 775721 words/s, in_qsize 1, out_qsize 1
2017-04-19 03:48:34,195 : INFO : PROGRESS: at 24.71% examples, 775892 words/s, in_qsize 0, out_qsize 3
2017-04-19 03:48:54,197 : INFO : PROGRESS: at 24.89% examples, 775968 words/s, in_qsize 0, out_qsize 1
2017-04-19 03:48:58,884 : INFO : Loading new file for index: 320000
2017-04-19 03:49:14,217 : INFO : PROGRESS: at 25.08% examples, 776113 words/s, in_qsize 0, out_qsize 1
2017-04-19 03:49:34,221 : INFO : PROGRESS: at 25.26% examples, 776215 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:49:54,225 : INFO : PROGRESS: at 25.44% examples, 776322 words/s, in_qsize 0, out_qsize 2
2017-04-19 03:50:14,232 : INFO : PROGRESS: at 25.63% examples, 776425 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:50:22,088 : INFO : Loading new file for index: 330000
2017-04-19 03:50:34,238 : INFO : PROGRES

2017-04-19 04:10:54,590 : INFO : PROGRESS: at 37.21% examples, 784448 words/s, in_qsize 0, out_qsize 2
2017-04-19 04:11:09,159 : INFO : Loading new file for index: 480000
2017-04-19 04:11:14,601 : INFO : PROGRESS: at 37.40% examples, 784550 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:11:34,611 : INFO : PROGRESS: at 37.59% examples, 784680 words/s, in_qsize 0, out_qsize 1
2017-04-19 04:11:54,618 : INFO : PROGRESS: at 37.78% examples, 784817 words/s, in_qsize 1, out_qsize 0
2017-04-19 04:12:14,620 : INFO : PROGRESS: at 37.97% examples, 784928 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:12:30,558 : INFO : Loading new file for index: 490000
2017-04-19 04:12:34,641 : INFO : PROGRESS: at 38.16% examples, 785055 words/s, in_qsize 0, out_qsize 1
2017-04-19 04:12:54,642 : INFO : PROGRESS: at 38.35% examples, 785197 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:13:14,652 : INFO : PROGRESS: at 38.54% examples, 785360 words/s, in_qsize 0, out_qsize 1
2017-04-19 04:13:34,654 : INFO : PROGRES

2017-04-19 04:33:55,152 : INFO : PROGRESS: at 50.28% examples, 791433 words/s, in_qsize 0, out_qsize 1
2017-04-19 04:34:15,171 : INFO : PROGRESS: at 50.47% examples, 791522 words/s, in_qsize 0, out_qsize 3
2017-04-19 04:34:18,656 : INFO : Loading new file for index: 650000
2017-04-19 04:34:35,161 : INFO : PROGRESS: at 50.66% examples, 791595 words/s, in_qsize 0, out_qsize 3
2017-04-19 04:34:55,177 : INFO : PROGRESS: at 50.84% examples, 791662 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:35:15,177 : INFO : PROGRESS: at 51.03% examples, 791713 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:35:35,192 : INFO : PROGRESS: at 51.22% examples, 791783 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:35:42,522 : INFO : Loading new file for index: 660000
2017-04-19 04:35:55,194 : INFO : PROGRESS: at 51.41% examples, 791864 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:36:15,201 : INFO : PROGRESS: at 51.60% examples, 791950 words/s, in_qsize 0, out_qsize 2
2017-04-19 04:36:35,230 : INFO : PROGRES

2017-04-19 04:56:55,601 : INFO : PROGRESS: at 63.25% examples, 795230 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:57:15,608 : INFO : PROGRESS: at 63.44% examples, 795285 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:57:35,612 : INFO : PROGRESS: at 63.63% examples, 795339 words/s, in_qsize 2, out_qsize 0
2017-04-19 04:57:53,080 : INFO : Loading new file for index: 820000
2017-04-19 04:57:55,612 : INFO : PROGRESS: at 63.82% examples, 795367 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:58:15,612 : INFO : PROGRESS: at 64.01% examples, 795419 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:58:35,613 : INFO : PROGRESS: at 64.20% examples, 795466 words/s, in_qsize 0, out_qsize 1
2017-04-19 04:58:55,616 : INFO : PROGRESS: at 64.39% examples, 795522 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:59:12,688 : INFO : Loading new file for index: 830000
2017-04-19 04:59:15,618 : INFO : PROGRESS: at 64.58% examples, 795595 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:59:35,628 : INFO : PROGRES

2017-04-19 05:19:56,008 : INFO : PROGRESS: at 76.53% examples, 800332 words/s, in_qsize 0, out_qsize 1
2017-04-19 05:20:16,004 : INFO : PROGRESS: at 76.72% examples, 800393 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:20:36,030 : INFO : PROGRESS: at 76.92% examples, 800491 words/s, in_qsize 0, out_qsize 3
2017-04-19 05:20:43,264 : INFO : Loading new file for index: 990000
2017-04-19 05:20:56,023 : INFO : PROGRESS: at 77.11% examples, 800579 words/s, in_qsize 0, out_qsize 1
2017-04-19 05:21:16,031 : INFO : PROGRESS: at 77.31% examples, 800668 words/s, in_qsize 0, out_qsize 1
2017-04-19 05:21:36,040 : INFO : PROGRESS: at 77.50% examples, 800732 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:21:56,044 : INFO : PROGRESS: at 77.69% examples, 800799 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:22:03,412 : INFO : Loading new file for index: 1000000
2017-04-19 05:22:16,056 : INFO : PROGRESS: at 77.89% examples, 800868 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:22:36,063 : INFO : PROGRE

2017-04-19 05:42:56,515 : INFO : PROGRESS: at 89.93% examples, 805286 words/s, in_qsize 0, out_qsize 1
2017-04-19 05:43:16,540 : INFO : PROGRESS: at 90.12% examples, 805317 words/s, in_qsize 0, out_qsize 1
2017-04-19 05:43:26,125 : INFO : Loading new file for index: 1160000
2017-04-19 05:43:36,557 : INFO : PROGRESS: at 90.31% examples, 805359 words/s, in_qsize 0, out_qsize 3
2017-04-19 05:43:56,563 : INFO : PROGRESS: at 90.51% examples, 805400 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:44:16,569 : INFO : PROGRESS: at 90.70% examples, 805440 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:44:36,569 : INFO : PROGRESS: at 90.89% examples, 805468 words/s, in_qsize 0, out_qsize 1
2017-04-19 05:44:47,384 : INFO : Loading new file for index: 1170000
2017-04-19 05:44:56,572 : INFO : PROGRESS: at 91.08% examples, 805507 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:45:16,579 : INFO : PROGRESS: at 91.28% examples, 805541 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:45:36,593 : INFO : PROGR

CPU times: user 18h 23min 1s, sys: 10min 34s, total: 18h 33min 36s
Wall time: 2h 56min 56s


2017-04-19 05:59:49,652 : INFO : not storing attribute syn0norm
2017-04-19 05:59:49,653 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_1/model.wv.syn0.npy
2017-04-19 05:59:49,886 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_1/model.syn1neg.npy
2017-04-19 05:59:50,079 : INFO : not storing attribute cum_table
2017-04-19 06:00:04,228 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_1/model
2017-04-19 06:00:04,229 : INFO : ****************** Epoch 2 --- Working on doc2vec_size_200

2017-04-19 06:19:05,720 : INFO : PROGRESS: at 11.11% examples, 837759 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:19:25,749 : INFO : PROGRESS: at 11.30% examples, 837838 words/s, in_qsize 0, out_qsize 3
2017-04-19 06:19:45,736 : INFO : PROGRESS: at 11.50% examples, 838047 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:20:04,815 : INFO : Loading new file for index: 150000
2017-04-19 06:20:05,748 : INFO : PROGRESS: at 11.70% examples, 838192 words/s, in_qsize 0, out_qsize 2
2017-04-19 06:20:25,751 : INFO : PROGRESS: at 11.89% examples, 838372 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:20:45,752 : INFO : PROGRESS: at 12.09% examples, 838399 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:21:05,753 : INFO : PROGRESS: at 12.29% examples, 838461 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:21:24,726 : INFO : Loading new file for index: 160000
2017-04-19 06:21:25,756 : INFO : PROGRESS: at 12.48% examples, 838628 words/s, in_qsize 0, out_qsize 3
2017-04-19 06:21:45,762 : INFO : PROGRES

2017-04-19 06:42:06,151 : INFO : PROGRESS: at 24.65% examples, 841724 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:42:26,166 : INFO : PROGRESS: at 24.84% examples, 841780 words/s, in_qsize 0, out_qsize 2
2017-04-19 06:42:34,630 : INFO : Loading new file for index: 320000
2017-04-19 06:42:46,159 : INFO : PROGRESS: at 25.04% examples, 841805 words/s, in_qsize 0, out_qsize 1
2017-04-19 06:43:06,163 : INFO : PROGRESS: at 25.24% examples, 841819 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:43:26,166 : INFO : PROGRESS: at 25.44% examples, 841870 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:43:46,178 : INFO : PROGRESS: at 25.63% examples, 841817 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:43:53,021 : INFO : Loading new file for index: 330000
2017-04-19 06:44:06,183 : INFO : PROGRESS: at 25.83% examples, 841788 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:44:26,183 : INFO : PROGRESS: at 26.03% examples, 841848 words/s, in_qsize 0, out_qsize 2
2017-04-19 06:44:46,188 : INFO : PROGRES

2017-04-19 07:05:06,642 : INFO : PROGRESS: at 38.09% examples, 839847 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:05:09,820 : INFO : Loading new file for index: 490000
2017-04-19 07:05:26,641 : INFO : PROGRESS: at 38.28% examples, 839805 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:05:46,644 : INFO : PROGRESS: at 38.48% examples, 839740 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:06:06,672 : INFO : PROGRESS: at 38.67% examples, 839655 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:06:26,681 : INFO : PROGRESS: at 38.86% examples, 839581 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:06:29,739 : INFO : Loading new file for index: 500000
2017-04-19 07:06:46,694 : INFO : PROGRESS: at 39.05% examples, 839498 words/s, in_qsize 0, out_qsize 3
2017-04-19 07:07:06,700 : INFO : PROGRESS: at 39.25% examples, 839430 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:07:26,701 : INFO : PROGRESS: at 39.44% examples, 839340 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:07:46,714 : INFO : PROGRES

2017-04-19 07:27:50,906 : INFO : Loading new file for index: 660000
2017-04-19 07:28:07,067 : INFO : PROGRESS: at 51.44% examples, 837427 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:28:27,092 : INFO : PROGRESS: at 51.63% examples, 837385 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:28:47,096 : INFO : PROGRESS: at 51.83% examples, 837356 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:29:07,103 : INFO : PROGRESS: at 52.02% examples, 837340 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:29:11,520 : INFO : Loading new file for index: 670000
2017-04-19 07:29:27,104 : INFO : PROGRESS: at 52.22% examples, 837339 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:29:47,108 : INFO : PROGRESS: at 52.41% examples, 837339 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:30:07,120 : INFO : PROGRESS: at 52.60% examples, 837315 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:30:27,121 : INFO : PROGRESS: at 52.79% examples, 837286 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:30:32,180 : INFO : Loading

2017-04-19 07:51:07,509 : INFO : PROGRESS: at 64.06% examples, 827136 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:51:27,519 : INFO : PROGRESS: at 64.23% examples, 826895 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:51:47,523 : INFO : PROGRESS: at 64.41% examples, 826697 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:52:02,104 : INFO : Loading new file for index: 830000
2017-04-19 07:52:07,535 : INFO : PROGRESS: at 64.61% examples, 826698 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:52:27,539 : INFO : PROGRESS: at 64.78% examples, 826445 words/s, in_qsize 0, out_qsize 3
2017-04-19 07:52:47,546 : INFO : PROGRESS: at 64.95% examples, 826239 words/s, in_qsize 1, out_qsize 3
2017-04-19 07:53:07,555 : INFO : PROGRESS: at 65.15% examples, 826218 words/s, in_qsize 0, out_qsize 1
2017-04-19 07:53:27,558 : INFO : PROGRESS: at 65.32% examples, 826014 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:53:30,260 : INFO : Loading new file for index: 840000
2017-04-19 07:53:47,562 : INFO : PROGRES

2017-04-19 08:14:07,958 : INFO : PROGRESS: at 76.77% examples, 820897 words/s, in_qsize 0, out_qsize 1
2017-04-19 08:14:27,961 : INFO : PROGRESS: at 76.96% examples, 820863 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:14:30,562 : INFO : Loading new file for index: 990000
2017-04-19 08:14:47,975 : INFO : PROGRESS: at 77.15% examples, 820819 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:15:07,996 : INFO : PROGRESS: at 77.34% examples, 820804 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:15:27,993 : INFO : PROGRESS: at 77.53% examples, 820797 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:15:48,006 : INFO : PROGRESS: at 77.72% examples, 820794 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:15:52,310 : INFO : Loading new file for index: 1000000
2017-04-19 08:16:08,008 : INFO : PROGRESS: at 77.91% examples, 820789 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:16:28,017 : INFO : PROGRESS: at 78.10% examples, 820797 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:16:48,024 : INFO : PROGRE

2017-04-19 08:37:08,453 : INFO : PROGRESS: at 90.04% examples, 821706 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:37:26,233 : INFO : Loading new file for index: 1160000
2017-04-19 08:37:28,460 : INFO : PROGRESS: at 90.23% examples, 821709 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:37:48,462 : INFO : PROGRESS: at 90.43% examples, 821715 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:38:08,475 : INFO : PROGRESS: at 90.62% examples, 821729 words/s, in_qsize 0, out_qsize 4
2017-04-19 08:38:28,481 : INFO : PROGRESS: at 90.81% examples, 821748 words/s, in_qsize 0, out_qsize 1
2017-04-19 08:38:46,974 : INFO : Loading new file for index: 1170000
2017-04-19 08:38:48,496 : INFO : PROGRESS: at 91.01% examples, 821771 words/s, in_qsize 0, out_qsize 2
2017-04-19 08:39:08,488 : INFO : PROGRESS: at 91.20% examples, 821786 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:39:28,498 : INFO : PROGRESS: at 91.40% examples, 821803 words/s, in_qsize 0, out_qsize 2
2017-04-19 08:39:48,508 : INFO : PROGR

2017-04-19 08:54:13,015 : INFO : storing np array 'doctag_syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_2/model.docvecs.doctag_syn0.npy


CPU times: user 18h 9min 34s, sys: 12min 24s, total: 18h 21min 58s
Wall time: 2h 54min 8s


2017-04-19 08:54:13,863 : INFO : not storing attribute syn0norm
2017-04-19 08:54:13,864 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_2/model.wv.syn0.npy
2017-04-19 08:54:14,115 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_2/model.syn1neg.npy
2017-04-19 08:54:14,327 : INFO : not storing attribute cum_table
2017-04-19 08:54:29,558 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_2/model
2017-04-19 08:54:29,559 : INFO : ****************** Epoch 3 --- Working on doc2vec_size_200

2017-04-19 09:13:31,089 : INFO : PROGRESS: at 11.06% examples, 834049 words/s, in_qsize 0, out_qsize 2
2017-04-19 09:13:51,095 : INFO : PROGRESS: at 11.25% examples, 834075 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:14:11,097 : INFO : PROGRESS: at 11.45% examples, 834309 words/s, in_qsize 0, out_qsize 1
2017-04-19 09:14:31,101 : INFO : PROGRESS: at 11.64% examples, 834351 words/s, in_qsize 0, out_qsize 2
2017-04-19 09:14:35,581 : INFO : Loading new file for index: 150000
2017-04-19 09:14:51,135 : INFO : PROGRESS: at 11.84% examples, 834404 words/s, in_qsize 0, out_qsize 3
2017-04-19 09:15:11,122 : INFO : PROGRESS: at 12.03% examples, 834426 words/s, in_qsize 0, out_qsize 2
2017-04-19 09:15:31,122 : INFO : PROGRESS: at 12.23% examples, 834571 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:15:51,128 : INFO : PROGRESS: at 12.42% examples, 834685 words/s, in_qsize 0, out_qsize 1
2017-04-19 09:15:56,039 : INFO : Loading new file for index: 160000
2017-04-19 09:16:11,150 : INFO : PROGRES

2017-04-19 09:36:31,518 : INFO : PROGRESS: at 24.51% examples, 836941 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:36:51,526 : INFO : PROGRESS: at 24.70% examples, 836907 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:37:11,530 : INFO : PROGRESS: at 24.90% examples, 836937 words/s, in_qsize 1, out_qsize 1
2017-04-19 09:37:14,823 : INFO : Loading new file for index: 320000
2017-04-19 09:37:31,532 : INFO : PROGRESS: at 25.10% examples, 837075 words/s, in_qsize 0, out_qsize 1
2017-04-19 09:37:51,535 : INFO : PROGRESS: at 25.30% examples, 837229 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:38:11,540 : INFO : PROGRESS: at 25.49% examples, 837237 words/s, in_qsize 0, out_qsize 1
2017-04-19 09:38:31,548 : INFO : PROGRESS: at 25.69% examples, 837254 words/s, in_qsize 1, out_qsize 0
2017-04-19 09:38:32,654 : INFO : Loading new file for index: 330000
2017-04-19 09:38:51,549 : INFO : PROGRESS: at 25.89% examples, 837349 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:39:11,552 : INFO : PROGRES

2017-04-19 09:59:31,890 : INFO : PROGRESS: at 37.99% examples, 837597 words/s, in_qsize 0, out_qsize 2
2017-04-19 09:59:45,563 : INFO : Loading new file for index: 490000
2017-04-19 09:59:51,899 : INFO : PROGRESS: at 38.18% examples, 837636 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:00:11,897 : INFO : PROGRESS: at 38.38% examples, 837674 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:00:31,907 : INFO : PROGRESS: at 38.58% examples, 837694 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:00:51,916 : INFO : PROGRESS: at 38.77% examples, 837723 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:01:03,664 : INFO : Loading new file for index: 500000
2017-04-19 10:01:11,933 : INFO : PROGRESS: at 38.97% examples, 837787 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:01:31,939 : INFO : PROGRESS: at 39.17% examples, 837818 words/s, in_qsize 0, out_qsize 1
2017-04-19 10:01:51,940 : INFO : PROGRESS: at 39.37% examples, 837853 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:02:11,952 : INFO : PROGRES

2017-04-19 10:22:12,350 : INFO : PROGRESS: at 51.31% examples, 838414 words/s, in_qsize 0, out_qsize 2
2017-04-19 10:22:32,347 : INFO : PROGRESS: at 51.50% examples, 838434 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:22:52,350 : INFO : PROGRESS: at 51.69% examples, 838403 words/s, in_qsize 0, out_qsize 2
2017-04-19 10:23:12,355 : INFO : PROGRESS: at 51.89% examples, 838411 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:23:30,121 : INFO : Loading new file for index: 670000
2017-04-19 10:23:32,353 : INFO : PROGRESS: at 52.09% examples, 838400 words/s, in_qsize 0, out_qsize 1
2017-04-19 10:23:52,375 : INFO : PROGRESS: at 52.28% examples, 838380 words/s, in_qsize 0, out_qsize 2
2017-04-19 10:24:12,369 : INFO : PROGRESS: at 52.47% examples, 838381 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:24:32,378 : INFO : PROGRESS: at 52.67% examples, 838349 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:24:50,782 : INFO : Loading new file for index: 680000
2017-04-19 10:24:52,387 : INFO : PROGRES

2017-04-19 10:44:52,877 : INFO : Loading new file for index: 830000
2017-04-19 10:45:12,800 : INFO : PROGRESS: at 64.75% examples, 838505 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:45:32,816 : INFO : PROGRESS: at 64.94% examples, 838499 words/s, in_qsize 0, out_qsize 1
2017-04-19 10:45:52,816 : INFO : PROGRESS: at 65.14% examples, 838503 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:46:12,822 : INFO : PROGRESS: at 65.33% examples, 838459 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:46:14,489 : INFO : Loading new file for index: 840000
2017-04-19 10:46:32,827 : INFO : PROGRESS: at 65.53% examples, 838451 words/s, in_qsize 0, out_qsize 2
2017-04-19 10:46:52,833 : INFO : PROGRESS: at 65.71% examples, 838314 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:47:12,834 : INFO : PROGRESS: at 65.89% examples, 838085 words/s, in_qsize 0, out_qsize 3
2017-04-19 10:47:32,847 : INFO : PROGRESS: at 66.08% examples, 838053 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:47:37,533 : INFO : Loading

2017-04-19 11:08:13,238 : INFO : PROGRESS: at 77.44% examples, 830097 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:08:33,260 : INFO : PROGRESS: at 77.62% examples, 829921 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:08:47,982 : INFO : Loading new file for index: 1000000
2017-04-19 11:08:53,262 : INFO : PROGRESS: at 77.82% examples, 829946 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:09:13,269 : INFO : PROGRESS: at 78.00% examples, 829798 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:09:33,278 : INFO : PROGRESS: at 78.17% examples, 829623 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:09:53,282 : INFO : PROGRESS: at 78.36% examples, 829608 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:10:12,436 : INFO : Loading new file for index: 1010000
2017-04-19 11:10:13,287 : INFO : PROGRESS: at 78.55% examples, 829474 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:10:33,302 : INFO : PROGRESS: at 78.72% examples, 829302 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:10:53,313 : INFO : PROGR

2017-04-19 11:31:13,757 : INFO : PROGRESS: at 90.57% examples, 828289 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:31:33,784 : INFO : PROGRESS: at 90.77% examples, 828328 words/s, in_qsize 0, out_qsize 2
2017-04-19 11:31:53,764 : INFO : PROGRESS: at 90.96% examples, 828335 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:31:56,743 : INFO : Loading new file for index: 1170000
2017-04-19 11:32:13,767 : INFO : PROGRESS: at 91.16% examples, 828340 words/s, in_qsize 0, out_qsize 1
2017-04-19 11:32:33,770 : INFO : PROGRESS: at 91.35% examples, 828346 words/s, in_qsize 0, out_qsize 1
2017-04-19 11:32:53,789 : INFO : PROGRESS: at 91.55% examples, 828366 words/s, in_qsize 0, out_qsize 4
2017-04-19 11:33:13,793 : INFO : PROGRESS: at 91.75% examples, 828387 words/s, in_qsize 0, out_qsize 1
2017-04-19 11:33:14,936 : INFO : Loading new file for index: 1180000
2017-04-19 11:33:33,794 : INFO : PROGRESS: at 91.94% examples, 828408 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:33:53,797 : INFO : PROGR

CPU times: user 17h 44min 37s, sys: 12min 15s, total: 17h 56min 53s
Wall time: 2h 52min 36s


2017-04-19 11:47:07,380 : INFO : not storing attribute syn0norm
2017-04-19 11:47:07,381 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_3/model.wv.syn0.npy
2017-04-19 11:47:07,608 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_3/model.syn1neg.npy
2017-04-19 11:47:07,800 : INFO : not storing attribute cum_table
2017-04-19 11:47:23,363 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_3/model
2017-04-19 11:47:23,364 : INFO : Getting Validation Embeddings
2017-04-19 11:47:23,365 : IN

2017-04-19 13:59:26,604 : INFO : PROGRESS: at 4.37% examples, 893596 words/s, in_qsize 0, out_qsize 2
2017-04-19 13:59:46,606 : INFO : PROGRESS: at 4.58% examples, 894061 words/s, in_qsize 0, out_qsize 0
2017-04-19 13:59:54,807 : INFO : Loading new file for index: 60000
2017-04-19 14:00:06,607 : INFO : PROGRESS: at 4.79% examples, 894146 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:00:26,621 : INFO : PROGRESS: at 5.00% examples, 893751 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:00:46,621 : INFO : PROGRESS: at 5.21% examples, 894188 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:01:06,626 : INFO : PROGRESS: at 5.42% examples, 894642 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:01:08,736 : INFO : Loading new file for index: 70000
2017-04-19 14:01:26,629 : INFO : PROGRESS: at 5.63% examples, 894528 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:01:46,632 : INFO : PROGRESS: at 5.83% examples, 894600 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:02:06,639 : INFO : PROGRESS: at 6.04

2017-04-19 14:22:06,977 : INFO : PROGRESS: at 18.56% examples, 896405 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:22:19,575 : INFO : Loading new file for index: 240000
2017-04-19 14:22:26,993 : INFO : PROGRESS: at 18.76% examples, 896282 words/s, in_qsize 0, out_qsize 2
2017-04-19 14:22:47,002 : INFO : PROGRESS: at 18.97% examples, 896184 words/s, in_qsize 2, out_qsize 0
2017-04-19 14:23:07,007 : INFO : PROGRESS: at 19.18% examples, 896179 words/s, in_qsize 0, out_qsize 2
2017-04-19 14:23:27,007 : INFO : PROGRESS: at 19.39% examples, 896266 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:23:34,748 : INFO : Loading new file for index: 250000
2017-04-19 14:23:47,007 : INFO : PROGRESS: at 19.60% examples, 896254 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:24:07,010 : INFO : PROGRESS: at 19.80% examples, 896265 words/s, in_qsize 0, out_qsize 1
2017-04-19 14:24:27,022 : INFO : PROGRESS: at 20.01% examples, 896227 words/s, in_qsize 0, out_qsize 1
2017-04-19 14:24:47,012 : INFO : PROGRES

2017-04-19 14:44:47,367 : INFO : PROGRESS: at 32.68% examples, 895335 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:44:49,666 : INFO : Loading new file for index: 420000
2017-04-19 14:45:07,375 : INFO : PROGRESS: at 32.88% examples, 895300 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:45:27,383 : INFO : PROGRESS: at 33.09% examples, 895272 words/s, in_qsize 0, out_qsize 2
2017-04-19 14:45:47,389 : INFO : PROGRESS: at 33.30% examples, 895231 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:46:05,591 : INFO : Loading new file for index: 430000
2017-04-19 14:46:07,399 : INFO : PROGRESS: at 33.50% examples, 895212 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:46:27,403 : INFO : PROGRESS: at 33.71% examples, 895156 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:46:47,419 : INFO : PROGRESS: at 33.92% examples, 895142 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:47:07,433 : INFO : PROGRESS: at 34.13% examples, 895129 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:47:20,664 : INFO : Loading

2017-04-19 15:07:08,056 : INFO : Loading new file for index: 600000
2017-04-19 15:07:27,792 : INFO : PROGRESS: at 46.83% examples, 894445 words/s, in_qsize 1, out_qsize 1
2017-04-19 15:07:47,796 : INFO : PROGRESS: at 47.04% examples, 894424 words/s, in_qsize 0, out_qsize 1
2017-04-19 15:08:07,789 : INFO : PROGRESS: at 47.24% examples, 894419 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:08:22,309 : INFO : Loading new file for index: 610000
2017-04-19 15:08:27,793 : INFO : PROGRESS: at 47.45% examples, 894413 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:08:47,798 : INFO : PROGRESS: at 47.66% examples, 894412 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:09:07,799 : INFO : PROGRESS: at 47.87% examples, 894430 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:09:27,803 : INFO : PROGRESS: at 48.08% examples, 894405 words/s, in_qsize 0, out_qsize 2
2017-04-19 15:09:35,846 : INFO : Loading new file for index: 620000
2017-04-19 15:09:47,813 : INFO : PROGRESS: at 48.29% examples, 894433 words

2017-04-19 15:29:48,195 : INFO : PROGRESS: at 60.92% examples, 896939 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:30:08,201 : INFO : PROGRESS: at 61.13% examples, 896974 words/s, in_qsize 0, out_qsize 5
2017-04-19 15:30:28,204 : INFO : PROGRESS: at 61.34% examples, 897036 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:30:38,788 : INFO : Loading new file for index: 790000
2017-04-19 15:30:48,208 : INFO : PROGRESS: at 61.55% examples, 897087 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:31:08,208 : INFO : PROGRESS: at 61.76% examples, 897133 words/s, in_qsize 0, out_qsize 1
2017-04-19 15:31:28,211 : INFO : PROGRESS: at 61.97% examples, 897213 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:31:48,219 : INFO : PROGRESS: at 62.19% examples, 897272 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:31:51,455 : INFO : Loading new file for index: 800000
2017-04-19 15:32:08,221 : INFO : PROGRESS: at 62.40% examples, 897342 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:32:28,229 : INFO : PROGRES

2017-04-19 15:52:28,580 : INFO : PROGRESS: at 75.41% examples, 900424 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:52:30,883 : INFO : Loading new file for index: 970000
2017-04-19 15:52:48,585 : INFO : PROGRESS: at 75.62% examples, 900456 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:53:08,587 : INFO : PROGRESS: at 75.83% examples, 900500 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:53:28,593 : INFO : PROGRESS: at 76.04% examples, 900530 words/s, in_qsize 0, out_qsize 1
2017-04-19 15:53:44,105 : INFO : Loading new file for index: 980000
2017-04-19 15:53:48,591 : INFO : PROGRESS: at 76.26% examples, 900585 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:54:08,606 : INFO : PROGRESS: at 76.47% examples, 900606 words/s, in_qsize 0, out_qsize 4
2017-04-19 15:54:28,609 : INFO : PROGRESS: at 76.69% examples, 900635 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:54:48,622 : INFO : PROGRESS: at 76.90% examples, 900687 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:54:56,659 : INFO : Loading

2017-04-19 16:14:49,007 : INFO : PROGRESS: at 89.61% examples, 902054 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:15:09,015 : INFO : PROGRESS: at 89.82% examples, 902077 words/s, in_qsize 0, out_qsize 1
2017-04-19 16:15:29,020 : INFO : PROGRESS: at 90.04% examples, 902104 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:15:45,638 : INFO : Loading new file for index: 1160000
2017-04-19 16:15:49,022 : INFO : PROGRESS: at 90.25% examples, 902142 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:16:09,024 : INFO : PROGRESS: at 90.46% examples, 902164 words/s, in_qsize 0, out_qsize 1
2017-04-19 16:16:29,025 : INFO : PROGRESS: at 90.68% examples, 902202 words/s, in_qsize 0, out_qsize 1
2017-04-19 16:16:49,031 : INFO : PROGRESS: at 90.89% examples, 902229 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:16:58,754 : INFO : Loading new file for index: 1170000
2017-04-19 16:17:09,041 : INFO : PROGRESS: at 91.10% examples, 902265 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:17:29,045 : INFO : PROGR

CPU times: user 18h 30min 18s, sys: 11min 3s, total: 18h 41min 21s
Wall time: 2h 38min 36s


2017-04-19 16:31:03,077 : INFO : not storing attribute syn0norm
2017-04-19 16:31:03,078 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_4/model.wv.syn0.npy
2017-04-19 16:31:03,313 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_4/model.syn1neg.npy
2017-04-19 16:31:03,503 : INFO : not storing attribute cum_table
2017-04-19 16:31:17,590 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_4/model
2017-04-19 16:31:17,591 : INFO : ****************** Epoch 5 --- Working on doc2vec_size_200

2017-04-19 16:50:19,162 : INFO : PROGRESS: at 11.91% examples, 898220 words/s, in_qsize 0, out_qsize 2
2017-04-19 16:50:39,168 : INFO : PROGRESS: at 12.12% examples, 898356 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:50:59,167 : INFO : PROGRESS: at 12.33% examples, 898353 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:51:13,096 : INFO : Loading new file for index: 160000
2017-04-19 16:51:19,176 : INFO : PROGRESS: at 12.54% examples, 898322 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:51:39,174 : INFO : PROGRESS: at 12.75% examples, 898309 words/s, in_qsize 0, out_qsize 1
2017-04-19 16:51:59,182 : INFO : PROGRESS: at 12.95% examples, 898270 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:52:19,180 : INFO : PROGRESS: at 13.16% examples, 898267 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:52:27,125 : INFO : Loading new file for index: 170000
2017-04-19 16:52:39,194 : INFO : PROGRESS: at 13.37% examples, 898260 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:52:59,231 : INFO : PROGRES

2017-04-19 17:12:59,554 : INFO : PROGRESS: at 26.29% examples, 904583 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:13:16,926 : INFO : Loading new file for index: 340000
2017-04-19 17:13:19,552 : INFO : PROGRESS: at 26.50% examples, 904667 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:13:39,562 : INFO : PROGRESS: at 26.71% examples, 904729 words/s, in_qsize 0, out_qsize 5
2017-04-19 17:13:59,567 : INFO : PROGRESS: at 26.93% examples, 904767 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:14:19,568 : INFO : PROGRESS: at 27.14% examples, 904833 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:14:29,351 : INFO : Loading new file for index: 350000
2017-04-19 17:14:39,570 : INFO : PROGRESS: at 27.35% examples, 904836 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:14:59,570 : INFO : PROGRESS: at 27.56% examples, 904856 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:15:19,575 : INFO : PROGRESS: at 27.77% examples, 904865 words/s, in_qsize 0, out_qsize 1
2017-04-19 17:15:39,582 : INFO : PROGRES

2017-04-19 17:35:19,926 : INFO : PROGRESS: at 40.53% examples, 907582 words/s, in_qsize 0, out_qsize 2
2017-04-19 17:35:39,918 : INFO : PROGRESS: at 40.75% examples, 907589 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:35:59,925 : INFO : PROGRESS: at 40.96% examples, 907630 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:36:19,931 : INFO : PROGRESS: at 41.17% examples, 907667 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:36:23,380 : INFO : Loading new file for index: 530000
2017-04-19 17:36:39,962 : INFO : PROGRESS: at 41.39% examples, 907739 words/s, in_qsize 3, out_qsize 1
2017-04-19 17:36:59,963 : INFO : PROGRESS: at 41.60% examples, 907797 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:37:19,964 : INFO : PROGRESS: at 41.82% examples, 907847 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:37:35,431 : INFO : Loading new file for index: 540000
2017-04-19 17:37:39,980 : INFO : PROGRESS: at 42.03% examples, 907911 words/s, in_qsize 0, out_qsize 3
2017-04-19 17:37:59,978 : INFO : PROGRES

2017-04-19 17:58:00,346 : INFO : PROGRESS: at 55.07% examples, 910516 words/s, in_qsize 0, out_qsize 1
2017-04-19 17:58:11,629 : INFO : Loading new file for index: 710000
2017-04-19 17:58:20,348 : INFO : PROGRESS: at 55.28% examples, 910541 words/s, in_qsize 0, out_qsize 1
2017-04-19 17:58:40,354 : INFO : PROGRESS: at 55.50% examples, 910551 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:59:00,369 : INFO : PROGRESS: at 55.70% examples, 910544 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:59:20,372 : INFO : PROGRESS: at 55.92% examples, 910543 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:59:26,378 : INFO : Loading new file for index: 720000
2017-04-19 17:59:40,375 : INFO : PROGRESS: at 56.13% examples, 910552 words/s, in_qsize 0, out_qsize 3
2017-04-19 18:00:00,378 : INFO : PROGRESS: at 56.35% examples, 910629 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:00:20,385 : INFO : PROGRESS: at 56.56% examples, 910645 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:00:38,719 : INFO : Loading

2017-04-19 18:20:20,781 : INFO : PROGRESS: at 69.36% examples, 911907 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:20:40,796 : INFO : PROGRESS: at 69.57% examples, 911920 words/s, in_qsize 0, out_qsize 2
2017-04-19 18:21:00,795 : INFO : PROGRESS: at 69.78% examples, 911914 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:21:20,803 : INFO : PROGRESS: at 70.00% examples, 911938 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:21:23,099 : INFO : Loading new file for index: 900000
2017-04-19 18:21:40,808 : INFO : PROGRESS: at 70.21% examples, 911948 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:22:00,820 : INFO : PROGRESS: at 70.42% examples, 911956 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:22:20,825 : INFO : PROGRESS: at 70.64% examples, 911972 words/s, in_qsize 0, out_qsize 1
2017-04-19 18:22:35,232 : INFO : Loading new file for index: 910000
2017-04-19 18:22:40,834 : INFO : PROGRESS: at 70.85% examples, 911994 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:23:00,852 : INFO : PROGRES

2017-04-19 18:43:01,175 : INFO : PROGRESS: at 83.94% examples, 913412 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:43:07,343 : INFO : Loading new file for index: 1080000
2017-04-19 18:43:21,177 : INFO : PROGRESS: at 84.16% examples, 913493 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:43:41,182 : INFO : PROGRESS: at 84.38% examples, 913547 words/s, in_qsize 0, out_qsize 3
2017-04-19 18:44:01,195 : INFO : PROGRESS: at 84.59% examples, 913567 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:44:18,718 : INFO : Loading new file for index: 1090000
2017-04-19 18:44:21,196 : INFO : PROGRESS: at 84.81% examples, 913587 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:44:41,201 : INFO : PROGRESS: at 85.02% examples, 913592 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:45:01,204 : INFO : PROGRESS: at 85.23% examples, 913598 words/s, in_qsize 0, out_qsize 1
2017-04-19 18:45:21,210 : INFO : PROGRESS: at 85.44% examples, 913603 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:45:32,783 : INFO : Loadi

2017-04-19 19:05:21,625 : INFO : PROGRESS: at 98.20% examples, 913348 words/s, in_qsize 0, out_qsize 0
2017-04-19 19:05:41,629 : INFO : PROGRESS: at 98.40% examples, 913228 words/s, in_qsize 0, out_qsize 0
2017-04-19 19:06:01,643 : INFO : PROGRESS: at 98.60% examples, 913094 words/s, in_qsize 0, out_qsize 0
2017-04-19 19:06:13,529 : INFO : Loading new file for index: 1270000
2017-04-19 19:06:21,650 : INFO : PROGRESS: at 98.80% examples, 912963 words/s, in_qsize 0, out_qsize 0
2017-04-19 19:06:41,652 : INFO : PROGRESS: at 99.00% examples, 912816 words/s, in_qsize 0, out_qsize 0
2017-04-19 19:07:01,657 : INFO : PROGRESS: at 99.20% examples, 912685 words/s, in_qsize 0, out_qsize 1
2017-04-19 19:07:21,655 : INFO : PROGRESS: at 99.40% examples, 912530 words/s, in_qsize 1, out_qsize 0
2017-04-19 19:07:30,255 : INFO : Loading new file for index: 1280000
2017-04-19 19:07:41,660 : INFO : PROGRESS: at 99.59% examples, 912392 words/s, in_qsize 0, out_qsize 0
2017-04-19 19:08:01,663 : INFO : PROGR

CPU times: user 18h 46min 57s, sys: 12min 11s, total: 18h 59min 8s
Wall time: 2h 37min 2s


2017-04-19 19:08:21,458 : INFO : not storing attribute syn0norm
2017-04-19 19:08:21,459 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_5/model.wv.syn0.npy
2017-04-19 19:08:21,700 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_5/model.syn1neg.npy
2017-04-19 19:08:21,896 : INFO : not storing attribute cum_table
2017-04-19 19:08:35,430 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_5/model
2017-04-19 19:08:35,431 : INFO : Getting Validation Embeddings
2017-04-19 19:08:35,431 : IN

2017-04-19 21:15:42,980 : INFO : PROGRESS: at 4.31% examples, 881501 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:16:02,990 : INFO : PROGRESS: at 4.52% examples, 881804 words/s, in_qsize 0, out_qsize 2
2017-04-19 21:16:17,416 : INFO : Loading new file for index: 60000
2017-04-19 21:16:22,992 : INFO : PROGRESS: at 4.73% examples, 881924 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:16:42,993 : INFO : PROGRESS: at 4.93% examples, 881919 words/s, in_qsize 0, out_qsize 1
2017-04-19 21:17:02,996 : INFO : PROGRESS: at 5.14% examples, 881790 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:17:22,997 : INFO : PROGRESS: at 5.34% examples, 881721 words/s, in_qsize 0, out_qsize 1
2017-04-19 21:17:32,694 : INFO : Loading new file for index: 70000
2017-04-19 21:17:42,999 : INFO : PROGRESS: at 5.55% examples, 881819 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:18:03,001 : INFO : PROGRESS: at 5.75% examples, 881722 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:18:23,004 : INFO : PROGRESS: at 5.96

2017-04-19 21:38:23,355 : INFO : PROGRESS: at 18.24% examples, 880986 words/s, in_qsize 0, out_qsize 1
2017-04-19 21:38:43,357 : INFO : PROGRESS: at 18.45% examples, 881006 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:39:03,364 : INFO : PROGRESS: at 18.65% examples, 880929 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:39:07,330 : INFO : Loading new file for index: 240000
2017-04-19 21:39:23,384 : INFO : PROGRESS: at 18.85% examples, 880875 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:39:43,397 : INFO : PROGRESS: at 19.05% examples, 880869 words/s, in_qsize 0, out_qsize 1
2017-04-19 21:40:03,415 : INFO : PROGRESS: at 19.26% examples, 880834 words/s, in_qsize 0, out_qsize 1
2017-04-19 21:40:23,426 : INFO : PROGRESS: at 19.47% examples, 880820 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:40:23,871 : INFO : Loading new file for index: 250000
2017-04-19 21:40:43,433 : INFO : PROGRESS: at 19.67% examples, 880814 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:41:03,430 : INFO : PROGRES

2017-04-19 22:01:03,809 : INFO : PROGRESS: at 32.12% examples, 879981 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:01:23,821 : INFO : PROGRESS: at 32.33% examples, 879916 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:01:43,821 : INFO : PROGRESS: at 32.53% examples, 879887 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:02:01,397 : INFO : Loading new file for index: 420000
2017-04-19 22:02:03,821 : INFO : PROGRESS: at 32.73% examples, 879804 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:02:23,825 : INFO : PROGRESS: at 32.93% examples, 879771 words/s, in_qsize 0, out_qsize 1
2017-04-19 22:02:43,835 : INFO : PROGRESS: at 33.13% examples, 879724 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:03:03,846 : INFO : PROGRESS: at 33.33% examples, 879688 words/s, in_qsize 1, out_qsize 1
2017-04-19 22:03:18,870 : INFO : Loading new file for index: 430000
2017-04-19 22:03:23,846 : INFO : PROGRESS: at 33.54% examples, 879631 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:03:43,862 : INFO : PROGRES

2017-04-19 22:23:44,204 : INFO : PROGRESS: at 46.02% examples, 879021 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:24:04,214 : INFO : PROGRESS: at 46.23% examples, 879015 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:24:24,218 : INFO : PROGRESS: at 46.43% examples, 879005 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:24:43,330 : INFO : Loading new file for index: 600000
2017-04-19 22:24:44,219 : INFO : PROGRESS: at 46.63% examples, 878992 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:25:04,222 : INFO : PROGRESS: at 46.84% examples, 878953 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:25:24,227 : INFO : PROGRESS: at 47.04% examples, 878932 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:25:44,237 : INFO : PROGRESS: at 47.24% examples, 878912 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:25:59,035 : INFO : Loading new file for index: 610000
2017-04-19 22:26:04,238 : INFO : PROGRESS: at 47.45% examples, 878913 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:26:24,241 : INFO : PROGRES

2017-04-19 22:46:26,400 : INFO : Loading new file for index: 770000
2017-04-19 22:46:44,598 : INFO : PROGRESS: at 60.07% examples, 878452 words/s, in_qsize 0, out_qsize 1
2017-04-19 22:47:04,599 : INFO : PROGRESS: at 60.27% examples, 878442 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:47:24,607 : INFO : PROGRESS: at 60.48% examples, 878435 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:47:41,598 : INFO : Loading new file for index: 780000
2017-04-19 22:47:44,607 : INFO : PROGRESS: at 60.68% examples, 878440 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:48:04,611 : INFO : PROGRESS: at 60.88% examples, 878438 words/s, in_qsize 1, out_qsize 1
2017-04-19 22:48:24,612 : INFO : PROGRESS: at 61.09% examples, 878436 words/s, in_qsize 0, out_qsize 1
2017-04-19 22:48:44,614 : INFO : PROGRESS: at 61.29% examples, 878424 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:49:00,145 : INFO : Loading new file for index: 790000
2017-04-19 22:49:04,617 : INFO : PROGRESS: at 61.49% examples, 878427 words

2017-04-19 23:09:17,097 : INFO : Loading new file for index: 950000
2017-04-19 23:09:25,048 : INFO : PROGRESS: at 73.96% examples, 878193 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:09:45,067 : INFO : PROGRESS: at 74.16% examples, 878192 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:10:05,067 : INFO : PROGRESS: at 74.36% examples, 878191 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:10:25,078 : INFO : PROGRESS: at 74.57% examples, 878191 words/s, in_qsize 0, out_qsize 1
2017-04-19 23:10:32,992 : INFO : Loading new file for index: 960000
2017-04-19 23:10:45,074 : INFO : PROGRESS: at 74.78% examples, 878191 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:11:05,094 : INFO : PROGRESS: at 74.98% examples, 878190 words/s, in_qsize 2, out_qsize 0
2017-04-19 23:11:25,106 : INFO : PROGRESS: at 75.18% examples, 878186 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:11:45,119 : INFO : PROGRESS: at 75.39% examples, 878186 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:11:49,764 : INFO : Loading

2017-04-19 23:32:05,535 : INFO : PROGRESS: at 87.84% examples, 878048 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:32:10,262 : INFO : Loading new file for index: 1130000
2017-04-19 23:32:25,541 : INFO : PROGRESS: at 88.04% examples, 878061 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:32:45,545 : INFO : PROGRESS: at 88.25% examples, 878049 words/s, in_qsize 0, out_qsize 2
2017-04-19 23:33:05,549 : INFO : PROGRESS: at 88.45% examples, 878042 words/s, in_qsize 0, out_qsize 2
2017-04-19 23:33:25,470 : INFO : Loading new file for index: 1140000
2017-04-19 23:33:25,553 : INFO : PROGRESS: at 88.66% examples, 878031 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:33:45,560 : INFO : PROGRESS: at 88.86% examples, 878030 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:34:05,563 : INFO : PROGRESS: at 89.07% examples, 878030 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:34:25,566 : INFO : PROGRESS: at 89.27% examples, 878019 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:34:40,757 : INFO : Loadi

2017-04-19 23:51:58,331 : INFO : worker thread finished; awaiting finish of 6 more threads
2017-04-19 23:51:58,332 : INFO : worker thread finished; awaiting finish of 5 more threads
2017-04-19 23:51:58,339 : INFO : worker thread finished; awaiting finish of 4 more threads
2017-04-19 23:51:58,346 : INFO : worker thread finished; awaiting finish of 3 more threads
2017-04-19 23:51:58,351 : INFO : worker thread finished; awaiting finish of 2 more threads
2017-04-19 23:51:58,360 : INFO : worker thread finished; awaiting finish of 1 more threads
2017-04-19 23:51:58,363 : INFO : worker thread finished; awaiting finish of 0 more threads
2017-04-19 23:51:58,364 : INFO : training on 12614970497 raw words (8597110081 effective words) took 9796.5s, 877570 effective words/s
2017-04-19 23:51:58,365 : INFO : saving Doc2Vec object under /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_

CPU times: user 22h 5min 49s, sys: 11min 59s, total: 22h 17min 49s
Wall time: 2h 43min 16s


2017-04-19 23:51:59,066 : INFO : not storing attribute syn0norm
2017-04-19 23:51:59,067 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_6/model.wv.syn0.npy
2017-04-19 23:51:59,295 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_6/model.syn1neg.npy
2017-04-19 23:51:59,486 : INFO : not storing attribute cum_table
2017-04-19 23:52:14,013 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_6/model
2017-04-19 23:52:14,014 : INFO : ****************** Epoch 7 --- Working on doc2vec_size_200

2017-04-20 00:11:05,811 : INFO : Loading new file for index: 150000
2017-04-20 00:11:15,631 : INFO : PROGRESS: at 11.79% examples, 889158 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:11:35,634 : INFO : PROGRESS: at 11.99% examples, 889091 words/s, in_qsize 0, out_qsize 2
2017-04-20 00:11:55,635 : INFO : PROGRESS: at 12.20% examples, 889120 words/s, in_qsize 0, out_qsize 1
2017-04-20 00:12:15,650 : INFO : PROGRESS: at 12.41% examples, 889202 words/s, in_qsize 0, out_qsize 4
2017-04-20 00:12:21,753 : INFO : Loading new file for index: 160000
2017-04-20 00:12:35,642 : INFO : PROGRESS: at 12.62% examples, 889238 words/s, in_qsize 0, out_qsize 2
2017-04-20 00:12:55,644 : INFO : PROGRESS: at 12.82% examples, 889292 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:13:15,644 : INFO : PROGRESS: at 13.03% examples, 889295 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:13:35,644 : INFO : PROGRESS: at 13.24% examples, 889265 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:13:36,401 : INFO : Loading

2017-04-20 00:33:40,927 : INFO : Loading new file for index: 330000
2017-04-20 00:33:55,997 : INFO : PROGRESS: at 25.86% examples, 889883 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:34:15,999 : INFO : PROGRESS: at 26.07% examples, 889898 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:34:36,010 : INFO : PROGRESS: at 26.28% examples, 889944 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:34:54,947 : INFO : Loading new file for index: 340000
2017-04-20 00:34:56,006 : INFO : PROGRESS: at 26.49% examples, 889952 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:35:16,010 : INFO : PROGRESS: at 26.69% examples, 889919 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:35:36,015 : INFO : PROGRESS: at 26.90% examples, 889968 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:35:56,024 : INFO : PROGRESS: at 27.11% examples, 889982 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:36:08,999 : INFO : Loading new file for index: 350000
2017-04-20 00:36:16,033 : INFO : PROGRESS: at 27.32% examples, 890003 words

2017-04-20 00:56:16,365 : INFO : PROGRESS: at 39.76% examples, 890226 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:56:36,376 : INFO : PROGRESS: at 39.96% examples, 890252 words/s, in_qsize 1, out_qsize 2
2017-04-20 00:56:56,389 : INFO : PROGRESS: at 40.17% examples, 890273 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:57:16,400 : INFO : PROGRESS: at 40.38% examples, 890286 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:57:22,205 : INFO : Loading new file for index: 520000
2017-04-20 00:57:36,405 : INFO : PROGRESS: at 40.59% examples, 890295 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:57:56,407 : INFO : PROGRESS: at 40.80% examples, 890289 words/s, in_qsize 0, out_qsize 1
2017-04-20 00:58:16,419 : INFO : PROGRESS: at 41.01% examples, 890292 words/s, in_qsize 1, out_qsize 1
2017-04-20 00:58:36,139 : INFO : Loading new file for index: 530000
2017-04-20 00:58:36,425 : INFO : PROGRESS: at 41.21% examples, 890276 words/s, in_qsize 0, out_qsize 1
2017-04-20 00:58:56,430 : INFO : PROGRES

2017-04-20 01:18:56,769 : INFO : PROGRESS: at 53.87% examples, 890655 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:19:16,775 : INFO : PROGRESS: at 54.08% examples, 890638 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:19:36,779 : INFO : PROGRESS: at 54.29% examples, 890631 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:19:47,992 : INFO : Loading new file for index: 700000
2017-04-20 01:19:56,779 : INFO : PROGRESS: at 54.49% examples, 890647 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:20:16,794 : INFO : PROGRESS: at 54.70% examples, 890608 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:20:36,803 : INFO : PROGRESS: at 54.90% examples, 890600 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:20:56,813 : INFO : PROGRESS: at 55.11% examples, 890584 words/s, in_qsize 1, out_qsize 2
2017-04-20 01:21:04,924 : INFO : Loading new file for index: 710000
2017-04-20 01:21:16,821 : INFO : PROGRESS: at 55.31% examples, 890568 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:21:36,853 : INFO : PROGRES

2017-04-20 01:41:37,215 : INFO : PROGRESS: at 67.95% examples, 890647 words/s, in_qsize 0, out_qsize 1
2017-04-20 01:41:57,227 : INFO : PROGRESS: at 68.15% examples, 890656 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:42:17,226 : INFO : PROGRESS: at 68.36% examples, 890667 words/s, in_qsize 0, out_qsize 1
2017-04-20 01:42:27,345 : INFO : Loading new file for index: 880000
2017-04-20 01:42:37,246 : INFO : PROGRESS: at 68.57% examples, 890667 words/s, in_qsize 0, out_qsize 3
2017-04-20 01:42:57,239 : INFO : PROGRESS: at 68.77% examples, 890666 words/s, in_qsize 0, out_qsize 1
2017-04-20 01:43:17,240 : INFO : PROGRESS: at 68.99% examples, 890693 words/s, in_qsize 0, out_qsize 1
2017-04-20 01:43:37,246 : INFO : PROGRESS: at 69.19% examples, 890690 words/s, in_qsize 0, out_qsize 1
2017-04-20 01:43:40,809 : INFO : Loading new file for index: 890000
2017-04-20 01:43:57,247 : INFO : PROGRESS: at 69.40% examples, 890696 words/s, in_qsize 0, out_qsize 2
2017-04-20 01:44:17,248 : INFO : PROGRES

2017-04-20 02:04:17,684 : INFO : PROGRESS: at 82.05% examples, 890764 words/s, in_qsize 0, out_qsize 1
2017-04-20 02:04:37,687 : INFO : PROGRESS: at 82.26% examples, 890750 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:04:57,691 : INFO : PROGRESS: at 82.47% examples, 890739 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:04:58,441 : INFO : Loading new file for index: 1060000
2017-04-20 02:05:17,691 : INFO : PROGRESS: at 82.67% examples, 890750 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:05:37,695 : INFO : PROGRESS: at 82.88% examples, 890739 words/s, in_qsize 0, out_qsize 2
2017-04-20 02:05:57,698 : INFO : PROGRESS: at 83.09% examples, 890755 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:06:12,533 : INFO : Loading new file for index: 1070000
2017-04-20 02:06:17,708 : INFO : PROGRESS: at 83.30% examples, 890752 words/s, in_qsize 0, out_qsize 1
2017-04-20 02:06:37,709 : INFO : PROGRESS: at 83.51% examples, 890750 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:06:57,714 : INFO : PROGR

2017-04-20 02:26:58,143 : INFO : PROGRESS: at 96.15% examples, 890560 words/s, in_qsize 0, out_qsize 1
2017-04-20 02:27:18,147 : INFO : PROGRESS: at 96.36% examples, 890556 words/s, in_qsize 0, out_qsize 1
2017-04-20 02:27:22,544 : INFO : Loading new file for index: 1240000
2017-04-20 02:27:38,149 : INFO : PROGRESS: at 96.57% examples, 890548 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:27:58,155 : INFO : PROGRESS: at 96.78% examples, 890544 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:28:18,160 : INFO : PROGRESS: at 96.98% examples, 890553 words/s, in_qsize 0, out_qsize 1
2017-04-20 02:28:36,628 : INFO : Loading new file for index: 1250000
2017-04-20 02:28:38,161 : INFO : PROGRESS: at 97.19% examples, 890554 words/s, in_qsize 0, out_qsize 1
2017-04-20 02:28:58,181 : INFO : PROGRESS: at 97.40% examples, 890530 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:29:18,196 : INFO : PROGRESS: at 97.61% examples, 890528 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:29:38,196 : INFO : PROGR

CPU times: user 20h 30min 44s, sys: 11min 18s, total: 20h 42min 2s
Wall time: 2h 40min 54s


2017-04-20 02:33:09,180 : INFO : not storing attribute syn0norm
2017-04-20 02:33:09,181 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_7/model.wv.syn0.npy
2017-04-20 02:33:09,399 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_7/model.syn1neg.npy
2017-04-20 02:33:09,587 : INFO : not storing attribute cum_table
2017-04-20 02:33:23,987 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_7/model
2017-04-20 02:33:23,988 : INFO : ****************** Epoch 8 --- Working on doc2vec_size_200

2017-04-20 02:52:45,554 : INFO : PROGRESS: at 10.67% examples, 791195 words/s, in_qsize 1, out_qsize 0
2017-04-20 02:53:05,555 : INFO : PROGRESS: at 10.88% examples, 792642 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:53:09,148 : INFO : Loading new file for index: 140000
2017-04-20 02:53:25,561 : INFO : PROGRESS: at 11.08% examples, 794072 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:53:45,562 : INFO : PROGRESS: at 11.29% examples, 795431 words/s, in_qsize 0, out_qsize 2
2017-04-20 02:54:05,571 : INFO : PROGRESS: at 11.49% examples, 796851 words/s, in_qsize 0, out_qsize 4
2017-04-20 02:54:24,956 : INFO : Loading new file for index: 150000
2017-04-20 02:54:25,579 : INFO : PROGRESS: at 11.69% examples, 798153 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:54:45,585 : INFO : PROGRESS: at 11.90% examples, 799380 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:55:05,586 : INFO : PROGRESS: at 12.10% examples, 800606 words/s, in_qsize 0, out_qsize 1
2017-04-20 02:55:25,595 : INFO : PROGRES

2017-04-20 03:15:25,852 : INFO : PROGRESS: at 24.71% examples, 843957 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:15:45,861 : INFO : PROGRESS: at 24.92% examples, 844385 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:15:46,719 : INFO : Loading new file for index: 320000
2017-04-20 03:16:05,862 : INFO : PROGRESS: at 25.13% examples, 844793 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:16:25,868 : INFO : PROGRESS: at 25.34% examples, 845179 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:16:45,873 : INFO : PROGRESS: at 25.55% examples, 845609 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:17:00,173 : INFO : Loading new file for index: 330000
2017-04-20 03:17:05,892 : INFO : PROGRESS: at 25.76% examples, 846000 words/s, in_qsize 0, out_qsize 3
2017-04-20 03:17:25,880 : INFO : PROGRESS: at 25.97% examples, 846369 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:17:45,880 : INFO : PROGRESS: at 26.18% examples, 846764 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:18:05,887 : INFO : PROGRES

2017-04-20 03:38:03,914 : INFO : Loading new file for index: 500000
2017-04-20 03:38:06,224 : INFO : PROGRESS: at 38.92% examples, 862478 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:38:26,227 : INFO : PROGRESS: at 39.13% examples, 862665 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:38:46,227 : INFO : PROGRESS: at 39.34% examples, 862853 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:39:06,229 : INFO : PROGRESS: at 39.55% examples, 863045 words/s, in_qsize 0, out_qsize 2
2017-04-20 03:39:17,967 : INFO : Loading new file for index: 510000
2017-04-20 03:39:26,232 : INFO : PROGRESS: at 39.75% examples, 863224 words/s, in_qsize 0, out_qsize 2
2017-04-20 03:39:46,234 : INFO : PROGRESS: at 39.96% examples, 863423 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:40:06,237 : INFO : PROGRESS: at 40.17% examples, 863623 words/s, in_qsize 1, out_qsize 0
2017-04-20 03:40:26,244 : INFO : PROGRESS: at 40.38% examples, 863824 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:40:31,808 : INFO : Loading

2017-04-20 04:00:26,614 : INFO : PROGRESS: at 52.98% examples, 872521 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:00:46,628 : INFO : PROGRESS: at 53.19% examples, 872651 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:01:06,627 : INFO : PROGRESS: at 53.40% examples, 872811 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:01:26,627 : INFO : PROGRESS: at 53.61% examples, 872947 words/s, in_qsize 0, out_qsize 2
2017-04-20 04:01:27,891 : INFO : Loading new file for index: 690000
2017-04-20 04:01:46,642 : INFO : PROGRESS: at 53.82% examples, 873060 words/s, in_qsize 0, out_qsize 2
2017-04-20 04:02:06,636 : INFO : PROGRESS: at 54.03% examples, 873179 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:02:26,644 : INFO : PROGRESS: at 54.24% examples, 873298 words/s, in_qsize 0, out_qsize 2
2017-04-20 04:02:41,826 : INFO : Loading new file for index: 700000
2017-04-20 04:02:46,654 : INFO : PROGRESS: at 54.45% examples, 873404 words/s, in_qsize 0, out_qsize 2
2017-04-20 04:03:06,654 : INFO : PROGRES

2017-04-20 04:23:07,004 : INFO : PROGRESS: at 67.45% examples, 881541 words/s, in_qsize 0, out_qsize 1
2017-04-20 04:23:27,016 : INFO : PROGRESS: at 67.67% examples, 881773 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:23:28,067 : INFO : Loading new file for index: 870000
2017-04-20 04:23:47,019 : INFO : PROGRESS: at 67.90% examples, 881971 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:24:07,021 : INFO : PROGRESS: at 68.12% examples, 882233 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:24:27,023 : INFO : PROGRESS: at 68.34% examples, 882435 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:24:38,142 : INFO : Loading new file for index: 880000
2017-04-20 04:24:47,032 : INFO : PROGRESS: at 68.56% examples, 882631 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:25:07,040 : INFO : PROGRESS: at 68.79% examples, 882880 words/s, in_qsize 0, out_qsize 1
2017-04-20 04:25:27,049 : INFO : PROGRESS: at 69.02% examples, 883175 words/s, in_qsize 0, out_qsize 2
2017-04-20 04:25:46,556 : INFO : Loading

2017-04-20 04:45:27,466 : INFO : PROGRESS: at 82.26% examples, 893028 words/s, in_qsize 0, out_qsize 2
2017-04-20 04:45:47,343 : INFO : Loading new file for index: 1060000
2017-04-20 04:45:47,466 : INFO : PROGRESS: at 82.47% examples, 893102 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:46:07,474 : INFO : PROGRESS: at 82.69% examples, 893231 words/s, in_qsize 0, out_qsize 1
2017-04-20 04:46:27,477 : INFO : PROGRESS: at 82.92% examples, 893380 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:46:47,480 : INFO : PROGRESS: at 83.14% examples, 893491 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:46:57,589 : INFO : Loading new file for index: 1070000
2017-04-20 04:47:07,482 : INFO : PROGRESS: at 83.35% examples, 893575 words/s, in_qsize 1, out_qsize 0
2017-04-20 04:47:27,486 : INFO : PROGRESS: at 83.58% examples, 893722 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:47:47,493 : INFO : PROGRESS: at 83.81% examples, 893920 words/s, in_qsize 0, out_qsize 1
2017-04-20 04:48:05,120 : INFO : Loadi

2017-04-20 05:07:47,857 : INFO : PROGRESS: at 97.11% examples, 901343 words/s, in_qsize 0, out_qsize 0
2017-04-20 05:07:53,846 : INFO : Loading new file for index: 1250000
2017-04-20 05:08:07,865 : INFO : PROGRESS: at 97.33% examples, 901431 words/s, in_qsize 0, out_qsize 1
2017-04-20 05:08:27,870 : INFO : PROGRESS: at 97.55% examples, 901532 words/s, in_qsize 0, out_qsize 0
2017-04-20 05:08:47,861 : INFO : PROGRESS: at 97.77% examples, 901596 words/s, in_qsize 0, out_qsize 0
2017-04-20 05:09:04,222 : INFO : Loading new file for index: 1260000
2017-04-20 05:09:07,865 : INFO : PROGRESS: at 97.99% examples, 901661 words/s, in_qsize 0, out_qsize 0
2017-04-20 05:09:27,868 : INFO : PROGRESS: at 98.20% examples, 901720 words/s, in_qsize 0, out_qsize 0
2017-04-20 05:09:47,870 : INFO : PROGRESS: at 98.43% examples, 901866 words/s, in_qsize 0, out_qsize 0
2017-04-20 05:10:07,871 : INFO : PROGRESS: at 98.66% examples, 901963 words/s, in_qsize 0, out_qsize 0
2017-04-20 05:10:13,431 : INFO : Loadi

CPU times: user 20h 36min 59s, sys: 12min 45s, total: 20h 49min 45s
Wall time: 2h 38min 45s


2017-04-20 05:12:10,079 : INFO : not storing attribute syn0norm
2017-04-20 05:12:10,080 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_8/model.wv.syn0.npy
2017-04-20 05:12:10,346 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_8/model.syn1neg.npy
2017-04-20 05:12:10,559 : INFO : not storing attribute cum_table
2017-04-20 05:12:27,408 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_8/model


CPU times: user 7d 22h 58min 12s, sys: 1h 49min 34s, total: 8d 47min 46s
Wall time: 1d 2h 9min 35s


## Inference Only (if needed)

In [19]:
NUM_CORES = 32

In [20]:
epoch = 1
GLOBAL_VARS.MODEL_NAME = placeholder_model_name.format(epoch)

if os.path.exists(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, MODEL_PREFIX)):
    doc2vec_model = Doc2Vec.load(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, MODEL_PREFIX))
    doc2vec_model.workers = NUM_CORES
    GLOBAL_VARS.DOC2VEC_MODEL = doc2vec_model

2017-04-18 18:02:01,019 : INFO : loading Doc2Vec object from /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_1/model
2017-04-18 18:02:08,119 : INFO : loading docvecs recursively from /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_1/model.docvecs.* with mmap=None
2017-04-18 18:02:08,121 : INFO : loading doctag_syn0 from /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_1/model.docvecs.doctag_syn0.npy with mmap=None
2017-04-18 18:02:14,679 : INFO : loading wv recursively from /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean

In [22]:
GLOBAL_VARS.MODEL_NAME

'doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_1'

In [21]:
doc2vec_model

<gensim.models.doc2vec.Doc2Vec at 0x7f8aaf855090>

In [None]:
Xv = get_extended_docs_with_inference_data_only(doc2vec_model, VALIDATION_DICT, 
                                         validation_preprocessed_files_prefix, level, model_name)

2017-04-18 18:03:06,812 : INFO : ===== Getting vectors with inference
2017-04-18 18:03:07,082 : INFO : Loading new file for index: 0
2017-04-18 18:03:26,531 : INFO : Loading new file for index: 10000
2017-04-18 18:06:37,739 : INFO : Finished: 10000 tags
2017-04-18 18:06:57,927 : INFO : Loading new file for index: 20000
2017-04-18 18:10:58,441 : INFO : Finished: 20000 tags
2017-04-18 18:11:17,440 : INFO : Loading new file for index: 30000
2017-04-18 18:15:05,454 : INFO : Finished: 30000 tags
2017-04-18 18:15:26,058 : INFO : Loading new file for index: 40000
2017-04-18 18:19:11,750 : INFO : Finished: 40000 tags
2017-04-18 18:19:32,490 : INFO : Loading new file for index: 50000
2017-04-18 18:23:17,129 : INFO : Finished: 50000 tags
2017-04-18 18:23:36,633 : INFO : Loading new file for index: 60000
2017-04-18 18:27:11,904 : INFO : Finished: 60000 tags
2017-04-18 18:27:32,721 : INFO : Loading new file for index: 70000
2017-04-18 18:31:10,730 : INFO : Finished: 70000 tags
2017-04-18 18:31:30,

#### Testing inference

In [55]:
inference_docs_iterator = BatchWrapper(validation_preprocessed_files_prefix, batch_size=None, level=level, level_type=model_name)        
for doc_tuple in inference_doczs_iterator:
    doc_id, doc_tokens = doc_tuple
    rep = doc2vec_model.infer_vector(doc_tokens)
    print (doc_id, rep)
    break

('08521002', array([  1.13558674e+00,  -2.01971769e-01,  -9.30447519e-01,
         9.55632687e-01,   5.11517346e-01,   4.34441900e+00,
        -3.77764761e-01,  -1.11617422e+00,  -2.15896085e-01,
         9.09354746e-01,   5.74674904e-01,  -2.07049704e+00,
        -7.20400810e-01,   4.94136661e-01,  -1.74060893e+00,
        -2.17272949e+00,  -4.39270258e-01,  -1.51936769e+00,
         5.65607429e-01,  -4.58835810e-01,  -1.69598356e-01,
         1.77733886e+00,   3.66123140e-01,   1.38953611e-01,
        -1.04259264e+00,   8.84979665e-01,  -8.56729895e-02,
        -6.04329109e-01,   4.42179322e-01,   1.08561194e+00,
        -2.49654725e-01,   3.02951038e-01,  -3.80307257e-01,
         1.32433748e+00,   7.18038738e-01,   7.99864233e-01,
        -3.60305488e-01,  -3.32749695e-01,   1.86409019e-02,
        -1.16298962e+00,  -2.36521304e-01,   8.52507114e-01,
        -4.25269688e-03,  -2.73190904e+00,  -1.39228487e+00,
         3.14658254e-01,   1.19927609e+00,  -8.86219382e-01,
        -2.

In [53]:
doc2vec_model.wv.syn0

array([[ 0.24962339,  0.03808838, -0.38492572, ...,  0.81019139,
        -0.0872335 ,  0.00503489],
       [-0.24393913, -0.9072656 , -0.08245134, ..., -0.12438237,
        -0.10501056,  0.07241193],
       [ 0.06769085, -0.22004843,  0.05649997, ...,  0.15331532,
        -0.87121236, -0.71148068],
       ..., 
       [ 0.02257917,  0.18380728, -0.19475998, ...,  0.72972393,
        -0.03356596, -0.29145467],
       [-0.20255305, -0.25994578,  0.31640032, ..., -0.02623975,
         0.41660461, -0.45980361],
       [ 1.09419656, -0.97489876, -0.3509953 , ...,  0.82430571,
         0.02756385,  0.7905944 ]], dtype=float32)