## Creates the doc2vec vector embeddings for a specific configuration

In [1]:
import json
import nltk
from nltk.tokenize import RegexpTokenizer
import string
import math
import os
import io
import time
from collections import namedtuple
import cPickle as pickle
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import random
import gzip

from multiprocessing import Process, Queue

from multiprocessing.dummy import Pool as ThreadPool
import itertools

from sklearn.metrics import coverage_error
import sklearn.metrics
from sklearn.multiclass import OneVsRestClassifier
from sklearn import linear_model
from sklearn.preprocessing import MultiLabelBinarizer

from gensim.models.doc2vec import Doc2Vec, LabeledSentence

import logging
from logging import info
from functools import partial

from thesis.utils.metrics import *
from thesis.utils.file import *

## Global variables used throughout the script

In [2]:
root = logging.getLogger()
for handler in root.handlers[:]:
    root.removeHandler(handler)
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) # adds a default StreamHanlder
#root.addHandler(logging.StreamHandler())

In [3]:
SVM_SEED = 1234
DOC2VEC_SEED = 1234

In [4]:
MIN_WORD_COUNT = 100
NUM_CORES = 16

In [5]:
GLOBAL_VARS = namedtuple('GLOBAL_VARS', ['MODEL_NAME', 'DOC2VEC_MODEL_NAME', 'DOC2VEC_MODEL', 
                                         'SVM_MODEL_NAME', 'NN_MODEL_NAME'])

In [6]:
VOCAB_MODEL = "vocab_model"
MODEL_PREFIX = "model"
VALIDATION_DICT = "validation_dict.pkl"
TEST_MATRIX = "test_matrix.pkl"
TEST_DICT = "test_dict.pkl"
METRICS = "metrics.pkl"
CLASSIFIER = "classifier.pkl"

In [7]:
root_location = "/mnt/virtual-machines/data/"
exports_location = root_location + "exported_data/"

doc_classifications_map_file = exports_location + "doc_classification_map.pkl"
training_docs_list_file = exports_location + "training_docs_list.pkl"
validation_docs_list_file = exports_location + "validation_docs_list.pkl"
test_docs_list_file = exports_location + "test_docs_list.pkl"

preprocessed_location = root_location + "preprocessed_data/extended_pv_abs_desc_claims_full_chunks/"

training_preprocessed_files_prefix = preprocessed_location + "extended_pv_training_docs_data_preprocessed-"
validation_preprocessed_files_prefix = preprocessed_location + "extended_pv_validation_docs_data_preprocessed-"
test_preprocessed_files_prefix = preprocessed_location + "extended_pv_test_docs_data_preprocessed-"

## Load general data required for classification

In [8]:
%%time
doc_classification_map = pickle.load(open(doc_classifications_map_file))
training_docs_list = pickle.load(open(training_docs_list_file))
validation_docs_list = pickle.load(open(validation_docs_list_file))
test_docs_list = pickle.load(open(test_docs_list_file))

CPU times: user 17.7 s, sys: 1.08 s, total: 18.8 s
Wall time: 18.8 s


In [9]:
len(training_docs_list)

1286325

In [10]:
len(validation_docs_list)

321473

In [11]:
len(test_docs_list)

401877

# Utility functions for data loading

In [12]:
VALIDATION_MINI_BATCH_SIZE = 10000
def get_extended_docs_with_inference_data_only(doc2vec_model, file_to_write, preprocessed_files_prefix, level, model_name):
    """
    Use the trained doc2vec model to get the paragraph vector representations of the validation or test documents
    """

    def infer_one_doc(doc_tuple):
        # doc2vec_model.random = np.random.RandomState(DOC2VEC_SEED)
        doc_id, doc_tokens = doc_tuple
        rep = doc2vec_model.infer_vector(doc_tokens)
        return (doc_id, rep)

    if os.path.exists(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, file_to_write)):
        info("===== Loading inference vectors")
        inference_documents_reps = pickle.load(open(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, file_to_write)))
        info("Loaded inference vectors matrix")
    else:
        inference_documents_reps = {}
        info("===== Getting vectors with inference")

        # Multi-threaded inference
#         inference_docs_iterator = ExtendedPVDocumentBatchGenerator(preprocessed_files_prefix, batch_size=None)
        inference_docs_iterator = BatchWrapper(preprocessed_files_prefix, batch_size=None, level=level, level_type=model_name)
        generator_func = inference_docs_iterator.__iter__()
        # map consumes the whole iterator on the spot, so we have to use itertools.islice to fake mini-batching
        mini_batch_size = VALIDATION_MINI_BATCH_SIZE
        batches_run = 1
        pool = ThreadPool(NUM_CORES)
        while True:
            threaded_reps_partial = pool.map(infer_one_doc, itertools.islice(generator_func, mini_batch_size))
            info("Finished: {} tags".format(batches_run * mini_batch_size))
            batches_run += 1
            if threaded_reps_partial:
                # threaded_reps.extend(threaded_reps_partial)
                inference_documents_reps.update(threaded_reps_partial)
            else:
                break
                
        pool.close()
        pool.terminate()

        pickle.dump(inference_documents_reps,
                    open(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, file_to_write), 'w'))

    return inference_documents_reps

In [13]:
class ExtendedPVDocumentBatchGenerator(Process):
    def __init__(self, filename_prefix, queue, batch_size=10000, start_file=0, offset=10000):
        super(ExtendedPVDocumentBatchGenerator, self).__init__()
        self.queue = queue
        self.offset = offset
        self.filename_prefix = filename_prefix
        self.files_loaded = start_file - offset

    def run(self):
        cur_file = None
        while True:
            try:
                if cur_file is None:
                    info("Loading new file for index: {}".format(str(self.files_loaded + self.offset)))
                    cur_file = io.BufferedReader(gzip.open(self.filename_prefix + str(self.files_loaded + self.offset) + '.gz'))
#                     cur_file = open(self.filename_prefix + str(self.files_loaded + self.offset))
                    self.files_loaded += self.offset
                for line in cur_file:
                    self.queue.put(line)
                cur_file.close()
                cur_file = None
            except IOError:
                self.queue.put(False, block=True, timeout=None)
                info("All files are loaded - last file: {}".format(str(self.files_loaded + self.offset)))
                return


class BatchWrapper(object):
    def __init__(self, training_preprocessed_files_prefix, buffer_size=10000, batch_size=10000, level=1, level_type=None):
        assert batch_size <= 10000 or batch_size is None
        self.level = level
        self.level_type = level_type[0] if level_type is not None else None
        self.batch_size = batch_size
        self.q = Queue(maxsize=buffer_size)
        self.p = ExtendedPVDocumentBatchGenerator(training_preprocessed_files_prefix, queue=self.q,
                                                  batch_size=batch_size, start_file=0, offset=10000)
        self.p.start()
        self.cur_data = []

    def is_correct_type(self, doc_id):
        parts = doc_id.split("_")
        len_parts = len(parts)
        if len_parts == self.level:
            if len_parts == 1:
                return True
            if len_parts == self.level and (parts[1][0] == self.level_type or self.level_type is None):
                return True
        return False

    def return_sentences(self, line):
        line_array = tuple(line.split(" "))
        doc_id = line_array[0]
        if not self.is_correct_type(doc_id):
            return False
        line_array = line_array[1:]
        len_line_array = len(line_array)
        # divide the document to batches according to the batch size
        sentences = []
        
        if self.batch_size is None:
            # dont use LabeledSentence for validation iterator
            sentences.append((doc_id, line_array))
        else:
            curr_batch_iter = 0
            while curr_batch_iter < len_line_array:
                sentences.append(LabeledSentence(words=line_array[curr_batch_iter: curr_batch_iter + self.batch_size], tags=[doc_id]))
                curr_batch_iter += self.batch_size
        return tuple(sentences)

    def __iter__(self):
        while True:
            item = self.q.get(block=True)
            if item is False:
                self.p.terminate()
                raise StopIteration()
            else:
                sentences = self.return_sentences(item)
                if not sentences:
                    None
                else:
                    for sentence in sentences:
                        yield sentence


# Doc2vec and SVM Parameters

In [14]:
DOC2VEC_SIZE = 200
DOC2VEC_WINDOW = 2
DOC2VEC_MAX_VOCAB_SIZE = None
DOC2VEC_SAMPLE = 1e-3
DOC2VEC_TYPE = 1
DOC2VEC_HIERARCHICAL_SAMPLE = 0
DOC2VEC_NEGATIVE_SAMPLE_SIZE = 10
DOC2VEC_CONCAT = 0
DOC2VEC_MEAN = 1
DOC2VEC_TRAIN_WORDS = 0
DOC2VEC_EPOCHS = 1 # we do our training manually one epoch at a time
DOC2VEC_MAX_EPOCHS = 8
REPORT_DELAY = 20 # report the progress every x seconds
REPORT_VOCAB_PROGRESS = 100000 # report vocab progress every x documents

## Create the Doc2vec model and create/load the vocab

In [15]:
models = [
    (2, 'description')
]
level, model_name = models[0]

In [18]:
info("creating/loading vocabulary for " + str(level) + ' ' + model_name + ' in ')
doc2vec_model_save_location = os.path.join(root_location,
                                           "parameter_search_doc2vec_models_recalc_" + str(level) + '_' + model_name,
                                           "full")
if not os.path.exists(doc2vec_model_save_location):
    os.makedirs(doc2vec_model_save_location)
if not os.path.exists(os.path.join(doc2vec_model_save_location, VOCAB_MODEL)):
    os.makedirs(os.path.join(doc2vec_model_save_location, VOCAB_MODEL))

placeholder_model_name = 'doc2vec_size_{}_w_{}_type_{}_concat_{}_mean_{}_trainwords_{}_hs_{}_neg_{}_vocabsize_{}_model_{}'.format(DOC2VEC_SIZE,
                                                                DOC2VEC_WINDOW,
                                                                'dm' if DOC2VEC_TYPE == 1 else 'pv-dbow',
                                                                DOC2VEC_CONCAT, DOC2VEC_MEAN,
                                                                DOC2VEC_TRAIN_WORDS,
                                                                DOC2VEC_HIERARCHICAL_SAMPLE,DOC2VEC_NEGATIVE_SAMPLE_SIZE,
                                                                str(DOC2VEC_MAX_VOCAB_SIZE),
                                                                str(level) + '_' + model_name
                                                                )
GLOBAL_VARS.DOC2VEC_MODEL_NAME = placeholder_model_name
placeholder_model_name = os.path.join(placeholder_model_name, "epoch_{}")
info("FILE " + os.path.join(doc2vec_model_save_location, VOCAB_MODEL, MODEL_PREFIX))
doc2vec_model = Doc2Vec(size=DOC2VEC_SIZE, window=DOC2VEC_WINDOW, min_count=MIN_WORD_COUNT,
                max_vocab_size= DOC2VEC_MAX_VOCAB_SIZE,
                sample=DOC2VEC_SAMPLE, seed=DOC2VEC_SEED, workers=NUM_CORES,
                # doc2vec algorithm dm=1 => PV-DM, dm=2 => PV-DBOW, PV-DM dictates CBOW for words
                dm=DOC2VEC_TYPE,
                # hs=0 => negative sampling, hs=1 => hierarchical softmax
                hs=DOC2VEC_HIERARCHICAL_SAMPLE, negative=DOC2VEC_NEGATIVE_SAMPLE_SIZE,
                dm_concat=DOC2VEC_CONCAT,
                # would train words with skip-gram on top of cbow, we don't need that for now
                dbow_words=DOC2VEC_TRAIN_WORDS,
                iter=DOC2VEC_EPOCHS)

GLOBAL_VARS.DOC2VEC_MODEL = doc2vec_model


2017-04-19 02:01:54,634 : INFO : creating/loading vocabulary for 2 description in 
2017-04-19 02:01:54,636 : INFO : FILE /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/vocab_model/model


In [19]:
if not os.path.exists(os.path.join(doc2vec_model_save_location, VOCAB_MODEL, MODEL_PREFIX)):
    
    info("Creating vocab model")
    training_docs_iterator = BatchWrapper(training_preprocessed_files_prefix, batch_size=10000, level=level,
                                          level_type=model_name)
    doc2vec_model.build_vocab(sentences=training_docs_iterator, progress_per=REPORT_VOCAB_PROGRESS)
    doc2vec_model.save(os.path.join(doc2vec_model_save_location, VOCAB_MODEL, MODEL_PREFIX))
else:
    info("Loading vocab model")
    doc2vec_model_vocab_model = Doc2Vec.load(os.path.join(doc2vec_model_save_location, VOCAB_MODEL, MODEL_PREFIX))
    doc2vec_model.reset_from(doc2vec_model_vocab_model)

2017-04-19 02:01:56,803 : INFO : Creating vocab model
2017-04-19 02:01:56,846 : INFO : collecting all words and their counts
2017-04-19 02:01:56,849 : INFO : Loading new file for index: 0
2017-04-19 02:01:56,856 : INFO : PROGRESS: at example #0, processed 0 words (0/s), 0 word types, 0 tags
2017-04-19 02:02:26,877 : INFO : Loading new file for index: 10000
2017-04-19 02:02:57,273 : INFO : Loading new file for index: 20000
2017-04-19 02:03:29,916 : INFO : Loading new file for index: 30000
2017-04-19 02:04:02,779 : INFO : Loading new file for index: 40000
2017-04-19 02:04:35,457 : INFO : Loading new file for index: 50000
2017-04-19 02:05:08,435 : INFO : Loading new file for index: 60000
2017-04-19 02:05:40,060 : INFO : Loading new file for index: 70000
2017-04-19 02:05:48,746 : INFO : PROGRESS: at example #100000, processed 626223826 words (2700540/s), 3125696 word types, 72315 tags
2017-04-19 02:06:13,408 : INFO : Loading new file for index: 80000
2017-04-19 02:06:46,458 : INFO : Loadin

2017-04-19 02:52:10,590 : INFO : Loading new file for index: 940000
2017-04-19 02:52:25,230 : INFO : PROGRESS: at example #1300000, processed 8134258763 words (2440725/s), 20168634 word types, 944012 tags
2017-04-19 02:52:44,923 : INFO : Loading new file for index: 950000
2017-04-19 02:53:18,893 : INFO : Loading new file for index: 960000
2017-04-19 02:53:53,487 : INFO : Loading new file for index: 970000
2017-04-19 02:54:27,993 : INFO : Loading new file for index: 980000
2017-04-19 02:55:02,293 : INFO : Loading new file for index: 990000
2017-04-19 02:55:36,997 : INFO : Loading new file for index: 1000000
2017-04-19 02:56:11,641 : INFO : Loading new file for index: 1010000
2017-04-19 02:56:35,940 : INFO : PROGRESS: at example #1400000, processed 8759559554 words (2494142/s), 21243867 word types, 1016608 tags
2017-04-19 02:56:46,434 : INFO : Loading new file for index: 1020000
2017-04-19 02:57:21,064 : INFO : Loading new file for index: 1030000
2017-04-19 02:57:57,479 : INFO : Loading 

## Actual Training, validation and Metrics Loop

In [20]:
doc2vec_model.min_alpha = 0.025
DOC2VEC_ALPHA_DECREASE = 0.001

In [21]:
doc2vec_model.workers = NUM_CORES

In [22]:
%%time
# when resuming, resume from an epoch with a previously created doc2vec model to get the learning rate right
start_from = 1
for epoch in range(start_from, DOC2VEC_MAX_EPOCHS+1):
    GLOBAL_VARS.MODEL_NAME = placeholder_model_name.format(epoch)
    info("****************** Epoch {} --- Working on {} *******************".format(epoch, GLOBAL_VARS.MODEL_NAME))
    
    # if we have the model, just load it, otherwise train the previous model
    if os.path.exists(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, MODEL_PREFIX)):
        doc2vec_model = Doc2Vec.load(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, MODEL_PREFIX))
        doc2vec_model.workers = NUM_CORES
        GLOBAL_VARS.DOC2VEC_MODEL = doc2vec_model
    else:
        # train the doc2vec model
        training_docs_iterator = BatchWrapper(training_preprocessed_files_prefix, batch_size=10000, level=level,
                                          level_type=model_name)
        %time doc2vec_model.train(sentences=training_docs_iterator, report_delay=REPORT_DELAY)
        doc2vec_model.alpha -= DOC2VEC_ALPHA_DECREASE  # decrease the learning rate
        doc2vec_model.min_alpha = doc2vec_model.alpha  # fix the learning rate, no decay
        ensure_disk_location_exists(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME))
        doc2vec_model.save(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, MODEL_PREFIX))
        GLOBAL_VARS.DOC2VEC_MODEL = doc2vec_model
        
    # only do the inference for higher epochs, as inference usually takes as much time as the actual training
    if epoch == 3 or epoch == 5:
        # Validation Embeddings
        info('Getting Validation Embeddings')
        Xv = get_extended_docs_with_inference_data_only(doc2vec_model, VALIDATION_DICT, 
                                         validation_preprocessed_files_prefix, level, model_name)

2017-04-19 03:13:21,923 : INFO : ****************** Epoch 1 --- Working on doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_1 *******************
2017-04-19 03:13:22,070 : INFO : training model with 16 workers on 424864 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=10 window=2
2017-04-19 03:13:22,072 : INFO : expecting 1770337 sentences, matching count from corpus used for vocabulary survey
2017-04-19 03:13:22,071 : INFO : Loading new file for index: 0
2017-04-19 03:13:23,097 : INFO : PROGRESS: at 0.01% examples, 495978 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:13:43,112 : INFO : PROGRESS: at 0.18% examples, 656700 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:14:03,131 : INFO : PROGRESS: at 0.37% examples, 672721 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:14:23,139 : INFO : PROGRESS: at 0.56% examples, 679811 words/s, in_qsize 0, out_qsize 3
2017-04-19 03:14:43,144 : INFO : PROGRESS: at 0.74% 

2017-04-19 03:35:23,713 : INFO : PROGRESS: at 12.46% examples, 710113 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:35:23,853 : INFO : Loading new file for index: 160000
2017-04-19 03:35:43,719 : INFO : PROGRESS: at 12.65% examples, 710368 words/s, in_qsize 0, out_qsize 2
2017-04-19 03:36:03,726 : INFO : PROGRESS: at 12.85% examples, 710538 words/s, in_qsize 0, out_qsize 2
2017-04-19 03:36:23,732 : INFO : PROGRESS: at 13.04% examples, 710651 words/s, in_qsize 0, out_qsize 2
2017-04-19 03:36:43,732 : INFO : PROGRESS: at 13.23% examples, 710752 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:36:44,286 : INFO : Loading new file for index: 170000
2017-04-19 03:37:03,742 : INFO : PROGRESS: at 13.42% examples, 710872 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:37:23,746 : INFO : PROGRESS: at 13.61% examples, 710969 words/s, in_qsize 1, out_qsize 0
2017-04-19 03:37:43,747 : INFO : PROGRESS: at 13.80% examples, 711074 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:38:03,748 : INFO : PROGRES

2017-04-19 03:58:22,028 : INFO : Loading new file for index: 330000
2017-04-19 03:58:24,179 : INFO : PROGRESS: at 25.71% examples, 716291 words/s, in_qsize 0, out_qsize 1
2017-04-19 03:58:44,187 : INFO : PROGRESS: at 25.91% examples, 716358 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:59:04,190 : INFO : PROGRESS: at 26.10% examples, 716414 words/s, in_qsize 0, out_qsize 1
2017-04-19 03:59:24,193 : INFO : PROGRESS: at 26.30% examples, 716514 words/s, in_qsize 0, out_qsize 0
2017-04-19 03:59:41,248 : INFO : Loading new file for index: 340000
2017-04-19 03:59:44,195 : INFO : PROGRESS: at 26.49% examples, 716605 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:00:04,196 : INFO : PROGRESS: at 26.69% examples, 716659 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:00:24,196 : INFO : PROGRESS: at 26.88% examples, 716714 words/s, in_qsize 0, out_qsize 1
2017-04-19 04:00:44,216 : INFO : PROGRESS: at 27.07% examples, 716755 words/s, in_qsize 0, out_qsize 2
2017-04-19 04:01:00,747 : INFO : Loading

2017-04-19 04:21:04,596 : INFO : PROGRESS: at 38.92% examples, 720212 words/s, in_qsize 0, out_qsize 2
2017-04-19 04:21:24,599 : INFO : PROGRESS: at 39.12% examples, 720238 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:21:44,599 : INFO : PROGRESS: at 39.31% examples, 720278 words/s, in_qsize 0, out_qsize 1
2017-04-19 04:22:04,603 : INFO : PROGRESS: at 39.51% examples, 720363 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:22:20,678 : INFO : Loading new file for index: 510000
2017-04-19 04:22:24,609 : INFO : PROGRESS: at 39.70% examples, 720425 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:22:44,619 : INFO : PROGRESS: at 39.90% examples, 720545 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:23:04,621 : INFO : PROGRESS: at 40.10% examples, 720644 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:23:24,623 : INFO : PROGRESS: at 40.30% examples, 720715 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:23:39,378 : INFO : Loading new file for index: 520000
2017-04-19 04:23:44,622 : INFO : PROGRES

2017-04-19 04:44:05,194 : INFO : PROGRESS: at 52.42% examples, 723489 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:44:25,198 : INFO : PROGRESS: at 52.61% examples, 723519 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:44:45,210 : INFO : PROGRESS: at 52.81% examples, 723562 words/s, in_qsize 0, out_qsize 1
2017-04-19 04:44:47,687 : INFO : Loading new file for index: 680000
2017-04-19 04:45:05,214 : INFO : PROGRESS: at 53.00% examples, 723593 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:45:25,226 : INFO : PROGRESS: at 53.20% examples, 723631 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:45:45,231 : INFO : PROGRESS: at 53.39% examples, 723704 words/s, in_qsize 0, out_qsize 0
2017-04-19 04:46:05,233 : INFO : PROGRESS: at 53.59% examples, 723731 words/s, in_qsize 0, out_qsize 1
2017-04-19 04:46:08,328 : INFO : Loading new file for index: 690000
2017-04-19 04:46:25,233 : INFO : PROGRESS: at 53.78% examples, 723768 words/s, in_qsize 0, out_qsize 2
2017-04-19 04:46:45,238 : INFO : PROGRES

2017-04-19 05:07:05,679 : INFO : PROGRESS: at 65.88% examples, 725592 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:07:25,681 : INFO : PROGRESS: at 66.07% examples, 725624 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:07:30,956 : INFO : Loading new file for index: 850000
2017-04-19 05:07:45,701 : INFO : PROGRESS: at 66.26% examples, 725652 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:08:05,704 : INFO : PROGRESS: at 66.46% examples, 725680 words/s, in_qsize 0, out_qsize 1
2017-04-19 05:08:25,712 : INFO : PROGRESS: at 66.65% examples, 725714 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:08:45,713 : INFO : PROGRESS: at 66.85% examples, 725757 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:08:50,829 : INFO : Loading new file for index: 860000
2017-04-19 05:09:05,723 : INFO : PROGRESS: at 67.04% examples, 725795 words/s, in_qsize 0, out_qsize 2
2017-04-19 05:09:25,726 : INFO : PROGRESS: at 67.24% examples, 725824 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:09:45,735 : INFO : PROGRES

2017-04-19 05:29:55,878 : INFO : Loading new file for index: 1020000
2017-04-19 05:30:06,149 : INFO : PROGRESS: at 79.42% examples, 727430 words/s, in_qsize 0, out_qsize 2
2017-04-19 05:30:26,168 : INFO : PROGRESS: at 79.62% examples, 727447 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:30:46,187 : INFO : PROGRESS: at 79.81% examples, 727443 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:31:06,196 : INFO : PROGRESS: at 80.00% examples, 727445 words/s, in_qsize 0, out_qsize 1
2017-04-19 05:31:16,481 : INFO : Loading new file for index: 1030000
2017-04-19 05:31:26,200 : INFO : PROGRESS: at 80.20% examples, 727461 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:31:46,220 : INFO : PROGRESS: at 80.39% examples, 727471 words/s, in_qsize 0, out_qsize 1
2017-04-19 05:32:06,218 : INFO : PROGRESS: at 80.58% examples, 727486 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:32:26,220 : INFO : PROGRESS: at 80.77% examples, 727498 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:32:38,104 : INFO : Loadi

2017-04-19 05:52:46,618 : INFO : PROGRESS: at 92.78% examples, 728775 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:53:06,626 : INFO : PROGRESS: at 92.98% examples, 728810 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:53:26,633 : INFO : PROGRESS: at 93.19% examples, 728880 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:53:39,266 : INFO : Loading new file for index: 1200000
2017-04-19 05:53:46,642 : INFO : PROGRESS: at 93.39% examples, 728900 words/s, in_qsize 0, out_qsize 1
2017-04-19 05:54:06,653 : INFO : PROGRESS: at 93.58% examples, 728910 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:54:26,653 : INFO : PROGRESS: at 93.78% examples, 728942 words/s, in_qsize 0, out_qsize 0
2017-04-19 05:54:46,659 : INFO : PROGRESS: at 93.98% examples, 728974 words/s, in_qsize 0, out_qsize 1
2017-04-19 05:54:57,713 : INFO : Loading new file for index: 1210000
2017-04-19 05:55:06,691 : INFO : PROGRESS: at 94.18% examples, 729014 words/s, in_qsize 0, out_qsize 2
2017-04-19 05:55:26,710 : INFO : PROGR

CPU times: user 17h 44min 27s, sys: 14min 6s, total: 17h 58min 33s
Wall time: 2h 51min 35s


2017-04-19 06:04:58,621 : INFO : not storing attribute syn0norm
2017-04-19 06:04:58,622 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_1/model.wv.syn0.npy
2017-04-19 06:04:58,874 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_1/model.syn1neg.npy
2017-04-19 06:04:59,082 : INFO : not storing attribute cum_table
2017-04-19 06:05:16,395 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_1/model
2017-04-19 06:05:16,397 : INFO : ****************** Epoch 2 --- Working o

2017-04-19 06:24:17,889 : INFO : PROGRESS: at 11.07% examples, 730638 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:24:37,897 : INFO : PROGRESS: at 11.27% examples, 730569 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:24:57,911 : INFO : PROGRESS: at 11.46% examples, 730584 words/s, in_qsize 0, out_qsize 3
2017-04-19 06:25:17,926 : INFO : PROGRESS: at 11.66% examples, 730582 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:25:20,544 : INFO : Loading new file for index: 150000
2017-04-19 06:25:37,924 : INFO : PROGRESS: at 11.85% examples, 730638 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:25:57,937 : INFO : PROGRESS: at 12.04% examples, 730680 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:26:17,939 : INFO : PROGRESS: at 12.24% examples, 730773 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:26:37,950 : INFO : PROGRESS: at 12.43% examples, 730832 words/s, in_qsize 0, out_qsize 3
2017-04-19 06:26:40,814 : INFO : Loading new file for index: 160000
2017-04-19 06:26:57,955 : INFO : PROGRES

2017-04-19 06:47:18,350 : INFO : PROGRESS: at 24.49% examples, 731045 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:47:38,360 : INFO : PROGRESS: at 24.68% examples, 731091 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:47:58,361 : INFO : PROGRESS: at 24.87% examples, 731094 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:48:03,000 : INFO : Loading new file for index: 320000
2017-04-19 06:48:18,360 : INFO : PROGRESS: at 25.07% examples, 731123 words/s, in_qsize 0, out_qsize 1
2017-04-19 06:48:38,360 : INFO : PROGRESS: at 25.27% examples, 731143 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:48:58,367 : INFO : PROGRESS: at 25.46% examples, 731182 words/s, in_qsize 1, out_qsize 0
2017-04-19 06:49:18,393 : INFO : PROGRESS: at 25.66% examples, 731164 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:49:21,425 : INFO : Loading new file for index: 330000
2017-04-19 06:49:38,404 : INFO : PROGRESS: at 25.86% examples, 731253 words/s, in_qsize 0, out_qsize 0
2017-04-19 06:49:58,419 : INFO : PROGRES

2017-04-19 07:10:18,794 : INFO : PROGRESS: at 37.97% examples, 731444 words/s, in_qsize 0, out_qsize 1
2017-04-19 07:10:34,332 : INFO : Loading new file for index: 490000
2017-04-19 07:10:38,796 : INFO : PROGRESS: at 38.16% examples, 731478 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:10:58,804 : INFO : PROGRESS: at 38.36% examples, 731501 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:11:18,814 : INFO : PROGRESS: at 38.55% examples, 731500 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:11:38,828 : INFO : PROGRESS: at 38.75% examples, 731516 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:11:52,708 : INFO : Loading new file for index: 500000
2017-04-19 07:11:58,831 : INFO : PROGRESS: at 38.95% examples, 731523 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:12:18,833 : INFO : PROGRESS: at 39.15% examples, 731517 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:12:38,839 : INFO : PROGRESS: at 39.34% examples, 731516 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:12:58,843 : INFO : PROGRES

2017-04-19 07:33:04,337 : INFO : Loading new file for index: 660000
2017-04-19 07:33:19,207 : INFO : PROGRESS: at 51.40% examples, 731052 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:33:39,213 : INFO : PROGRESS: at 51.60% examples, 731078 words/s, in_qsize 2, out_qsize 0
2017-04-19 07:33:59,214 : INFO : PROGRESS: at 51.79% examples, 730927 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:34:19,215 : INFO : PROGRESS: at 51.96% examples, 730615 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:34:29,282 : INFO : Loading new file for index: 670000
2017-04-19 07:34:39,223 : INFO : PROGRESS: at 52.15% examples, 730611 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:34:59,224 : INFO : PROGRESS: at 52.34% examples, 730499 words/s, in_qsize 0, out_qsize 1
2017-04-19 07:35:19,238 : INFO : PROGRESS: at 52.51% examples, 730198 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:35:39,247 : INFO : PROGRESS: at 52.70% examples, 730103 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:35:52,895 : INFO : Loading

2017-04-19 07:56:01,363 : INFO : Loading new file for index: 820000
2017-04-19 07:56:19,710 : INFO : PROGRESS: at 63.96% examples, 721578 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:56:39,743 : INFO : PROGRESS: at 64.14% examples, 721349 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:56:59,746 : INFO : PROGRESS: at 64.32% examples, 721306 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:57:19,762 : INFO : PROGRESS: at 64.51% examples, 721237 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:57:23,578 : INFO : Loading new file for index: 830000
2017-04-19 07:57:39,775 : INFO : PROGRESS: at 64.68% examples, 720982 words/s, in_qsize 0, out_qsize 1
2017-04-19 07:57:59,779 : INFO : PROGRESS: at 64.87% examples, 720894 words/s, in_qsize 0, out_qsize 1
2017-04-19 07:58:19,786 : INFO : PROGRESS: at 65.05% examples, 720831 words/s, in_qsize 0, out_qsize 0
2017-04-19 07:58:39,789 : INFO : PROGRESS: at 65.23% examples, 720601 words/s, in_qsize 9, out_qsize 0
2017-04-19 07:58:52,577 : INFO : Loading

2017-04-19 08:19:12,142 : INFO : Loading new file for index: 990000
2017-04-19 08:19:20,151 : INFO : PROGRESS: at 77.06% examples, 719811 words/s, in_qsize 0, out_qsize 1
2017-04-19 08:19:40,160 : INFO : PROGRESS: at 77.25% examples, 719817 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:20:00,162 : INFO : PROGRESS: at 77.44% examples, 719821 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:20:20,164 : INFO : PROGRESS: at 77.63% examples, 719820 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:20:32,894 : INFO : Loading new file for index: 1000000
2017-04-19 08:20:40,167 : INFO : PROGRESS: at 77.82% examples, 719827 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:21:00,169 : INFO : PROGRESS: at 78.02% examples, 719857 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:21:20,178 : INFO : PROGRESS: at 78.21% examples, 719868 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:21:40,179 : INFO : PROGRESS: at 78.41% examples, 719892 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:21:52,582 : INFO : Loadin

2017-04-19 08:42:01,645 : INFO : Loading new file for index: 1160000
2017-04-19 08:42:20,642 : INFO : PROGRESS: at 90.39% examples, 720698 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:42:40,646 : INFO : PROGRESS: at 90.59% examples, 720704 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:43:00,647 : INFO : PROGRESS: at 90.78% examples, 720716 words/s, in_qsize 0, out_qsize 3
2017-04-19 08:43:20,652 : INFO : PROGRESS: at 90.97% examples, 720722 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:43:21,996 : INFO : Loading new file for index: 1170000
2017-04-19 08:43:40,659 : INFO : PROGRESS: at 91.17% examples, 720746 words/s, in_qsize 0, out_qsize 1
2017-04-19 08:44:00,682 : INFO : PROGRESS: at 91.37% examples, 720777 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:44:20,687 : INFO : PROGRESS: at 91.57% examples, 720789 words/s, in_qsize 0, out_qsize 0
2017-04-19 08:44:39,936 : INFO : Loading new file for index: 1180000
2017-04-19 08:44:40,693 : INFO : PROGRESS: at 91.76% examples, 720800 wo

CPU times: user 18h 25min 41s, sys: 15min 13s, total: 18h 40min 55s
Wall time: 2h 53min 30s


2017-04-19 08:58:48,023 : INFO : not storing attribute syn0norm
2017-04-19 08:58:48,024 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_2/model.wv.syn0.npy
2017-04-19 08:58:48,275 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_2/model.syn1neg.npy
2017-04-19 08:58:48,482 : INFO : not storing attribute cum_table
2017-04-19 08:59:05,802 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_2/model
2017-04-19 08:59:05,804 : INFO : ****************** Epoch 3 --- Working o

2017-04-19 09:18:07,291 : INFO : PROGRESS: at 10.97% examples, 723592 words/s, in_qsize 0, out_qsize 1
2017-04-19 09:18:27,294 : INFO : PROGRESS: at 11.16% examples, 723526 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:18:47,299 : INFO : PROGRESS: at 11.35% examples, 723526 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:19:07,319 : INFO : PROGRESS: at 11.54% examples, 723454 words/s, in_qsize 0, out_qsize 1
2017-04-19 09:19:21,773 : INFO : Loading new file for index: 150000
2017-04-19 09:19:27,325 : INFO : PROGRESS: at 11.73% examples, 723424 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:19:47,330 : INFO : PROGRESS: at 11.93% examples, 723434 words/s, in_qsize 0, out_qsize 1
2017-04-19 09:20:07,348 : INFO : PROGRESS: at 12.11% examples, 723329 words/s, in_qsize 0, out_qsize 1
2017-04-19 09:20:27,350 : INFO : PROGRESS: at 12.31% examples, 723301 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:20:43,624 : INFO : Loading new file for index: 160000
2017-04-19 09:20:47,350 : INFO : PROGRES

2017-04-19 09:40:51,823 : INFO : Loading new file for index: 310000
2017-04-19 09:41:07,724 : INFO : PROGRESS: at 24.30% examples, 725297 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:41:27,730 : INFO : PROGRESS: at 24.49% examples, 725314 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:41:47,745 : INFO : PROGRESS: at 24.68% examples, 725335 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:42:07,759 : INFO : PROGRESS: at 24.87% examples, 725357 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:42:12,714 : INFO : Loading new file for index: 320000
2017-04-19 09:42:27,759 : INFO : PROGRESS: at 25.07% examples, 725392 words/s, in_qsize 0, out_qsize 1
2017-04-19 09:42:47,765 : INFO : PROGRESS: at 25.26% examples, 725425 words/s, in_qsize 0, out_qsize 1
2017-04-19 09:43:07,767 : INFO : PROGRESS: at 25.46% examples, 725459 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:43:27,784 : INFO : PROGRESS: at 25.65% examples, 725447 words/s, in_qsize 0, out_qsize 0
2017-04-19 09:43:31,699 : INFO : Loading

2017-04-19 10:03:48,255 : INFO : PROGRESS: at 37.45% examples, 725332 words/s, in_qsize 1, out_qsize 0
2017-04-19 10:04:08,258 : INFO : PROGRESS: at 37.65% examples, 725346 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:04:28,261 : INFO : PROGRESS: at 37.84% examples, 725346 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:04:48,263 : INFO : PROGRESS: at 38.04% examples, 725297 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:04:57,057 : INFO : Loading new file for index: 490000
2017-04-19 10:05:08,265 : INFO : PROGRESS: at 38.23% examples, 725304 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:05:28,272 : INFO : PROGRESS: at 38.42% examples, 725322 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:05:48,276 : INFO : PROGRESS: at 38.62% examples, 725309 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:06:08,277 : INFO : PROGRESS: at 38.81% examples, 725305 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:06:16,415 : INFO : Loading new file for index: 500000
2017-04-19 10:06:28,278 : INFO : PROGRES

2017-04-19 10:26:48,799 : INFO : PROGRESS: at 50.80% examples, 725091 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:27:08,802 : INFO : PROGRESS: at 50.99% examples, 725107 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:27:28,804 : INFO : PROGRESS: at 51.18% examples, 725104 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:27:38,457 : INFO : Loading new file for index: 660000
2017-04-19 10:27:48,814 : INFO : PROGRESS: at 51.38% examples, 725134 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:28:08,819 : INFO : PROGRESS: at 51.57% examples, 725131 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:28:28,829 : INFO : PROGRESS: at 51.76% examples, 725115 words/s, in_qsize 0, out_qsize 1
2017-04-19 10:28:48,839 : INFO : PROGRESS: at 51.95% examples, 725103 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:28:59,035 : INFO : Loading new file for index: 670000
2017-04-19 10:29:08,846 : INFO : PROGRESS: at 52.15% examples, 725104 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:29:28,857 : INFO : PROGRES

2017-04-19 10:49:49,239 : INFO : PROGRESS: at 64.05% examples, 724747 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:50:09,241 : INFO : PROGRESS: at 64.23% examples, 724519 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:50:29,251 : INFO : PROGRESS: at 64.41% examples, 724410 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:50:42,282 : INFO : Loading new file for index: 830000
2017-04-19 10:50:49,253 : INFO : PROGRESS: at 64.62% examples, 724489 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:51:09,255 : INFO : PROGRESS: at 64.79% examples, 724262 words/s, in_qsize 0, out_qsize 1
2017-04-19 10:51:29,273 : INFO : PROGRESS: at 64.97% examples, 724128 words/s, in_qsize 2, out_qsize 0
2017-04-19 10:51:49,285 : INFO : PROGRESS: at 65.17% examples, 724187 words/s, in_qsize 0, out_qsize 1
2017-04-19 10:52:09,296 : INFO : PROGRESS: at 65.33% examples, 723948 words/s, in_qsize 0, out_qsize 0
2017-04-19 10:52:09,783 : INFO : Loading new file for index: 840000
2017-04-19 10:52:29,315 : INFO : PROGRES

2017-04-19 11:12:49,721 : INFO : PROGRESS: at 76.69% examples, 718222 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:13:09,735 : INFO : PROGRESS: at 76.88% examples, 718130 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:13:21,572 : INFO : Loading new file for index: 990000
2017-04-19 11:13:29,779 : INFO : PROGRESS: at 77.05% examples, 717943 words/s, in_qsize 0, out_qsize 3
2017-04-19 11:13:49,776 : INFO : PROGRESS: at 77.24% examples, 717965 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:14:09,792 : INFO : PROGRESS: at 77.42% examples, 717861 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:14:29,800 : INFO : PROGRESS: at 77.60% examples, 717689 words/s, in_qsize 0, out_qsize 1
2017-04-19 11:14:46,363 : INFO : Loading new file for index: 1000000
2017-04-19 11:14:49,806 : INFO : PROGRESS: at 77.79% examples, 717714 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:15:09,806 : INFO : PROGRESS: at 77.97% examples, 717611 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:15:29,834 : INFO : PROGRE

2017-04-19 11:35:50,290 : INFO : PROGRESS: at 89.95% examples, 718619 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:36:10,297 : INFO : PROGRESS: at 90.14% examples, 718671 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:36:17,058 : INFO : Loading new file for index: 1160000
2017-04-19 11:36:30,313 : INFO : PROGRESS: at 90.34% examples, 718707 words/s, in_qsize 0, out_qsize 4
2017-04-19 11:36:50,321 : INFO : PROGRESS: at 90.53% examples, 718740 words/s, in_qsize 0, out_qsize 2
2017-04-19 11:37:10,324 : INFO : PROGRESS: at 90.73% examples, 718766 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:37:30,333 : INFO : PROGRESS: at 90.92% examples, 718791 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:37:36,772 : INFO : Loading new file for index: 1170000
2017-04-19 11:37:50,348 : INFO : PROGRESS: at 91.12% examples, 718824 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:38:10,351 : INFO : PROGRESS: at 91.32% examples, 718869 words/s, in_qsize 0, out_qsize 0
2017-04-19 11:38:30,362 : INFO : PROGR

2017-04-19 11:53:07,958 : INFO : storing np array 'doctag_syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_3/model.docvecs.doctag_syn0.npy


CPU times: user 18h 6min 32s, sys: 15min 36s, total: 18h 22min 9s
Wall time: 2h 54min 2s


2017-04-19 11:53:08,886 : INFO : not storing attribute syn0norm
2017-04-19 11:53:08,887 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_3/model.wv.syn0.npy
2017-04-19 11:53:09,142 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_3/model.syn1neg.npy
2017-04-19 11:53:09,348 : INFO : not storing attribute cum_table
2017-04-19 11:53:28,143 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_3/model
2017-04-19 11:53:28,145 : INFO : Getting Validation Embeddings
2017-04-19

2017-04-19 13:59:38,714 : INFO : PROGRESS: at 3.90% examples, 729888 words/s, in_qsize 0, out_qsize 0
2017-04-19 13:59:58,734 : INFO : PROGRESS: at 4.08% examples, 729648 words/s, in_qsize 0, out_qsize 6
2017-04-19 14:00:18,738 : INFO : PROGRESS: at 4.28% examples, 729957 words/s, in_qsize 0, out_qsize 3
2017-04-19 14:00:38,739 : INFO : PROGRESS: at 4.47% examples, 730101 words/s, in_qsize 1, out_qsize 0
2017-04-19 14:00:58,738 : INFO : PROGRESS: at 4.67% examples, 730015 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:00:58,751 : INFO : Loading new file for index: 60000
2017-04-19 14:01:18,749 : INFO : PROGRESS: at 4.86% examples, 729829 words/s, in_qsize 2, out_qsize 0
2017-04-19 14:01:38,749 : INFO : PROGRESS: at 5.06% examples, 729789 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:01:58,765 : INFO : PROGRESS: at 5.25% examples, 730002 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:02:18,163 : INFO : Loading new file for index: 70000
2017-04-19 14:02:18,770 : INFO : PROGRESS: at 5.45

2017-04-19 14:22:39,203 : INFO : PROGRESS: at 17.27% examples, 729215 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:22:59,221 : INFO : PROGRESS: at 17.46% examples, 729231 words/s, in_qsize 0, out_qsize 1
2017-04-19 14:23:19,223 : INFO : PROGRESS: at 17.66% examples, 729226 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:23:39,235 : INFO : PROGRESS: at 17.85% examples, 729268 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:23:43,386 : INFO : Loading new file for index: 230000
2017-04-19 14:23:59,250 : INFO : PROGRESS: at 18.05% examples, 729308 words/s, in_qsize 1, out_qsize 0
2017-04-19 14:24:19,278 : INFO : PROGRESS: at 18.24% examples, 729254 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:24:39,288 : INFO : PROGRESS: at 18.44% examples, 729378 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:24:59,289 : INFO : PROGRESS: at 18.63% examples, 729413 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:25:03,983 : INFO : Loading new file for index: 240000
2017-04-19 14:25:19,292 : INFO : PROGRES

2017-04-19 14:45:39,792 : INFO : PROGRESS: at 30.66% examples, 729411 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:45:59,802 : INFO : PROGRESS: at 30.86% examples, 729430 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:46:19,802 : INFO : PROGRESS: at 31.05% examples, 729484 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:46:28,316 : INFO : Loading new file for index: 400000
2017-04-19 14:46:39,831 : INFO : PROGRESS: at 31.25% examples, 729440 words/s, in_qsize 0, out_qsize 1
2017-04-19 14:46:59,840 : INFO : PROGRESS: at 31.44% examples, 729431 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:47:19,842 : INFO : PROGRESS: at 31.63% examples, 729415 words/s, in_qsize 0, out_qsize 1
2017-04-19 14:47:39,847 : INFO : PROGRESS: at 31.83% examples, 729409 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:47:49,667 : INFO : Loading new file for index: 410000
2017-04-19 14:47:59,854 : INFO : PROGRESS: at 32.02% examples, 729457 words/s, in_qsize 0, out_qsize 0
2017-04-19 14:48:19,866 : INFO : PROGRES

2017-04-19 15:08:40,356 : INFO : PROGRESS: at 44.10% examples, 729351 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:09:00,357 : INFO : PROGRESS: at 44.29% examples, 729327 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:09:00,603 : INFO : Loading new file for index: 570000
2017-04-19 15:09:20,368 : INFO : PROGRESS: at 44.49% examples, 729340 words/s, in_qsize 0, out_qsize 1
2017-04-19 15:09:40,372 : INFO : PROGRESS: at 44.68% examples, 729354 words/s, in_qsize 0, out_qsize 2
2017-04-19 15:10:00,377 : INFO : PROGRESS: at 44.88% examples, 729354 words/s, in_qsize 0, out_qsize 2
2017-04-19 15:10:19,248 : INFO : Loading new file for index: 580000
2017-04-19 15:10:20,379 : INFO : PROGRESS: at 45.08% examples, 729352 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:10:40,391 : INFO : PROGRESS: at 45.27% examples, 729345 words/s, in_qsize 0, out_qsize 1
2017-04-19 15:11:00,408 : INFO : PROGRESS: at 45.47% examples, 729376 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:11:20,438 : INFO : PROGRES

2017-04-19 15:31:00,896 : INFO : PROGRESS: at 58.52% examples, 747552 words/s, in_qsize 0, out_qsize 2
2017-04-19 15:31:20,910 : INFO : PROGRESS: at 58.75% examples, 747926 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:31:40,916 : INFO : PROGRESS: at 58.97% examples, 748289 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:31:51,630 : INFO : Loading new file for index: 760000
2017-04-19 15:32:00,920 : INFO : PROGRESS: at 59.20% examples, 748651 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:32:20,924 : INFO : PROGRESS: at 59.43% examples, 749014 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:32:40,933 : INFO : PROGRESS: at 59.65% examples, 749378 words/s, in_qsize 3, out_qsize 1
2017-04-19 15:33:00,152 : INFO : Loading new file for index: 770000
2017-04-19 15:33:00,942 : INFO : PROGRESS: at 59.88% examples, 749743 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:33:20,950 : INFO : PROGRESS: at 60.12% examples, 750105 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:33:40,962 : INFO : PROGRES

2017-04-19 15:53:21,261 : INFO : PROGRESS: at 73.82% examples, 767923 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:53:25,321 : INFO : Loading new file for index: 950000
2017-04-19 15:53:41,261 : INFO : PROGRESS: at 74.05% examples, 768163 words/s, in_qsize 0, out_qsize 1
2017-04-19 15:54:01,275 : INFO : PROGRESS: at 74.28% examples, 768412 words/s, in_qsize 0, out_qsize 1
2017-04-19 15:54:21,288 : INFO : PROGRESS: at 74.51% examples, 768644 words/s, in_qsize 0, out_qsize 1
2017-04-19 15:54:33,082 : INFO : Loading new file for index: 960000
2017-04-19 15:54:41,296 : INFO : PROGRESS: at 74.74% examples, 768892 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:55:01,309 : INFO : PROGRESS: at 74.96% examples, 769140 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:55:21,308 : INFO : PROGRESS: at 75.19% examples, 769391 words/s, in_qsize 0, out_qsize 2
2017-04-19 15:55:41,312 : INFO : PROGRESS: at 75.42% examples, 769654 words/s, in_qsize 0, out_qsize 0
2017-04-19 15:55:41,499 : INFO : Loading

2017-04-19 16:15:41,717 : INFO : PROGRESS: at 89.13% examples, 781912 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:16:01,720 : INFO : PROGRESS: at 89.36% examples, 782083 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:16:07,393 : INFO : Loading new file for index: 1150000
2017-04-19 16:16:21,726 : INFO : PROGRESS: at 89.58% examples, 782251 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:16:41,729 : INFO : PROGRESS: at 89.81% examples, 782410 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:17:01,730 : INFO : PROGRESS: at 90.03% examples, 782581 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:17:17,111 : INFO : Loading new file for index: 1160000
2017-04-19 16:17:21,742 : INFO : PROGRESS: at 90.26% examples, 782758 words/s, in_qsize 0, out_qsize 3
2017-04-19 16:17:41,739 : INFO : PROGRESS: at 90.49% examples, 782922 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:18:01,746 : INFO : PROGRESS: at 90.72% examples, 783106 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:18:21,746 : INFO : PROGR

CPU times: user 17h 19min 23s, sys: 12min 21s, total: 17h 31min 45s
Wall time: 2h 38min 32s


2017-04-19 16:31:31,032 : INFO : not storing attribute syn0norm
2017-04-19 16:31:31,033 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_4/model.wv.syn0.npy
2017-04-19 16:31:31,257 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_4/model.syn1neg.npy
2017-04-19 16:31:31,450 : INFO : not storing attribute cum_table
2017-04-19 16:31:44,232 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_4/model
2017-04-19 16:31:44,233 : INFO : ****************** Epoch 5 --- Working o

2017-04-19 16:50:25,233 : INFO : Loading new file for index: 160000
2017-04-19 16:50:25,807 : INFO : PROGRESS: at 12.47% examples, 837414 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:50:45,820 : INFO : PROGRESS: at 12.69% examples, 837413 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:51:05,827 : INFO : PROGRESS: at 12.91% examples, 837327 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:51:25,830 : INFO : PROGRESS: at 13.14% examples, 837245 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:51:34,790 : INFO : Loading new file for index: 170000
2017-04-19 16:51:45,826 : INFO : PROGRESS: at 13.36% examples, 837188 words/s, in_qsize 0, out_qsize 1
2017-04-19 16:52:05,832 : INFO : PROGRESS: at 13.58% examples, 837143 words/s, in_qsize 0, out_qsize 0
2017-04-19 16:52:25,842 : INFO : PROGRESS: at 13.80% examples, 837149 words/s, in_qsize 0, out_qsize 4
2017-04-19 16:52:45,839 : INFO : PROGRESS: at 14.02% examples, 837195 words/s, in_qsize 0, out_qsize 2
2017-04-19 16:52:46,041 : INFO : Loading

2017-04-19 17:12:29,247 : INFO : Loading new file for index: 350000
2017-04-19 17:12:46,213 : INFO : PROGRESS: at 27.42% examples, 838162 words/s, in_qsize 0, out_qsize 1
2017-04-19 17:13:06,215 : INFO : PROGRESS: at 27.64% examples, 838136 words/s, in_qsize 0, out_qsize 2
2017-04-19 17:13:26,217 : INFO : PROGRESS: at 27.86% examples, 838148 words/s, in_qsize 1, out_qsize 0
2017-04-19 17:13:41,142 : INFO : Loading new file for index: 360000
2017-04-19 17:13:46,220 : INFO : PROGRESS: at 28.08% examples, 838124 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:14:06,221 : INFO : PROGRESS: at 28.30% examples, 838087 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:14:26,226 : INFO : PROGRESS: at 28.53% examples, 838162 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:14:46,232 : INFO : PROGRESS: at 28.75% examples, 838176 words/s, in_qsize 0, out_qsize 2
2017-04-19 17:14:51,531 : INFO : Loading new file for index: 370000
2017-04-19 17:15:06,230 : INFO : PROGRESS: at 28.98% examples, 838208 words

2017-04-19 17:35:06,591 : INFO : PROGRESS: at 42.42% examples, 838448 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:35:26,596 : INFO : PROGRESS: at 42.65% examples, 838424 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:35:36,532 : INFO : Loading new file for index: 550000
2017-04-19 17:35:46,597 : INFO : PROGRESS: at 42.87% examples, 838393 words/s, in_qsize 0, out_qsize 1
2017-04-19 17:36:06,599 : INFO : PROGRESS: at 43.09% examples, 838393 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:36:26,605 : INFO : PROGRESS: at 43.32% examples, 838376 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:36:45,001 : INFO : Loading new file for index: 560000
2017-04-19 17:36:46,611 : INFO : PROGRESS: at 43.54% examples, 838359 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:37:06,620 : INFO : PROGRESS: at 43.77% examples, 838351 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:37:26,626 : INFO : PROGRESS: at 43.99% examples, 838376 words/s, in_qsize 0, out_qsize 2
2017-04-19 17:37:46,624 : INFO : PROGRES

2017-04-19 17:57:27,033 : INFO : PROGRESS: at 57.40% examples, 838844 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:57:39,092 : INFO : Loading new file for index: 740000
2017-04-19 17:57:47,045 : INFO : PROGRESS: at 57.62% examples, 838832 words/s, in_qsize 0, out_qsize 1
2017-04-19 17:58:07,047 : INFO : PROGRESS: at 57.84% examples, 838843 words/s, in_qsize 0, out_qsize 1
2017-04-19 17:58:27,056 : INFO : PROGRESS: at 58.06% examples, 838811 words/s, in_qsize 0, out_qsize 3
2017-04-19 17:58:47,058 : INFO : PROGRESS: at 58.29% examples, 838792 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:58:49,506 : INFO : Loading new file for index: 750000
2017-04-19 17:59:07,066 : INFO : PROGRESS: at 58.51% examples, 838785 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:59:27,070 : INFO : PROGRESS: at 58.73% examples, 838770 words/s, in_qsize 0, out_qsize 0
2017-04-19 17:59:47,076 : INFO : PROGRESS: at 58.95% examples, 838772 words/s, in_qsize 0, out_qsize 2
2017-04-19 17:59:59,731 : INFO : Loading

2017-04-19 18:19:45,126 : INFO : Loading new file for index: 930000
2017-04-19 18:19:47,401 : INFO : PROGRESS: at 72.36% examples, 838781 words/s, in_qsize 0, out_qsize 2
2017-04-19 18:20:07,427 : INFO : PROGRESS: at 72.58% examples, 838773 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:20:27,446 : INFO : PROGRESS: at 72.81% examples, 838767 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:20:47,453 : INFO : PROGRESS: at 73.03% examples, 838745 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:20:53,984 : INFO : Loading new file for index: 940000
2017-04-19 18:21:07,454 : INFO : PROGRESS: at 73.25% examples, 838738 words/s, in_qsize 1, out_qsize 0
2017-04-19 18:21:27,456 : INFO : PROGRESS: at 73.48% examples, 838749 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:21:47,464 : INFO : PROGRESS: at 73.70% examples, 838759 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:22:02,043 : INFO : Loading new file for index: 950000
2017-04-19 18:22:07,465 : INFO : PROGRESS: at 73.93% examples, 838775 words

2017-04-19 18:42:07,834 : INFO : PROGRESS: at 87.34% examples, 838846 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:42:27,843 : INFO : PROGRESS: at 87.56% examples, 838839 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:42:47,858 : INFO : PROGRESS: at 87.79% examples, 838844 words/s, in_qsize 0, out_qsize 2
2017-04-19 18:42:56,586 : INFO : Loading new file for index: 1130000
2017-04-19 18:43:07,860 : INFO : PROGRESS: at 88.01% examples, 838836 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:43:27,861 : INFO : PROGRESS: at 88.23% examples, 838844 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:43:47,867 : INFO : PROGRESS: at 88.46% examples, 838842 words/s, in_qsize 0, out_qsize 1
2017-04-19 18:44:05,137 : INFO : Loading new file for index: 1140000
2017-04-19 18:44:07,877 : INFO : PROGRESS: at 88.68% examples, 838845 words/s, in_qsize 0, out_qsize 0
2017-04-19 18:44:27,889 : INFO : PROGRESS: at 88.91% examples, 838833 words/s, in_qsize 0, out_qsize 1
2017-04-19 18:44:47,889 : INFO : PROGR

2017-04-19 19:00:55,751 : INFO : worker thread finished; awaiting finish of 3 more threads
2017-04-19 19:00:55,761 : INFO : worker thread finished; awaiting finish of 2 more threads
2017-04-19 19:00:55,762 : INFO : worker thread finished; awaiting finish of 1 more threads
2017-04-19 19:00:55,771 : INFO : worker thread finished; awaiting finish of 0 more threads
2017-04-19 19:00:55,772 : INFO : training on 11070832244 raw words (7509432885 effective words) took 8951.4s, 838912 effective words/s
2017-04-19 19:00:55,774 : INFO : saving Doc2Vec object under /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_5/model, separately None
2017-04-19 19:00:55,775 : INFO : storing np array 'doctag_syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_

CPU times: user 16h 46min 48s, sys: 10min 37s, total: 16h 57min 25s
Wall time: 2h 29min 11s


2017-04-19 19:00:56,507 : INFO : not storing attribute syn0norm
2017-04-19 19:00:56,508 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_5/model.wv.syn0.npy
2017-04-19 19:00:56,730 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_5/model.syn1neg.npy
2017-04-19 19:00:56,921 : INFO : not storing attribute cum_table
2017-04-19 19:01:11,573 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_5/model
2017-04-19 19:01:11,574 : INFO : Getting Validation Embeddings
2017-04-19

2017-04-19 20:55:33,929 : INFO : PROGRESS: at 4.21% examples, 790502 words/s, in_qsize 0, out_qsize 0
2017-04-19 20:55:53,943 : INFO : PROGRESS: at 4.42% examples, 790640 words/s, in_qsize 0, out_qsize 2
2017-04-19 20:56:13,944 : INFO : PROGRESS: at 4.63% examples, 789866 words/s, in_qsize 0, out_qsize 2
2017-04-19 20:56:17,602 : INFO : Loading new file for index: 60000
2017-04-19 20:56:33,961 : INFO : PROGRESS: at 4.84% examples, 789256 words/s, in_qsize 0, out_qsize 2
2017-04-19 20:56:53,956 : INFO : PROGRESS: at 5.06% examples, 790494 words/s, in_qsize 0, out_qsize 0
2017-04-19 20:57:13,960 : INFO : PROGRESS: at 5.28% examples, 792078 words/s, in_qsize 0, out_qsize 0
2017-04-19 20:57:29,601 : INFO : Loading new file for index: 70000
2017-04-19 20:57:33,964 : INFO : PROGRESS: at 5.49% examples, 791670 words/s, in_qsize 0, out_qsize 0
2017-04-19 20:57:53,968 : INFO : PROGRESS: at 5.69% examples, 791044 words/s, in_qsize 0, out_qsize 0
2017-04-19 20:58:13,983 : INFO : PROGRESS: at 5.89

2017-04-19 21:17:54,751 : INFO : Loading new file for index: 240000
2017-04-19 21:18:14,356 : INFO : PROGRESS: at 18.89% examples, 807020 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:18:34,358 : INFO : PROGRESS: at 19.12% examples, 807440 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:18:54,370 : INFO : PROGRESS: at 19.35% examples, 807821 words/s, in_qsize 0, out_qsize 1
2017-04-19 21:19:04,507 : INFO : Loading new file for index: 250000
2017-04-19 21:19:14,371 : INFO : PROGRESS: at 19.57% examples, 808246 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:19:34,378 : INFO : PROGRESS: at 19.79% examples, 808622 words/s, in_qsize 0, out_qsize 1
2017-04-19 21:19:54,378 : INFO : PROGRESS: at 20.01% examples, 808987 words/s, in_qsize 0, out_qsize 1
2017-04-19 21:20:14,379 : INFO : PROGRESS: at 20.24% examples, 809393 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:20:15,000 : INFO : Loading new file for index: 260000
2017-04-19 21:20:34,389 : INFO : PROGRESS: at 20.46% examples, 809704 words

2017-04-19 21:40:34,781 : INFO : PROGRESS: at 33.92% examples, 822502 words/s, in_qsize 0, out_qsize 2
2017-04-19 21:40:54,783 : INFO : PROGRESS: at 34.14% examples, 822611 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:41:05,788 : INFO : Loading new file for index: 440000
2017-04-19 21:41:14,789 : INFO : PROGRESS: at 34.37% examples, 822740 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:41:34,807 : INFO : PROGRESS: at 34.59% examples, 822847 words/s, in_qsize 0, out_qsize 0
2017-04-19 21:41:54,808 : INFO : PROGRESS: at 34.82% examples, 822941 words/s, in_qsize 0, out_qsize 1
2017-04-19 21:42:13,606 : INFO : Loading new file for index: 450000
2017-04-19 21:42:14,822 : INFO : PROGRESS: at 35.04% examples, 823081 words/s, in_qsize 0, out_qsize 2
2017-04-19 21:42:34,824 : INFO : PROGRESS: at 35.27% examples, 823182 words/s, in_qsize 0, out_qsize 1
2017-04-19 21:42:54,826 : INFO : PROGRESS: at 35.49% examples, 823317 words/s, in_qsize 0, out_qsize 1
2017-04-19 21:43:14,836 : INFO : PROGRES

2017-04-19 22:02:49,360 : INFO : Loading new file for index: 630000
2017-04-19 22:02:55,217 : INFO : PROGRESS: at 48.99% examples, 828188 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:03:15,218 : INFO : PROGRESS: at 49.21% examples, 828252 words/s, in_qsize 0, out_qsize 1
2017-04-19 22:03:35,226 : INFO : PROGRESS: at 49.44% examples, 828295 words/s, in_qsize 0, out_qsize 2
2017-04-19 22:03:55,228 : INFO : PROGRESS: at 49.66% examples, 828375 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:03:59,312 : INFO : Loading new file for index: 640000
2017-04-19 22:04:15,240 : INFO : PROGRESS: at 49.88% examples, 828423 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:04:35,242 : INFO : PROGRESS: at 50.11% examples, 828487 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:04:55,254 : INFO : PROGRESS: at 50.33% examples, 828530 words/s, in_qsize 0, out_qsize 1
2017-04-19 22:05:10,254 : INFO : Loading new file for index: 650000
2017-04-19 22:05:15,266 : INFO : PROGRESS: at 50.55% examples, 828593 words

2017-04-19 22:25:01,972 : INFO : Loading new file for index: 820000
2017-04-19 22:25:15,644 : INFO : PROGRESS: at 63.94% examples, 831169 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:25:35,646 : INFO : PROGRESS: at 64.17% examples, 831205 words/s, in_qsize 0, out_qsize 2
2017-04-19 22:25:55,653 : INFO : PROGRESS: at 64.40% examples, 831255 words/s, in_qsize 0, out_qsize 1
2017-04-19 22:26:08,912 : INFO : Loading new file for index: 830000
2017-04-19 22:26:15,679 : INFO : PROGRESS: at 64.62% examples, 831289 words/s, in_qsize 0, out_qsize 6
2017-04-19 22:26:35,691 : INFO : PROGRESS: at 64.84% examples, 831312 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:26:55,702 : INFO : PROGRESS: at 65.07% examples, 831366 words/s, in_qsize 0, out_qsize 1
2017-04-19 22:27:15,705 : INFO : PROGRESS: at 65.29% examples, 831419 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:27:19,723 : INFO : Loading new file for index: 840000
2017-04-19 22:27:35,711 : INFO : PROGRESS: at 65.52% examples, 831441 words

2017-04-19 22:47:36,071 : INFO : PROGRESS: at 78.97% examples, 833086 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:47:56,077 : INFO : PROGRESS: at 79.20% examples, 833132 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:48:06,834 : INFO : Loading new file for index: 1020000
2017-04-19 22:48:16,079 : INFO : PROGRESS: at 79.43% examples, 833161 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:48:36,087 : INFO : PROGRESS: at 79.65% examples, 833176 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:48:56,098 : INFO : PROGRESS: at 79.87% examples, 833197 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:49:16,111 : INFO : PROGRESS: at 80.09% examples, 833218 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:49:16,809 : INFO : Loading new file for index: 1030000
2017-04-19 22:49:36,117 : INFO : PROGRESS: at 80.32% examples, 833259 words/s, in_qsize 1, out_qsize 1
2017-04-19 22:49:56,128 : INFO : PROGRESS: at 80.54% examples, 833289 words/s, in_qsize 0, out_qsize 0
2017-04-19 22:50:16,128 : INFO : PROGR

2017-04-19 23:09:56,512 : INFO : PROGRESS: at 93.99% examples, 834232 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:10:05,295 : INFO : Loading new file for index: 1210000
2017-04-19 23:10:16,525 : INFO : PROGRESS: at 94.21% examples, 834248 words/s, in_qsize 0, out_qsize 1
2017-04-19 23:10:36,527 : INFO : PROGRESS: at 94.44% examples, 834264 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:10:56,541 : INFO : PROGRESS: at 94.66% examples, 834290 words/s, in_qsize 0, out_qsize 1
2017-04-19 23:11:15,459 : INFO : Loading new file for index: 1220000
2017-04-19 23:11:16,558 : INFO : PROGRESS: at 94.89% examples, 834303 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:11:36,560 : INFO : PROGRESS: at 95.11% examples, 834326 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:11:56,561 : INFO : PROGRESS: at 95.33% examples, 834332 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:12:16,569 : INFO : PROGRESS: at 95.56% examples, 834371 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:12:23,970 : INFO : Loadi

CPU times: user 18h 3min 5s, sys: 9min 33s, total: 18h 12min 39s
Wall time: 2h 29min 57s


2017-04-19 23:18:50,625 : INFO : not storing attribute syn0norm
2017-04-19 23:18:50,626 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_6/model.wv.syn0.npy
2017-04-19 23:18:50,859 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_6/model.syn1neg.npy
2017-04-19 23:18:51,050 : INFO : not storing attribute cum_table
2017-04-19 23:19:05,755 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_6/model
2017-04-19 23:19:05,756 : INFO : ****************** Epoch 7 --- Working o

2017-04-19 23:37:44,352 : INFO : Loading new file for index: 160000
2017-04-19 23:37:47,247 : INFO : PROGRESS: at 12.49% examples, 839221 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:38:07,252 : INFO : PROGRESS: at 12.72% examples, 839240 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:38:27,254 : INFO : PROGRESS: at 12.94% examples, 839257 words/s, in_qsize 0, out_qsize 1
2017-04-19 23:38:47,261 : INFO : PROGRESS: at 13.17% examples, 839305 words/s, in_qsize 0, out_qsize 0
2017-04-19 23:38:53,300 : INFO : Loading new file for index: 170000
2017-04-19 23:39:07,271 : INFO : PROGRESS: at 13.39% examples, 839281 words/s, in_qsize 0, out_qsize 1
2017-04-19 23:39:27,269 : INFO : PROGRESS: at 13.62% examples, 839337 words/s, in_qsize 0, out_qsize 1
2017-04-19 23:39:47,272 : INFO : PROGRESS: at 13.84% examples, 839413 words/s, in_qsize 0, out_qsize 4
2017-04-19 23:40:04,237 : INFO : Loading new file for index: 180000
2017-04-19 23:40:07,281 : INFO : PROGRESS: at 14.06% examples, 839415 words

2017-04-20 00:00:07,729 : INFO : PROGRESS: at 27.49% examples, 840379 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:00:27,730 : INFO : PROGRESS: at 27.72% examples, 840401 words/s, in_qsize 0, out_qsize 1
2017-04-20 00:00:47,734 : INFO : PROGRESS: at 27.94% examples, 840494 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:00:55,614 : INFO : Loading new file for index: 360000
2017-04-20 00:01:07,737 : INFO : PROGRESS: at 28.17% examples, 840502 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:01:27,744 : INFO : PROGRESS: at 28.39% examples, 840551 words/s, in_qsize 0, out_qsize 1
2017-04-20 00:01:47,748 : INFO : PROGRESS: at 28.61% examples, 840600 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:02:05,583 : INFO : Loading new file for index: 370000
2017-04-20 00:02:07,756 : INFO : PROGRESS: at 28.83% examples, 840636 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:02:27,757 : INFO : PROGRESS: at 29.06% examples, 840612 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:02:47,762 : INFO : PROGRES

2017-04-20 00:22:28,133 : INFO : PROGRESS: at 42.57% examples, 841382 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:22:44,684 : INFO : Loading new file for index: 550000
2017-04-20 00:22:48,143 : INFO : PROGRESS: at 42.80% examples, 841352 words/s, in_qsize 0, out_qsize 1
2017-04-20 00:23:08,148 : INFO : PROGRESS: at 43.02% examples, 841332 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:23:28,154 : INFO : PROGRESS: at 43.25% examples, 841362 words/s, in_qsize 1, out_qsize 0
2017-04-20 00:23:48,165 : INFO : PROGRESS: at 43.48% examples, 841364 words/s, in_qsize 0, out_qsize 1
2017-04-20 00:23:52,538 : INFO : Loading new file for index: 560000
2017-04-20 00:24:08,176 : INFO : PROGRESS: at 43.70% examples, 841386 words/s, in_qsize 0, out_qsize 2
2017-04-20 00:24:28,185 : INFO : PROGRESS: at 43.93% examples, 841398 words/s, in_qsize 0, out_qsize 1
2017-04-20 00:24:48,193 : INFO : PROGRESS: at 44.15% examples, 841413 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:25:01,082 : INFO : Loading

2017-04-20 00:44:40,001 : INFO : Loading new file for index: 740000
2017-04-20 00:44:48,589 : INFO : PROGRESS: at 57.63% examples, 842215 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:45:08,602 : INFO : PROGRESS: at 57.85% examples, 842236 words/s, in_qsize 0, out_qsize 2
2017-04-20 00:45:28,606 : INFO : PROGRESS: at 58.08% examples, 842206 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:45:48,620 : INFO : PROGRESS: at 58.30% examples, 842218 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:45:49,754 : INFO : Loading new file for index: 750000
2017-04-20 00:46:08,625 : INFO : PROGRESS: at 58.53% examples, 842220 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:46:28,650 : INFO : PROGRESS: at 58.75% examples, 842196 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:46:48,652 : INFO : PROGRESS: at 58.97% examples, 842206 words/s, in_qsize 0, out_qsize 0
2017-04-20 00:46:59,702 : INFO : Loading new file for index: 760000
2017-04-20 00:47:08,658 : INFO : PROGRESS: at 59.20% examples, 842208 words

2017-04-20 01:07:09,021 : INFO : PROGRESS: at 72.66% examples, 842278 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:07:29,026 : INFO : PROGRESS: at 72.89% examples, 842270 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:07:48,089 : INFO : Loading new file for index: 940000
2017-04-20 01:07:49,035 : INFO : PROGRESS: at 73.11% examples, 842272 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:08:09,036 : INFO : PROGRESS: at 73.34% examples, 842260 words/s, in_qsize 0, out_qsize 2
2017-04-20 01:08:29,033 : INFO : PROGRESS: at 73.56% examples, 842259 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:08:49,036 : INFO : PROGRESS: at 73.79% examples, 842286 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:08:55,988 : INFO : Loading new file for index: 950000
2017-04-20 01:09:09,041 : INFO : PROGRESS: at 74.02% examples, 842284 words/s, in_qsize 0, out_qsize 1
2017-04-20 01:09:29,053 : INFO : PROGRESS: at 74.24% examples, 842283 words/s, in_qsize 0, out_qsize 2
2017-04-20 01:09:49,054 : INFO : PROGRES

2017-04-20 01:29:29,575 : INFO : PROGRESS: at 87.71% examples, 842380 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:29:45,101 : INFO : Loading new file for index: 1130000
2017-04-20 01:29:49,604 : INFO : PROGRESS: at 87.93% examples, 842373 words/s, in_qsize 2, out_qsize 0
2017-04-20 01:30:09,605 : INFO : PROGRESS: at 88.16% examples, 842353 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:30:29,609 : INFO : PROGRESS: at 88.38% examples, 842352 words/s, in_qsize 0, out_qsize 1
2017-04-20 01:30:49,622 : INFO : PROGRESS: at 88.61% examples, 842348 words/s, in_qsize 0, out_qsize 1
2017-04-20 01:30:53,630 : INFO : Loading new file for index: 1140000
2017-04-20 01:31:09,643 : INFO : PROGRESS: at 88.83% examples, 842347 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:31:29,645 : INFO : PROGRESS: at 89.06% examples, 842355 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:31:49,650 : INFO : PROGRESS: at 89.29% examples, 842333 words/s, in_qsize 0, out_qsize 0
2017-04-20 01:32:01,863 : INFO : Loadi

2017-04-20 01:47:40,521 : INFO : worker thread finished; awaiting finish of 1 more threads
2017-04-20 01:47:40,528 : INFO : worker thread finished; awaiting finish of 0 more threads
2017-04-20 01:47:40,529 : INFO : training on 11070832244 raw words (7509404123 effective words) took 8914.6s, 842369 effective words/s
2017-04-20 01:47:40,531 : INFO : saving Doc2Vec object under /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_7/model, separately None
2017-04-20 01:47:40,532 : INFO : storing np array 'doctag_syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_7/model.docvecs.doctag_syn0.npy


CPU times: user 17h 42min 53s, sys: 10min 23s, total: 17h 53min 17s
Wall time: 2h 28min 34s


2017-04-20 01:47:41,240 : INFO : not storing attribute syn0norm
2017-04-20 01:47:41,241 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_7/model.wv.syn0.npy
2017-04-20 01:47:41,464 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_7/model.syn1neg.npy
2017-04-20 01:47:41,645 : INFO : not storing attribute cum_table
2017-04-20 01:47:56,848 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_7/model
2017-04-20 01:47:56,849 : INFO : ****************** Epoch 8 --- Working o

2017-04-20 02:06:38,293 : INFO : PROGRESS: at 12.69% examples, 852344 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:06:58,298 : INFO : PROGRESS: at 12.92% examples, 852366 words/s, in_qsize 0, out_qsize 1
2017-04-20 02:07:18,308 : INFO : PROGRESS: at 13.15% examples, 852352 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:07:26,263 : INFO : Loading new file for index: 170000
2017-04-20 02:07:38,323 : INFO : PROGRESS: at 13.37% examples, 852339 words/s, in_qsize 0, out_qsize 1
2017-04-20 02:07:58,334 : INFO : PROGRESS: at 13.60% examples, 852358 words/s, in_qsize 0, out_qsize 1
2017-04-20 02:08:18,336 : INFO : PROGRESS: at 13.82% examples, 852339 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:08:36,265 : INFO : Loading new file for index: 180000
2017-04-20 02:08:38,342 : INFO : PROGRESS: at 14.05% examples, 852367 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:08:58,357 : INFO : PROGRESS: at 14.28% examples, 852371 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:09:18,358 : INFO : PROGRES

2017-04-20 02:28:58,627 : INFO : PROGRESS: at 27.90% examples, 853042 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:29:09,767 : INFO : Loading new file for index: 360000
2017-04-20 02:29:18,628 : INFO : PROGRESS: at 28.13% examples, 853041 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:29:38,630 : INFO : PROGRESS: at 28.35% examples, 853045 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:29:58,635 : INFO : PROGRESS: at 28.58% examples, 853063 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:30:18,637 : INFO : PROGRESS: at 28.81% examples, 853081 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:30:19,013 : INFO : Loading new file for index: 370000
2017-04-20 02:30:38,638 : INFO : PROGRESS: at 29.04% examples, 853091 words/s, in_qsize 0, out_qsize 1
2017-04-20 02:30:58,662 : INFO : PROGRESS: at 29.26% examples, 853077 words/s, in_qsize 0, out_qsize 2
2017-04-20 02:31:18,669 : INFO : PROGRESS: at 29.50% examples, 853072 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:31:25,511 : INFO : Loading

2017-04-20 02:51:39,186 : INFO : PROGRESS: at 41.23% examples, 810703 words/s, in_qsize 0, out_qsize 1
2017-04-20 02:51:59,190 : INFO : PROGRESS: at 41.46% examples, 810857 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:52:19,196 : INFO : PROGRESS: at 41.68% examples, 811033 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:52:39,201 : INFO : PROGRESS: at 41.91% examples, 811224 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:52:45,719 : INFO : Loading new file for index: 540000
2017-04-20 02:52:59,207 : INFO : PROGRESS: at 42.14% examples, 811443 words/s, in_qsize 0, out_qsize 1
2017-04-20 02:53:19,206 : INFO : PROGRESS: at 42.36% examples, 811635 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:53:39,211 : INFO : PROGRESS: at 42.59% examples, 811835 words/s, in_qsize 0, out_qsize 1
2017-04-20 02:53:54,039 : INFO : Loading new file for index: 550000
2017-04-20 02:53:59,228 : INFO : PROGRESS: at 42.82% examples, 812004 words/s, in_qsize 0, out_qsize 0
2017-04-20 02:54:19,234 : INFO : PROGRES

2017-04-20 03:13:59,633 : INFO : PROGRESS: at 56.42% examples, 821208 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:14:19,652 : INFO : PROGRESS: at 56.65% examples, 821311 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:14:29,037 : INFO : Loading new file for index: 730000
2017-04-20 03:14:39,654 : INFO : PROGRESS: at 56.87% examples, 821435 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:14:59,660 : INFO : PROGRESS: at 57.10% examples, 821557 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:15:19,662 : INFO : PROGRESS: at 57.32% examples, 821737 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:15:38,417 : INFO : Loading new file for index: 740000
2017-04-20 03:15:39,667 : INFO : PROGRESS: at 57.55% examples, 821838 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:15:59,669 : INFO : PROGRESS: at 57.77% examples, 821968 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:16:19,685 : INFO : PROGRESS: at 58.00% examples, 822072 words/s, in_qsize 0, out_qsize 1
2017-04-20 03:16:39,698 : INFO : PROGRES

2017-04-20 03:36:14,689 : INFO : Loading new file for index: 920000
2017-04-20 03:36:20,007 : INFO : PROGRESS: at 71.62% examples, 827649 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:36:40,023 : INFO : PROGRESS: at 71.84% examples, 827713 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:37:00,024 : INFO : PROGRESS: at 72.07% examples, 827782 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:37:20,026 : INFO : PROGRESS: at 72.29% examples, 827868 words/s, in_qsize 0, out_qsize 2
2017-04-20 03:37:23,095 : INFO : Loading new file for index: 930000
2017-04-20 03:37:40,040 : INFO : PROGRESS: at 72.52% examples, 827953 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:38:00,053 : INFO : PROGRESS: at 72.76% examples, 828030 words/s, in_qsize 0, out_qsize 2
2017-04-20 03:38:20,040 : INFO : PROGRESS: at 72.98% examples, 828078 words/s, in_qsize 0, out_qsize 1
2017-04-20 03:38:30,693 : INFO : Loading new file for index: 940000
2017-04-20 03:38:40,059 : INFO : PROGRESS: at 73.21% examples, 828157 words

2017-04-20 03:58:40,443 : INFO : PROGRESS: at 86.83% examples, 831791 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:59:00,444 : INFO : PROGRESS: at 87.05% examples, 831843 words/s, in_qsize 0, out_qsize 0
2017-04-20 03:59:04,028 : INFO : Loading new file for index: 1120000
2017-04-20 03:59:20,453 : INFO : PROGRESS: at 87.28% examples, 831900 words/s, in_qsize 2, out_qsize 0
2017-04-20 03:59:40,453 : INFO : PROGRESS: at 87.51% examples, 831933 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:00:00,483 : INFO : PROGRESS: at 87.73% examples, 831988 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:00:13,689 : INFO : Loading new file for index: 1130000
2017-04-20 04:00:20,484 : INFO : PROGRESS: at 87.96% examples, 832041 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:00:40,486 : INFO : PROGRESS: at 88.18% examples, 832077 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:01:00,490 : INFO : PROGRESS: at 88.41% examples, 832123 words/s, in_qsize 0, out_qsize 0
2017-04-20 04:01:20,501 : INFO : PROGR

2017-04-20 04:17:57,202 : INFO : worker thread finished; awaiting finish of 5 more threads
2017-04-20 04:17:57,203 : INFO : worker thread finished; awaiting finish of 4 more threads
2017-04-20 04:17:57,211 : INFO : worker thread finished; awaiting finish of 3 more threads
2017-04-20 04:17:57,219 : INFO : worker thread finished; awaiting finish of 2 more threads
2017-04-20 04:17:57,223 : INFO : worker thread finished; awaiting finish of 1 more threads
2017-04-20 04:17:57,224 : INFO : worker thread finished; awaiting finish of 0 more threads
2017-04-20 04:17:57,225 : INFO : training on 11070832244 raw words (7509452256 effective words) took 9000.2s, 834362 effective words/s
2017-04-20 04:17:57,226 : INFO : saving Doc2Vec object under /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_8/model, separately None
2017-04-20 04:17:57,227 : INFO : stor

CPU times: user 17h 32min 25s, sys: 10min 12s, total: 17h 42min 38s
Wall time: 2h 30min


2017-04-20 04:17:57,952 : INFO : not storing attribute syn0norm
2017-04-20 04:17:57,953 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_8/model.wv.syn0.npy
2017-04-20 04:17:58,165 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_8/model.syn1neg.npy
2017-04-20 04:17:58,349 : INFO : not storing attribute cum_table
2017-04-20 04:18:11,474 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_recalc_2_description/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_description/epoch_8/model


CPU times: user 7d 6h 40min 58s, sys: 1h 53min 55s, total: 7d 8h 34min 54s
Wall time: 1d 1h 4min 49s


## Inference Only (if needed)

In [42]:
NUM_CORES = 32

In [41]:
epoch = 1
GLOBAL_VARS.MODEL_NAME = placeholder_model_name.format(epoch)

if os.path.exists(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, MODEL_PREFIX)):
    doc2vec_model = Doc2Vec.load(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, MODEL_PREFIX))
    doc2vec_model.workers = NUM_CORES
    GLOBAL_VARS.DOC2VEC_MODEL = doc2vec_model

2017-04-18 18:58:38,660 : INFO : loading Doc2Vec object from /mnt/virtual-machines/data/parameter_search_doc2vec_models_2_claims/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_claims/epoch_1/model
2017-04-18 18:58:45,944 : INFO : loading docvecs recursively from /mnt/virtual-machines/data/parameter_search_doc2vec_models_2_claims/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_claims/epoch_1/model.docvecs.* with mmap=None
2017-04-18 18:58:45,945 : INFO : loading doctag_syn0 from /mnt/virtual-machines/data/parameter_search_doc2vec_models_2_claims/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_claims/epoch_1/model.docvecs.doctag_syn0.npy with mmap=None
2017-04-18 18:58:51,024 : INFO : loading wv recursively from /mnt/virtual-machines/data/parameter_search_doc2vec_models_2_claims/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_

In [43]:
GLOBAL_VARS.MODEL_NAME

'doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_2_claims/epoch_1'

In [None]:
Xv = get_extended_docs_with_inference_data_only(doc2vec_model, VALIDATION_DICT, 
                                         validation_preprocessed_files_prefix, level, model_name)

2017-04-18 18:59:26,842 : INFO : ===== Getting vectors with inference
2017-04-18 18:59:27,021 : INFO : Loading new file for index: 0
2017-04-18 18:59:55,443 : INFO : Loading new file for index: 10000
2017-04-18 19:00:44,049 : INFO : Finished: 10000 tags
2017-04-18 19:01:10,981 : INFO : Loading new file for index: 20000
2017-04-18 19:01:52,567 : INFO : Finished: 20000 tags
2017-04-18 19:02:18,405 : INFO : Loading new file for index: 30000
2017-04-18 19:02:58,946 : INFO : Finished: 30000 tags
2017-04-18 19:03:25,179 : INFO : Loading new file for index: 40000
2017-04-18 19:04:12,858 : INFO : Finished: 40000 tags
2017-04-18 19:04:39,225 : INFO : Loading new file for index: 50000
2017-04-18 19:05:23,518 : INFO : Finished: 50000 tags
2017-04-18 19:05:48,791 : INFO : Loading new file for index: 60000
2017-04-18 19:06:35,336 : INFO : Finished: 60000 tags
2017-04-18 19:07:00,384 : INFO : Loading new file for index: 70000
2017-04-18 19:07:50,475 : INFO : Finished: 70000 tags
2017-04-18 19:08:15,

#### Testing inference

In [55]:
inference_docs_iterator = BatchWrapper(validation_preprocessed_files_prefix, batch_size=None, level=level, level_type=model_name)        
for doc_tuple in inference_doczs_iterator:
    doc_id, doc_tokens = doc_tuple
    rep = doc2vec_model.infer_vector(doc_tokens)
    print (doc_id, rep)
    break

('08521002', array([  1.13558674e+00,  -2.01971769e-01,  -9.30447519e-01,
         9.55632687e-01,   5.11517346e-01,   4.34441900e+00,
        -3.77764761e-01,  -1.11617422e+00,  -2.15896085e-01,
         9.09354746e-01,   5.74674904e-01,  -2.07049704e+00,
        -7.20400810e-01,   4.94136661e-01,  -1.74060893e+00,
        -2.17272949e+00,  -4.39270258e-01,  -1.51936769e+00,
         5.65607429e-01,  -4.58835810e-01,  -1.69598356e-01,
         1.77733886e+00,   3.66123140e-01,   1.38953611e-01,
        -1.04259264e+00,   8.84979665e-01,  -8.56729895e-02,
        -6.04329109e-01,   4.42179322e-01,   1.08561194e+00,
        -2.49654725e-01,   3.02951038e-01,  -3.80307257e-01,
         1.32433748e+00,   7.18038738e-01,   7.99864233e-01,
        -3.60305488e-01,  -3.32749695e-01,   1.86409019e-02,
        -1.16298962e+00,  -2.36521304e-01,   8.52507114e-01,
        -4.25269688e-03,  -2.73190904e+00,  -1.39228487e+00,
         3.14658254e-01,   1.19927609e+00,  -8.86219382e-01,
        -2.

In [53]:
doc2vec_model.wv.syn0

array([[ 0.24962339,  0.03808838, -0.38492572, ...,  0.81019139,
        -0.0872335 ,  0.00503489],
       [-0.24393913, -0.9072656 , -0.08245134, ..., -0.12438237,
        -0.10501056,  0.07241193],
       [ 0.06769085, -0.22004843,  0.05649997, ...,  0.15331532,
        -0.87121236, -0.71148068],
       ..., 
       [ 0.02257917,  0.18380728, -0.19475998, ...,  0.72972393,
        -0.03356596, -0.29145467],
       [-0.20255305, -0.25994578,  0.31640032, ..., -0.02623975,
         0.41660461, -0.45980361],
       [ 1.09419656, -0.97489876, -0.3509953 , ...,  0.82430571,
         0.02756385,  0.7905944 ]], dtype=float32)