## Creates the doc2vec vector embeddings for a specific configuration

In [1]:
import json
import nltk
from nltk.tokenize import RegexpTokenizer
import string
import math
import os
import time
from collections import namedtuple
import cPickle as pickle
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import random
import gzip

from multiprocessing import Process, Queue

from multiprocessing.dummy import Pool as ThreadPool
import itertools

from sklearn.metrics import coverage_error
import sklearn.metrics
from sklearn.multiclass import OneVsRestClassifier
from sklearn import linear_model
from sklearn.preprocessing import MultiLabelBinarizer

from gensim.models.doc2vec import Doc2Vec, LabeledSentence

import logging
from logging import info
from functools import partial

from thesis.utils.metrics import *
from thesis.utils.file import *

## Global variables used throughout the script

In [2]:
root = logging.getLogger()
for handler in root.handlers[:]:
    root.removeHandler(handler)
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) # adds a default StreamHanlder
#root.addHandler(logging.StreamHandler())

In [3]:
SVM_SEED = 1234
DOC2VEC_SEED = 1234

In [4]:
MIN_WORD_COUNT = 100
NUM_CORES = 16

In [5]:
GLOBAL_VARS = namedtuple('GLOBAL_VARS', ['MODEL_NAME', 'DOC2VEC_MODEL_NAME', 'DOC2VEC_MODEL', 
                                         'SVM_MODEL_NAME', 'NN_MODEL_NAME'])

In [6]:
VOCAB_MODEL = "vocab_model"
MODEL_PREFIX = "model"
VALIDATION_DICT = "validation_dict.pkl"
TEST_MATRIX = "test_matrix.pkl"
TEST_DICT = "test_dict.pkl"
METRICS = "metrics.pkl"
CLASSIFIER = "classifier.pkl"

In [7]:
root_location = "/mnt/virtual-machines/data/"
exports_location = root_location + "exported_data/"

doc_classifications_map_file = exports_location + "doc_classification_map.pkl"
training_docs_list_file = exports_location + "training_docs_list.pkl"
validation_docs_list_file = exports_location + "validation_docs_list.pkl"
test_docs_list_file = exports_location + "test_docs_list.pkl"

preprocessed_location = root_location + "preprocessed_data/extended_pv_abs_desc_claims_full_chunks/"

training_preprocessed_files_prefix = preprocessed_location + "extended_pv_training_docs_data_preprocessed-"
validation_preprocessed_files_prefix = preprocessed_location + "extended_pv_validation_docs_data_preprocessed-"
test_preprocessed_files_prefix = preprocessed_location + "extended_pv_test_docs_data_preprocessed-"

## Load general data required for classification

In [8]:
%%time
doc_classification_map = pickle.load(open(doc_classifications_map_file))
training_docs_list = pickle.load(open(training_docs_list_file))
validation_docs_list = pickle.load(open(validation_docs_list_file))
test_docs_list = pickle.load(open(test_docs_list_file))

CPU times: user 21.7 s, sys: 1.47 s, total: 23.2 s
Wall time: 23.2 s


In [9]:
len(training_docs_list)

1286325

In [10]:
len(validation_docs_list)

321473

In [11]:
len(test_docs_list)

401877

# Utility functions for data loading

In [12]:
VALIDATION_MINI_BATCH_SIZE = 10000
def get_extended_docs_with_inference_data_only(doc2vec_model, file_to_write, preprocessed_files_prefix, level, model_name):
    """
    Use the trained doc2vec model to get the paragraph vector representations of the validation or test documents
    """

    def infer_one_doc(doc_tuple):
        # doc2vec_model.random = np.random.RandomState(DOC2VEC_SEED)
        doc_id, doc_tokens = doc_tuple
        rep = doc2vec_model.infer_vector(doc_tokens)
        return (doc_id, rep)

    if os.path.exists(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, file_to_write)):
        info("===== Loading inference vectors")
        inference_documents_reps = pickle.load(open(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, file_to_write)))
        info("Loaded inference vectors matrix")
    else:
        inference_documents_reps = {}
        info("===== Getting vectors with inference")

        # Multi-threaded inference
#         inference_docs_iterator = ExtendedPVDocumentBatchGenerator(preprocessed_files_prefix, batch_size=None)
        inference_docs_iterator = BatchWrapper(preprocessed_files_prefix, batch_size=None, level=level, level_type=model_name)
        generator_func = inference_docs_iterator.__iter__()
        # map consumes the whole iterator on the spot, so we have to use itertools.islice to fake mini-batching
        mini_batch_size = VALIDATION_MINI_BATCH_SIZE
        batches_run = 1
        pool = ThreadPool(NUM_CORES)
        while True:
            threaded_reps_partial = pool.map(infer_one_doc, itertools.islice(generator_func, mini_batch_size))
            info("Finished: {} tags".format(batches_run * mini_batch_size))
            batches_run += 1
            if threaded_reps_partial:
                # threaded_reps.extend(threaded_reps_partial)
                inference_documents_reps.update(threaded_reps_partial)
            else:
                break
                
        pool.close()
        pool.terminate()

        pickle.dump(inference_documents_reps,
                    open(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, file_to_write), 'w'))

    return inference_documents_reps

In [13]:
class ExtendedPVDocumentBatchGenerator(Process):
    def __init__(self, filename_prefix, queue, batch_size=10000, start_file=0, offset=10000):
        super(ExtendedPVDocumentBatchGenerator, self).__init__()
        self.queue = queue
        self.offset = offset
        self.filename_prefix = filename_prefix
        self.files_loaded = start_file - offset

    def run(self):
        cur_file = None
        while True:
            try:
                if cur_file is None:
                    info("Loading new file for index: {}".format(str(self.files_loaded + self.offset)))
                    cur_file = gzip.open(self.filename_prefix + str(self.files_loaded + self.offset) + '.gz')
#                     cur_file = open(self.filename_prefix + str(self.files_loaded + self.offset))
                    self.files_loaded += self.offset
                for line in cur_file:
                    self.queue.put(line)
                cur_file.close()
                cur_file = None
            except IOError:
                self.queue.put(False, block=True, timeout=None)
                info("All files are loaded - last file: {}".format(str(self.files_loaded + self.offset)))
                return


class BatchWrapper(object):
    def __init__(self, training_preprocessed_files_prefix, buffer_size=10000, batch_size=10000, level=1, level_type=None):
        assert batch_size <= 10000 or batch_size is None
        self.level = level
        self.level_type = level_type[0] if level_type is not None else None
        self.batch_size = batch_size
        self.q = Queue(maxsize=buffer_size)
        self.p = ExtendedPVDocumentBatchGenerator(training_preprocessed_files_prefix, queue=self.q,
                                                  batch_size=batch_size, start_file=0, offset=10000)
        self.p.start()
        self.cur_data = []

    def is_correct_type(self, doc_id):
        parts = doc_id.split("_")
        len_parts = len(parts)
        if len_parts == self.level:
            if len_parts == 1:
                return True
            if len_parts == self.level and (parts[1][0] == self.level_type or self.level_type is None):
                return True
        return False

    def return_sentences(self, line):
        line_array = tuple(line.split(" "))
        doc_id = line_array[0]
        if not self.is_correct_type(doc_id):
            return False
        line_array = line_array[1:]
        len_line_array = len(line_array)
        # divide the document to batches according to the batch size
        sentences = []
        
        if self.batch_size is None:
            # dont use LabeledSentence for validation iterator
            sentences.append((doc_id, line_array))
        else:
            curr_batch_iter = 0
            while curr_batch_iter < len_line_array:
                sentences.append(LabeledSentence(words=line_array[curr_batch_iter: curr_batch_iter + self.batch_size], tags=[doc_id]))
                curr_batch_iter += self.batch_size
        return tuple(sentences)

    def __iter__(self):
        while True:
            item = self.q.get(block=True)
            if item is False:
                raise StopIteration()
            else:
                sentences = self.return_sentences(item)
                if not sentences:
                    None
                else:
                    for sentence in sentences:
                        yield sentence


# Doc2vec and SVM Parameters

In [15]:
DOC2VEC_SIZE = 200
DOC2VEC_WINDOW = 2
DOC2VEC_MAX_VOCAB_SIZE = None
DOC2VEC_SAMPLE = 1e-3
DOC2VEC_TYPE = 1
DOC2VEC_HIERARCHICAL_SAMPLE = 0
DOC2VEC_NEGATIVE_SAMPLE_SIZE = 10
DOC2VEC_CONCAT = 0
DOC2VEC_MEAN = 1
DOC2VEC_TRAIN_WORDS = 0
DOC2VEC_EPOCHS = 1 # we do our training manually one epoch at a time
DOC2VEC_MAX_EPOCHS = 8
REPORT_DELAY = 20 # report the progress every x seconds
REPORT_VOCAB_PROGRESS = 100000 # report vocab progress every x documents

## Create the Doc2vec model and create/load the vocab

In [16]:
models = [
    (3, 'claims')
]
level, model_name = models[0]

In [24]:
info("creating/loading vocabulary for " + str(level) + ' ' + model_name + ' in ')
doc2vec_model_save_location = os.path.join(root_location,
                                           "parameter_search_doc2vec_models_" + str(level) + '_' + model_name,
                                           "full")
if not os.path.exists(doc2vec_model_save_location):
    os.makedirs(doc2vec_model_save_location)
if not os.path.exists(os.path.join(doc2vec_model_save_location, VOCAB_MODEL)):
    os.makedirs(os.path.join(doc2vec_model_save_location, VOCAB_MODEL))

placeholder_model_name = 'doc2vec_size_{}_w_{}_type_{}_concat_{}_mean_{}_trainwords_{}_hs_{}_neg_{}_vocabsize_{}_model_{}'.format(DOC2VEC_SIZE,
                                                                DOC2VEC_WINDOW,
                                                                'dm' if DOC2VEC_TYPE == 1 else 'pv-dbow',
                                                                DOC2VEC_CONCAT, DOC2VEC_MEAN,
                                                                DOC2VEC_TRAIN_WORDS,
                                                                DOC2VEC_HIERARCHICAL_SAMPLE,DOC2VEC_NEGATIVE_SAMPLE_SIZE,
                                                                str(DOC2VEC_MAX_VOCAB_SIZE),
                                                                str(level) + '_' + model_name
                                                                )
GLOBAL_VARS.DOC2VEC_MODEL_NAME = placeholder_model_name
placeholder_model_name = os.path.join(placeholder_model_name, "epoch_{}")
info("FILE " + os.path.join(doc2vec_model_save_location, VOCAB_MODEL, MODEL_PREFIX))
doc2vec_model = Doc2Vec(size=DOC2VEC_SIZE, window=DOC2VEC_WINDOW, min_count=MIN_WORD_COUNT,
                max_vocab_size= DOC2VEC_MAX_VOCAB_SIZE,
                sample=DOC2VEC_SAMPLE, seed=DOC2VEC_SEED, workers=NUM_CORES,
                # doc2vec algorithm dm=1 => PV-DM, dm=2 => PV-DBOW, PV-DM dictates CBOW for words
                dm=DOC2VEC_TYPE,
                # hs=0 => negative sampling, hs=1 => hierarchical softmax
                hs=DOC2VEC_HIERARCHICAL_SAMPLE, negative=DOC2VEC_NEGATIVE_SAMPLE_SIZE,
                dm_concat=DOC2VEC_CONCAT,
                # would train words with skip-gram on top of cbow, we don't need that for now
                dbow_words=DOC2VEC_TRAIN_WORDS,
                iter=DOC2VEC_EPOCHS)

GLOBAL_VARS.DOC2VEC_MODEL = doc2vec_model


2017-04-10 16:19:23,129 : INFO : creating/loading vocabulary for 1 document in 
2017-04-10 16:19:23,130 : INFO : FILE /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/vocab_model/model


In [None]:
if not os.path.exists(os.path.join(doc2vec_model_save_location, VOCAB_MODEL, MODEL_PREFIX)):
    
    info("Creating vocab model")
    training_docs_iterator = BatchWrapper(training_preprocessed_files_prefix, batch_size=10000, level=level,
                                          level_type=model_name)
    doc2vec_model.build_vocab(sentences=training_docs_iterator, progress_per=REPORT_VOCAB_PROGRESS)
    doc2vec_model.save(os.path.join(doc2vec_model_save_location, VOCAB_MODEL, MODEL_PREFIX))
else:
    info("Loading vocab model")
    doc2vec_model_vocab_model = Doc2Vec.load(os.path.join(doc2vec_model_save_location, VOCAB_MODEL, MODEL_PREFIX))
    doc2vec_model.reset_from(doc2vec_model_vocab_model)

## Actual Training, validation and Metrics Loop

In [25]:
doc2vec_model.min_alpha = 0.025
DOC2VEC_ALPHA_DECREASE = 0.001

In [26]:
doc2vec_model.workers = NUM_CORES

In [None]:
%%time
# when resuming, resume from an epoch with a previously created doc2vec model to get the learning rate right
start_from = 1
for epoch in range(start_from, DOC2VEC_MAX_EPOCHS+1):
    GLOBAL_VARS.MODEL_NAME = placeholder_model_name.format(epoch)
    info("****************** Epoch {} --- Working on {} *******************".format(epoch, GLOBAL_VARS.MODEL_NAME))
    
    # if we have the model, just load it, otherwise train the previous model
    if os.path.exists(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, MODEL_PREFIX)):
        doc2vec_model = Doc2Vec.load(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, MODEL_PREFIX))
        doc2vec_model.workers = NUM_CORES
        GLOBAL_VARS.DOC2VEC_MODEL = doc2vec_model
    else:
        # train the doc2vec model
        training_docs_iterator = BatchWrapper(training_preprocessed_files_prefix, batch_size=10000, level=level,
                                          level_type=model_name)
        %time doc2vec_model.train(sentences=training_docs_iterator, report_delay=REPORT_DELAY)
        doc2vec_model.alpha -= DOC2VEC_ALPHA_DECREASE  # decrease the learning rate
        doc2vec_model.min_alpha = doc2vec_model.alpha  # fix the learning rate, no decay
        ensure_disk_location_exists(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME))
        doc2vec_model.save(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, MODEL_PREFIX))
        GLOBAL_VARS.DOC2VEC_MODEL = doc2vec_model
        
    # only do the inference for higher epochs, as inference usually takes as much time as the actual training
    if epoch > 7:
        # Validation Embeddings
        info('Getting Validation Embeddings')
        Xv = get_extended_docs_with_inference_data_only(doc2vec_model, VALIDATION_DICT, 
                                         validation_preprocessed_files_prefix, level, model_name)

2017-04-09 16:56:40,606 : INFO : ****************** Epoch 1 --- Working on doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_1 *******************
2017-04-09 16:56:40,735 : INFO : training model with 16 workers on 446814 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=10 window=2
2017-04-09 16:56:40,735 : INFO : Loading new file for index: 0
2017-04-09 16:56:40,736 : INFO : expecting 1879865 sentences, matching count from corpus used for vocabulary survey
2017-04-09 16:56:41,782 : INFO : PROGRESS: at 0.01% examples, 610582 words/s, in_qsize 0, out_qsize 2
2017-04-09 16:57:01,783 : INFO : PROGRESS: at 0.20% examples, 837420 words/s, in_qsize 0, out_qsize 2
2017-04-09 16:57:21,803 : INFO : PROGRESS: at 0.41% examples, 854427 words/s, in_qsize 0, out_qsize 2
2017-04-09 16:57:41,806 : INFO : PROGRESS: at 0.61% examples, 854564 words/s, in_qsize 0, out_qsize 0
2017-04-09 16:57:55,704 : INFO : Loading new file for i

2017-04-09 17:18:22,190 : INFO : PROGRESS: at 12.73% examples, 842020 words/s, in_qsize 0, out_qsize 0
2017-04-09 17:18:42,196 : INFO : PROGRESS: at 12.92% examples, 841905 words/s, in_qsize 0, out_qsize 2
2017-04-09 17:19:02,200 : INFO : PROGRESS: at 13.12% examples, 841955 words/s, in_qsize 0, out_qsize 0
2017-04-09 17:19:15,216 : INFO : Loading new file for index: 170000
2017-04-09 17:19:22,211 : INFO : PROGRESS: at 13.32% examples, 841940 words/s, in_qsize 0, out_qsize 0
2017-04-09 17:19:42,221 : INFO : PROGRESS: at 13.51% examples, 841777 words/s, in_qsize 0, out_qsize 2
2017-04-09 17:20:02,225 : INFO : PROGRESS: at 13.71% examples, 841798 words/s, in_qsize 0, out_qsize 0
2017-04-09 17:20:22,230 : INFO : PROGRESS: at 13.90% examples, 841741 words/s, in_qsize 0, out_qsize 0
2017-04-09 17:20:36,235 : INFO : Loading new file for index: 180000
2017-04-09 17:20:42,233 : INFO : PROGRESS: at 14.09% examples, 841749 words/s, in_qsize 0, out_qsize 0
2017-04-09 17:21:02,234 : INFO : PROGRES

2017-04-09 17:41:22,701 : INFO : PROGRESS: at 26.34% examples, 845249 words/s, in_qsize 0, out_qsize 0
2017-04-09 17:41:36,462 : INFO : Loading new file for index: 340000
2017-04-09 17:41:42,705 : INFO : PROGRESS: at 26.54% examples, 845396 words/s, in_qsize 0, out_qsize 0
2017-04-09 17:42:02,710 : INFO : PROGRESS: at 26.73% examples, 845352 words/s, in_qsize 0, out_qsize 0
2017-04-09 17:42:22,711 : INFO : PROGRESS: at 26.93% examples, 845511 words/s, in_qsize 0, out_qsize 0
2017-04-09 17:42:42,717 : INFO : PROGRESS: at 27.13% examples, 845558 words/s, in_qsize 0, out_qsize 1
2017-04-09 17:42:53,797 : INFO : Loading new file for index: 350000
2017-04-09 17:43:02,725 : INFO : PROGRESS: at 27.33% examples, 845620 words/s, in_qsize 0, out_qsize 0
2017-04-09 17:43:22,732 : INFO : PROGRESS: at 27.53% examples, 845672 words/s, in_qsize 0, out_qsize 0
2017-04-09 17:43:42,737 : INFO : PROGRESS: at 27.72% examples, 845657 words/s, in_qsize 0, out_qsize 1
2017-04-09 17:44:02,743 : INFO : PROGRES

2017-04-09 18:04:03,170 : INFO : PROGRESS: at 39.85% examples, 848209 words/s, in_qsize 0, out_qsize 0
2017-04-09 18:04:23,171 : INFO : PROGRESS: at 40.05% examples, 848206 words/s, in_qsize 0, out_qsize 0
2017-04-09 18:04:43,171 : INFO : PROGRESS: at 40.25% examples, 848192 words/s, in_qsize 0, out_qsize 0
2017-04-09 18:05:02,526 : INFO : Loading new file for index: 520000
2017-04-09 18:05:03,185 : INFO : PROGRESS: at 40.45% examples, 848229 words/s, in_qsize 0, out_qsize 1
2017-04-09 18:05:23,191 : INFO : PROGRESS: at 40.65% examples, 848261 words/s, in_qsize 0, out_qsize 1
2017-04-09 18:05:43,192 : INFO : PROGRESS: at 40.83% examples, 848040 words/s, in_qsize 0, out_qsize 3
2017-04-09 18:06:03,196 : INFO : PROGRESS: at 41.04% examples, 848079 words/s, in_qsize 0, out_qsize 1
2017-04-09 18:06:20,747 : INFO : Loading new file for index: 530000
2017-04-09 18:06:23,204 : INFO : PROGRESS: at 41.24% examples, 848103 words/s, in_qsize 0, out_qsize 2
2017-04-09 18:06:43,207 : INFO : PROGRES

2017-04-09 18:27:03,636 : INFO : PROGRESS: at 53.55% examples, 849300 words/s, in_qsize 0, out_qsize 0
2017-04-09 18:27:11,610 : INFO : Loading new file for index: 690000
2017-04-09 18:27:23,636 : INFO : PROGRESS: at 53.75% examples, 849329 words/s, in_qsize 0, out_qsize 0
2017-04-09 18:27:43,645 : INFO : PROGRESS: at 53.94% examples, 849320 words/s, in_qsize 0, out_qsize 0
2017-04-09 18:28:03,646 : INFO : PROGRESS: at 54.14% examples, 849336 words/s, in_qsize 0, out_qsize 0
2017-04-09 18:28:23,648 : INFO : PROGRESS: at 54.34% examples, 849356 words/s, in_qsize 0, out_qsize 0
2017-04-09 18:28:29,907 : INFO : Loading new file for index: 700000
2017-04-09 18:28:43,657 : INFO : PROGRESS: at 54.54% examples, 849370 words/s, in_qsize 0, out_qsize 1
2017-04-09 18:29:03,664 : INFO : PROGRESS: at 54.74% examples, 849397 words/s, in_qsize 0, out_qsize 0
2017-04-09 18:29:23,676 : INFO : PROGRESS: at 54.93% examples, 849340 words/s, in_qsize 0, out_qsize 0
2017-04-09 18:29:43,681 : INFO : PROGRES

2017-04-09 18:49:44,140 : INFO : PROGRESS: at 67.03% examples, 850068 words/s, in_qsize 0, out_qsize 0
2017-04-09 18:50:04,152 : INFO : PROGRESS: at 67.22% examples, 850061 words/s, in_qsize 0, out_qsize 4
2017-04-09 18:50:24,158 : INFO : PROGRESS: at 67.44% examples, 850256 words/s, in_qsize 0, out_qsize 0
2017-04-09 18:50:44,181 : INFO : PROGRESS: at 67.64% examples, 850307 words/s, in_qsize 1, out_qsize 0
2017-04-09 18:50:49,244 : INFO : Loading new file for index: 870000
2017-04-09 18:51:04,194 : INFO : PROGRESS: at 67.83% examples, 850175 words/s, in_qsize 0, out_qsize 1
2017-04-09 18:51:24,211 : INFO : PROGRESS: at 68.01% examples, 850011 words/s, in_qsize 0, out_qsize 1
2017-04-09 18:51:44,214 : INFO : PROGRESS: at 68.20% examples, 849883 words/s, in_qsize 0, out_qsize 0
2017-04-09 18:52:04,217 : INFO : PROGRESS: at 68.39% examples, 849826 words/s, in_qsize 0, out_qsize 0
2017-04-09 18:52:12,001 : INFO : Loading new file for index: 880000
2017-04-09 18:52:24,232 : INFO : PROGRES

2017-04-09 19:12:24,623 : INFO : PROGRESS: at 81.22% examples, 857773 words/s, in_qsize 0, out_qsize 0
2017-04-09 19:12:44,784 : INFO : PROGRESS: at 81.42% examples, 857736 words/s, in_qsize 0, out_qsize 0
2017-04-09 19:13:04,784 : INFO : PROGRESS: at 81.62% examples, 857895 words/s, in_qsize 0, out_qsize 1
2017-04-09 19:13:11,464 : INFO : Loading new file for index: 1050000
2017-04-09 19:13:24,778 : INFO : PROGRESS: at 81.83% examples, 857979 words/s, in_qsize 0, out_qsize 0
2017-04-09 19:13:44,792 : INFO : PROGRESS: at 82.04% examples, 858056 words/s, in_qsize 0, out_qsize 0
2017-04-09 19:14:04,791 : INFO : PROGRESS: at 82.25% examples, 858109 words/s, in_qsize 0, out_qsize 0
2017-04-09 19:14:24,799 : INFO : PROGRESS: at 82.45% examples, 858188 words/s, in_qsize 0, out_qsize 1
2017-04-09 19:14:26,935 : INFO : Loading new file for index: 1060000
2017-04-09 19:14:44,806 : INFO : PROGRESS: at 82.66% examples, 858285 words/s, in_qsize 0, out_qsize 0
2017-04-09 19:15:04,813 : INFO : PROGR

2017-04-09 19:35:25,123 : INFO : PROGRESS: at 94.72% examples, 855172 words/s, in_qsize 0, out_qsize 1
2017-04-09 19:35:41,127 : INFO : Loading new file for index: 1220000
2017-04-09 19:35:45,140 : INFO : PROGRESS: at 94.91% examples, 855138 words/s, in_qsize 0, out_qsize 0
2017-04-09 19:36:05,142 : INFO : PROGRESS: at 95.11% examples, 855088 words/s, in_qsize 0, out_qsize 1
2017-04-09 19:36:25,146 : INFO : PROGRESS: at 95.30% examples, 855051 words/s, in_qsize 0, out_qsize 0
2017-04-09 19:36:45,162 : INFO : PROGRESS: at 95.50% examples, 855002 words/s, in_qsize 0, out_qsize 1
2017-04-09 19:37:00,570 : INFO : Loading new file for index: 1230000
2017-04-09 19:37:05,185 : INFO : PROGRESS: at 95.69% examples, 854962 words/s, in_qsize 0, out_qsize 2
2017-04-09 19:37:25,181 : INFO : PROGRESS: at 95.89% examples, 854924 words/s, in_qsize 0, out_qsize 1
2017-04-09 19:37:45,185 : INFO : PROGRESS: at 96.08% examples, 854870 words/s, in_qsize 0, out_qsize 0
2017-04-09 19:38:05,211 : INFO : PROGR

CPU times: user 20h 39min 41s, sys: 15min 30s, total: 20h 55min 12s
Wall time: 2h 47min 44s


2017-04-09 19:44:26,609 : INFO : not storing attribute syn0norm
2017-04-09 19:44:26,610 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_1/model.wv.syn0.npy
2017-04-09 19:44:26,870 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_1/model.syn1neg.npy
2017-04-09 19:44:27,092 : INFO : not storing attribute cum_table
2017-04-09 19:44:40,711 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_1/model
2017-04-09 19:44:40,712 : INFO : ****************** Epoch 2 --- Working on doc2vec_size_200_w_2_type_dm_concat_0

2017-04-09 20:04:02,247 : INFO : PROGRESS: at 11.46% examples, 849865 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:04:22,247 : INFO : PROGRESS: at 11.66% examples, 849854 words/s, in_qsize 0, out_qsize 1
2017-04-09 20:04:24,886 : INFO : Loading new file for index: 150000
2017-04-09 20:04:42,255 : INFO : PROGRESS: at 11.86% examples, 849795 words/s, in_qsize 0, out_qsize 3
2017-04-09 20:05:02,251 : INFO : PROGRESS: at 12.06% examples, 849830 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:05:22,257 : INFO : PROGRESS: at 12.26% examples, 849871 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:05:42,278 : INFO : PROGRESS: at 12.45% examples, 849793 words/s, in_qsize 0, out_qsize 1
2017-04-09 20:05:44,500 : INFO : Loading new file for index: 160000
2017-04-09 20:06:02,283 : INFO : PROGRESS: at 12.65% examples, 849762 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:06:22,296 : INFO : PROGRESS: at 12.85% examples, 849787 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:06:42,299 : INFO : PROGRES

2017-04-09 20:26:43,022 : INFO : Loading new file for index: 320000
2017-04-09 20:27:02,781 : INFO : PROGRESS: at 25.13% examples, 851296 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:27:22,789 : INFO : PROGRESS: at 25.33% examples, 851339 words/s, in_qsize 0, out_qsize 1
2017-04-09 20:27:42,787 : INFO : PROGRESS: at 25.53% examples, 851389 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:28:00,005 : INFO : Loading new file for index: 330000
2017-04-09 20:28:02,793 : INFO : PROGRESS: at 25.73% examples, 851391 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:28:22,796 : INFO : PROGRESS: at 25.93% examples, 851433 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:28:42,819 : INFO : PROGRESS: at 26.13% examples, 851458 words/s, in_qsize 0, out_qsize 2
2017-04-09 20:29:02,832 : INFO : PROGRESS: at 26.33% examples, 851506 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:29:17,318 : INFO : Loading new file for index: 340000
2017-04-09 20:29:22,838 : INFO : PROGRESS: at 26.53% examples, 851473 words

2017-04-09 20:49:43,180 : INFO : PROGRESS: at 38.65% examples, 852107 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:50:03,190 : INFO : PROGRESS: at 38.84% examples, 852095 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:50:07,658 : INFO : Loading new file for index: 500000
2017-04-09 20:50:23,193 : INFO : PROGRESS: at 39.05% examples, 852143 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:50:43,205 : INFO : PROGRESS: at 39.25% examples, 852152 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:51:03,209 : INFO : PROGRESS: at 39.44% examples, 852122 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:51:23,215 : INFO : PROGRESS: at 39.64% examples, 852074 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:51:26,006 : INFO : Loading new file for index: 510000
2017-04-09 20:51:43,226 : INFO : PROGRESS: at 39.83% examples, 852047 words/s, in_qsize 0, out_qsize 1
2017-04-09 20:52:03,223 : INFO : PROGRESS: at 40.03% examples, 852090 words/s, in_qsize 0, out_qsize 0
2017-04-09 20:52:23,230 : INFO : PROGRES

2017-04-09 21:12:23,524 : INFO : PROGRESS: at 52.17% examples, 852459 words/s, in_qsize 0, out_qsize 0
2017-04-09 21:12:43,527 : INFO : PROGRESS: at 52.36% examples, 852452 words/s, in_qsize 0, out_qsize 2
2017-04-09 21:13:03,539 : INFO : PROGRESS: at 52.56% examples, 852464 words/s, in_qsize 1, out_qsize 1
2017-04-09 21:13:23,546 : INFO : PROGRESS: at 52.76% examples, 852426 words/s, in_qsize 0, out_qsize 2
2017-04-09 21:13:32,266 : INFO : Loading new file for index: 680000
2017-04-09 21:13:43,558 : INFO : PROGRESS: at 52.96% examples, 852433 words/s, in_qsize 0, out_qsize 1
2017-04-09 21:14:03,558 : INFO : PROGRESS: at 53.16% examples, 852470 words/s, in_qsize 0, out_qsize 0
2017-04-09 21:14:23,561 : INFO : PROGRESS: at 53.35% examples, 852515 words/s, in_qsize 0, out_qsize 4
2017-04-09 21:14:43,584 : INFO : PROGRESS: at 53.55% examples, 852506 words/s, in_qsize 0, out_qsize 0
2017-04-09 21:14:51,229 : INFO : Loading new file for index: 690000
2017-04-09 21:15:03,592 : INFO : PROGRES

2017-04-09 21:45:04,130 : INFO : PROGRESS: at 71.61% examples, 852761 words/s, in_qsize 0, out_qsize 0
2017-04-09 21:45:24,135 : INFO : PROGRESS: at 71.81% examples, 852745 words/s, in_qsize 0, out_qsize 0
2017-04-09 21:45:44,148 : INFO : PROGRESS: at 72.01% examples, 852744 words/s, in_qsize 0, out_qsize 3
2017-04-09 21:46:04,147 : INFO : PROGRESS: at 72.21% examples, 852752 words/s, in_qsize 0, out_qsize 0
2017-04-09 21:46:17,641 : INFO : Loading new file for index: 930000
2017-04-09 21:46:24,160 : INFO : PROGRESS: at 72.41% examples, 852749 words/s, in_qsize 0, out_qsize 0
2017-04-09 21:46:44,163 : INFO : PROGRESS: at 72.60% examples, 852757 words/s, in_qsize 0, out_qsize 0
2017-04-09 21:47:04,167 : INFO : PROGRESS: at 72.81% examples, 852755 words/s, in_qsize 0, out_qsize 0
2017-04-09 21:47:24,172 : INFO : PROGRESS: at 73.00% examples, 852749 words/s, in_qsize 0, out_qsize 0
2017-04-09 21:47:34,790 : INFO : Loading new file for index: 940000
2017-04-09 21:47:44,177 : INFO : PROGRES

2017-04-09 22:08:04,608 : INFO : PROGRESS: at 85.34% examples, 853061 words/s, in_qsize 0, out_qsize 1
2017-04-09 22:08:24,609 : INFO : PROGRESS: at 85.53% examples, 853063 words/s, in_qsize 0, out_qsize 1
2017-04-09 22:08:27,709 : INFO : Loading new file for index: 1100000
2017-04-09 22:08:44,619 : INFO : PROGRESS: at 85.74% examples, 853070 words/s, in_qsize 0, out_qsize 0
2017-04-09 22:09:04,621 : INFO : PROGRESS: at 85.94% examples, 853079 words/s, in_qsize 0, out_qsize 1
2017-04-09 22:09:24,623 : INFO : PROGRESS: at 86.14% examples, 853085 words/s, in_qsize 0, out_qsize 0
2017-04-09 22:09:44,327 : INFO : Loading new file for index: 1110000
2017-04-09 22:09:44,633 : INFO : PROGRESS: at 86.34% examples, 853113 words/s, in_qsize 0, out_qsize 2
2017-04-09 22:10:04,633 : INFO : PROGRESS: at 86.53% examples, 853110 words/s, in_qsize 0, out_qsize 0
2017-04-09 22:10:24,646 : INFO : PROGRESS: at 86.73% examples, 853117 words/s, in_qsize 0, out_qsize 1
2017-04-09 22:10:44,647 : INFO : PROGR

2017-04-09 22:30:44,994 : INFO : PROGRESS: at 98.93% examples, 853594 words/s, in_qsize 0, out_qsize 0
2017-04-09 22:31:04,995 : INFO : PROGRESS: at 99.13% examples, 853583 words/s, in_qsize 0, out_qsize 0
2017-04-09 22:31:25,003 : INFO : PROGRESS: at 99.34% examples, 853589 words/s, in_qsize 0, out_qsize 0
2017-04-09 22:31:39,720 : INFO : Loading new file for index: 1280000
2017-04-09 22:31:45,008 : INFO : PROGRESS: at 99.53% examples, 853595 words/s, in_qsize 1, out_qsize 1
2017-04-09 22:32:05,012 : INFO : PROGRESS: at 99.73% examples, 853578 words/s, in_qsize 0, out_qsize 4
2017-04-09 22:32:25,033 : INFO : PROGRESS: at 99.92% examples, 853589 words/s, in_qsize 0, out_qsize 0
2017-04-09 22:32:30,164 : INFO : Loading new file for index: 1290000
2017-04-09 22:32:30,172 : INFO : All files are loaded - last file: 1290000
2017-04-09 22:32:32,490 : INFO : worker thread finished; awaiting finish of 15 more threads
2017-04-09 22:32:32,494 : INFO : worker thread finished; awaiting finish of 1

CPU times: user 19h 6min 51s, sys: 16min 11s, total: 19h 23min 2s
Wall time: 2h 47min 51s


2017-04-09 22:32:33,340 : INFO : not storing attribute syn0norm
2017-04-09 22:32:33,341 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_2/model.wv.syn0.npy
2017-04-09 22:32:33,574 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_2/model.syn1neg.npy
2017-04-09 22:32:33,775 : INFO : not storing attribute cum_table
2017-04-09 22:32:48,509 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_2/model
2017-04-09 22:32:48,510 : INFO : ****************** Epoch 3 --- Working on doc2vec_size_200_w_2_type_dm_concat_0

2017-04-09 22:52:10,155 : INFO : PROGRESS: at 10.52% examples, 780032 words/s, in_qsize 0, out_qsize 0
2017-04-09 22:52:30,157 : INFO : PROGRESS: at 10.71% examples, 780054 words/s, in_qsize 0, out_qsize 0
2017-04-09 22:52:50,168 : INFO : PROGRESS: at 10.89% examples, 780088 words/s, in_qsize 0, out_qsize 0
2017-04-09 22:52:53,109 : INFO : Loading new file for index: 140000
2017-04-09 22:53:10,168 : INFO : PROGRESS: at 11.07% examples, 780050 words/s, in_qsize 0, out_qsize 0
2017-04-09 22:53:30,177 : INFO : PROGRESS: at 11.25% examples, 780041 words/s, in_qsize 0, out_qsize 0
2017-04-09 22:53:50,183 : INFO : PROGRESS: at 11.43% examples, 780014 words/s, in_qsize 0, out_qsize 0
2017-04-09 22:54:10,190 : INFO : PROGRESS: at 11.61% examples, 780159 words/s, in_qsize 0, out_qsize 0
2017-04-09 22:54:18,453 : INFO : Loading new file for index: 150000
2017-04-09 22:54:30,191 : INFO : PROGRESS: at 11.79% examples, 780059 words/s, in_qsize 0, out_qsize 0
2017-04-09 22:54:50,192 : INFO : PROGRES

2017-04-09 23:15:10,613 : INFO : PROGRESS: at 23.04% examples, 780430 words/s, in_qsize 0, out_qsize 0
2017-04-09 23:15:30,619 : INFO : PROGRESS: at 23.22% examples, 780465 words/s, in_qsize 0, out_qsize 0
2017-04-09 23:15:46,537 : INFO : Loading new file for index: 300000
2017-04-09 23:15:50,629 : INFO : PROGRESS: at 23.40% examples, 780529 words/s, in_qsize 0, out_qsize 1
2017-04-09 23:16:10,620 : INFO : PROGRESS: at 23.58% examples, 780470 words/s, in_qsize 0, out_qsize 1
2017-04-09 23:16:30,627 : INFO : PROGRESS: at 23.76% examples, 780495 words/s, in_qsize 0, out_qsize 0
2017-04-09 23:16:50,636 : INFO : PROGRESS: at 23.95% examples, 780496 words/s, in_qsize 0, out_qsize 0
2017-04-09 23:17:10,650 : INFO : PROGRESS: at 24.13% examples, 780570 words/s, in_qsize 1, out_qsize 0
2017-04-09 23:17:12,812 : INFO : Loading new file for index: 310000
2017-04-09 23:17:30,651 : INFO : PROGRESS: at 24.31% examples, 780619 words/s, in_qsize 1, out_qsize 1
2017-04-09 23:17:50,652 : INFO : PROGRES

2017-04-09 23:37:53,810 : INFO : Loading new file for index: 460000
2017-04-09 23:38:11,108 : INFO : PROGRESS: at 35.99% examples, 789762 words/s, in_qsize 0, out_qsize 0
2017-04-09 23:38:31,110 : INFO : PROGRESS: at 36.20% examples, 790379 words/s, in_qsize 0, out_qsize 0
2017-04-09 23:38:51,110 : INFO : PROGRESS: at 36.42% examples, 790959 words/s, in_qsize 0, out_qsize 0
2017-04-09 23:39:05,770 : INFO : Loading new file for index: 470000
2017-04-09 23:39:11,126 : INFO : PROGRESS: at 36.63% examples, 791508 words/s, in_qsize 0, out_qsize 0
2017-04-09 23:39:31,132 : INFO : PROGRESS: at 36.83% examples, 791859 words/s, in_qsize 0, out_qsize 0
2017-04-09 23:39:51,139 : INFO : PROGRESS: at 37.03% examples, 792206 words/s, in_qsize 0, out_qsize 0
2017-04-09 23:40:11,143 : INFO : PROGRESS: at 37.23% examples, 792585 words/s, in_qsize 0, out_qsize 0
2017-04-09 23:40:22,764 : INFO : Loading new file for index: 480000
2017-04-09 23:40:31,150 : INFO : PROGRESS: at 37.43% examples, 792967 words

2017-04-10 00:00:49,424 : INFO : Loading new file for index: 640000
2017-04-10 00:00:51,539 : INFO : PROGRESS: at 49.74% examples, 809471 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:01:11,544 : INFO : PROGRESS: at 49.94% examples, 809695 words/s, in_qsize 0, out_qsize 2
2017-04-10 00:01:31,545 : INFO : PROGRESS: at 50.14% examples, 809907 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:01:51,559 : INFO : PROGRESS: at 50.34% examples, 810083 words/s, in_qsize 0, out_qsize 1
2017-04-10 00:02:08,539 : INFO : Loading new file for index: 650000
2017-04-10 00:02:11,559 : INFO : PROGRESS: at 50.53% examples, 810249 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:02:31,560 : INFO : PROGRESS: at 50.73% examples, 810425 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:02:51,574 : INFO : PROGRESS: at 50.93% examples, 810609 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:03:11,578 : INFO : PROGRESS: at 51.13% examples, 810828 words/s, in_qsize 0, out_qsize 1
2017-04-10 00:03:27,268 : INFO : Loading

2017-04-10 00:23:31,989 : INFO : PROGRESS: at 63.34% examples, 820270 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:23:51,989 : INFO : PROGRESS: at 63.54% examples, 820399 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:24:11,995 : INFO : PROGRESS: at 63.74% examples, 820520 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:24:17,554 : INFO : Loading new file for index: 820000
2017-04-10 00:24:32,007 : INFO : PROGRESS: at 63.94% examples, 820635 words/s, in_qsize 0, out_qsize 6
2017-04-10 00:24:52,013 : INFO : PROGRESS: at 64.14% examples, 820755 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:25:12,018 : INFO : PROGRESS: at 64.34% examples, 820882 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:25:32,021 : INFO : PROGRESS: at 64.54% examples, 820994 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:25:33,016 : INFO : Loading new file for index: 830000
2017-04-10 00:25:52,023 : INFO : PROGRESS: at 64.74% examples, 821115 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:26:12,035 : INFO : PROGRES

2017-04-10 00:46:12,445 : INFO : PROGRESS: at 77.01% examples, 827452 words/s, in_qsize 0, out_qsize 1
2017-04-10 00:46:32,452 : INFO : PROGRESS: at 77.21% examples, 827530 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:46:52,486 : INFO : PROGRESS: at 77.41% examples, 827602 words/s, in_qsize 0, out_qsize 2
2017-04-10 00:47:12,494 : INFO : PROGRESS: at 77.61% examples, 827682 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:47:28,233 : INFO : Loading new file for index: 1000000
2017-04-10 00:47:32,499 : INFO : PROGRESS: at 77.81% examples, 827768 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:47:52,509 : INFO : PROGRESS: at 78.01% examples, 827855 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:48:12,521 : INFO : PROGRESS: at 78.21% examples, 827937 words/s, in_qsize 0, out_qsize 1
2017-04-10 00:48:32,524 : INFO : PROGRESS: at 78.41% examples, 828029 words/s, in_qsize 0, out_qsize 0
2017-04-10 00:48:45,151 : INFO : Loading new file for index: 1010000
2017-04-10 00:48:52,527 : INFO : PROGR

2017-04-10 01:09:12,857 : INFO : PROGRESS: at 90.87% examples, 832733 words/s, in_qsize 0, out_qsize 1
2017-04-10 01:09:25,446 : INFO : Loading new file for index: 1170000
2017-04-10 01:09:32,855 : INFO : PROGRESS: at 91.07% examples, 832802 words/s, in_qsize 0, out_qsize 0
2017-04-10 01:09:52,857 : INFO : PROGRESS: at 91.27% examples, 832879 words/s, in_qsize 0, out_qsize 0
2017-04-10 01:10:12,864 : INFO : PROGRESS: at 91.47% examples, 832933 words/s, in_qsize 0, out_qsize 1
2017-04-10 01:10:32,863 : INFO : PROGRESS: at 91.68% examples, 833005 words/s, in_qsize 0, out_qsize 0
2017-04-10 01:10:40,807 : INFO : Loading new file for index: 1180000
2017-04-10 01:10:52,866 : INFO : PROGRESS: at 91.88% examples, 833085 words/s, in_qsize 0, out_qsize 0
2017-04-10 01:11:12,869 : INFO : PROGRESS: at 92.08% examples, 833143 words/s, in_qsize 1, out_qsize 0
2017-04-10 01:11:32,868 : INFO : PROGRESS: at 92.28% examples, 833220 words/s, in_qsize 0, out_qsize 0
2017-04-10 01:11:52,876 : INFO : PROGR

CPU times: user 20h 8min 13s, sys: 16min 31s, total: 20h 24min 44s
Wall time: 2h 51min 26s


2017-04-10 01:24:16,334 : INFO : not storing attribute syn0norm
2017-04-10 01:24:16,335 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_3/model.wv.syn0.npy
2017-04-10 01:24:16,577 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_3/model.syn1neg.npy
2017-04-10 01:24:16,789 : INFO : not storing attribute cum_table
2017-04-10 01:24:30,690 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_3/model
2017-04-10 01:24:30,691 : INFO : ****************** Epoch 4 --- Working on doc2vec_size_200_w_2_type_dm_concat_0

2017-04-10 01:43:52,283 : INFO : PROGRESS: at 11.68% examples, 865688 words/s, in_qsize 0, out_qsize 0
2017-04-10 01:43:53,190 : INFO : Loading new file for index: 150000
2017-04-10 01:44:12,288 : INFO : PROGRESS: at 11.88% examples, 865644 words/s, in_qsize 0, out_qsize 1
2017-04-10 01:44:32,290 : INFO : PROGRESS: at 12.08% examples, 865815 words/s, in_qsize 0, out_qsize 0
2017-04-10 01:44:52,292 : INFO : PROGRESS: at 12.29% examples, 865895 words/s, in_qsize 0, out_qsize 1
2017-04-10 01:45:10,830 : INFO : Loading new file for index: 160000
2017-04-10 01:45:12,292 : INFO : PROGRESS: at 12.49% examples, 865982 words/s, in_qsize 0, out_qsize 1
2017-04-10 01:45:32,310 : INFO : PROGRESS: at 12.69% examples, 866047 words/s, in_qsize 0, out_qsize 2
2017-04-10 01:45:52,310 : INFO : PROGRESS: at 12.89% examples, 865986 words/s, in_qsize 0, out_qsize 0
2017-04-10 01:46:12,317 : INFO : PROGRESS: at 13.09% examples, 865831 words/s, in_qsize 0, out_qsize 0
2017-04-10 01:46:27,567 : INFO : Loading

2017-04-10 02:06:32,709 : INFO : PROGRESS: at 25.38% examples, 866417 words/s, in_qsize 0, out_qsize 0
2017-04-10 02:06:52,710 : INFO : PROGRESS: at 25.58% examples, 866399 words/s, in_qsize 0, out_qsize 0
2017-04-10 02:07:04,937 : INFO : Loading new file for index: 330000
2017-04-10 02:07:12,718 : INFO : PROGRESS: at 25.78% examples, 866392 words/s, in_qsize 0, out_qsize 1
2017-04-10 02:07:32,712 : INFO : PROGRESS: at 25.99% examples, 866510 words/s, in_qsize 0, out_qsize 0
2017-04-10 02:07:52,720 : INFO : PROGRESS: at 26.19% examples, 866521 words/s, in_qsize 1, out_qsize 0
2017-04-10 02:08:12,723 : INFO : PROGRESS: at 26.39% examples, 866522 words/s, in_qsize 0, out_qsize 0
2017-04-10 02:08:20,816 : INFO : Loading new file for index: 340000
2017-04-10 02:08:32,721 : INFO : PROGRESS: at 26.60% examples, 866509 words/s, in_qsize 0, out_qsize 2
2017-04-10 02:08:52,736 : INFO : PROGRESS: at 26.80% examples, 866486 words/s, in_qsize 0, out_qsize 1
2017-04-10 02:09:12,739 : INFO : PROGRES

2017-04-10 02:29:13,045 : INFO : PROGRESS: at 39.12% examples, 866932 words/s, in_qsize 0, out_qsize 0
2017-04-10 02:29:33,047 : INFO : PROGRESS: at 39.32% examples, 866927 words/s, in_qsize 0, out_qsize 1
2017-04-10 02:29:53,054 : INFO : PROGRESS: at 39.53% examples, 866938 words/s, in_qsize 0, out_qsize 0
2017-04-10 02:30:07,446 : INFO : Loading new file for index: 510000
2017-04-10 02:30:13,062 : INFO : PROGRESS: at 39.72% examples, 866922 words/s, in_qsize 0, out_qsize 1
2017-04-10 02:30:33,075 : INFO : PROGRESS: at 39.92% examples, 866924 words/s, in_qsize 0, out_qsize 2
2017-04-10 02:30:53,071 : INFO : PROGRESS: at 40.12% examples, 866917 words/s, in_qsize 0, out_qsize 0
2017-04-10 02:31:13,071 : INFO : PROGRESS: at 40.33% examples, 866904 words/s, in_qsize 0, out_qsize 0
2017-04-10 02:31:24,406 : INFO : Loading new file for index: 520000
2017-04-10 02:31:33,074 : INFO : PROGRESS: at 40.53% examples, 866846 words/s, in_qsize 0, out_qsize 0
2017-04-10 02:31:53,088 : INFO : PROGRES

2017-04-10 02:51:58,695 : INFO : Loading new file for index: 680000
2017-04-10 02:52:13,421 : INFO : PROGRESS: at 52.99% examples, 866000 words/s, in_qsize 0, out_qsize 0
2017-04-10 02:52:33,433 : INFO : PROGRESS: at 53.20% examples, 866023 words/s, in_qsize 0, out_qsize 0
2017-04-10 02:52:53,454 : INFO : PROGRESS: at 53.39% examples, 866056 words/s, in_qsize 0, out_qsize 0
2017-04-10 02:53:13,463 : INFO : PROGRESS: at 53.59% examples, 866066 words/s, in_qsize 0, out_qsize 1
2017-04-10 02:53:16,495 : INFO : Loading new file for index: 690000
2017-04-10 02:53:33,458 : INFO : PROGRESS: at 53.80% examples, 866061 words/s, in_qsize 0, out_qsize 1
2017-04-10 02:53:53,468 : INFO : PROGRESS: at 54.00% examples, 866064 words/s, in_qsize 0, out_qsize 0
2017-04-10 02:54:13,470 : INFO : PROGRESS: at 54.20% examples, 866059 words/s, in_qsize 0, out_qsize 0
2017-04-10 02:54:33,478 : INFO : PROGRESS: at 54.40% examples, 866083 words/s, in_qsize 0, out_qsize 0
2017-04-10 02:54:33,620 : INFO : Loading

2017-04-10 03:14:53,907 : INFO : PROGRESS: at 66.67% examples, 866037 words/s, in_qsize 0, out_qsize 5
2017-04-10 03:15:13,909 : INFO : PROGRESS: at 66.87% examples, 866018 words/s, in_qsize 0, out_qsize 0
2017-04-10 03:15:17,320 : INFO : Loading new file for index: 860000
2017-04-10 03:15:33,916 : INFO : PROGRESS: at 67.07% examples, 866043 words/s, in_qsize 0, out_qsize 2
2017-04-10 03:15:53,919 : INFO : PROGRESS: at 67.28% examples, 866057 words/s, in_qsize 0, out_qsize 0
2017-04-10 03:16:13,920 : INFO : PROGRESS: at 67.48% examples, 866050 words/s, in_qsize 0, out_qsize 0
2017-04-10 03:16:33,922 : INFO : PROGRESS: at 67.68% examples, 866052 words/s, in_qsize 0, out_qsize 0
2017-04-10 03:16:34,767 : INFO : Loading new file for index: 870000
2017-04-10 03:16:53,922 : INFO : PROGRESS: at 67.89% examples, 866078 words/s, in_qsize 0, out_qsize 0
2017-04-10 03:17:13,942 : INFO : PROGRESS: at 68.09% examples, 866097 words/s, in_qsize 0, out_qsize 3
2017-04-10 03:17:33,952 : INFO : PROGRES

2017-04-10 03:37:34,308 : INFO : PROGRESS: at 80.13% examples, 863271 words/s, in_qsize 0, out_qsize 0
2017-04-10 03:37:54,312 : INFO : PROGRESS: at 80.33% examples, 863198 words/s, in_qsize 0, out_qsize 2
2017-04-10 03:38:14,317 : INFO : PROGRESS: at 80.52% examples, 863109 words/s, in_qsize 0, out_qsize 0
2017-04-10 03:38:34,321 : INFO : PROGRESS: at 80.71% examples, 863088 words/s, in_qsize 0, out_qsize 0
2017-04-10 03:38:53,492 : INFO : Loading new file for index: 1040000
2017-04-10 03:38:54,336 : INFO : PROGRESS: at 80.91% examples, 863063 words/s, in_qsize 0, out_qsize 1
2017-04-10 03:39:14,339 : INFO : PROGRESS: at 81.11% examples, 863043 words/s, in_qsize 0, out_qsize 1
2017-04-10 03:39:34,343 : INFO : PROGRESS: at 81.32% examples, 863060 words/s, in_qsize 0, out_qsize 1
2017-04-10 03:39:54,347 : INFO : PROGRESS: at 81.50% examples, 862959 words/s, in_qsize 0, out_qsize 0
2017-04-10 03:40:13,940 : INFO : Loading new file for index: 1050000
2017-04-10 03:40:14,358 : INFO : PROGR

2017-04-10 04:00:34,780 : INFO : PROGRESS: at 93.55% examples, 859122 words/s, in_qsize 0, out_qsize 0
2017-04-10 04:00:54,782 : INFO : PROGRESS: at 93.74% examples, 859022 words/s, in_qsize 0, out_qsize 0
2017-04-10 04:01:14,783 : INFO : PROGRESS: at 93.94% examples, 858978 words/s, in_qsize 0, out_qsize 2
2017-04-10 04:01:30,146 : INFO : Loading new file for index: 1210000
2017-04-10 04:01:34,787 : INFO : PROGRESS: at 94.13% examples, 858945 words/s, in_qsize 0, out_qsize 2
2017-04-10 04:01:54,796 : INFO : PROGRESS: at 94.33% examples, 858886 words/s, in_qsize 0, out_qsize 2
2017-04-10 04:02:14,803 : INFO : PROGRESS: at 94.52% examples, 858846 words/s, in_qsize 0, out_qsize 1
2017-04-10 04:02:34,826 : INFO : PROGRESS: at 94.72% examples, 858798 words/s, in_qsize 0, out_qsize 1
2017-04-10 04:02:51,130 : INFO : Loading new file for index: 1220000
2017-04-10 04:02:54,826 : INFO : PROGRESS: at 94.91% examples, 858745 words/s, in_qsize 0, out_qsize 0
2017-04-10 04:03:14,839 : INFO : PROGR

CPU times: user 19h 5min 48s, sys: 15min 14s, total: 19h 21min 2s
Wall time: 2h 47min 6s


2017-04-10 04:11:38,487 : INFO : not storing attribute syn0norm
2017-04-10 04:11:38,488 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_4/model.wv.syn0.npy
2017-04-10 04:11:38,789 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_4/model.syn1neg.npy
2017-04-10 04:11:39,034 : INFO : not storing attribute cum_table
2017-04-10 04:11:54,155 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_4/model
2017-04-10 04:11:54,157 : INFO : ****************** Epoch 5 --- Working on doc2vec_size_200_w_2_type_dm_concat_0

2017-04-10 04:31:15,647 : INFO : PROGRESS: at 11.50% examples, 852552 words/s, in_qsize 0, out_qsize 1
2017-04-10 04:31:34,149 : INFO : Loading new file for index: 150000
2017-04-10 04:31:35,648 : INFO : PROGRESS: at 11.70% examples, 852871 words/s, in_qsize 0, out_qsize 0
2017-04-10 04:31:55,654 : INFO : PROGRESS: at 11.91% examples, 853130 words/s, in_qsize 0, out_qsize 0
2017-04-10 04:32:15,660 : INFO : PROGRESS: at 12.11% examples, 853396 words/s, in_qsize 0, out_qsize 0
2017-04-10 04:32:35,660 : INFO : PROGRESS: at 12.31% examples, 853686 words/s, in_qsize 0, out_qsize 0
2017-04-10 04:32:51,841 : INFO : Loading new file for index: 160000
2017-04-10 04:32:55,671 : INFO : PROGRESS: at 12.51% examples, 853920 words/s, in_qsize 0, out_qsize 0
2017-04-10 04:33:15,684 : INFO : PROGRESS: at 12.71% examples, 854056 words/s, in_qsize 0, out_qsize 1
2017-04-10 04:33:35,676 : INFO : PROGRESS: at 12.91% examples, 854209 words/s, in_qsize 0, out_qsize 0
2017-04-10 04:33:55,684 : INFO : PROGRES

2017-04-10 04:53:56,093 : INFO : PROGRESS: at 25.24% examples, 861632 words/s, in_qsize 0, out_qsize 1
2017-04-10 04:54:16,094 : INFO : PROGRESS: at 25.44% examples, 861732 words/s, in_qsize 0, out_qsize 1
2017-04-10 04:54:36,093 : INFO : PROGRESS: at 25.64% examples, 861781 words/s, in_qsize 0, out_qsize 0
2017-04-10 04:54:41,974 : INFO : Loading new file for index: 330000
2017-04-10 04:54:56,105 : INFO : PROGRESS: at 25.84% examples, 861897 words/s, in_qsize 0, out_qsize 4
2017-04-10 04:55:16,115 : INFO : PROGRESS: at 26.05% examples, 861996 words/s, in_qsize 0, out_qsize 0
2017-04-10 04:55:36,133 : INFO : PROGRESS: at 26.26% examples, 862064 words/s, in_qsize 1, out_qsize 3
2017-04-10 04:55:56,144 : INFO : PROGRESS: at 26.46% examples, 862126 words/s, in_qsize 0, out_qsize 3
2017-04-10 04:55:57,630 : INFO : Loading new file for index: 340000
2017-04-10 04:56:16,150 : INFO : PROGRESS: at 26.66% examples, 862199 words/s, in_qsize 0, out_qsize 1
2017-04-10 04:56:36,161 : INFO : PROGRES

2017-04-10 05:16:36,643 : INFO : PROGRESS: at 39.03% examples, 864830 words/s, in_qsize 0, out_qsize 0
2017-04-10 05:16:56,654 : INFO : PROGRESS: at 39.23% examples, 864852 words/s, in_qsize 0, out_qsize 4
2017-04-10 05:17:16,662 : INFO : PROGRESS: at 39.43% examples, 864861 words/s, in_qsize 0, out_qsize 0
2017-04-10 05:17:36,662 : INFO : PROGRESS: at 39.63% examples, 864834 words/s, in_qsize 0, out_qsize 0
2017-04-10 05:17:40,399 : INFO : Loading new file for index: 510000
2017-04-10 05:17:56,673 : INFO : PROGRESS: at 39.83% examples, 864847 words/s, in_qsize 0, out_qsize 1
2017-04-10 05:18:16,685 : INFO : PROGRESS: at 40.03% examples, 864878 words/s, in_qsize 0, out_qsize 1
2017-04-10 05:18:36,693 : INFO : PROGRESS: at 40.23% examples, 864893 words/s, in_qsize 0, out_qsize 0
2017-04-10 05:18:56,695 : INFO : PROGRESS: at 40.44% examples, 864930 words/s, in_qsize 1, out_qsize 1
2017-04-10 05:18:56,937 : INFO : Loading new file for index: 520000
2017-04-10 05:19:16,701 : INFO : PROGRES

2017-04-10 05:39:23,932 : INFO : Loading new file for index: 680000
2017-04-10 05:39:37,138 : INFO : PROGRESS: at 52.98% examples, 865705 words/s, in_qsize 0, out_qsize 2
2017-04-10 05:39:57,149 : INFO : PROGRESS: at 53.18% examples, 865708 words/s, in_qsize 0, out_qsize 0
2017-04-10 05:40:17,144 : INFO : PROGRESS: at 53.38% examples, 865764 words/s, in_qsize 0, out_qsize 0
2017-04-10 05:40:37,154 : INFO : PROGRESS: at 53.58% examples, 865747 words/s, in_qsize 0, out_qsize 3
2017-04-10 05:40:41,923 : INFO : Loading new file for index: 690000
2017-04-10 05:40:57,163 : INFO : PROGRESS: at 53.78% examples, 865776 words/s, in_qsize 0, out_qsize 0
2017-04-10 05:41:17,171 : INFO : PROGRESS: at 53.98% examples, 865763 words/s, in_qsize 0, out_qsize 2
2017-04-10 05:41:37,170 : INFO : PROGRESS: at 54.18% examples, 865758 words/s, in_qsize 0, out_qsize 1
2017-04-10 05:41:57,173 : INFO : PROGRESS: at 54.38% examples, 865773 words/s, in_qsize 0, out_qsize 0
2017-04-10 05:41:58,947 : INFO : Loading

2017-04-10 06:02:17,550 : INFO : PROGRESS: at 66.68% examples, 866184 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:02:37,550 : INFO : PROGRESS: at 66.87% examples, 865951 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:02:41,252 : INFO : Loading new file for index: 860000
2017-04-10 06:02:57,554 : INFO : PROGRESS: at 67.07% examples, 865962 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:03:17,556 : INFO : PROGRESS: at 67.27% examples, 865932 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:03:37,561 : INFO : PROGRESS: at 67.46% examples, 865826 words/s, in_qsize 0, out_qsize 1
2017-04-10 06:03:57,562 : INFO : PROGRESS: at 67.65% examples, 865616 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:04:01,835 : INFO : Loading new file for index: 870000
2017-04-10 06:04:17,568 : INFO : PROGRESS: at 67.83% examples, 865407 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:04:37,580 : INFO : PROGRESS: at 68.02% examples, 865233 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:04:57,588 : INFO : PROGRES

2017-04-10 06:25:19,106 : INFO : PROGRESS: at 79.82% examples, 857539 words/s, in_qsize 0, out_qsize 1
2017-04-10 06:25:39,110 : INFO : PROGRESS: at 80.01% examples, 857478 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:25:50,044 : INFO : Loading new file for index: 1030000
2017-04-10 06:25:59,113 : INFO : PROGRESS: at 80.20% examples, 857432 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:26:19,114 : INFO : PROGRESS: at 80.40% examples, 857385 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:26:39,116 : INFO : PROGRESS: at 80.59% examples, 857345 words/s, in_qsize 2, out_qsize 0
2017-04-10 06:26:59,122 : INFO : PROGRESS: at 80.78% examples, 857278 words/s, in_qsize 0, out_qsize 1
2017-04-10 06:27:11,765 : INFO : Loading new file for index: 1040000
2017-04-10 06:27:19,136 : INFO : PROGRESS: at 80.98% examples, 857226 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:27:39,148 : INFO : PROGRESS: at 81.18% examples, 857198 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:27:59,168 : INFO : PROGR

2017-04-10 06:48:19,575 : INFO : PROGRESS: at 93.31% examples, 854968 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:48:19,581 : INFO : Loading new file for index: 1200000
2017-04-10 06:48:39,579 : INFO : PROGRESS: at 93.51% examples, 854951 words/s, in_qsize 0, out_qsize 2
2017-04-10 06:48:59,587 : INFO : PROGRESS: at 93.71% examples, 854929 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:49:19,586 : INFO : PROGRESS: at 93.90% examples, 854922 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:49:38,126 : INFO : Loading new file for index: 1210000
2017-04-10 06:49:39,598 : INFO : PROGRESS: at 94.10% examples, 854914 words/s, in_qsize 0, out_qsize 1
2017-04-10 06:49:59,601 : INFO : PROGRESS: at 94.30% examples, 854912 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:50:19,696 : INFO : PROGRESS: at 94.50% examples, 854864 words/s, in_qsize 0, out_qsize 0
2017-04-10 06:50:39,703 : INFO : PROGRESS: at 94.70% examples, 854879 words/s, in_qsize 0, out_qsize 3
2017-04-10 06:50:57,404 : INFO : Loadi

CPU times: user 19h 12min 23s, sys: 13min 12s, total: 19h 25min 35s
Wall time: 2h 47min 25s


2017-04-10 06:59:20,531 : INFO : not storing attribute syn0norm
2017-04-10 06:59:20,532 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_5/model.wv.syn0.npy
2017-04-10 06:59:20,772 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_5/model.syn1neg.npy
2017-04-10 06:59:20,989 : INFO : not storing attribute cum_table
2017-04-10 06:59:34,822 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_5/model
2017-04-10 06:59:34,823 : INFO : ****************** Epoch 6 --- Working on doc2vec_size_200_w_2_type_dm_concat_0

2017-04-10 07:18:50,823 : INFO : Loading new file for index: 150000
2017-04-10 07:18:56,314 : INFO : PROGRESS: at 11.74% examples, 870499 words/s, in_qsize 0, out_qsize 2
2017-04-10 07:19:16,318 : INFO : PROGRESS: at 11.95% examples, 870437 words/s, in_qsize 0, out_qsize 0
2017-04-10 07:19:36,322 : INFO : PROGRESS: at 12.14% examples, 870333 words/s, in_qsize 0, out_qsize 0
2017-04-10 07:19:56,326 : INFO : PROGRESS: at 12.35% examples, 870355 words/s, in_qsize 0, out_qsize 0
2017-04-10 07:20:08,783 : INFO : Loading new file for index: 160000
2017-04-10 07:20:16,329 : INFO : PROGRESS: at 12.55% examples, 870278 words/s, in_qsize 0, out_qsize 1
2017-04-10 07:20:36,343 : INFO : PROGRESS: at 12.75% examples, 870194 words/s, in_qsize 0, out_qsize 0
2017-04-10 07:20:56,355 : INFO : PROGRESS: at 12.95% examples, 870076 words/s, in_qsize 0, out_qsize 4
2017-04-10 07:21:16,356 : INFO : PROGRESS: at 13.15% examples, 870032 words/s, in_qsize 0, out_qsize 0
2017-04-10 07:21:25,457 : INFO : Loading

2017-04-10 07:41:36,756 : INFO : PROGRESS: at 25.53% examples, 871615 words/s, in_qsize 0, out_qsize 0
2017-04-10 07:41:53,841 : INFO : Loading new file for index: 330000
2017-04-10 07:41:56,758 : INFO : PROGRESS: at 25.73% examples, 871613 words/s, in_qsize 0, out_qsize 5
2017-04-10 07:42:16,753 : INFO : PROGRESS: at 25.94% examples, 871647 words/s, in_qsize 0, out_qsize 0
2017-04-10 07:42:36,762 : INFO : PROGRESS: at 26.14% examples, 871675 words/s, in_qsize 0, out_qsize 0
2017-04-10 07:42:56,765 : INFO : PROGRESS: at 26.35% examples, 871618 words/s, in_qsize 0, out_qsize 0
2017-04-10 07:43:09,759 : INFO : Loading new file for index: 340000
2017-04-10 07:43:16,772 : INFO : PROGRESS: at 26.55% examples, 871571 words/s, in_qsize 0, out_qsize 0
2017-04-10 07:43:36,777 : INFO : PROGRESS: at 26.75% examples, 871750 words/s, in_qsize 0, out_qsize 1
2017-04-10 07:43:56,781 : INFO : PROGRESS: at 26.97% examples, 872093 words/s, in_qsize 0, out_qsize 0
2017-04-10 07:44:16,785 : INFO : PROGRES

2017-04-10 08:03:57,087 : INFO : PROGRESS: at 39.74% examples, 885201 words/s, in_qsize 2, out_qsize 0
2017-04-10 08:04:17,085 : INFO : PROGRESS: at 39.94% examples, 885129 words/s, in_qsize 0, out_qsize 1
2017-04-10 08:04:37,087 : INFO : PROGRESS: at 40.14% examples, 885043 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:04:57,091 : INFO : PROGRESS: at 40.35% examples, 885020 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:05:06,458 : INFO : Loading new file for index: 520000
2017-04-10 08:05:17,096 : INFO : PROGRESS: at 40.55% examples, 884935 words/s, in_qsize 0, out_qsize 1
2017-04-10 08:05:37,107 : INFO : PROGRESS: at 40.75% examples, 884819 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:05:57,116 : INFO : PROGRESS: at 40.96% examples, 884762 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:06:17,117 : INFO : PROGRESS: at 41.16% examples, 884688 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:06:22,087 : INFO : Loading new file for index: 530000
2017-04-10 08:06:37,122 : INFO : PROGRES

2017-04-10 08:26:47,206 : INFO : Loading new file for index: 690000
2017-04-10 08:26:57,470 : INFO : PROGRESS: at 53.73% examples, 881503 words/s, in_qsize 0, out_qsize 1
2017-04-10 08:27:17,479 : INFO : PROGRESS: at 53.93% examples, 881478 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:27:37,490 : INFO : PROGRESS: at 54.13% examples, 881451 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:27:57,509 : INFO : PROGRESS: at 54.34% examples, 881405 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:28:03,817 : INFO : Loading new file for index: 700000
2017-04-10 08:28:17,510 : INFO : PROGRESS: at 54.54% examples, 881393 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:28:37,516 : INFO : PROGRESS: at 54.74% examples, 881354 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:28:57,520 : INFO : PROGRESS: at 54.94% examples, 881290 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:29:17,527 : INFO : PROGRESS: at 55.15% examples, 881247 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:29:22,075 : INFO : Loading

2017-04-10 08:49:37,840 : INFO : PROGRESS: at 67.46% examples, 878997 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:49:57,846 : INFO : PROGRESS: at 67.66% examples, 878951 words/s, in_qsize 0, out_qsize 1
2017-04-10 08:50:00,291 : INFO : Loading new file for index: 870000
2017-04-10 08:50:17,854 : INFO : PROGRESS: at 67.87% examples, 878881 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:50:37,863 : INFO : PROGRESS: at 68.07% examples, 878853 words/s, in_qsize 0, out_qsize 2
2017-04-10 08:50:57,868 : INFO : PROGRESS: at 68.27% examples, 878819 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:51:17,537 : INFO : Loading new file for index: 880000
2017-04-10 08:51:17,868 : INFO : PROGRESS: at 68.47% examples, 878792 words/s, in_qsize 0, out_qsize 1
2017-04-10 08:51:37,878 : INFO : PROGRESS: at 68.67% examples, 878759 words/s, in_qsize 0, out_qsize 0
2017-04-10 08:51:57,880 : INFO : PROGRESS: at 68.87% examples, 878723 words/s, in_qsize 0, out_qsize 1
2017-04-10 08:52:17,877 : INFO : PROGRES

2017-04-10 09:12:18,343 : INFO : PROGRESS: at 81.24% examples, 877397 words/s, in_qsize 0, out_qsize 0
2017-04-10 09:12:38,349 : INFO : PROGRESS: at 81.43% examples, 877311 words/s, in_qsize 31, out_qsize 1
2017-04-10 09:12:58,355 : INFO : PROGRESS: at 81.63% examples, 877301 words/s, in_qsize 0, out_qsize 0
2017-04-10 09:13:04,854 : INFO : Loading new file for index: 1050000
2017-04-10 09:13:18,362 : INFO : PROGRESS: at 81.83% examples, 877296 words/s, in_qsize 0, out_qsize 0
2017-04-10 09:13:38,363 : INFO : PROGRESS: at 82.04% examples, 877287 words/s, in_qsize 0, out_qsize 0
2017-04-10 09:13:58,364 : INFO : PROGRESS: at 82.24% examples, 877262 words/s, in_qsize 0, out_qsize 0
2017-04-10 09:14:18,368 : INFO : PROGRESS: at 82.44% examples, 877246 words/s, in_qsize 0, out_qsize 0
2017-04-10 09:14:21,662 : INFO : Loading new file for index: 1060000
2017-04-10 09:14:38,401 : INFO : PROGRESS: at 82.64% examples, 877244 words/s, in_qsize 13, out_qsize 0
2017-04-10 09:14:58,399 : INFO : PRO

2017-04-10 09:34:58,717 : INFO : PROGRESS: at 95.03% examples, 876402 words/s, in_qsize 0, out_qsize 0
2017-04-10 09:35:18,714 : INFO : PROGRESS: at 95.23% examples, 876394 words/s, in_qsize 0, out_qsize 0
2017-04-10 09:35:38,715 : INFO : PROGRESS: at 95.43% examples, 876389 words/s, in_qsize 0, out_qsize 0
2017-04-10 09:35:58,736 : INFO : PROGRESS: at 95.64% examples, 876376 words/s, in_qsize 0, out_qsize 0
2017-04-10 09:35:59,842 : INFO : Loading new file for index: 1230000
2017-04-10 09:36:18,737 : INFO : PROGRESS: at 95.84% examples, 876386 words/s, in_qsize 0, out_qsize 0
2017-04-10 09:36:38,741 : INFO : PROGRESS: at 96.04% examples, 876361 words/s, in_qsize 0, out_qsize 0
2017-04-10 09:36:58,744 : INFO : PROGRESS: at 96.25% examples, 876352 words/s, in_qsize 0, out_qsize 2
2017-04-10 09:37:14,120 : INFO : Loading new file for index: 1240000
2017-04-10 09:37:18,745 : INFO : PROGRESS: at 96.46% examples, 876361 words/s, in_qsize 0, out_qsize 0
2017-04-10 09:37:38,757 : INFO : PROGR

CPU times: user 18h 51min 28s, sys: 12min 55s, total: 19h 4min 24s
Wall time: 2h 43min 31s


2017-04-10 09:43:07,071 : INFO : not storing attribute syn0norm
2017-04-10 09:43:07,072 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_6/model.wv.syn0.npy
2017-04-10 09:43:07,300 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_6/model.syn1neg.npy
2017-04-10 09:43:07,520 : INFO : not storing attribute cum_table
2017-04-10 09:43:19,773 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_6/model
2017-04-10 09:43:19,774 : INFO : ****************** Epoch 7 --- Working on doc2vec_size_200_w_2_type_dm_concat_0

2017-04-10 10:02:37,995 : INFO : Loading new file for index: 150000
2017-04-10 10:02:41,172 : INFO : PROGRESS: at 11.72% examples, 868899 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:03:01,172 : INFO : PROGRESS: at 11.92% examples, 868828 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:03:21,196 : INFO : PROGRESS: at 12.12% examples, 868751 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:03:41,205 : INFO : PROGRESS: at 12.33% examples, 868848 words/s, in_qsize 0, out_qsize 2
2017-04-10 10:03:55,894 : INFO : Loading new file for index: 160000
2017-04-10 10:04:01,217 : INFO : PROGRESS: at 12.53% examples, 868733 words/s, in_qsize 0, out_qsize 3
2017-04-10 10:04:21,206 : INFO : PROGRESS: at 12.73% examples, 868718 words/s, in_qsize 0, out_qsize 2
2017-04-10 10:04:41,207 : INFO : PROGRESS: at 12.93% examples, 868789 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:05:01,214 : INFO : PROGRESS: at 13.13% examples, 868813 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:05:12,214 : INFO : Loading

2017-04-10 10:25:21,565 : INFO : PROGRESS: at 25.46% examples, 869550 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:25:41,578 : INFO : PROGRESS: at 25.67% examples, 869570 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:25:44,624 : INFO : Loading new file for index: 330000
2017-04-10 10:26:01,582 : INFO : PROGRESS: at 25.87% examples, 869568 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:26:21,599 : INFO : PROGRESS: at 26.08% examples, 869544 words/s, in_qsize 0, out_qsize 2
2017-04-10 10:26:41,608 : INFO : PROGRESS: at 26.28% examples, 869528 words/s, in_qsize 0, out_qsize 1
2017-04-10 10:27:00,636 : INFO : Loading new file for index: 340000
2017-04-10 10:27:01,613 : INFO : PROGRESS: at 26.48% examples, 869575 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:27:21,616 : INFO : PROGRESS: at 26.69% examples, 869619 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:27:41,620 : INFO : PROGRESS: at 26.89% examples, 869643 words/s, in_qsize 1, out_qsize 0
2017-04-10 10:28:01,638 : INFO : PROGRES

2017-04-10 10:48:02,047 : INFO : PROGRESS: at 39.24% examples, 869604 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:48:22,053 : INFO : PROGRESS: at 39.44% examples, 869621 words/s, in_qsize 0, out_qsize 3
2017-04-10 10:48:42,049 : INFO : PROGRESS: at 39.64% examples, 869612 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:48:44,222 : INFO : Loading new file for index: 510000
2017-04-10 10:49:02,053 : INFO : PROGRESS: at 39.84% examples, 869612 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:49:22,068 : INFO : PROGRESS: at 40.05% examples, 869630 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:49:42,067 : INFO : PROGRESS: at 40.25% examples, 869626 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:50:00,814 : INFO : Loading new file for index: 520000
2017-04-10 10:50:02,091 : INFO : PROGRESS: at 40.45% examples, 869627 words/s, in_qsize 0, out_qsize 3
2017-04-10 10:50:22,091 : INFO : PROGRESS: at 40.66% examples, 869597 words/s, in_qsize 0, out_qsize 0
2017-04-10 10:50:42,097 : INFO : PROGRES

2017-04-10 11:10:42,513 : INFO : PROGRESS: at 53.02% examples, 869713 words/s, in_qsize 0, out_qsize 1
2017-04-10 11:11:02,529 : INFO : PROGRESS: at 53.22% examples, 869726 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:11:22,529 : INFO : PROGRESS: at 53.42% examples, 869805 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:11:42,532 : INFO : PROGRESS: at 53.62% examples, 869804 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:11:42,757 : INFO : Loading new file for index: 690000
2017-04-10 11:12:02,546 : INFO : PROGRESS: at 53.83% examples, 869781 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:12:22,545 : INFO : PROGRESS: at 54.03% examples, 869821 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:12:42,550 : INFO : PROGRESS: at 54.23% examples, 869828 words/s, in_qsize 0, out_qsize 1
2017-04-10 11:12:59,411 : INFO : Loading new file for index: 700000
2017-04-10 11:13:02,550 : INFO : PROGRESS: at 54.44% examples, 869832 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:13:22,557 : INFO : PROGRES

2017-04-10 11:33:38,868 : INFO : Loading new file for index: 860000
2017-04-10 11:33:43,019 : INFO : PROGRESS: at 66.95% examples, 869625 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:34:03,025 : INFO : PROGRESS: at 67.15% examples, 869609 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:34:23,040 : INFO : PROGRESS: at 67.35% examples, 869614 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:34:43,064 : INFO : PROGRESS: at 67.55% examples, 869560 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:34:57,767 : INFO : Loading new file for index: 870000
2017-04-10 11:35:03,073 : INFO : PROGRESS: at 67.74% examples, 869369 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:35:23,091 : INFO : PROGRESS: at 67.93% examples, 869167 words/s, in_qsize 0, out_qsize 1
2017-04-10 11:35:43,083 : INFO : PROGRESS: at 68.11% examples, 868932 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:36:03,097 : INFO : PROGRESS: at 68.30% examples, 868717 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:36:21,620 : INFO : Loading

2017-04-10 11:56:32,278 : INFO : Loading new file for index: 1030000
2017-04-10 11:56:44,145 : INFO : PROGRESS: at 80.23% examples, 862132 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:57:04,151 : INFO : PROGRESS: at 80.44% examples, 862153 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:57:24,153 : INFO : PROGRESS: at 80.63% examples, 862101 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:57:44,164 : INFO : PROGRESS: at 80.83% examples, 862166 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:57:50,854 : INFO : Loading new file for index: 1040000
2017-04-10 11:58:04,164 : INFO : PROGRESS: at 81.04% examples, 862167 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:58:24,165 : INFO : PROGRESS: at 81.24% examples, 862154 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:58:44,168 : INFO : PROGRESS: at 81.43% examples, 862088 words/s, in_qsize 31, out_qsize 0
2017-04-10 11:59:04,170 : INFO : PROGRESS: at 81.62% examples, 862052 words/s, in_qsize 0, out_qsize 0
2017-04-10 11:59:11,545 : INFO : Load

2017-04-10 12:19:24,577 : INFO : PROGRESS: at 93.54% examples, 858974 words/s, in_qsize 0, out_qsize 0
2017-04-10 12:19:44,584 : INFO : PROGRESS: at 93.74% examples, 858964 words/s, in_qsize 0, out_qsize 0
2017-04-10 12:20:04,601 : INFO : PROGRESS: at 93.94% examples, 858946 words/s, in_qsize 0, out_qsize 0
2017-04-10 12:20:19,569 : INFO : Loading new file for index: 1210000
2017-04-10 12:20:24,605 : INFO : PROGRESS: at 94.14% examples, 858921 words/s, in_qsize 0, out_qsize 3
2017-04-10 12:20:44,608 : INFO : PROGRESS: at 94.34% examples, 858927 words/s, in_qsize 0, out_qsize 0
2017-04-10 12:21:04,612 : INFO : PROGRESS: at 94.54% examples, 858932 words/s, in_qsize 0, out_qsize 0
2017-04-10 12:21:24,617 : INFO : PROGRESS: at 94.74% examples, 858940 words/s, in_qsize 0, out_qsize 0
2017-04-10 12:21:38,241 : INFO : Loading new file for index: 1220000
2017-04-10 12:21:44,624 : INFO : PROGRESS: at 94.94% examples, 858935 words/s, in_qsize 0, out_qsize 0
2017-04-10 12:22:04,628 : INFO : PROGR

CPU times: user 19h 13min 14s, sys: 13min 17s, total: 19h 26min 31s
Wall time: 2h 46min 54s


2017-04-10 12:30:15,623 : INFO : not storing attribute syn0norm
2017-04-10 12:30:15,624 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_7/model.wv.syn0.npy
2017-04-10 12:30:15,870 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_7/model.syn1neg.npy
2017-04-10 12:30:16,084 : INFO : not storing attribute cum_table
2017-04-10 12:30:34,585 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_7/model
2017-04-10 12:30:34,588 : INFO : ****************** Epoch 8 --- Working on doc2vec_size_200_w_2_type_dm_concat_0

2017-04-10 12:49:56,071 : INFO : PROGRESS: at 11.35% examples, 841053 words/s, in_qsize 0, out_qsize 0
2017-04-10 12:50:16,075 : INFO : PROGRESS: at 11.53% examples, 840782 words/s, in_qsize 0, out_qsize 0
2017-04-10 12:50:31,799 : INFO : Loading new file for index: 150000
2017-04-10 12:50:36,077 : INFO : PROGRESS: at 11.73% examples, 840446 words/s, in_qsize 0, out_qsize 0
2017-04-10 12:50:56,077 : INFO : PROGRESS: at 11.92% examples, 840214 words/s, in_qsize 0, out_qsize 0
2017-04-10 12:51:16,093 : INFO : PROGRESS: at 12.11% examples, 839796 words/s, in_qsize 0, out_qsize 1
2017-04-10 12:51:36,112 : INFO : PROGRESS: at 12.30% examples, 839406 words/s, in_qsize 1, out_qsize 0
2017-04-10 12:51:53,949 : INFO : Loading new file for index: 160000
2017-04-10 12:51:56,120 : INFO : PROGRESS: at 12.49% examples, 839375 words/s, in_qsize 0, out_qsize 3
2017-04-10 12:52:16,134 : INFO : PROGRESS: at 12.68% examples, 838907 words/s, in_qsize 0, out_qsize 0
2017-04-10 12:52:36,136 : INFO : PROGRES

2017-04-10 13:12:56,565 : INFO : PROGRESS: at 24.63% examples, 834510 words/s, in_qsize 0, out_qsize 0
2017-04-10 13:13:16,572 : INFO : PROGRESS: at 24.83% examples, 834668 words/s, in_qsize 0, out_qsize 0
2017-04-10 13:13:26,744 : INFO : Loading new file for index: 320000
2017-04-10 13:13:36,589 : INFO : PROGRESS: at 25.03% examples, 834814 words/s, in_qsize 0, out_qsize 0
2017-04-10 13:13:56,593 : INFO : PROGRESS: at 25.23% examples, 835009 words/s, in_qsize 0, out_qsize 0
2017-04-10 13:14:16,602 : INFO : PROGRESS: at 25.43% examples, 835228 words/s, in_qsize 0, out_qsize 1
2017-04-10 13:14:36,611 : INFO : PROGRESS: at 25.63% examples, 835414 words/s, in_qsize 0, out_qsize 0
2017-04-10 13:14:43,275 : INFO : Loading new file for index: 330000
2017-04-10 13:14:56,619 : INFO : PROGRESS: at 25.83% examples, 835562 words/s, in_qsize 0, out_qsize 1
2017-04-10 13:15:16,624 : INFO : PROGRESS: at 26.04% examples, 835795 words/s, in_qsize 0, out_qsize 0
2017-04-10 13:15:36,631 : INFO : PROGRES

2017-04-10 13:35:37,559 : INFO : Loading new file for index: 490000
2017-04-10 13:35:57,013 : INFO : PROGRESS: at 38.31% examples, 840532 words/s, in_qsize 0, out_qsize 0
2017-04-10 13:36:17,013 : INFO : PROGRESS: at 38.51% examples, 840563 words/s, in_qsize 0, out_qsize 0
2017-04-10 13:36:37,018 : INFO : PROGRESS: at 38.71% examples, 840547 words/s, in_qsize 0, out_qsize 0
2017-04-10 13:36:55,380 : INFO : Loading new file for index: 500000
2017-04-10 13:36:57,036 : INFO : PROGRESS: at 38.91% examples, 840572 words/s, in_qsize 0, out_qsize 3
2017-04-10 13:37:17,027 : INFO : PROGRESS: at 39.10% examples, 840563 words/s, in_qsize 0, out_qsize 0
2017-04-10 13:37:37,029 : INFO : PROGRESS: at 39.30% examples, 840553 words/s, in_qsize 0, out_qsize 0
2017-04-10 13:37:57,034 : INFO : PROGRESS: at 39.50% examples, 840567 words/s, in_qsize 0, out_qsize 0
2017-04-10 13:38:14,804 : INFO : Loading new file for index: 510000
2017-04-10 13:38:17,047 : INFO : PROGRESS: at 39.69% examples, 840549 words

2017-04-10 13:58:37,417 : INFO : PROGRESS: at 51.42% examples, 837113 words/s, in_qsize 0, out_qsize 0
2017-04-10 13:58:57,418 : INFO : PROGRESS: at 51.62% examples, 837094 words/s, in_qsize 0, out_qsize 0
2017-04-10 13:59:17,423 : INFO : PROGRESS: at 51.81% examples, 837059 words/s, in_qsize 0, out_qsize 1
2017-04-10 13:59:37,436 : INFO : PROGRESS: at 52.00% examples, 837056 words/s, in_qsize 0, out_qsize 0
2017-04-10 13:59:43,780 : INFO : Loading new file for index: 670000
2017-04-10 13:59:57,443 : INFO : PROGRESS: at 52.20% examples, 837063 words/s, in_qsize 0, out_qsize 0
2017-04-10 14:00:17,443 : INFO : PROGRESS: at 52.39% examples, 837070 words/s, in_qsize 0, out_qsize 0
2017-04-10 14:00:37,448 : INFO : PROGRESS: at 52.59% examples, 837067 words/s, in_qsize 0, out_qsize 0
2017-04-10 14:00:57,458 : INFO : PROGRESS: at 52.78% examples, 837060 words/s, in_qsize 0, out_qsize 0
2017-04-10 14:01:04,032 : INFO : Loading new file for index: 680000
2017-04-10 14:01:17,460 : INFO : PROGRES

2017-04-10 14:21:37,886 : INFO : PROGRESS: at 64.89% examples, 837798 words/s, in_qsize 1, out_qsize 1
2017-04-10 14:21:57,890 : INFO : PROGRESS: at 65.09% examples, 837806 words/s, in_qsize 0, out_qsize 4
2017-04-10 14:22:17,897 : INFO : PROGRESS: at 65.28% examples, 837837 words/s, in_qsize 0, out_qsize 0
2017-04-10 14:22:24,287 : INFO : Loading new file for index: 840000
2017-04-10 14:22:37,905 : INFO : PROGRESS: at 65.48% examples, 837890 words/s, in_qsize 0, out_qsize 1
2017-04-10 14:22:57,910 : INFO : PROGRESS: at 65.68% examples, 837979 words/s, in_qsize 0, out_qsize 0
2017-04-10 14:23:17,919 : INFO : PROGRESS: at 65.89% examples, 838062 words/s, in_qsize 0, out_qsize 1
2017-04-10 14:23:37,925 : INFO : PROGRESS: at 66.08% examples, 838133 words/s, in_qsize 0, out_qsize 0
2017-04-10 14:23:41,730 : INFO : Loading new file for index: 850000
2017-04-10 14:23:57,928 : INFO : PROGRESS: at 66.28% examples, 838206 words/s, in_qsize 0, out_qsize 1
2017-04-10 14:24:17,944 : INFO : PROGRES

2017-04-10 14:44:18,308 : INFO : PROGRESS: at 78.56% examples, 842015 words/s, in_qsize 0, out_qsize 2
2017-04-10 14:44:38,312 : INFO : PROGRESS: at 78.76% examples, 842078 words/s, in_qsize 0, out_qsize 1
2017-04-10 14:44:58,323 : INFO : PROGRESS: at 78.95% examples, 842114 words/s, in_qsize 0, out_qsize 6
2017-04-10 14:45:18,325 : INFO : PROGRESS: at 79.15% examples, 842127 words/s, in_qsize 0, out_qsize 2
2017-04-10 14:45:36,702 : INFO : Loading new file for index: 1020000
2017-04-10 14:45:38,338 : INFO : PROGRESS: at 79.35% examples, 842116 words/s, in_qsize 0, out_qsize 1
2017-04-10 14:45:58,352 : INFO : PROGRESS: at 79.54% examples, 842095 words/s, in_qsize 0, out_qsize 0
2017-04-10 14:46:18,358 : INFO : PROGRESS: at 79.74% examples, 842080 words/s, in_qsize 0, out_qsize 0
2017-04-10 14:46:38,363 : INFO : PROGRESS: at 79.93% examples, 842079 words/s, in_qsize 1, out_qsize 1
2017-04-10 14:46:57,276 : INFO : Loading new file for index: 1030000
2017-04-10 14:46:58,370 : INFO : PROGR

2017-04-10 15:07:18,740 : INFO : PROGRESS: at 92.33% examples, 844335 words/s, in_qsize 0, out_qsize 0
2017-04-10 15:07:38,739 : INFO : PROGRESS: at 92.54% examples, 844391 words/s, in_qsize 0, out_qsize 2
2017-04-10 15:07:39,693 : INFO : Loading new file for index: 1190000
2017-04-10 15:07:58,739 : INFO : PROGRESS: at 92.74% examples, 844432 words/s, in_qsize 0, out_qsize 1
2017-04-10 15:08:18,746 : INFO : PROGRESS: at 92.94% examples, 844488 words/s, in_qsize 1, out_qsize 0
2017-04-10 15:08:38,753 : INFO : PROGRESS: at 93.14% examples, 844527 words/s, in_qsize 0, out_qsize 0
2017-04-10 15:08:55,650 : INFO : Loading new file for index: 1200000
2017-04-10 15:08:58,757 : INFO : PROGRESS: at 93.34% examples, 844565 words/s, in_qsize 0, out_qsize 0
2017-04-10 15:09:18,768 : INFO : PROGRESS: at 93.54% examples, 844602 words/s, in_qsize 0, out_qsize 0
2017-04-10 15:09:38,776 : INFO : PROGRESS: at 93.75% examples, 844629 words/s, in_qsize 0, out_qsize 0
2017-04-10 15:09:58,775 : INFO : PROGR

CPU times: user 20h 2min 4s, sys: 13min 12s, total: 20h 15min 17s
Wall time: 2h 49min 21s


2017-04-10 15:19:56,693 : INFO : not storing attribute syn0norm
2017-04-10 15:19:56,694 : INFO : storing np array 'syn0' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_8/model.wv.syn0.npy
2017-04-10 15:19:56,930 : INFO : storing np array 'syn1neg' to /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_8/model.syn1neg.npy
2017-04-10 15:19:57,131 : INFO : not storing attribute cum_table
2017-04-10 15:20:12,114 : INFO : saved /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_8/model
2017-04-10 15:20:12,115 : INFO : Getting Validation Embeddings
2017-04-10 15:20:12,115 : INFO : ===== Getting ve

NameError: global name 'BatchClass' is not defined

## Inference Only (if needed)

In [50]:
NUM_CORES = 32

In [51]:
epoch = 8
GLOBAL_VARS.MODEL_NAME = placeholder_model_name.format(epoch)

if os.path.exists(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, MODEL_PREFIX)):
    doc2vec_model = Doc2Vec.load(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, MODEL_PREFIX))
    doc2vec_model.workers = NUM_CORES
    GLOBAL_VARS.DOC2VEC_MODEL = doc2vec_model

2017-04-10 17:03:27,806 : INFO : loading Doc2Vec object from /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_8/model
2017-04-10 17:03:36,871 : INFO : loading docvecs recursively from /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_8/model.docvecs.* with mmap=None
2017-04-10 17:03:36,872 : INFO : loading doctag_syn0 from /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None_model_1_document/epoch_8/model.docvecs.doctag_syn0.npy with mmap=None
2017-04-10 17:03:37,677 : INFO : loading wv recursively from /mnt/virtual-machines/data/parameter_search_doc2vec_models_1_document/full/doc2vec_size_200_w_2_type_dm_concat_0_mean

In [52]:
doc2vec_model

<gensim.models.doc2vec.Doc2Vec at 0x7fcd4b45d290>

In [None]:
Xv = get_extended_docs_with_inference_data_only(doc2vec_model, VALIDATION_DICT, 
                                         validation_preprocessed_files_prefix, level, model_name)

2017-04-10 17:12:48,776 : INFO : ===== Getting vectors with inference
2017-04-10 17:12:49,061 : INFO : Loading new file for index: 0
2017-04-10 17:13:11,811 : INFO : Finished: 1000 tags
2017-04-10 17:13:33,430 : INFO : Finished: 2000 tags
2017-04-10 17:13:56,085 : INFO : Finished: 3000 tags
2017-04-10 17:14:17,651 : INFO : Finished: 4000 tags
2017-04-10 17:14:39,593 : INFO : Finished: 5000 tags
2017-04-10 17:15:02,056 : INFO : Finished: 6000 tags
2017-04-10 17:15:24,133 : INFO : Finished: 7000 tags
2017-04-10 17:15:45,809 : INFO : Finished: 8000 tags
2017-04-10 17:16:07,193 : INFO : Finished: 9000 tags
2017-04-10 17:16:08,702 : INFO : Loading new file for index: 10000
2017-04-10 17:16:29,323 : INFO : Finished: 10000 tags
2017-04-10 17:16:51,594 : INFO : Finished: 11000 tags
2017-04-10 17:17:13,626 : INFO : Finished: 12000 tags
2017-04-10 17:17:36,071 : INFO : Finished: 13000 tags
2017-04-10 17:17:58,521 : INFO : Finished: 14000 tags
2017-04-10 17:18:20,964 : INFO : Finished: 15000 tags

2017-04-10 18:02:02,467 : INFO : Finished: 134000 tags
2017-04-10 18:02:23,534 : INFO : Finished: 135000 tags
2017-04-10 18:02:45,161 : INFO : Finished: 136000 tags
2017-04-10 18:03:06,566 : INFO : Finished: 137000 tags
2017-04-10 18:03:28,294 : INFO : Finished: 138000 tags
2017-04-10 18:03:49,788 : INFO : Finished: 139000 tags
2017-04-10 18:03:51,478 : INFO : Loading new file for index: 140000
2017-04-10 18:04:11,539 : INFO : Finished: 140000 tags
2017-04-10 18:04:32,958 : INFO : Finished: 141000 tags
2017-04-10 18:04:54,908 : INFO : Finished: 142000 tags
2017-04-10 18:05:16,756 : INFO : Finished: 143000 tags
2017-04-10 18:05:39,417 : INFO : Finished: 144000 tags
2017-04-10 18:06:01,588 : INFO : Finished: 145000 tags
2017-04-10 18:06:23,596 : INFO : Finished: 146000 tags
2017-04-10 18:06:45,153 : INFO : Finished: 147000 tags
2017-04-10 18:07:07,241 : INFO : Finished: 148000 tags
2017-04-10 18:07:29,414 : INFO : Finished: 149000 tags
2017-04-10 18:07:30,810 : INFO : Loading new file fo

2017-04-10 18:56:07,656 : INFO : Finished: 282000 tags
2017-04-10 18:56:29,354 : INFO : Finished: 283000 tags
2017-04-10 18:56:50,790 : INFO : Finished: 284000 tags
2017-04-10 18:57:13,031 : INFO : Finished: 285000 tags
2017-04-10 18:57:35,284 : INFO : Finished: 286000 tags
2017-04-10 18:57:57,129 : INFO : Finished: 287000 tags
2017-04-10 18:58:19,175 : INFO : Finished: 288000 tags
2017-04-10 18:58:41,498 : INFO : Finished: 289000 tags
2017-04-10 18:58:43,084 : INFO : Loading new file for index: 290000
2017-04-10 18:59:03,646 : INFO : Finished: 290000 tags
2017-04-10 18:59:25,522 : INFO : Finished: 291000 tags
2017-04-10 18:59:47,465 : INFO : Finished: 292000 tags
2017-04-10 19:00:08,736 : INFO : Finished: 293000 tags
2017-04-10 19:00:30,792 : INFO : Finished: 294000 tags
2017-04-10 19:00:52,469 : INFO : Finished: 295000 tags
2017-04-10 19:01:15,068 : INFO : Finished: 296000 tags
2017-04-10 19:01:36,886 : INFO : Finished: 297000 tags
2017-04-10 19:01:58,471 : INFO : Finished: 298000 ta

#### Testing inference

In [55]:
inference_docs_iterator = BatchWrapper(validation_preprocessed_files_prefix, batch_size=None, level=level, level_type=model_name)        
for doc_tuple in inference_doczs_iterator:
    doc_id, doc_tokens = doc_tuple
    rep = doc2vec_model.infer_vector(doc_tokens)
    print (doc_id, rep)
    break

('08521002', array([  1.13558674e+00,  -2.01971769e-01,  -9.30447519e-01,
         9.55632687e-01,   5.11517346e-01,   4.34441900e+00,
        -3.77764761e-01,  -1.11617422e+00,  -2.15896085e-01,
         9.09354746e-01,   5.74674904e-01,  -2.07049704e+00,
        -7.20400810e-01,   4.94136661e-01,  -1.74060893e+00,
        -2.17272949e+00,  -4.39270258e-01,  -1.51936769e+00,
         5.65607429e-01,  -4.58835810e-01,  -1.69598356e-01,
         1.77733886e+00,   3.66123140e-01,   1.38953611e-01,
        -1.04259264e+00,   8.84979665e-01,  -8.56729895e-02,
        -6.04329109e-01,   4.42179322e-01,   1.08561194e+00,
        -2.49654725e-01,   3.02951038e-01,  -3.80307257e-01,
         1.32433748e+00,   7.18038738e-01,   7.99864233e-01,
        -3.60305488e-01,  -3.32749695e-01,   1.86409019e-02,
        -1.16298962e+00,  -2.36521304e-01,   8.52507114e-01,
        -4.25269688e-03,  -2.73190904e+00,  -1.39228487e+00,
         3.14658254e-01,   1.19927609e+00,  -8.86219382e-01,
        -2.

In [53]:
doc2vec_model.wv.syn0

array([[ 0.24962339,  0.03808838, -0.38492572, ...,  0.81019139,
        -0.0872335 ,  0.00503489],
       [-0.24393913, -0.9072656 , -0.08245134, ..., -0.12438237,
        -0.10501056,  0.07241193],
       [ 0.06769085, -0.22004843,  0.05649997, ...,  0.15331532,
        -0.87121236, -0.71148068],
       ..., 
       [ 0.02257917,  0.18380728, -0.19475998, ...,  0.72972393,
        -0.03356596, -0.29145467],
       [-0.20255305, -0.25994578,  0.31640032, ..., -0.02623975,
         0.41660461, -0.45980361],
       [ 1.09419656, -0.97489876, -0.3509953 , ...,  0.82430571,
         0.02756385,  0.7905944 ]], dtype=float32)