# Fix Coverage Errors from using y_binary to y_score

In [15]:
import json
import nltk
from nltk.tokenize import RegexpTokenizer
import string
import math
import os
import time
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from collections import namedtuple
import random
import cPickle as pickle

from sklearn.metrics import coverage_error
import sklearn.metrics
from sklearn.multiclass import OneVsRestClassifier
from sklearn import linear_model

from gensim.models.doc2vec import Doc2Vec, LabeledSentence

import logging
from logging import info

from thesis.utils.metrics import *

In [2]:
root = logging.getLogger()
for handler in root.handlers[:]:
    root.removeHandler(handler)
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) # adds a default StreamHanlder
#root.addHandler(logging.StreamHandler())

In [8]:
CLASSIFIER_FILE = '{}_classifier.pkl'
VALIDATION_METRICS_FILENAME= '{}_validation_metrics.pkl'
TRAINING_METRICS_FILENAME = '{}_training_metrics.pkl'

In [9]:
root_location = "/big/s/shalaby/"
exports_location = root_location + "exported_data/"
svm_location = root_location + "benchmarking_svm/"


training_file = root_location + "docs_output.json"

doc_classifications_map_file = exports_location + "doc_classification_map.pkl"
sections_file = exports_location + "sections.pkl"
classes_file = exports_location + "classes.pkl"
subclasses_file = exports_location + "subclasses.pkl"
classifications_output = exports_location + "classifications.pkl"
training_docs_list_file = exports_location + "training_docs_list.pkl"
validation_docs_list_file = exports_location + "validation_docs_list.pkl"
test_docs_list_file = exports_location + "test_docs_list.pkl"

#### Load Classification Objects

In [5]:
%%time
doc_classification_map = pickle.load(open(doc_classifications_map_file))
sections = pickle.load(open(sections_file))
classes = pickle.load(open(classes_file))
subclasses = pickle.load(open(subclasses_file))
training_docs_list = pickle.load(open(training_docs_list_file))
validation_docs_list = pickle.load(open(validation_docs_list_file))

CPU times: user 29.6 s, sys: 1.5 s, total: 31.1 s
Wall time: 31.4 s


In [6]:
import re
def get_subdirectories(d):
    #return filter(os.path.isdir, [f for f in os.listdir(d)])
    return [f for f in os.listdir(d) if os.path.isdir(os.path.join(d,f))]
def natural_sort(l): 
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(l, key = alphanum_key)

## Fix Coverage Error for Benchmarking

In [None]:
classifications = sections
classifications_type = "sections"

In [None]:
%%time
# data_types = ["bm25", "tf_idf", "sublinear_tf_idf", "sublinear_tf", "tf"]
data_types = ["tf_idf", "sublinear_tf_idf", "sublinear_tf", "tf"]
# data_types = ["tf"]
for data_type in data_types:
    preprocessor = data_type
    info("=============== {} Being Evaluated ================".format(data_type))
    
    data_training_location = exports_location + "{}_training_sparse_data.pkl".format(data_type)
    data_training_docids_location = exports_location + "{}_training_sparse_docids.pkl".format(data_type)
    data_validation_location = exports_location + "{}_validation_sparse_data.pkl".format(data_type)
    data_validation_docids_location = exports_location + "{}_validation_sparse_docids.pkl".format(data_type)
    
#     # Get the training data
#     info('Getting Training Data')
#     %time X = pickle.load(open(data_training_location, "r"))
#     training_data_docids = pickle.load(open(data_training_docids_location, "r"))
#     %time y = get_label_data(classifications, training_data_docids, doc_classification_map)

#     print y
#     print y.shape
    
    # Get the validation data
    info('Getting Valdiation Data')
    %time Xv = pickle.load(open(data_validation_location,'r'))
    validation_data_docids = pickle.load(open(data_validation_docids_location, "r"))
    %time yv = get_label_data(classifications, validation_data_docids, doc_classification_map)
    
    print yv
    print yv.shape
        
    for classifier in natural_sort(get_subdirectories(svm_location)):
        print classifier
        classifier_path = os.path.join(svm_location, classifier)
        preprocessor_path = os.path.join(classifier_path, preprocessor)

        if not os.path.exists(preprocessor_path):
            info("{} doesn't exist, skipping".format(preprocessor_path)) 
            continue
            
        info('Loading Classifier')
        clf = pickle.load(open(os.path.join(preprocessor_path, CLASSIFIER_FILE.format(classifications_type)), 'r'))
        
#         # Training Metrics
#         info('Evaluating on Training Data')
#         %time yp = clf.predict(X)
#         %time yp_score = clf.decision_function(X)
#         print yp
#         info('Calculating training metrics')
#         training_metrics = get_metrics(y, yp_score, yp)
#         print "** Training Metrics: Cov Err: {:.3f}, Avg Labels: {:.3f}, \n\t\t Top 1: {:.3f}, Top 3: {:.3f}, Top 5: {:.3f}, \n\t\t F1 Micro: {:.3f}, F1 Macro: {:.3f}, Total Pos: {:,d}".format(
#             training_metrics['coverage_error'], training_metrics['average_num_of_labels'], 
#             training_metrics['top_1'], training_metrics['top_3'], training_metrics['top_5'], 
#             training_metrics['f1_micro'], training_metrics['f1_macro'], training_metrics['total_positive'])

        # Validation Metrics
        info('Evaluating on Validation Data')
        %time yvp = clf.predict(Xv)
        %time yvp_score = clf.decision_function(Xv)
        print yvp
        info('Calculating validation metrics')
        validation_metrics = get_metrics(yv, yvp_score, yvp)
        print "** Validation Metrics: Cov Err: {:.3f}, Avg Labels: {:.3f}, \n\t\t Top 1: {:.3f}, Top 3: {:.3f}, Top 5: {:.3f}, \n\t\t F1 Micro: {:.3f}, F1 Macro: {:.3f}, Total Pos: {:,d}".format(
            validation_metrics['coverage_error'], validation_metrics['average_num_of_labels'], 
            validation_metrics['top_1'], validation_metrics['top_3'], validation_metrics['top_5'], 
            validation_metrics['f1_micro'], validation_metrics['f1_macro'], validation_metrics['total_positive'])

        info('Dumping the Metrics')
#         pickle.dump(training_metrics, open(os.path.join(preprocessor_path, TRAINING_METRICS_FILENAME.format(classifications_type)), "w"))
        pickle.dump(validation_metrics, open(os.path.join(preprocessor_path, VALIDATION_METRICS_FILENAME.format(classifications_type)), "w"))
        

## Fix Coverage Error for Doc2vec

In [10]:
from gensim.models.doc2vec import Doc2Vec

2017-01-17 00:49:47,865 : INFO : Pattern library is not installed, lemmatization won't be available.
2017-01-17 00:49:47,903 : INFO : Could not import Theano, will use standard float for default ShardedCorpus dtype.
2017-01-17 00:49:48,126 : INFO : 'pattern' package not found; tag filters are not available for English


In [11]:
ROOT_RESULTS_LOCATION = '/big/s/shalaby/parameter_search_doc2vec_models_new/full/'

MODEL_PREFIX = "model"
VALIDATION_MATRIX = "validation_matrix.pkl"
CLASSIFIER = "classifier.pkl"

preprocessed_location = root_location + "preprocessed_data/"

validation_preprocessed_files_prefix = preprocessed_location + "validation_docs_merged_data_preprocessed-"
validation_preprocessed_docids_files_prefix = preprocessed_location + "validation_docs_merged_docids_preprocessed-"

In [29]:
NUM_CORES = 22
SVM_SEED = 1234

In [18]:
GLOBAL_VARS = namedtuple('GLOBAL_VARS', ['MODEL_NAME', 'DOC2VEC_MODEL'])

In [12]:
classifications = sections
classifications_type = "sections"
VALIDATION_METRICS_FILENAME= '{}_validation_metrics.pkl'.format(classifications_type)

In [25]:
class OneHotEncoder():
    
    def __init__(self, classifications):
        self.classifications = classifications
        self.one_hot_indices = {}

        # convert character classifications to bit vectors
        for i, clssf in enumerate(classifications):
            bits = [0] * len(classifications)
            bits[i] = 1
            self.one_hot_indices[clssf] = i
    
    def get_label_vector(self, labels):
        """
        classes: array of string with the classes assigned to the instance
        """
        output_vector = [0] * len(self.classifications)
        for label in labels:
            index = self.one_hot_indices[label]
            output_vector[index] = 1
            
        return output_vector

    
def get_training_data(doc2vec_model, classifications):
    one_hot_encoder = OneHotEncoder(classifications)
    training_data = []
    training_labels = []
    for doc_id in training_docs_list:
        # converting from memmap to a normal array
        normal_array = []
        normal_array[:] = doc2vec_model.docvecs[doc_id][:]
        training_data.append(normal_array)
        eligible_classifications = [clssf for clssf in doc_classification_map[doc_id] if clssf in classifications]
        training_labels.append(one_hot_encoder.get_label_vector(eligible_classifications))
    training_labels = np.array(training_labels)
    return training_data, training_labels

def get_validation_docs_with_inference_new(doc2vec_model, doc_classification_map, classifications, 
                                           val_docs_list, val_preprocessed_files_prefix, 
                                           val_preprocessed_docids_files_prefix):
    """
    Use the trained doc2vec model to get the paragraph vector representations of the validation documents
    """

    def infer_one_doc(doc_tuple):
        #doc2vec_model.random = np.random.RandomState(DOC2VEC_SEED)
        doc_id, doc_tokens = doc_tuple
        rep = doc2vec_model.infer_vector(doc_tokens)
        return (doc_id, rep)

    one_hot_encoder = OneHotEncoder(classifications)
    if os.path.exists(os.path.join(GLOBAL_VARS.MODEL_NAME, VALIDATION_MATRIX)):
        info("===== Loading validation vectors")
        validation_labels = []
        validation_vectors_matrix = pickle.load(open(os.path.join(GLOBAL_VARS.MODEL_NAME, VALIDATION_MATRIX)))
        for validation_doc_id in val_docs_list:
            val_labels = [classf for classf in doc_classification_map[validation_doc_id] if classf in classifications]
            validation_labels.append(one_hot_encoder.get_label_vector(val_labels))
        validation_labels = np.array(validation_labels)
    else:
        validation_documents_reps = {}
        validation_vectors = []
        validation_labels = []
        info("===== Getting validation vectors with inference")

        # Multi-threaded inference
        validation_docs_iterator = DocumentBatchGenerator(validation_preprocessed_files_prefix, 
                                                          validation_preprocessed_docids_files_prefix, batch_size=None)
        generator_func = validation_docs_iterator.__iter__()
        pool = ThreadPool(NUM_CORES)
        # map consumes the whole iterator on the spot, so we have to use itertools.islice to fake mini-batching
        validation_documents_reps = {}
        mini_batch_size = 1000
        while True:
            threaded_reps_partial = pool.map(infer_one_doc, itertools.islice(generator_func, mini_batch_size))
            info("Finished: {}".format(str(validation_docs_iterator.curr_index)))
            if threaded_reps_partial:
                #threaded_reps.extend(threaded_reps_partial)
                validation_documents_reps.update(threaded_reps_partial)
            else:
                break
                
        # create matrix for the validation vectors
        for validation_doc_id in val_docs_list:
            validation_vectors.append(validation_documents_reps[validation_doc_id])
            val_labels = [classf for classf in doc_classification_map[validation_doc_id] if classf in classifications]
            validation_labels.append(one_hot_encoder.get_label_vector(val_labels))
        validation_vectors_matrix = np.array(validation_vectors)
        validation_labels = np.array(validation_labels)
        pickle.dump(validation_vectors_matrix, open(os.path.join(doc2vec_model_save_location, GLOBAL_VARS.MODEL_NAME, VALIDATION_MATRIX), 'w'))
    
    return validation_vectors_matrix, validation_labels

In [32]:
ee = 'svm_iter_100_reg_0.001_classweights_balanced'.split('_')
SVM_ITER = ee[2]
SVM_REG = ee[4]
SVM_CLASSWEIGHTS = ee[6]

In [33]:
print SVM_ITER
print SVM_REG
print SVM_CLASSWEIGHTS

100
0.001
balanced


In [39]:
finished_doc2vec_methods = ['doc2vec_size_100_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None',
                           'doc2vec_size_200_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None',
                           'doc2vec_size_200_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None',
                           'doc2vec_size_500_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None',
                           'doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None']

In [37]:
for doc2vec_method in natural_sort(get_subdirectories(ROOT_RESULTS_LOCATION)):
    if doc2vec_method in finished_doc2vec_methods: 
        info('skipping {}'.format(doc2vec_method))
        continue
        
    print '********************* {}'.format(doc2vec_method)
    epochs_path = os.path.join(ROOT_RESULTS_LOCATION, doc2vec_method)
    # this will have the structure dict of classifiers -> dict of metrics -> list of values throughout the epoch
    for epoch in natural_sort(get_subdirectories(epochs_path)):
        print epoch
        epoch_path =  os.path.join(epochs_path, epoch)
        
        if os.path.exists(os.path.join(epoch_path, MODEL_PREFIX)):
            doc2vec_model = Doc2Vec.load(os.path.join(epoch_path, MODEL_PREFIX))
        else:
            info('No Doc2vec model found for {}. Exiting..'.format(epoch))
            break
            
        GLOBAL_VARS.MODEL_NAME = epoch_path

        # Validation Metrics
        info('Getting Validation Embeddings')
        try:
            Xv, yv = get_validation_docs_with_inference_new(doc2vec_model, doc_classification_map, classifications, 
                                                            validation_docs_list, validation_preprocessed_files_prefix,
                                                            validation_preprocessed_docids_files_prefix)
        except:
            info('No More Validation Embeddings to load, exiting.....')
            
        for classifier in get_subdirectories(epoch_path):
            print classifier
            
            classifier_path = os.path.join(epoch_path, classifier)
            if(os.path.exists(os.path.join(classifier_path, CLASSIFIER))):
                info('Loading Classifier')
                clf = pickle.load(open(os.path.join(classifier_path, CLASSIFIER), 'r'))
            else:
                
                info('No Classifier found for {} in {}'.format(classifier, epoch))
                info('Getting training Data')
                X, y = get_training_data(doc2vec_model, classifications)
                info('Training Classifier')
                classifier_parts = classifier.split('_')
                SVM_ITERATIONS = classifier[2]
                SVM_REG = classifier[4]
                SVM_CLASS_WEIGHTS = classifier[6] if classifier[6] != 'None' else None
                clf = OneVsRestClassifier(linear_model.SGDClassifier(loss='hinge', penalty='l2', 
                                                                     #alpha is the 1/C parameter
                                                                     alpha=SVM_REG, fit_intercept=True, n_iter=SVM_ITERATIONS,
                                                                     #n_jobs=-1 means use all cpus
                                                                     shuffle=True, verbose=0, n_jobs=1,
                                                                     #eta0 is the learning rate when we use constant configuration
                                                                     random_state=SVM_SEED, learning_rate='optimal', eta0=0.0, 
                                                                     class_weight=SVM_CLASS_WEIGHTS, warm_start=False), n_jobs=1)

                # Training of a classifier
                clf.fit(X,y)
                pickle.dump(clf, open(os.path.join(classifier_path, CLASSIFIER), 'w'))

            info('Evaluating on Validation Data')
            yvp = clf.predict(Xv)
            yvp_score = clf.decision_function(Xv)
            print yvp
            info('Getting Validation Metrics')
            validation_metrics = get_metrics(yv, yvp_score, yvp)
            print "** Validation Metrics: Cov Err: {:.3f}, Avg Labels: {:.3f}, \n\t\t Top 1: {:.3f}, Top 3: {:.3f}, Top 5: {:.3f}, \n\t\t F1 Micro: {:.3f}, F1 Macro: {:.3f}, Total Pos: {:,d}".format(
                validation_metrics['coverage_error'], validation_metrics['average_num_of_labels'], 
                validation_metrics['top_1'], validation_metrics['top_3'], validation_metrics['top_5'], 
                validation_metrics['f1_micro'], validation_metrics['f1_macro'], validation_metrics['total_positive'])
            
            pickle.dump(validation_metrics, open(os.path.join(classifier_path, VALIDATION_METRICS_FILENAME), 'w'))


2017-01-17 20:33:55,231 : INFO : skipping doc2vec_size_100_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None
2017-01-17 20:33:55,233 : INFO : skipping doc2vec_size_200_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None
2017-01-17 20:33:55,234 : INFO : skipping doc2vec_size_200_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None
2017-01-17 20:33:55,234 : INFO : skipping doc2vec_size_500_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None
2017-01-17 20:33:55,236 : INFO : loading Doc2Vec object from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_1/model


********************* doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None
epoch_1


2017-01-17 20:34:24,725 : INFO : loading docvecs recursively from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_1/model.docvecs.* with mmap=None
2017-01-17 20:34:24,726 : INFO : loading doctag_syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_1/model.docvecs.doctag_syn0.npy with mmap=None
2017-01-17 20:35:03,827 : INFO : loading syn1neg from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_1/model.syn1neg.npy with mmap=None
2017-01-17 20:35:19,049 : INFO : loading syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_1/model.syn0.npy with mmap=None
2017-01-17 20:35

svm_iter_100_reg_0.01_classweights_balanced


2017-01-17 20:39:39,699 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [1 0 0 ..., 1 1 0]
 [0 0 0 ..., 0 1 0]
 ..., 
 [1 1 0 ..., 0 0 0]
 [1 0 0 ..., 0 1 0]
 [1 1 0 ..., 0 0 0]]


2017-01-17 20:40:09,343 : INFO : loading Doc2Vec object from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_2/model


** Validation Metrics: Cov Err: 2.073, Avg Labels: 1.150, 
		 Top 1: 0.625, Top 3: 0.867, Top 5: 0.949, 
		 F1 Micro: 0.554, F1 Macro: 0.459, Total Pos: 707,179
epoch_2


2017-01-17 20:40:16,731 : INFO : loading docvecs recursively from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_2/model.docvecs.* with mmap=None
2017-01-17 20:40:16,732 : INFO : loading doctag_syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_2/model.docvecs.doctag_syn0.npy with mmap=None
2017-01-17 20:40:45,519 : INFO : loading syn1neg from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_2/model.syn1neg.npy with mmap=None
2017-01-17 20:40:55,220 : INFO : loading syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_2/model.syn0.npy with mmap=None
2017-01-17 20:41

svm_iter_100_reg_0.01_classweights_balanced


2017-01-17 20:44:39,707 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [1 0 0 ..., 1 1 0]
 [0 0 0 ..., 0 1 1]
 ..., 
 [1 1 0 ..., 0 0 0]
 [1 0 0 ..., 0 1 0]
 [0 1 0 ..., 0 0 0]]


2017-01-17 20:45:09,326 : INFO : loading Doc2Vec object from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_3/model


** Validation Metrics: Cov Err: 1.935, Avg Labels: 1.150, 
		 Top 1: 0.653, Top 3: 0.890, Top 5: 0.963, 
		 F1 Micro: 0.579, F1 Macro: 0.481, Total Pos: 700,606
epoch_3


2017-01-17 20:45:38,010 : INFO : loading docvecs recursively from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_3/model.docvecs.* with mmap=None
2017-01-17 20:45:38,011 : INFO : loading doctag_syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_3/model.docvecs.doctag_syn0.npy with mmap=None
2017-01-17 20:46:08,076 : INFO : loading syn1neg from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_3/model.syn1neg.npy with mmap=None
2017-01-17 20:46:19,251 : INFO : loading syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_3/model.syn0.npy with mmap=None
2017-01-17 20:46

svm_iter_100_reg_0.01_classweights_balanced


2017-01-17 20:48:49,005 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [1 1 0 ..., 1 1 0]
 [0 0 0 ..., 0 1 0]
 ..., 
 [1 1 0 ..., 1 0 0]
 [1 0 0 ..., 0 1 0]
 [0 1 0 ..., 0 0 0]]
** Validation Metrics: Cov Err: 1.916, Avg Labels: 1.150, 
		 Top 1: 0.662, Top 3: 0.892, Top 5: 0.964, 
		 F1 Micro: 0.580, F1 Macro: 0.485, Total Pos: 706,814


2017-01-17 20:49:17,758 : INFO : loading Doc2Vec object from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_4/model


epoch_4


2017-01-17 20:49:44,393 : INFO : loading docvecs recursively from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_4/model.docvecs.* with mmap=None
2017-01-17 20:49:44,394 : INFO : loading doctag_syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_4/model.docvecs.doctag_syn0.npy with mmap=None
2017-01-17 20:50:26,363 : INFO : loading syn1neg from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_4/model.syn1neg.npy with mmap=None
2017-01-17 20:50:41,321 : INFO : loading syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_4/model.syn0.npy with mmap=None
2017-01-17 20:50

svm_iter_100_reg_0.01_classweights_balanced


2017-01-17 20:53:30,040 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [1 1 0 ..., 1 0 0]
 [0 0 0 ..., 0 1 1]
 ..., 
 [1 1 0 ..., 1 0 0]
 [1 0 0 ..., 0 1 0]
 [0 1 0 ..., 0 0 0]]
** Validation Metrics: Cov Err: 1.876, Avg Labels: 1.150, 
		 Top 1: 0.667, Top 3: 0.901, Top 5: 0.969, 
		 F1 Micro: 0.591, F1 Macro: 0.493, Total Pos: 693,444


2017-01-17 20:53:59,165 : INFO : loading Doc2Vec object from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_5/model


epoch_5


2017-01-17 20:54:30,680 : INFO : loading docvecs recursively from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_5/model.docvecs.* with mmap=None
2017-01-17 20:54:30,682 : INFO : loading doctag_syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_5/model.docvecs.doctag_syn0.npy with mmap=None
2017-01-17 20:55:21,013 : INFO : loading syn1neg from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_5/model.syn1neg.npy with mmap=None
2017-01-17 20:55:38,123 : INFO : loading syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_5/model.syn0.npy with mmap=None
2017-01-17 20:55

svm_iter_100_reg_0.01_classweights_balanced


2017-01-17 20:58:18,586 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [1 1 0 ..., 0 0 0]
 [0 0 0 ..., 0 1 0]
 ..., 
 [1 1 0 ..., 1 0 0]
 [1 0 0 ..., 0 1 0]
 [1 1 0 ..., 0 0 0]]
** Validation Metrics: Cov Err: 1.897, Avg Labels: 1.150, 
		 Top 1: 0.665, Top 3: 0.895, Top 5: 0.966, 
		 F1 Micro: 0.581, F1 Macro: 0.488, Total Pos: 712,044


2017-01-17 20:58:47,649 : INFO : loading Doc2Vec object from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_6/model


epoch_6


2017-01-17 20:58:57,476 : INFO : loading docvecs recursively from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_6/model.docvecs.* with mmap=None
2017-01-17 20:58:57,477 : INFO : loading doctag_syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_6/model.docvecs.doctag_syn0.npy with mmap=None
2017-01-17 21:00:06,539 : INFO : loading syn1neg from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_6/model.syn1neg.npy with mmap=None
2017-01-17 21:00:24,373 : INFO : loading syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_6/model.syn0.npy with mmap=None
2017-01-17 21:00

svm_iter_100_reg_0.01_classweights_balanced


2017-01-17 21:03:43,962 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [1 1 0 ..., 1 0 0]
 [0 0 0 ..., 0 1 1]
 ..., 
 [1 1 0 ..., 1 0 0]
 [1 0 0 ..., 0 1 0]
 [0 1 0 ..., 1 0 0]]
** Validation Metrics: Cov Err: 1.891, Avg Labels: 1.150, 
		 Top 1: 0.668, Top 3: 0.897, Top 5: 0.966, 
		 F1 Micro: 0.583, F1 Macro: 0.491, Total Pos: 708,270


2017-01-17 21:04:12,708 : INFO : loading Doc2Vec object from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_7/model


epoch_7


2017-01-17 21:04:42,919 : INFO : loading docvecs recursively from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_7/model.docvecs.* with mmap=None
2017-01-17 21:04:42,920 : INFO : loading doctag_syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_7/model.docvecs.doctag_syn0.npy with mmap=None
2017-01-17 21:05:11,527 : INFO : loading syn1neg from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_7/model.syn1neg.npy with mmap=None
2017-01-17 21:05:23,410 : INFO : loading syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_7/model.syn0.npy with mmap=None
2017-01-17 21:05

svm_iter_100_reg_0.01_classweights_balanced


2017-01-17 21:07:58,487 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [1 1 0 ..., 1 1 0]
 [0 0 0 ..., 0 1 0]
 ..., 
 [1 1 0 ..., 1 0 0]
 [1 0 0 ..., 0 1 0]
 [0 1 0 ..., 0 0 0]]


2017-01-17 21:08:26,086 : INFO : loading Doc2Vec object from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_8/model


** Validation Metrics: Cov Err: 1.859, Avg Labels: 1.150, 
		 Top 1: 0.675, Top 3: 0.903, Top 5: 0.970, 
		 F1 Micro: 0.591, F1 Macro: 0.497, Total Pos: 701,909
epoch_8


2017-01-17 21:08:59,852 : INFO : loading docvecs recursively from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_8/model.docvecs.* with mmap=None
2017-01-17 21:08:59,853 : INFO : loading doctag_syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_8/model.docvecs.doctag_syn0.npy with mmap=None
2017-01-17 21:09:29,563 : INFO : loading syn1neg from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_8/model.syn1neg.npy with mmap=None
2017-01-17 21:09:38,792 : INFO : loading syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_500_w_8_type_pv-dbow_concat_1_mean_0_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_8/model.syn0.npy with mmap=None
2017-01-17 21:09

svm_iter_100_reg_0.01_classweights_balanced


2017-01-17 21:10:31,735 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [1 1 0 ..., 1 1 0]
 [0 0 0 ..., 0 1 0]
 ..., 
 [1 1 0 ..., 1 0 0]
 [1 0 0 ..., 0 1 0]
 [0 1 0 ..., 0 0 0]]


2017-01-17 21:10:59,519 : INFO : loading Doc2Vec object from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_1/model


** Validation Metrics: Cov Err: 1.842, Avg Labels: 1.150, 
		 Top 1: 0.677, Top 3: 0.906, Top 5: 0.971, 
		 F1 Micro: 0.595, F1 Macro: 0.501, Total Pos: 695,284
********************* doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None
epoch_1


2017-01-17 21:11:07,675 : INFO : loading docvecs recursively from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_1/model.docvecs.* with mmap=None
2017-01-17 21:11:07,676 : INFO : loading doctag_syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_1/model.docvecs.doctag_syn0.npy with mmap=None
2017-01-17 21:12:15,977 : INFO : loading syn1neg from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_1/model.syn1neg.npy with mmap=None
2017-01-17 21:12:33,797 : INFO : loading syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_1/model.syn0.npy with mmap=None
2017-01-17 21:12:49,662 : INFO :

svm_iter_100_reg_0.01_classweights_balanced


2017-01-17 21:17:32,685 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [0 1 0 ..., 1 1 1]
 [0 0 0 ..., 0 1 0]
 ..., 
 [1 1 0 ..., 1 1 0]
 [1 0 0 ..., 0 1 0]
 [1 1 0 ..., 1 0 0]]


2017-01-17 21:18:03,454 : INFO : Loading Classifier
2017-01-17 21:18:03,514 : INFO : Evaluating on Validation Data


** Validation Metrics: Cov Err: 2.118, Avg Labels: 1.150, 
		 Top 1: 0.596, Top 3: 0.853, Top 5: 0.953, 
		 F1 Micro: 0.516, F1 Macro: 0.448, Total Pos: 825,727
svm_iter_10_reg_0.001_classweights_balanced


2017-01-17 21:18:53,209 : INFO : Getting Validation Metrics


[[1 0 0 ..., 0 0 1]
 [0 1 0 ..., 1 1 1]
 [0 0 0 ..., 0 1 0]
 ..., 
 [1 1 0 ..., 1 1 0]
 [1 0 0 ..., 0 1 0]
 [0 1 0 ..., 1 0 0]]


2017-01-17 21:19:27,206 : INFO : Loading Classifier


** Validation Metrics: Cov Err: 2.103, Avg Labels: 1.150, 
		 Top 1: 0.598, Top 3: 0.855, Top 5: 0.955, 
		 F1 Micro: 0.508, F1 Macro: 0.444, Total Pos: 867,436
svm_iter_10_reg_0.1_classweights_balanced


2017-01-17 21:19:27,295 : INFO : Evaluating on Validation Data
2017-01-17 21:20:16,955 : INFO : Getting Validation Metrics


[[0 0 0 ..., 1 0 0]
 [0 0 0 ..., 1 0 0]
 [0 0 0 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 1 0 0]
 [0 0 0 ..., 1 0 0]
 [0 1 0 ..., 1 0 0]]


2017-01-17 21:20:48,839 : INFO : loading Doc2Vec object from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_2/model


** Validation Metrics: Cov Err: 3.595, Avg Labels: 1.150, 
		 Top 1: 0.136, Top 3: 0.576, Top 5: 0.869, 
		 F1 Micro: 0.169, F1 Macro: 0.201, Total Pos: 549,226
epoch_2


2017-01-17 21:20:58,740 : INFO : loading docvecs recursively from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_2/model.docvecs.* with mmap=None
2017-01-17 21:20:58,743 : INFO : loading doctag_syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_2/model.docvecs.doctag_syn0.npy with mmap=None
2017-01-17 21:21:59,118 : INFO : loading syn1neg from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_2/model.syn1neg.npy with mmap=None
2017-01-17 21:22:15,454 : INFO : loading syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_2/model.syn0.npy with mmap=None
2017-01-17 21:22:30,715 : INFO :

svm_iter_100_reg_0.01_classweights_balanced


2017-01-17 21:27:45,477 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [1 1 0 ..., 1 0 0]
 [0 0 0 ..., 0 1 0]
 ..., 
 [1 1 0 ..., 1 0 0]
 [1 0 0 ..., 0 1 0]
 [1 1 0 ..., 1 0 0]]


2017-01-17 21:28:15,703 : INFO : Loading Classifier


** Validation Metrics: Cov Err: 1.923, Avg Labels: 1.150, 
		 Top 1: 0.660, Top 3: 0.890, Top 5: 0.965, 
		 F1 Micro: 0.560, F1 Macro: 0.482, Total Pos: 778,323
svm_iter_10_reg_0.001_classweights_balanced


2017-01-17 21:28:15,789 : INFO : Evaluating on Validation Data
2017-01-17 21:31:29,190 : INFO : Getting Validation Metrics


[[0 0 0 ..., 1 1 1]
 [1 0 0 ..., 1 1 0]
 [0 0 0 ..., 0 1 1]
 ..., 
 [1 1 0 ..., 1 0 0]
 [1 0 0 ..., 0 1 0]
 [0 1 0 ..., 1 0 0]]


2017-01-17 21:31:57,915 : INFO : loading Doc2Vec object from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_3/model


** Validation Metrics: Cov Err: 1.877, Avg Labels: 1.150, 
		 Top 1: 0.668, Top 3: 0.899, Top 5: 0.970, 
		 F1 Micro: 0.565, F1 Macro: 0.484, Total Pos: 780,478
epoch_3


2017-01-17 21:32:31,208 : INFO : loading docvecs recursively from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_3/model.docvecs.* with mmap=None
2017-01-17 21:32:31,209 : INFO : loading doctag_syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_3/model.docvecs.doctag_syn0.npy with mmap=None
2017-01-17 21:33:36,063 : INFO : loading syn1neg from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_3/model.syn1neg.npy with mmap=None
2017-01-17 21:33:52,257 : INFO : loading syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_3/model.syn0.npy with mmap=None
2017-01-17 21:34:08,597 : INFO :

svm_iter_100_reg_0.01_classweights_balanced


2017-01-17 21:38:48,054 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [0 1 0 ..., 1 0 0]
 [0 0 0 ..., 0 1 0]
 ..., 
 [0 1 0 ..., 1 0 0]
 [1 0 0 ..., 0 1 0]
 [0 1 0 ..., 1 0 0]]


2017-01-17 21:39:16,886 : INFO : Loading Classifier


** Validation Metrics: Cov Err: 1.841, Avg Labels: 1.150, 
		 Top 1: 0.678, Top 3: 0.905, Top 5: 0.972, 
		 F1 Micro: 0.582, F1 Macro: 0.498, Total Pos: 746,318
svm_iter_10_reg_0.001_classweights_balanced


2017-01-17 21:39:16,976 : INFO : Evaluating on Validation Data
2017-01-17 21:40:05,932 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [0 0 0 ..., 1 0 0]
 [0 0 0 ..., 0 1 0]
 ..., 
 [0 1 0 ..., 1 0 0]
 [1 0 0 ..., 0 1 0]
 [0 1 0 ..., 1 0 0]]


2017-01-17 21:40:35,069 : INFO : loading Doc2Vec object from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_4/model


** Validation Metrics: Cov Err: 1.804, Avg Labels: 1.150, 
		 Top 1: 0.685, Top 3: 0.913, Top 5: 0.976, 
		 F1 Micro: 0.588, F1 Macro: 0.502, Total Pos: 745,179
epoch_4


2017-01-17 21:41:03,935 : INFO : loading docvecs recursively from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_4/model.docvecs.* with mmap=None
2017-01-17 21:41:03,936 : INFO : loading doctag_syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_4/model.docvecs.doctag_syn0.npy with mmap=None
2017-01-17 21:41:59,781 : INFO : loading syn1neg from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_4/model.syn1neg.npy with mmap=None
2017-01-17 21:42:22,688 : INFO : loading syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_4/model.syn0.npy with mmap=None
2017-01-17 21:42:41,189 : INFO :

svm_iter_100_reg_0.01_classweights_balanced


2017-01-17 21:50:03,946 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [1 0 0 ..., 1 0 0]
 [0 0 0 ..., 0 1 0]
 ..., 
 [1 1 0 ..., 1 0 0]
 [1 0 0 ..., 0 1 0]
 [0 1 0 ..., 1 0 0]]
** Validation Metrics: Cov Err: 1.824, Avg Labels: 1.150, 
		 Top 1: 0.682, Top 3: 0.908, Top 5: 0.973, 
		 F1 Micro: 0.589, F1 Macro: 0.502, Total Pos: 731,972


2017-01-17 21:50:33,685 : INFO : Loading Classifier
2017-01-17 21:50:33,746 : INFO : Evaluating on Validation Data


svm_iter_10_reg_0.001_classweights_balanced


2017-01-17 21:53:55,931 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [1 0 0 ..., 1 1 0]
 [0 0 0 ..., 0 1 0]
 ..., 
 [0 1 0 ..., 1 0 0]
 [1 0 0 ..., 0 1 0]
 [0 1 0 ..., 1 0 0]]


2017-01-17 21:54:25,794 : INFO : loading Doc2Vec object from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_5/model


** Validation Metrics: Cov Err: 1.787, Avg Labels: 1.150, 
		 Top 1: 0.690, Top 3: 0.915, Top 5: 0.977, 
		 F1 Micro: 0.597, F1 Macro: 0.507, Total Pos: 726,296
epoch_5


2017-01-17 21:54:59,289 : INFO : loading docvecs recursively from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_5/model.docvecs.* with mmap=None
2017-01-17 21:54:59,291 : INFO : loading doctag_syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_5/model.docvecs.doctag_syn0.npy with mmap=None
2017-01-17 21:57:15,675 : INFO : loading syn1neg from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_5/model.syn1neg.npy with mmap=None
2017-01-17 21:57:59,000 : INFO : loading syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_5/model.syn0.npy with mmap=None
2017-01-17 21:58:31,390 : INFO :

svm_iter_100_reg_0.01_classweights_balanced


2017-01-17 22:04:41,581 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [0 1 0 ..., 1 0 0]
 [0 0 0 ..., 0 1 0]
 ..., 
 [1 1 0 ..., 1 0 0]
 [1 0 0 ..., 0 1 0]
 [0 1 0 ..., 1 0 0]]
** Validation Metrics: Cov Err: 1.789, Avg Labels: 1.150, 
		 Top 1: 0.692, Top 3: 0.915, Top 5: 0.976, 
		 F1 Micro: 0.603, F1 Macro: 0.514, Total Pos: 706,023


2017-01-17 22:05:10,829 : INFO : Loading Classifier
2017-01-17 22:05:10,886 : INFO : Evaluating on Validation Data


svm_iter_10_reg_0.001_classweights_balanced


2017-01-17 22:07:19,614 : INFO : Getting Validation Metrics


[[0 0 0 ..., 0 1 1]
 [0 1 0 ..., 1 0 0]
 [0 0 0 ..., 0 1 1]
 ..., 
 [1 1 0 ..., 1 0 0]
 [1 0 0 ..., 0 1 0]
 [0 1 0 ..., 1 0 0]]


2017-01-17 22:07:49,943 : INFO : loading Doc2Vec object from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_6/model


** Validation Metrics: Cov Err: 1.747, Avg Labels: 1.150, 
		 Top 1: 0.701, Top 3: 0.924, Top 5: 0.980, 
		 F1 Micro: 0.616, F1 Macro: 0.523, Total Pos: 689,910
epoch_6


2017-01-17 22:08:57,923 : INFO : loading docvecs recursively from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_6/model.docvecs.* with mmap=None
2017-01-17 22:08:57,924 : INFO : loading doctag_syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_6/model.docvecs.doctag_syn0.npy with mmap=None
2017-01-17 22:10:03,747 : INFO : loading syn1neg from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_6/model.syn1neg.npy with mmap=None
2017-01-17 22:10:18,610 : INFO : loading syn0 from /big/s/shalaby/parameter_search_doc2vec_models_new/full/doc2vec_size_1000_w_8_type_dm_concat_0_mean_1_trainwords_0_hs_0_neg_10_vocabsize_None/epoch_6/model.syn0.npy with mmap=None
2017-01-17 22:10:34,224 : INFO :

svm_iter_100_reg_0.01_classweights_balanced


2017-01-17 22:20:24,187 : INFO : Training Classifier


ValueError: class_weight must be dict, 'auto', or None, got: 'e'