In [1]:
import json
import nltk
from nltk.tokenize import RegexpTokenizer
import string
import math
import os
import time
from collections import namedtuple
import cPickle as pickle
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import random
import re

from multiprocessing.dummy import Pool as ThreadPool
import itertools
from collections import defaultdict

from sklearn.metrics import coverage_error
import sklearn.metrics
from sklearn.multiclass import OneVsRestClassifier
from sklearn import linear_model
from sklearn.preprocessing import MultiLabelBinarizer

from gensim.models.doc2vec import Doc2Vec, LabeledSentence

import logging
from logging import info
from functools import partial

import keras

from thesis.utils.metrics import *

Using Theano backend.
Using gpu device 0: TITAN X (Pascal) (CNMeM is disabled, cuDNN 5105)


In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [3]:
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 200)

In [4]:
root = logging.getLogger()
for handler in root.handlers[:]:
    root.removeHandler(handler)
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) # adds a default StreamHanlder
#root.addHandler(logging.StreamHandler())

In [5]:
# NN_PARAMETER_SEARCH_PREFIX = "lstm_{}_batch_{}_nn_parameter_searches.pkl"
# NN_PARAMETER_SEARCH_PREFIX = "standard_nn_{}_batch_{}_nn_parameter_searches.pkl"
# NN_PARAMETER_SEARCH_PREFIX = "nn_bow_{}_batch_{}_nn_parameter_searches.pkl"
NN_PARAMETER_SEARCH_PREFIX = "nn_lda_{}_batch_{}_nn_parameter_searches.pkl"

In [6]:
VOCAB_MODEL = "vocab_model"
MODEL_PREFIX = "model"
VALIDATION_MATRIX = "validation_matrix.pkl"
METRICS = "metrics.pkl"
CLASSIFIER = "classifier.pkl"

In [16]:
#training_file = "/home/local/shalaby/docs_output_sample_100.json"

root_location = "/mnt/data2/shalaby/"
exports_location = root_location + "exported_data/"

doc2vec_model_save_location = os.path.join(root_location, "parameter_search_doc2vec_models_new", "full")
# nn_parameter_search_location = os.path.join(root_location, "nn_parameter_search_extended")
# nn_parameter_search_location = os.path.join(root_location, "nn_bow_parameter_search")
nn_parameter_search_location = os.path.join(root_location, "nn_lda_parameter_search")

if not os.path.exists(doc2vec_model_save_location):
    os.makedirs(doc2vec_model_save_location)
if not os.path.exists(os.path.join(doc2vec_model_save_location, VOCAB_MODEL)):
    os.makedirs(os.path.join(doc2vec_model_save_location, VOCAB_MODEL))

training_file = root_location + "docs_output.json"

doc_classifications_map_file = exports_location + "doc_classification_map.pkl"
classification_index_file = exports_location + "classification_index.pkl"
sections_file = exports_location + "sections.pkl"
classes_file = exports_location + "classes.pkl"
subclasses_file = exports_location + "subclasses.pkl"
valid_classes_file = exports_location + "valid_classes.pkl"
valid_subclasses_file = exports_location + "valid_subclasses.pkl"
classifications_output = exports_location + "classifications.pkl"
training_docs_list_file = exports_location + "training_docs_list.pkl"
validation_docs_list_file = exports_location + "validation_docs_list.pkl"
test_docs_list_file = exports_location + "test_docs_list.pkl"

preprocessed_location = root_location + "preprocessed_data/"

training_preprocessed_files_prefix = preprocessed_location + "training_docs_merged_data_preprocessed-"
training_preprocessed_docids_files_prefix = preprocessed_location + "training_docs_merged_docids_preprocessed-"
validation_preprocessed_files_prefix = preprocessed_location + "validation_docs_merged_data_preprocessed-"
validation_preprocessed_docids_files_prefix = preprocessed_location + "validation_docs_merged_docids_preprocessed-"

word2vec_questions_file = result = root_location + 'tensorflow/word2vec/questions-words.txt'

In [17]:
def get_subdirectories(d):
    #return filter(os.path.isdir, [f for f in os.listdir(d)])
    return [f for f in os.listdir(d) if os.path.isdir(os.path.join(d,f))]
def natural_sort(l):
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(l, key = alphanum_key)

In [18]:
class MetricsCallback(keras.callbacks.Callback):
    
    EPOCHS_BEFORE_VALIDATION = 10
    
    def on_train_begin(self, logs={}):
        self.epoch_index = 0
        self.val_loss_reductions = 0
        self.metrics_dict = {}
        self.best_val_loss = np.iinfo(np.int32).max
        self.best_weights = None
        self.best_validation_metrics = None
    def on_epoch_end(self, epoch, logs={}):
        self.epoch_index += 1
        if logs['val_loss'] < self.best_val_loss:
            self.val_loss_reductions += 1
            self.best_val_loss = logs['val_loss']
            self.best_weights = self.model.get_weights()
            print '\r    \r' # to remove the previous line of verbose output of model fit
            time.sleep(0.2)
            info('Found lower val loss for epoch {} => {}'.format(self.epoch_index, round(logs['val_loss'], 5)))
            if self.val_loss_reductions % MetricsCallback.EPOCHS_BEFORE_VALIDATION == 0:
                
                info('Validation Loss Reduced {} times'.format(self.val_loss_reductions))
                info('Evaluating on Validation Data')
                yvp = self.model.predict(Xv)
                yvp_binary = get_binary_0_5(yvp)
                info('Generating Validation Metrics')
                validation_metrics = get_metrics(yv, yvp, yvp_binary)
                print "****** Validation Metrics: Cov Err: {:.3f} | Top 3: {:.3f} | Top 5: {:.3f} | F1 Micro: {:.3f} | F1 Macro: {:.3f}".format(
                    validation_metrics['coverage_error'], validation_metrics['top_3'], validation_metrics['top_5'], 
                    validation_metrics['f1_micro'], validation_metrics['f1_macro'])
                self.metrics_dict[self.epoch_index] = validation_metrics
#                 self.best_validation_metrics = validation_metrics


In [19]:
NN_BATCH_SIZE = 2048

In [28]:
classifications_type = 'subclasses'

In [29]:
natural_sort(get_subdirectories(nn_parameter_search_location))

['bm25']

In [30]:
table_data = defaultdict(dict)
for doc2vec_method in natural_sort(get_subdirectories(nn_parameter_search_location)):
    for epoch in natural_sort(get_subdirectories(os.path.join(nn_parameter_search_location,doc2vec_method))):
        print '*********** {} -> {}'.format(doc2vec_method, epoch)
        if not os.path.exists(os.path.join(nn_parameter_search_location, doc2vec_method, epoch, NN_PARAMETER_SEARCH_PREFIX.format(classifications_type, NN_BATCH_SIZE))):
            print 'Not found'
            continue
        try:
            %time param_results_dict = pickle.load(open(os.path.join(os.path.join(nn_parameter_search_location, doc2vec_method, epoch, NN_PARAMETER_SEARCH_PREFIX.format(classifications_type, NN_BATCH_SIZE))), 'r'))
        except:
            continue
            
        for key in param_results_dict.keys():
            print('========== NN: {}'.format(key))
            val = param_results_dict[key]
            if val.get('best_validation_metrics'):
#                 val_metrics = val['last_validation_metrics']
                val_metrics2 =  val['best_validation_metrics']
            else:
                val_metrics = val['validation_metrics']
                val_metrics2 =  val['metrics_callback'].metrics_dict[sorted(val['metrics_callback'].metrics_dict.keys())[-1]]
    
#             print('Epochs => {}'.format(len(val['history'].history['val_loss'])))
#             print('Best Val Loss => {}'.format(val["metrics_callback"].best_val_loss))
#             print('Last Val: Coverage Error => {:.4f} | F1 Micro => {:.4f} | F1 Macro => {:.4f} | Top 3 => {:.4f}'.format(val_metrics['coverage_error'], 
#                                                                                                 val_metrics['f1_micro'], val_metrics['f1_macro'],
#                                                                                                 val_metrics['top_3']))
#             print('Best Val: Coverage Error => {:.4f} | F1 Micro => {:.4f} | F1 Macro => {:.4f} | Top 3 => {:.4f}'.format(val_metrics2['coverage_error'], 
#                                                                                                 val_metrics2['f1_micro'], val_metrics2['f1_macro'],
#                                                                                                 val_metrics2['top_3']))
            table_data[epoch + ' / ' + key]['{} {} {} -- {}'.format(doc2vec_method[13:22], doc2vec_method[22:29], doc2vec_method[29:37], 'Epochs')] = val['epochs']
            table_data[epoch + ' / ' + key]['{} {} {} -- {}'.format(doc2vec_method[13:22], doc2vec_method[22:29], doc2vec_method[29:37], 'duration')] = int(val['duration'])
            table_data[epoch + ' / ' + key]['{} {} {} -- {}'.format(doc2vec_method[13:22], doc2vec_method[22:29], doc2vec_method[29:37], 'Coverage Error')] = round(val_metrics2['coverage_error'], 3)
            table_data[epoch + ' / ' + key]['{} {} {} -- {}'.format(doc2vec_method[13:22], doc2vec_method[22:29], doc2vec_method[29:37], 'F1 Micro')] = round(val_metrics2['f1_micro'], 3)
            table_data[epoch + ' / ' + key]['{} {} {} -- {}'.format(doc2vec_method[13:22], doc2vec_method[22:29], doc2vec_method[29:37], 'F1 Macro')] = round(val_metrics2['f1_macro'], 3)
            # table_data[epoch + ' / ' + key]['{} {} {} -- {}'.format(doc2vec_method[13:22], doc2vec_method[22:29], doc2vec_method[29:37], 'Precision Micro')] = round(val_metrics2['precision_micro'], 3)
            table_data[epoch + ' / ' + key]['{} {} {} -- {}'.format(doc2vec_method[13:22], doc2vec_method[22:29], doc2vec_method[29:37], 'Top 3')] = round(val_metrics2['top_3'], 3)
            table_data[epoch + ' / ' + key]['{} {} {} -- {}'.format(doc2vec_method[13:22], doc2vec_method[22:29], doc2vec_method[29:37], 'Top 5')] = round(val_metrics2['top_5'], 3)


*********** bm25 -> lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000
CPU times: user 17.3 s, sys: 216 ms, total: 17.6 s
Wall time: 17.6 s
*********** bm25 -> size_10000
CPU times: user 4.91 s, sys: 44 ms, total: 4.95 s
Wall time: 4.95 s


In [31]:
table_rows = natural_sort(table_data.keys())
table_values = []
for table_row in table_rows:
    #print table_data[table_row]
    table_values.append(table_data[table_row])
df = pd.DataFrame(data=table_values, index=table_rows)
df

Unnamed: 0,-- Coverage Error,-- Epochs,-- F1 Macro,-- F1 Micro,-- Top 3,-- Top 5,-- duration
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_100_1st-act_relu_2nd-size_1000_2nd-act_sigmoid_in-drop_False_hid-drop_True,11.549,25,0.04,0.405,0.652,0.736,587
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_100_1st-act_sigmoid_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,11.919,20,0.048,0.435,0.648,0.733,573
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_100_1st-act_tanh_2nd-size_500_2nd-act_sigmoid_in-drop_False_hid-drop_True,11.338,24,0.062,0.468,0.656,0.74,548
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_100_1st-act_tanh_2nd-size_1000_2nd-act_relu_in-drop_False_hid-drop_True,11.377,57,0.06,0.465,0.661,0.744,1465
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_200_1st-act_sigmoid_2nd-size_1000_2nd-act_sigmoid_in-drop_False_hid-drop_True,10.176,26,0.061,0.472,0.676,0.76,668
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_200_1st-act_sigmoid_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,10.328,24,0.069,0.468,0.677,0.761,672
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_200_1st-act_tanh_2nd-size_500_2nd-act_tanh_in-drop_False_hid-drop_True,9.954,82,0.088,0.487,0.681,0.765,2015
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_200_1st-act_tanh_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,10.822,59,0.071,0.478,0.673,0.755,1479
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_500_1st-act_relu_2nd-size_500_2nd-act_relu_in-drop_False_hid-drop_True,10.583,23,0.078,0.482,0.68,0.762,685
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_500_1st-act_relu_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,10.47,22,0.085,0.488,0.679,0.763,678


In [323]:
df.rank(axis=0, ascending=True)

Unnamed: 0,-- Coverage Error,-- Epochs,-- F1 Macro,-- F1 Micro,-- Top 3,-- Top 5,-- duration
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_100_1st-act_relu_2nd-size_1000_2nd-act_sigmoid_in-drop_False_hid-drop_True,16.0,11.5,1.0,1.0,3.0,2.0,6.0
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_100_1st-act_sigmoid_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,17.0,5.0,2.0,5.0,1.0,1.0,5.0
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_100_1st-act_tanh_2nd-size_500_2nd-act_sigmoid_in-drop_False_hid-drop_True,14.0,9.5,7.5,9.5,4.0,3.0,4.0
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_100_1st-act_tanh_2nd-size_1000_2nd-act_relu_in-drop_False_hid-drop_True,15.0,17.0,5.0,7.0,5.0,5.0,17.0
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_200_1st-act_sigmoid_2nd-size_1000_2nd-act_sigmoid_in-drop_False_hid-drop_True,8.0,13.5,6.0,12.0,10.0,10.0,8.0
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_200_1st-act_sigmoid_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,9.0,9.5,9.5,9.5,11.0,11.0,9.5
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_200_1st-act_tanh_2nd-size_500_2nd-act_tanh_in-drop_False_hid-drop_True,7.0,19.0,16.0,16.0,14.0,14.0,19.0
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_200_1st-act_tanh_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,12.0,18.0,11.0,13.0,8.0,7.0,18.0
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_500_1st-act_relu_2nd-size_500_2nd-act_relu_in-drop_False_hid-drop_True,11.0,8.0,12.0,14.0,13.0,12.0,13.0
lda_online_topics_1000_iter_50_batch_4096_decay_0.5_evaluate-every_1000 / nn_1st-size_500_1st-act_relu_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,10.0,6.5,15.0,17.0,12.0,13.0,12.0


In [45]:
df.filter(regex='.*F1 Micro.*')

Unnamed: 0,1000_w_ 8_type_d m_concat_ -- F1 Micro,1000_w_ 8_type_p v-dbow_co -- F1 Micro,100_w_2 _type_dm _concat_1 -- F1 Micro,100_w_5 _type_dm _concat_1 -- F1 Micro,100_w_8 _type_dm _concat_0 -- F1 Micro,100_w_8 _type_dm _concat_1 -- F1 Micro,100_w_8 _type_pv -dbow_con -- F1 Micro,200_w_2 _type_dm _concat_0 -- F1 Micro,200_w_2 _type_pv -dbow_con -- F1 Micro,200_w_4 _type_dm _concat_0 -- F1 Micro,200_w_4 _type_pv -dbow_con -- F1 Micro,200_w_8 _type_dm _concat_0 -- F1 Micro,200_w_8 _type_dm _concat_1 -- F1 Micro,200_w_8 _type_pv -dbow_con -- F1 Micro,500_w_8 _type_dm _concat_0 -- F1 Micro,500_w_8 _type_pv -dbow_con -- F1 Micro,50_w_8_ type_dm_ concat_0_ -- F1 Micro,50_w_8_ type_dm_ concat_1_ -- F1 Micro,50_w_8_ type_pv- dbow_conc -- F1 Micro
nn_1st-size_50_1st-act_tanh_2nd-size_50_2nd-act_tanh_in-drop_False_hid-drop_True,,,,0.376,,,,,,,,,,,,,,,
nn_1st-size_50_1st-act_tanh_2nd-size_None_2nd-act_sigmoid_in-drop_False_hid-drop_True,,,,0.289,,,,,,,,,,,,,,,
nn_1st-size_100_1st-act_tanh_2nd-size_200_2nd-act_tanh_in-drop_False_hid-drop_True,,,,0.461,,,,,,,,,,,,,,,
nn_1st-size_100_1st-act_tanh_2nd-size_500_2nd-act_tanh_in-drop_False_hid-drop_True,,,,0.482,,,,,,,,,,,,,,,
nn_1st-size_100_1st-act_tanh_2nd-size_None_2nd-act_tanh_in-drop_False_hid-drop_True,,,,0.374,,,,,,,,,,,,,,,
nn_1st-size_200_1st-act_relu_2nd-size_500_2nd-act_softmax_in-drop_False_hid-drop_True,,,,,,,,,,,,0.0,,,,,,,
nn_1st-size_200_1st-act_relu_2nd-size_1000_2nd-act_relu_in-drop_False_hid-drop_True,,,,,,,,,,,,,,,,0.389,,,
nn_1st-size_200_1st-act_relu_2nd-size_1000_2nd-act_sigmoid_in-drop_False_hid-drop_True,,,,,,,,,,,,,,,,0.424,,,
nn_1st-size_200_1st-act_relu_2nd-size_1000_2nd-act_tanh_in-drop_False_hid-drop_True,,,,,,,,,,,,,,,,0.458,,,
nn_1st-size_200_1st-act_relu_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,,,,,,,,,,,,,,,,0.431,,,


In [35]:
df.filter(regex='.*F1 Micro.*').rank(axis=0, ascending=False)

Unnamed: 0,1000_w_ 8_type_d m_concat_ -- F1 Micro,1000_w_ 8_type_p v-dbow_co -- F1 Micro,100_w_2 _type_dm _concat_1 -- F1 Micro,100_w_5 _type_dm _concat_1 -- F1 Micro,100_w_8 _type_dm _concat_0 -- F1 Micro,100_w_8 _type_dm _concat_1 -- F1 Micro,100_w_8 _type_pv -dbow_con -- F1 Micro,200_w_2 _type_dm _concat_0 -- F1 Micro,200_w_2 _type_pv -dbow_con -- F1 Micro,200_w_4 _type_dm _concat_0 -- F1 Micro,200_w_4 _type_pv -dbow_con -- F1 Micro,200_w_8 _type_dm _concat_0 -- F1 Micro,200_w_8 _type_dm _concat_1 -- F1 Micro,200_w_8 _type_pv -dbow_con -- F1 Micro,500_w_8 _type_dm _concat_0 -- F1 Micro,500_w_8 _type_pv -dbow_con -- F1 Micro,50_w_8_ type_dm_ concat_0_ -- F1 Micro,50_w_8_ type_dm_ concat_1_ -- F1 Micro,50_w_8_ type_pv- dbow_conc -- F1 Micro
nn_1st-size_200_1st-act_tanh_2nd-size_1000_2nd-act_relu_in-drop_False_hid-drop_True,2.0,1.0,3.0,3.0,2.0,2.5,3.0,2.0,3.0,2.5,3.0,2.0,3.0,3.0,3.0,1.5,3.0,2.0,3.0
nn_1st-size_200_1st-act_tanh_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,1.0,2.0,1.0,1.5,1.0,1.0,1.0,1.0,2.0,2.5,2.0,3.0,1.0,2.0,2.0,1.5,2.0,1.0,2.0
nn_1st-size_500_1st-act_tanh_2nd-size_2000_2nd-act_sigmoid_in-drop_False_hid-drop_True,3.0,3.0,2.0,1.5,3.0,2.5,2.0,3.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,3.0,1.0,3.0,1.0


In [66]:
df.filter(regex='nn_1st-size_500_1st-act_tanh_2nd-size_2000_2nd-act_sigmoid_in-drop_False_hid-drop_True|nn_1st-size_200_1st-act_tanh_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True', axis=0)

Unnamed: 0,1000_w_ 8_type_d m_concat_ -- Coverage Error,1000_w_ 8_type_d m_concat_ -- F1 Macro,1000_w_ 8_type_d m_concat_ -- F1 Micro,1000_w_ 8_type_d m_concat_ -- Top 3,1000_w_ 8_type_d m_concat_ -- Top 5,1000_w_ 8_type_p v-dbow_co -- Coverage Error,1000_w_ 8_type_p v-dbow_co -- F1 Macro,1000_w_ 8_type_p v-dbow_co -- F1 Micro,1000_w_ 8_type_p v-dbow_co -- Top 3,1000_w_ 8_type_p v-dbow_co -- Top 5,100_w_2 _type_dm _concat_1 -- Coverage Error,100_w_2 _type_dm _concat_1 -- F1 Macro,100_w_2 _type_dm _concat_1 -- F1 Micro,100_w_2 _type_dm _concat_1 -- Top 3,100_w_2 _type_dm _concat_1 -- Top 5,100_w_5 _type_dm _concat_1 -- Coverage Error,100_w_5 _type_dm _concat_1 -- F1 Macro,100_w_5 _type_dm _concat_1 -- F1 Micro,100_w_5 _type_dm _concat_1 -- Top 3,100_w_5 _type_dm _concat_1 -- Top 5,100_w_8 _type_dm _concat_0 -- Coverage Error,100_w_8 _type_dm _concat_0 -- F1 Macro,100_w_8 _type_dm _concat_0 -- F1 Micro,100_w_8 _type_dm _concat_0 -- Top 3,100_w_8 _type_dm _concat_0 -- Top 5,100_w_8 _type_dm _concat_1 -- Coverage Error,100_w_8 _type_dm _concat_1 -- F1 Macro,100_w_8 _type_dm _concat_1 -- F1 Micro,100_w_8 _type_dm _concat_1 -- Top 3,100_w_8 _type_dm _concat_1 -- Top 5,100_w_8 _type_pv -dbow_con -- Coverage Error,100_w_8 _type_pv -dbow_con -- F1 Macro,100_w_8 _type_pv -dbow_con -- F1 Micro,100_w_8 _type_pv -dbow_con -- Top 3,100_w_8 _type_pv -dbow_con -- Top 5,200_w_2 _type_dm _concat_0 -- Coverage Error,200_w_2 _type_dm _concat_0 -- F1 Macro,200_w_2 _type_dm _concat_0 -- F1 Micro,200_w_2 _type_dm _concat_0 -- Top 3,200_w_2 _type_dm _concat_0 -- Top 5,200_w_2 _type_pv -dbow_con -- Coverage Error,200_w_2 _type_pv -dbow_con -- F1 Macro,200_w_2 _type_pv -dbow_con -- F1 Micro,200_w_2 _type_pv -dbow_con -- Top 3,200_w_2 _type_pv -dbow_con -- Top 5,200_w_4 _type_dm _concat_0 -- Coverage Error,200_w_4 _type_dm _concat_0 -- F1 Macro,200_w_4 _type_dm _concat_0 -- F1 Micro,200_w_4 _type_dm _concat_0 -- Top 3,200_w_4 _type_dm _concat_0 -- Top 5,200_w_4 _type_pv -dbow_con -- Coverage Error,200_w_4 _type_pv -dbow_con -- F1 Macro,200_w_4 _type_pv -dbow_con -- F1 Micro,200_w_4 _type_pv -dbow_con -- Top 3,200_w_4 _type_pv -dbow_con -- Top 5,200_w_8 _type_dm _concat_0 -- Coverage Error,200_w_8 _type_dm _concat_0 -- F1 Macro,200_w_8 _type_dm _concat_0 -- F1 Micro,200_w_8 _type_dm _concat_0 -- Top 3,200_w_8 _type_dm _concat_0 -- Top 5,200_w_8 _type_dm _concat_1 -- Coverage Error,200_w_8 _type_dm _concat_1 -- F1 Macro,200_w_8 _type_dm _concat_1 -- F1 Micro,200_w_8 _type_dm _concat_1 -- Top 3,200_w_8 _type_dm _concat_1 -- Top 5,200_w_8 _type_pv -dbow_con -- Coverage Error,200_w_8 _type_pv -dbow_con -- F1 Macro,200_w_8 _type_pv -dbow_con -- F1 Micro,200_w_8 _type_pv -dbow_con -- Top 3,200_w_8 _type_pv -dbow_con -- Top 5,500_w_8 _type_dm _concat_0 -- Coverage Error,500_w_8 _type_dm _concat_0 -- F1 Macro,500_w_8 _type_dm _concat_0 -- F1 Micro,500_w_8 _type_dm _concat_0 -- Top 3,500_w_8 _type_dm _concat_0 -- Top 5,500_w_8 _type_pv -dbow_con -- Coverage Error,500_w_8 _type_pv -dbow_con -- F1 Macro,500_w_8 _type_pv -dbow_con -- F1 Micro,500_w_8 _type_pv -dbow_con -- Top 3,500_w_8 _type_pv -dbow_con -- Top 5,50_w_8_ type_dm_ concat_0_ -- Coverage Error,50_w_8_ type_dm_ concat_0_ -- F1 Macro,50_w_8_ type_dm_ concat_0_ -- F1 Micro,50_w_8_ type_dm_ concat_0_ -- Top 3,50_w_8_ type_dm_ concat_0_ -- Top 5,50_w_8_ type_dm_ concat_1_ -- Coverage Error,50_w_8_ type_dm_ concat_1_ -- F1 Macro,50_w_8_ type_dm_ concat_1_ -- F1 Micro,50_w_8_ type_dm_ concat_1_ -- Top 3,50_w_8_ type_dm_ concat_1_ -- Top 5,50_w_8_ type_pv- dbow_conc -- Coverage Error,50_w_8_ type_pv- dbow_conc -- F1 Macro,50_w_8_ type_pv- dbow_conc -- F1 Micro,50_w_8_ type_pv- dbow_conc -- Top 3,50_w_8_ type_pv- dbow_conc -- Top 5
nn_1st-size_200_1st-act_tanh_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,6.37,0.19,0.559,0.757,0.836,7.667,0.148,0.529,0.725,0.809,6.699,0.149,0.52,0.74,0.823,6.346,0.164,0.532,0.748,0.831,7.21,0.15,0.52,0.729,0.813,6.889,0.153,0.528,0.736,0.82,7.083,0.111,0.486,0.726,0.811,6.154,0.179,0.551,0.756,0.837,6.961,0.14,0.509,0.734,0.816,6.381,0.188,0.555,0.752,0.833,6.898,0.138,0.515,0.735,0.818,6.28,0.161,0.53,0.752,0.834,69.804,0.003,0.058,0.23,0.285,6.86,0.143,0.519,0.735,0.819,6.184,0.177,0.549,0.757,0.837,7.133,0.154,0.534,0.732,0.815,8.3,0.09,0.467,0.695,0.783,7.187,0.129,0.504,0.728,0.812,7.388,0.087,0.469,0.718,0.803
nn_1st-size_500_1st-act_tanh_2nd-size_2000_2nd-act_sigmoid_in-drop_False_hid-drop_True,6.822,0.172,0.538,0.743,0.824,8.622,0.121,0.487,0.698,0.784,6.949,0.162,0.526,0.736,0.819,6.533,0.182,0.548,0.746,0.828,7.535,0.139,0.513,0.718,0.803,7.144,0.164,0.528,0.732,0.815,6.976,0.145,0.524,0.73,0.814,6.212,0.188,0.551,0.754,0.835,6.951,0.161,0.53,0.735,0.818,6.526,0.177,0.54,0.746,0.828,6.862,0.158,0.522,0.736,0.819,6.466,0.172,0.543,0.75,0.831,70.578,0.002,0.047,0.232,0.288,6.859,0.16,0.524,0.738,0.821,6.451,0.193,0.556,0.755,0.835,7.594,0.152,0.52,0.726,0.811,8.571,0.096,0.47,0.684,0.774,7.564,0.133,0.493,0.714,0.8,7.514,0.113,0.486,0.713,0.8


In [115]:
df.filter(regex="nn_1st-size_500_1st-act_tanh_2nd-size_2000_2nd-act_sigmoid_in-drop_False_hid-drop_True|" +
"nn_1st-size_200_1st-act_tanh_2nd-size_1000_2nd-act_relu_in-drop_False_hid-drop_True|" +
"nn_1st-size_200_1st-act_tanh_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True|" +
"nn_1st-size_500_1st-act_tanh_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True", axis=0)\
.filter(regex='ype_dm')\
# .rank(axis=1, ascending=True)\
# .filter(regex='F1 Micro')\
# .rank(axis=0, ascending=False)\
# .sum(axis=1)
#.filter(regex='Top 3')\

Unnamed: 0,1000_w_8_ type_dm _concat_ -- Coverage Error,1000_w_8_ type_dm _concat_ -- F1 Macro,1000_w_8_ type_dm _concat_ -- F1 Micro,1000_w_8_ type_dm _concat_ -- Precision Micro,1000_w_8_ type_dm _concat_ -- Top 3,1000_w_8_ type_dm _concat_ -- Top 5,100_w_2_t ype_dm_ concat_1 -- Coverage Error,100_w_2_t ype_dm_ concat_1 -- F1 Macro,100_w_2_t ype_dm_ concat_1 -- F1 Micro,100_w_2_t ype_dm_ concat_1 -- Precision Micro,100_w_2_t ype_dm_ concat_1 -- Top 3,100_w_2_t ype_dm_ concat_1 -- Top 5,100_w_5_t ype_dm_ concat_1 -- Coverage Error,100_w_5_t ype_dm_ concat_1 -- F1 Macro,100_w_5_t ype_dm_ concat_1 -- F1 Micro,100_w_5_t ype_dm_ concat_1 -- Precision Micro,100_w_5_t ype_dm_ concat_1 -- Top 3,100_w_5_t ype_dm_ concat_1 -- Top 5,100_w_8_t ype_dm_ concat_0 -- Coverage Error,100_w_8_t ype_dm_ concat_0 -- F1 Macro,100_w_8_t ype_dm_ concat_0 -- F1 Micro,100_w_8_t ype_dm_ concat_0 -- Precision Micro,100_w_8_t ype_dm_ concat_0 -- Top 3,100_w_8_t ype_dm_ concat_0 -- Top 5,100_w_8_t ype_dm_ concat_1 -- Coverage Error,100_w_8_t ype_dm_ concat_1 -- F1 Macro,100_w_8_t ype_dm_ concat_1 -- F1 Micro,100_w_8_t ype_dm_ concat_1 -- Precision Micro,100_w_8_t ype_dm_ concat_1 -- Top 3,100_w_8_t ype_dm_ concat_1 -- Top 5,200_w_2_t ype_dm_ concat_0 -- Coverage Error,200_w_2_t ype_dm_ concat_0 -- F1 Macro,200_w_2_t ype_dm_ concat_0 -- F1 Micro,200_w_2_t ype_dm_ concat_0 -- Precision Micro,200_w_2_t ype_dm_ concat_0 -- Top 3,200_w_2_t ype_dm_ concat_0 -- Top 5,200_w_4_t ype_dm_ concat_0 -- Coverage Error,200_w_4_t ype_dm_ concat_0 -- F1 Macro,200_w_4_t ype_dm_ concat_0 -- F1 Micro,200_w_4_t ype_dm_ concat_0 -- Precision Micro,200_w_4_t ype_dm_ concat_0 -- Top 3,200_w_4_t ype_dm_ concat_0 -- Top 5,200_w_8_t ype_dm_ concat_0 -- Coverage Error,200_w_8_t ype_dm_ concat_0 -- F1 Macro,200_w_8_t ype_dm_ concat_0 -- F1 Micro,200_w_8_t ype_dm_ concat_0 -- Precision Micro,200_w_8_t ype_dm_ concat_0 -- Top 3,200_w_8_t ype_dm_ concat_0 -- Top 5,200_w_8_t ype_dm_ concat_1 -- Coverage Error,200_w_8_t ype_dm_ concat_1 -- F1 Macro,200_w_8_t ype_dm_ concat_1 -- F1 Micro,200_w_8_t ype_dm_ concat_1 -- Precision Micro,200_w_8_t ype_dm_ concat_1 -- Top 3,200_w_8_t ype_dm_ concat_1 -- Top 5,500_w_8_t ype_dm_ concat_0 -- Coverage Error,500_w_8_t ype_dm_ concat_0 -- F1 Macro,500_w_8_t ype_dm_ concat_0 -- F1 Micro,500_w_8_t ype_dm_ concat_0 -- Precision Micro,500_w_8_t ype_dm_ concat_0 -- Top 3,500_w_8_t ype_dm_ concat_0 -- Top 5
nn_1st-size_200_1st-act_tanh_2nd-size_1000_2nd-act_relu_in-drop_False_hid-drop_True,6.52,0.177,0.552,0.707,0.751,0.831,6.898,0.137,0.513,0.731,0.734,0.818,6.519,0.15,0.529,0.729,0.743,0.826,7.36,0.141,0.521,0.7,0.724,0.809,7.047,0.139,0.515,0.725,0.732,0.815,6.314,0.164,0.539,0.726,0.75,0.832,6.501,0.174,0.541,0.714,0.747,0.829,6.472,0.151,0.525,0.741,0.747,0.828,68.467,0.003,0.059,0.337,0.233,0.289,6.422,0.168,0.546,0.706,0.749,0.83
nn_1st-size_200_1st-act_tanh_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,6.37,0.19,0.559,0.704,0.757,0.836,6.699,0.149,0.52,0.729,0.74,0.823,6.346,0.164,0.532,0.731,0.748,0.831,7.21,0.15,0.52,0.714,0.729,0.813,6.889,0.153,0.528,0.714,0.736,0.82,6.154,0.179,0.551,0.718,0.756,0.837,6.381,0.188,0.555,0.704,0.752,0.833,6.28,0.161,0.53,0.744,0.752,0.834,69.804,0.003,0.058,0.315,0.23,0.285,6.184,0.177,0.549,0.722,0.757,0.837
nn_1st-size_500_1st-act_tanh_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6.207,0.187,0.554,0.73,0.76,0.84,6.474,0.174,0.541,0.724,0.751,0.832,6.353,0.164,0.53,0.75,0.754,0.835,,,,,,,,,,,,
nn_1st-size_500_1st-act_tanh_2nd-size_2000_2nd-act_sigmoid_in-drop_False_hid-drop_True,6.822,0.172,0.538,0.717,0.743,0.824,6.949,0.162,0.526,0.722,0.736,0.819,6.533,0.182,0.548,0.707,0.746,0.828,7.535,0.139,0.513,0.702,0.718,0.803,7.144,0.164,0.528,0.707,0.732,0.815,6.212,0.188,0.551,0.723,0.754,0.835,6.526,0.177,0.54,0.724,0.746,0.828,6.466,0.172,0.543,0.724,0.75,0.831,70.578,0.002,0.047,0.414,0.232,0.288,6.451,0.193,0.556,0.71,0.755,0.835


In [114]:
df.filter(regex="nn_1st-size_500_1st-act_tanh_2nd-size_2000_2nd-act_sigmoid_in-drop_False_hid-drop_True|" +
"nn_1st-size_200_1st-act_tanh_2nd-size_1000_2nd-act_relu_in-drop_False_hid-drop_True|" +
"nn_1st-size_200_1st-act_tanh_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True|" +
"nn_1st-size_500_1st-act_tanh_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True", axis=0)\
.filter(regex='ype_dm')\
.filter(regex='Top 3')\
# .filter(regex='F1 Micro')\
# .rank(axis=1, ascending=True)\
# .rank(axis=0, ascending=False)\
# .sum(axis=1)
#.filter(regex='Top 3')\

Unnamed: 0,1000_w_8_ type_dm _concat_ -- Top 3,100_w_2_t ype_dm_ concat_1 -- Top 3,100_w_5_t ype_dm_ concat_1 -- Top 3,100_w_8_t ype_dm_ concat_0 -- Top 3,100_w_8_t ype_dm_ concat_1 -- Top 3,200_w_2_t ype_dm_ concat_0 -- Top 3,200_w_4_t ype_dm_ concat_0 -- Top 3,200_w_8_t ype_dm_ concat_0 -- Top 3,200_w_8_t ype_dm_ concat_1 -- Top 3,500_w_8_t ype_dm_ concat_0 -- Top 3
nn_1st-size_200_1st-act_tanh_2nd-size_1000_2nd-act_relu_in-drop_False_hid-drop_True,0.751,0.734,0.743,0.724,0.732,0.75,0.747,0.747,0.233,0.749
nn_1st-size_200_1st-act_tanh_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,0.757,0.74,0.748,0.729,0.736,0.756,0.752,0.752,0.23,0.757
nn_1st-size_500_1st-act_tanh_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,,,,,,0.76,0.751,0.754,,
nn_1st-size_500_1st-act_tanh_2nd-size_2000_2nd-act_sigmoid_in-drop_False_hid-drop_True,0.743,0.736,0.746,0.718,0.732,0.754,0.746,0.75,0.232,0.755


In [16]:
df.filter(regex=('200_w_8.*'))

Unnamed: 0,200_w_8 _type_dm _concat_0 -- Coverage Error,200_w_8 _type_dm _concat_0 -- F1 Macro,200_w_8 _type_dm _concat_0 -- F1 Micro,200_w_8 _type_dm _concat_0 -- Top 3,200_w_8 _type_dm _concat_0 -- Top 5,200_w_8 _type_dm _concat_1 -- Coverage Error,200_w_8 _type_dm _concat_1 -- F1 Macro,200_w_8 _type_dm _concat_1 -- F1 Micro,200_w_8 _type_dm _concat_1 -- Top 3,200_w_8 _type_dm _concat_1 -- Top 5,200_w_8 _type_pv -dbow_con -- Coverage Error,200_w_8 _type_pv -dbow_con -- F1 Macro,200_w_8 _type_pv -dbow_con -- F1 Micro,200_w_8 _type_pv -dbow_con -- Top 3,200_w_8 _type_pv -dbow_con -- Top 5
nn_1st-size_50_1st-act_tanh_2nd-size_50_2nd-act_tanh_in-drop_False_hid-drop_True,,,,,,,,,,,,,,,
nn_1st-size_50_1st-act_tanh_2nd-size_None_2nd-act_sigmoid_in-drop_False_hid-drop_True,,,,,,,,,,,,,,,
nn_1st-size_100_1st-act_tanh_2nd-size_200_2nd-act_tanh_in-drop_False_hid-drop_True,,,,,,,,,,,,,,,
nn_1st-size_100_1st-act_tanh_2nd-size_500_2nd-act_tanh_in-drop_False_hid-drop_True,,,,,,,,,,,,,,,
nn_1st-size_100_1st-act_tanh_2nd-size_None_2nd-act_tanh_in-drop_False_hid-drop_True,,,,,,,,,,,,,,,
nn_1st-size_200_1st-act_relu_2nd-size_500_2nd-act_softmax_in-drop_False_hid-drop_True,75.112,0.0,0.0,0.186,0.238,,,,,,,,,,
nn_1st-size_200_1st-act_relu_2nd-size_1000_2nd-act_relu_in-drop_False_hid-drop_True,,,,,,,,,,,,,,,
nn_1st-size_200_1st-act_relu_2nd-size_1000_2nd-act_sigmoid_in-drop_False_hid-drop_True,,,,,,,,,,,,,,,
nn_1st-size_200_1st-act_relu_2nd-size_1000_2nd-act_tanh_in-drop_False_hid-drop_True,,,,,,,,,,,,,,,
nn_1st-size_200_1st-act_relu_2nd-size_2000_2nd-act_relu_in-drop_False_hid-drop_True,,,,,,,,,,,,,,,


In [38]:
# df.filter(regex=('200_w.*')).rank(axis=0, ascending=True)
df.rank(axis=0, ascending=True)

Unnamed: 0,1000_w_ 8_type_d m_concat_ -- Coverage Error,1000_w_ 8_type_d m_concat_ -- F1 Macro,1000_w_ 8_type_d m_concat_ -- F1 Micro,1000_w_ 8_type_d m_concat_ -- Top 3,1000_w_ 8_type_d m_concat_ -- Top 5,1000_w_ 8_type_p v-dbow_co -- Coverage Error,1000_w_ 8_type_p v-dbow_co -- F1 Macro,1000_w_ 8_type_p v-dbow_co -- F1 Micro,1000_w_ 8_type_p v-dbow_co -- Top 3,1000_w_ 8_type_p v-dbow_co -- Top 5,100_w_2 _type_dm _concat_1 -- Coverage Error,100_w_2 _type_dm _concat_1 -- F1 Macro,100_w_2 _type_dm _concat_1 -- F1 Micro,100_w_2 _type_dm _concat_1 -- Top 3,100_w_2 _type_dm _concat_1 -- Top 5,100_w_5 _type_dm _concat_1 -- Coverage Error,100_w_5 _type_dm _concat_1 -- F1 Macro,100_w_5 _type_dm _concat_1 -- F1 Micro,100_w_5 _type_dm _concat_1 -- Top 3,100_w_5 _type_dm _concat_1 -- Top 5,100_w_8 _type_dm _concat_0 -- Coverage Error,100_w_8 _type_dm _concat_0 -- F1 Macro,100_w_8 _type_dm _concat_0 -- F1 Micro,100_w_8 _type_dm _concat_0 -- Top 3,100_w_8 _type_dm _concat_0 -- Top 5,100_w_8 _type_dm _concat_1 -- Coverage Error,100_w_8 _type_dm _concat_1 -- F1 Macro,100_w_8 _type_dm _concat_1 -- F1 Micro,100_w_8 _type_dm _concat_1 -- Top 3,100_w_8 _type_dm _concat_1 -- Top 5,100_w_8 _type_pv -dbow_con -- Coverage Error,100_w_8 _type_pv -dbow_con -- F1 Macro,100_w_8 _type_pv -dbow_con -- F1 Micro,100_w_8 _type_pv -dbow_con -- Top 3,100_w_8 _type_pv -dbow_con -- Top 5,200_w_2 _type_dm _concat_0 -- Coverage Error,200_w_2 _type_dm _concat_0 -- F1 Macro,200_w_2 _type_dm _concat_0 -- F1 Micro,200_w_2 _type_dm _concat_0 -- Top 3,200_w_2 _type_dm _concat_0 -- Top 5,200_w_2 _type_pv -dbow_con -- Coverage Error,200_w_2 _type_pv -dbow_con -- F1 Macro,200_w_2 _type_pv -dbow_con -- F1 Micro,200_w_2 _type_pv -dbow_con -- Top 3,200_w_2 _type_pv -dbow_con -- Top 5,200_w_4 _type_dm _concat_0 -- Coverage Error,200_w_4 _type_dm _concat_0 -- F1 Macro,200_w_4 _type_dm _concat_0 -- F1 Micro,200_w_4 _type_dm _concat_0 -- Top 3,200_w_4 _type_dm _concat_0 -- Top 5,200_w_4 _type_pv -dbow_con -- Coverage Error,200_w_4 _type_pv -dbow_con -- F1 Macro,200_w_4 _type_pv -dbow_con -- F1 Micro,200_w_4 _type_pv -dbow_con -- Top 3,200_w_4 _type_pv -dbow_con -- Top 5,200_w_8 _type_dm _concat_0 -- Coverage Error,200_w_8 _type_dm _concat_0 -- F1 Macro,200_w_8 _type_dm _concat_0 -- F1 Micro,200_w_8 _type_dm _concat_0 -- Top 3,200_w_8 _type_dm _concat_0 -- Top 5,200_w_8 _type_dm _concat_1 -- Coverage Error,200_w_8 _type_dm _concat_1 -- F1 Macro,200_w_8 _type_dm _concat_1 -- F1 Micro,200_w_8 _type_dm _concat_1 -- Top 3,200_w_8 _type_dm _concat_1 -- Top 5,200_w_8 _type_pv -dbow_con -- Coverage Error,200_w_8 _type_pv -dbow_con -- F1 Macro,200_w_8 _type_pv -dbow_con -- F1 Micro,200_w_8 _type_pv -dbow_con -- Top 3,200_w_8 _type_pv -dbow_con -- Top 5,500_w_8 _type_dm _concat_0 -- Coverage Error,500_w_8 _type_dm _concat_0 -- F1 Macro,500_w_8 _type_dm _concat_0 -- F1 Micro,500_w_8 _type_dm _concat_0 -- Top 3,500_w_8 _type_dm _concat_0 -- Top 5,500_w_8 _type_pv -dbow_con -- Coverage Error,500_w_8 _type_pv -dbow_con -- F1 Macro,500_w_8 _type_pv -dbow_con -- F1 Micro,500_w_8 _type_pv -dbow_con -- Top 3,500_w_8 _type_pv -dbow_con -- Top 5,50_w_8_ type_dm_ concat_0_ -- Coverage Error,50_w_8_ type_dm_ concat_0_ -- F1 Macro,50_w_8_ type_dm_ concat_0_ -- F1 Micro,50_w_8_ type_dm_ concat_0_ -- Top 3,50_w_8_ type_dm_ concat_0_ -- Top 5,50_w_8_ type_dm_ concat_1_ -- Coverage Error,50_w_8_ type_dm_ concat_1_ -- F1 Macro,50_w_8_ type_dm_ concat_1_ -- F1 Micro,50_w_8_ type_dm_ concat_1_ -- Top 3,50_w_8_ type_dm_ concat_1_ -- Top 5,50_w_8_ type_pv- dbow_conc -- Coverage Error,50_w_8_ type_pv- dbow_conc -- F1 Macro,50_w_8_ type_pv- dbow_conc -- F1 Micro,50_w_8_ type_pv- dbow_conc -- Top 3,50_w_8_ type_pv- dbow_conc -- Top 5
nn_1st-size_200_1st-act_relu_2nd-size_500_2nd-act_softmax_in-drop_False_hid-drop_True,,,,,,19.0,10.0,10.0,5.0,4.5,,,,,,10.5,14.0,14.5,12.5,11.0,,,,,,9.5,15.0,15.0,14.0,14.0,8.0,15.0,16.0,15.0,13.5,,,,,,,,,,,,,,,,,,,,,14.0,19.0,22.0,19.0,20.0,22.0,1.0,1.0,1.0,2.0,,,,,,,,,,,,,,,,11.0,14.0,15.5,12.0,13.5,,,,,,,,,,
nn_1st-size_200_1st-act_relu_2nd-size_1000_2nd-act_sigmoid_in-drop_True_hid-drop_True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,23.0,8.0,9.0,8.5,8.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
nn_1st-size_200_1st-act_relu_2nd-size_1000_2nd-act_tanh_in-drop_True_hid-drop_False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,19.0,16.0,13.0,17.0,16.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,18.0,16.0,13.0,17.0,16.5
nn_1st-size_200_1st-act_relu_2nd-size_None_2nd-act_tanh_in-drop_False_hid-drop_True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11.0,22.0,22.0,23.0,22.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.0,22.0,22.0,23.0,22.5
nn_1st-size_200_1st-act_sigmoid_2nd-size_50_2nd-act_relu_in-drop_True_hid-drop_True,,,,,,13.5,7.0,12.0,10.5,6.5,,,,,,19.0,3.0,3.0,4.0,3.5,,,,,,19.0,4.0,4.0,4.0,4.0,19.0,4.0,4.0,4.0,4.5,,,,,,,,,,,,,,,,,,,,,26.0,7.0,7.0,6.5,5.0,12.0,4.0,4.0,9.0,7.5,,,,,,,,,,,,,,,,19.0,3.5,5.0,4.5,2.5,,,,,,,,,,
nn_1st-size_200_1st-act_sigmoid_2nd-size_50_2nd-act_sigmoid_in-drop_False_hid-drop_True,,,,,,11.0,16.5,13.0,13.0,13.5,,,,,,10.5,12.0,13.0,12.5,14.5,,,,,,12.0,13.0,13.0,14.0,14.0,9.0,15.0,14.0,15.0,16.5,,,,,,,,,,,,,,,,,,,,,11.0,22.0,21.0,21.5,25.5,10.0,10.0,9.0,11.0,9.0,,,,,,,,,,,,,,,,8.0,14.0,14.0,14.0,13.5,,,,,,,,,,
nn_1st-size_200_1st-act_sigmoid_2nd-size_50_2nd-act_softmax_in-drop_False_hid-drop_False,,,,,,22.0,2.0,2.0,2.0,2.0,,,,,,20.0,5.0,8.0,2.0,1.0,,,,,,22.0,1.0,3.0,1.0,1.0,12.0,10.0,11.0,9.0,7.0,,,,,,,,,,,,,,,,,,,,,27.0,9.0,14.0,1.0,1.0,21.0,6.0,6.0,5.0,3.5,,,,,,,,,,,,,,,,13.0,10.0,10.0,4.5,4.5,,,,,,,,,,
nn_1st-size_200_1st-act_sigmoid_2nd-size_200_2nd-act_sigmoid_in-drop_False_hid-drop_False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9.0,26.0,26.0,26.0,25.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.0,26.0,26.0,26.0,25.0
nn_1st-size_200_1st-act_sigmoid_2nd-size_200_2nd-act_tanh_in-drop_True_hid-drop_False,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,22.0,10.0,10.0,10.0,11.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,21.0,10.0,10.0,10.0,11.0
nn_1st-size_200_1st-act_sigmoid_2nd-size_500_2nd-act_relu_in-drop_True_hid-drop_True,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,32.0,2.0,2.0,2.0,3.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,31.0,2.0,2.0,2.0,3.5


In [None]:
param_results_dict = pickle.load(open(os.path.join(os.path.join(nn_parameter_search_location, GLOBAL_VARS.MODEL_NAME, 
                                           NN_PARAMETER_SEARCH_PREFIX.format(classifications_type, 1024))), 'r'))

In [None]:
for key in param_results_dict.keys():
    print('========== NN: {}'.format(key))
    val = param_results_dict[key]
    val_metrics = val['last_validation_metrics']
    val_metrics2 =  val['best_validation_metrics']
    
    print('Epochs => {}'.format(len(val['history'].history['val_loss'])))
    print('Best Val Loss => {}'.format(val["metrics_callback"].best_val_loss))
    print('Last Val: Coverage Error => {:.4f} | F1 Micro => {:.4f} | F1 Macro => {:.4f} | Top 3 => {:.4f}'.format(val_metrics['coverage_error'], 
                                                                                        val_metrics['f1_micro'], val_metrics['f1_macro'],
                                                                                        val_metrics['top_3']))
    print('Best Val: Coverage Error => {:.4f} | F1 Micro => {:.4f} | F1 Macro => {:.4f} | Top 3 => {:.4f}'.format(val_metrics2['coverage_error'], 
                                                                                        val_metrics2['f1_micro'], val_metrics2['f1_macro'],
                                                                                        val_metrics2['top_3']))

## Test Metrics

In [328]:
classifications_type = 'sections'

In [329]:
test_metrics_file = os.path.join(nn_parameter_search_location, 'bm25/size_10000', '{}_batch_{}_test_metrics.pkl'.format(classifications_type, NN_BATCH_SIZE))
test_metrics_file

'/mnt/data2/shalaby/nn_bow_parameter_search/bm25/size_10000/sections_batch_2048_test_metrics.pkl'

In [330]:
test_metrics = pickle.load(open(test_metrics_file))
print "** Test Metrics: Cov Err: {:.3f}, Avg Labels: {:.3f}, \n\t\t Top 1: {:.3f}, Top 3: {:.3f}, Top 5: {:.3f}, \n\t\t F1 Micro: {:.3f}, F1 Macro: {:.3f}, Total Pos: {:,d}".format(
    test_metrics['coverage_error'], test_metrics['average_num_of_labels'], 
    test_metrics['top_1'], test_metrics['top_3'], test_metrics['top_5'], 
    test_metrics['f1_micro'], test_metrics['f1_macro'], test_metrics['total_positive'])

** Test Metrics: Cov Err: 1.425, Avg Labels: 1.150, 
		 Top 1: 0.832, Top 3: 0.972, Top 5: 0.995, 
		 F1 Micro: 0.791, F1 Macro: 0.704, Total Pos: 411,053
