In [11]:
import inspect
import logging
import os

import pandas as pd

from csrank.util import setup_logger
from experiments.util import lp_metric_dict
import numpy as np
from experiments.dbconnection import DBConnector

In [21]:
DIR_PATH = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
log_path = os.path.join(DIR_PATH, 'logs', 'results.log')
setup_logger(log_path=log_path)
logger = logging.getLogger('Result Parsing')
config_file_path = os.path.join(DIR_PATH, 'config', 'clusterdb.json')
DATASET = "synthetic_dc"
learning_problem = "discrete_choice"
results_table = 'results.{}'.format(learning_problem)
schema = 'masterthesis'

In [22]:
self = DBConnector(config_file_path=config_file_path, is_gpu=False, schema=schema)

In [23]:
keys = list(lp_metric_dict[learning_problem].keys())
keys[-1] = keys[-1].format(6)
metrics = ', '.join([x for x in keys])
metrics

'CategoricalAccuracy, CategoricalTopK2, CategoricalTopK3, CategoricalTopK4, CategoricalTopK5, CategoricalTopK6'

In [24]:
self.init_connection()
avail_jobs = "{}.avail_jobs".format(self.schema)
select_st = "SELECT dataset_params, learner_params, learner, {3} from {0} INNER JOIN {1} ON {0}.job_id = {1}.job_id where {1}.dataset=\'{2}\'".format(
    results_table, avail_jobs, DATASET, metrics)
print(select_st)
self.cursor_db.execute(select_st)

SELECT dataset_params, learner_params, learner, CategoricalAccuracy, CategoricalTopK2, CategoricalTopK3, CategoricalTopK4, CategoricalTopK5, CategoricalTopK6 from results.discrete_choice INNER JOIN masterthesis.avail_jobs ON results.discrete_choice.job_id = masterthesis.avail_jobs.job_id where masterthesis.avail_jobs.dataset='synthetic_dc'


In [25]:
data = []
for job in self.cursor_db.fetchall():
    values = list(job.values())
    keys = list(job.keys())
    columns = ['dataset'] + keys[2:]
    if job['learner_params'].get("add_zeroth_order_model", False):
        values[2] = values[2]+'_zero'
    vals = [job['dataset_params']['dataset_type'].upper()] + values[2:]
    data.append(vals)

In [26]:
self.init_connection()
avail_jobs = "{}.avail_jobs".format("pymc3")
select_st = "SELECT dataset_params, learner, {3} from {0} INNER JOIN {1} ON {0}.job_id = {1}.job_id where {1}.dataset=\'{2}\'".format(
    results_table, avail_jobs, DATASET, metrics)
print(select_st)
self.cursor_db.execute(select_st)
for job in self.cursor_db.fetchall():
    values = list(job.values())
    keys = list(job.keys())
    columns = ['dataset'] + keys[1:]
    vals = [job['dataset_params']['dataset_type'].upper()] + values[1:]
    data.append(vals)

SELECT dataset_params, learner, CategoricalAccuracy, CategoricalTopK2, CategoricalTopK3, CategoricalTopK4, CategoricalTopK5, CategoricalTopK6 from results.discrete_choice INNER JOIN pymc3.avail_jobs ON results.discrete_choice.job_id = pymc3.avail_jobs.job_id where pymc3.avail_jobs.dataset='synthetic_dc'


In [27]:
df_full = pd.DataFrame(data, columns=columns)
#df_full['zeroonerankaccuracy'] = 1 - df_full['zeroonerankloss']
df_full.head()

Unnamed: 0,dataset,learner,categoricalaccuracy,categoricaltopk2,categoricaltopk3,categoricaltopk4,categoricaltopk5,categoricaltopk6
0,HYPERVOLUME,fate_dc,0.461,0.5846,0.7086,0.7891,0.8526,0.8987
1,MEDOID,fate_dc,0.8748,0.9776,0.9952,0.999,0.9998,1.0
2,HYPERVOLUME,ranknet_dc,0.199,0.2693,0.3622,0.4575,0.5605,0.669
3,MEDOID,ranknet_dc,0.5348,0.7609,0.8758,0.9383,0.971,0.988
4,HYPERVOLUME,ranknet_dc,0.2079,0.2783,0.3694,0.4644,0.5657,0.6695


In [28]:
grouped = df_full.groupby(['dataset', 'learner'])
data = []
for name, group in grouped:
    one_row = [name[0], str(name[1]).upper()]
    std = group.std(axis=0).values
    mean = group.mean(axis=0).values
    if np.all(np.isnan(std)):
        one_row.extend(["{:.4f}".format(m) for m in mean])
    else:
        one_row.extend(["{:.3f}+-{:.3f}".format(m, s) for m, s in zip(mean, std)])
    data.append(one_row)

In [29]:
df = pd.DataFrame(data, columns=columns)
df.sort_values(by='dataset')
df_path = os.path.join(DIR_PATH, 'results' , DATASET+'.csv')
df.to_csv(df_path)
df

Unnamed: 0,dataset,learner,categoricalaccuracy,categoricaltopk2,categoricaltopk3,categoricaltopk4,categoricaltopk5,categoricaltopk6
0,HYPERVOLUME,FATE_DC,0.681+-0.124,0.806+-0.124,0.881+-0.097,0.920+-0.073,0.947+-0.053,0.965+-0.037
1,HYPERVOLUME,FETA_DC,0.769+-0.022,0.875+-0.020,0.933+-0.007,0.961+-0.004,0.979+-0.001,0.991+-0.001
2,HYPERVOLUME,FETA_DC_ZERO,0.601+-0.298,0.711+-0.296,0.787+-0.275,0.836+-0.239,0.877+-0.199,0.910+-0.159
3,HYPERVOLUME,GENERALIZED_EXTREME_VALUE,0.366+-0.245,0.460+-0.287,0.552+-0.287,0.630+-0.256,0.708+-0.211,0.785+-0.156
4,HYPERVOLUME,MULTINOMIAL_LOGIT_MODEL,0.201+-0.008,0.267+-0.010,0.360+-0.010,0.456+-0.008,0.559+-0.004,0.664+-0.004
5,HYPERVOLUME,NESTED_LOGIT_MODEL,0.291+-0.003,0.416+-0.005,0.511+-0.007,0.582+-0.006,0.651+-0.006,0.722+-0.004
6,HYPERVOLUME,PAIRED_COMBINATORIAL_LOGIT,0.185+-0.001,0.248+-0.001,0.340+-0.002,0.440+-0.002,0.550+-0.002,0.668+-0.002
7,HYPERVOLUME,RANKNET_DC,0.203+-0.004,0.276+-0.006,0.369+-0.006,0.462+-0.005,0.562+-0.004,0.665+-0.007
8,HYPERVOLUME,RANKSVM_DC,0.186+-0.001,0.248+-0.001,0.340+-0.002,0.439+-0.002,0.550+-0.002,0.667+-0.002
9,MEDOID,FATE_DC,0.881+-0.007,0.980+-0.003,0.996+-0.001,0.999+-0.000,1.000+-0.000,1.000+-0.000


In [30]:
grouped = df.groupby(['dataset'])
for name, group in grouped:
    df_path = os.path.join(DIR_PATH, 'results' , name.lower()+'.csv')
    group.to_csv(df_path)

In [31]:
import numpy as np
np.arange(48,87)

X_train = np.arange(40).reshape(4,5,2)

learner_params = {}
learner_params['n_objects'], learner_params['n_object_features'] = X_train.shape[1:]