In [15]:
import inspect
import logging
import os

import pandas as pd

from csrank.util import setup_logger
from experiments.util import lp_metric_dict
import numpy as np
from experiments.dbconnection import DBConnector

In [25]:
DIR_PATH = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
log_path = os.path.join(DIR_PATH, 'logs', 'results.log')
setup_logger(log_path=log_path)
logger = logging.getLogger('Result Parsing')
config_file_path = os.path.join(DIR_PATH, 'config', 'clusterdb.json')
datasets = ['synthetic_dc', 'mnist_dc', 'tag_genome_dc']
DATASET = "synthetic_dc"
learning_problem = "discrete_choice"
results_table = 'results.{}'.format(learning_problem)
schema = 'masterthesis'
select_jobs = "SELECT learner_params, dataset_params, {0}.job_id, dataset, learner, {3} from {0} INNER JOIN {1} ON {0}.job_id = {1}.job_id where {1}.dataset=\'{2}\'"

In [26]:
self = DBConnector(config_file_path=config_file_path, is_gpu=False, schema=schema)

In [27]:
self.init_connection()
sel = "SELECT job_id FROM {} WHERE learner='feta_dc' and learner_params ->> 'add_zeroth_order_model' = 'TRUE' and fold_id=0"
self.cursor_db.execute(sel.format("masterthesis.avail_jobs"))
jobs = np.array(self.cursor_db.fetchall()).T[0]
jobs

array([118, 122, 126, 130, 138, 114, 103, 105, 104, 106, 110, 102, 134])

In [28]:
keys = list(lp_metric_dict[learning_problem].keys())
keys[-1] = keys[-1].format(6)
metrics = ', '.join([x for x in keys])
metrics

'CategoricalAccuracy, CategoricalTopK2, CategoricalTopK3, CategoricalTopK4, CategoricalTopK5, CategoricalTopK6'

In [29]:
self.init_connection()
avail_jobs = "{}.avail_jobs".format(self.schema)
select_st = select_jobs.format(results_table, avail_jobs, DATASET, metrics)
print(select_st)
self.cursor_db.execute(select_st)
data = []
for job in self.cursor_db.fetchall():
    job = dict(job)
    if job['learner_params'].get("add_zeroth_order_model", False):
        job['learner'] = job['learner']+'_zero'
    job['dataset'] = job['dataset_params']['dataset_type'].upper()
    job['learner'] = job['learner'].upper()
    values = list(job.values())
    keys = list(job.keys())
    columns = keys[2:]
    vals = values[2:]
    data.append(vals)

SELECT learner_params, dataset_params, results.discrete_choice.job_id, dataset, learner, CategoricalAccuracy, CategoricalTopK2, CategoricalTopK3, CategoricalTopK4, CategoricalTopK5, CategoricalTopK6 from results.discrete_choice INNER JOIN masterthesis.avail_jobs ON results.discrete_choice.job_id = masterthesis.avail_jobs.job_id where masterthesis.avail_jobs.dataset='synthetic_dc'


In [30]:
self.init_connection()
avail_jobs = "{}.avail_jobs".format("pymc3")
select_st = select_jobs.format(results_table, avail_jobs, DATASET, metrics)
print(select_st)
self.cursor_db.execute(select_st)
for job in self.cursor_db.fetchall():
    job = dict(job)
    job['dataset'] = job['dataset_params']['dataset_type'].upper()
    job['learner'] = job['learner'].upper()
    values = list(job.values())
    keys = list(job.keys())
    columns = keys[2:]
    vals = values[2:]
    data.append(vals)
df_full = pd.DataFrame(data, columns=columns)

SELECT learner_params, dataset_params, results.discrete_choice.job_id, dataset, learner, CategoricalAccuracy, CategoricalTopK2, CategoricalTopK3, CategoricalTopK4, CategoricalTopK5, CategoricalTopK6 from results.discrete_choice INNER JOIN pymc3.avail_jobs ON results.discrete_choice.job_id = pymc3.avail_jobs.job_id where pymc3.avail_jobs.dataset='synthetic_dc'


In [31]:
df_full = df_full.sort_values('dataset')
#df_full['zeroonerankaccuracy'] = 1 - df_full['zeroonerankloss']
df_full.loc[df_full['learner'] == 'FATE_DC']
#df_full.head()

Unnamed: 0,job_id,dataset,learner,categoricalaccuracy,categoricaltopk2,categoricaltopk3,categoricaltopk4,categoricaltopk5,categoricaltopk6
26,16,HYPERVOLUME,FATE_DC,0.7212,0.8437,0.9143,0.9454,0.9658,0.979
33,13,HYPERVOLUME,FATE_DC,0.7363,0.8625,0.924,0.9509,0.9684,0.9802
5,14,HYPERVOLUME,FATE_DC,0.738,0.8623,0.9248,0.9535,0.9721,0.9828
69,2,HYPERVOLUME,FATE_DC,0.7036,0.8281,0.9005,0.9362,0.9592,0.9752
17,15,HYPERVOLUME,FATE_DC,0.7499,0.8775,0.9352,0.9589,0.9744,0.9838
0,9,MEDOID,FATE_DC,0.8748,0.9776,0.9952,0.999,0.9998,1.0
29,1,MEDOID,FATE_DC,0.8728,0.9764,0.9948,0.9989,0.9999,0.9999
34,11,MEDOID,FATE_DC,0.8843,0.9814,0.9964,0.9993,0.9999,0.9999
36,12,MEDOID,FATE_DC,0.8852,0.9816,0.9967,0.9993,0.9999,1.0
35,10,MEDOID,FATE_DC,0.8897,0.9829,0.9968,0.9994,0.9999,1.0


In [32]:
del df_full["job_id"]
grouped = df_full.groupby(['dataset', 'learner'])
data = []
for name, group in grouped:
    one_row = [name[0], str(name[1]).upper()]
    std = group.std(axis=0).values
    mean = group.mean(axis=0).values
    if np.all(np.isnan(std)):
        one_row.extend(["{:.4f}".format(m) for m in mean])
    else:
        one_row.extend(["{:.3f}+-{:.3f}".format(m, s) for m, s in zip(mean, std)])
    data.append(one_row)

In [33]:
df = pd.DataFrame(data, columns=columns[1:])
df.sort_values(by='dataset')
df_path = os.path.join(DIR_PATH, 'results' , DATASET+'.csv')
df.to_csv(df_path)
df

Unnamed: 0,dataset,learner,categoricalaccuracy,categoricaltopk2,categoricaltopk3,categoricaltopk4,categoricaltopk5,categoricaltopk6
0,HYPERVOLUME,FATE_DC,0.730+-0.018,0.855+-0.019,0.920+-0.013,0.949+-0.009,0.968+-0.006,0.980+-0.003
1,HYPERVOLUME,FETA_DC,0.769+-0.022,0.875+-0.020,0.933+-0.007,0.961+-0.004,0.979+-0.001,0.991+-0.001
2,HYPERVOLUME,FETA_DC_ZERO,0.761+-0.031,0.869+-0.024,0.929+-0.010,0.958+-0.005,0.977+-0.002,0.990+-0.001
3,HYPERVOLUME,GENERALIZED_EXTREME_VALUE,0.366+-0.245,0.460+-0.287,0.552+-0.287,0.630+-0.256,0.708+-0.211,0.785+-0.156
4,HYPERVOLUME,MULTINOMIAL_LOGIT_MODEL,0.201+-0.008,0.267+-0.010,0.360+-0.010,0.456+-0.008,0.559+-0.004,0.664+-0.004
5,HYPERVOLUME,NESTED_LOGIT_MODEL,0.291+-0.003,0.416+-0.005,0.511+-0.007,0.582+-0.006,0.651+-0.006,0.722+-0.004
6,HYPERVOLUME,PAIRED_COMBINATORIAL_LOGIT,0.185+-0.001,0.248+-0.001,0.340+-0.002,0.440+-0.002,0.550+-0.002,0.668+-0.002
7,HYPERVOLUME,RANKNET_DC,0.203+-0.004,0.276+-0.006,0.369+-0.006,0.462+-0.005,0.562+-0.004,0.665+-0.007
8,HYPERVOLUME,RANKSVM_DC,0.186+-0.001,0.248+-0.001,0.340+-0.002,0.439+-0.002,0.550+-0.002,0.667+-0.002
9,MEDOID,FATE_DC,0.881+-0.007,0.980+-0.003,0.996+-0.001,0.999+-0.000,1.000+-0.000,1.000+-0.000


In [39]:
df_path = os.path.join(DIR_PATH, 'results' , "discrete_choice.csv")

if not os.path.isfile(df_path):
    dataFrame = df
else:
    dataFrame = pd.read_csv(df_path, index_col=0)
    dataFrame = dataFrame.append(df, ignore_index=True)
dataFrame
dataFrame.to_csv(df_path)

In [30]:
grouped = df.groupby(['dataset'])
for name, group in grouped:
    df_path = os.path.join(DIR_PATH, 'results' , name.lower()+'.csv')
    group.to_csv(df_path)

In [31]:
import numpy as np
np.arange(48,87)

X_train = np.arange(40).reshape(4,5,2)

learner_params = {}
learner_params['n_objects'], learner_params['n_object_features'] = X_train.shape[1:]