In [1]:
import inspect
import logging
import os

import pandas as pd

from csrank.util import setup_logger
from experiments.util import lp_metric_dict
import numpy as np
from experiments.dbconnection import DBConnector

Using TensorFlow backend.


In [2]:
DIR_PATH = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
log_path = os.path.join(DIR_PATH, 'logs', 'results.log')
setup_logger(log_path=log_path)
logger = logging.getLogger('Result Parsing')
config_file_path = os.path.join(DIR_PATH, 'config', 'clusterdb.json')
DATASET = "tag_genome_dc"
learning_problem = "discrete_choice"
results_table = 'results.{}'.format(learning_problem)
schema = 'masterthesis'
select_jobs = "SELECT learner_params, dataset_params, {0}.job_id, dataset, learner, {3} from {0} INNER JOIN {1} ON {0}.job_id = {1}.job_id where {1}.dataset=\'{2}\'"

In [7]:
self = DBConnector(config_file_path=config_file_path, is_gpu=False, schema=schema)

{'connect_params': {'dbname': 'clusterdb',
  'host': 'csr-clusterdb.cs.upb.de',
  'password': 'qW34!XFRzR',
  'port': 5432,
  'user': 'cluster'},
 'connection': None,
 'cursor_db': None,
 'is_gpu': False,
 'job_description': None,
 'logger': <Logger DBConnector (DEBUG)>,
 'schema': 'masterthesis'}

In [8]:
self.init_connection()
sel = "SELECT job_id FROM {} WHERE learner='feta_dc' and learner_params ->> 'add_zeroth_order_model' = 'TRUE' and fold_id=0"
self.cursor_db.execute(sel.format("masterthesis.avail_jobs"))
jobs = np.array(self.cursor_db.fetchall()).T[0]
jobs

array([118, 122, 126, 130, 138, 114, 103, 104, 105, 106, 110, 102, 134])

In [9]:
keys = list(lp_metric_dict[learning_problem].keys())
keys[-1] = keys[-1].format(6)
metrics = ', '.join([x for x in keys])
metrics

'CategoricalAccuracy, CategoricalTopK2, CategoricalTopK3, CategoricalTopK4, CategoricalTopK5, CategoricalTopK6'

In [10]:
self.init_connection()
avail_jobs = "{}.avail_jobs".format(self.schema)
select_st = select_jobs.format(results_table, avail_jobs, DATASET, metrics)
print(select_st)
self.cursor_db.execute(select_st)
data = []
for job in self.cursor_db.fetchall():
    job = dict(job)
    if job['learner_params'].get("add_zeroth_order_model", False):
        job['learner'] = job['learner']+'_zero'
    job['dataset'] = job['dataset_params']['dataset_type'].upper()
    job['learner'] = job['learner'].upper()
    values = list(job.values())
    keys = list(job.keys())
    columns = keys[2:]
    vals = values[2:]
    data.append(vals)

SELECT learner_params, dataset_params, results.discrete_choice.job_id, dataset, learner, CategoricalAccuracy, CategoricalTopK2, CategoricalTopK3, CategoricalTopK4, CategoricalTopK5, CategoricalTopK6 from results.discrete_choice INNER JOIN masterthesis.avail_jobs ON results.discrete_choice.job_id = masterthesis.avail_jobs.job_id where masterthesis.avail_jobs.dataset='tag_genome_dc'


In [11]:
self.init_connection()
avail_jobs = "{}.avail_jobs".format("pymc3")
select_st = select_jobs.format(results_table, avail_jobs, DATASET, metrics)
print(select_st)
self.cursor_db.execute(select_st)
for job in self.cursor_db.fetchall():
    job = dict(job)
    job['dataset'] = job['dataset_params']['dataset_type'].upper()
    job['learner'] = job['learner'].upper()
    values = list(job.values())
    keys = list(job.keys())
    columns = keys[2:]
    vals = values[2:]
    data.append(vals)
df_full = pd.DataFrame(data, columns=columns)

SELECT learner_params, dataset_params, results.discrete_choice.job_id, dataset, learner, CategoricalAccuracy, CategoricalTopK2, CategoricalTopK3, CategoricalTopK4, CategoricalTopK5, CategoricalTopK6 from results.discrete_choice INNER JOIN pymc3.avail_jobs ON results.discrete_choice.job_id = pymc3.avail_jobs.job_id where pymc3.avail_jobs.dataset='tag_genome_dc'


In [12]:
df_full = df_full.sort_values('dataset')
#df_full['zeroonerankaccuracy'] = 1 - df_full['zeroonerankloss']
df_full.loc[df_full['learner'] == 'FATE_DC']
#df_full.head()

Unnamed: 0,job_id,dataset,learner,categoricalaccuracy,categoricaltopk2,categoricaltopk3,categoricaltopk4,categoricaltopk5,categoricaltopk6
37,144,CRITIQUE_FIT_LESS,FATE_DC,0.2255,0.3521,0.4603,0.5583,0.6504,0.7333
49,152,CRITIQUE_FIT_LESS,FATE_DC,0.2246,0.3517,0.4586,0.5564,0.648,0.7332
51,190,CRITIQUE_FIT_LESS,FATE_DC,0.2276,0.3563,0.4675,0.5662,0.6567,0.74
54,192,CRITIQUE_FIT_LESS,FATE_DC,0.2229,0.3488,0.4589,0.5575,0.6488,0.7305
16,121,CRITIQUE_FIT_LESS,FATE_DC,0.2285,0.3547,0.4659,0.5641,0.6542,0.7345
65,201,CRITIQUE_FIT_MORE,FATE_DC,0.3509,0.5075,0.6153,0.7023,0.7742,0.8364
66,151,CRITIQUE_FIT_MORE,FATE_DC,0.3555,0.5105,0.6205,0.7058,0.7778,0.8395
12,125,CRITIQUE_FIT_MORE,FATE_DC,0.3608,0.5206,0.6275,0.7122,0.7834,0.8446
13,113,DISSIMILAR_CRITIQUE_LESS,FATE_DC,0.5136,0.6619,0.7442,0.8036,0.8466,0.8832
57,195,DISSIMILAR_CRITIQUE_LESS,FATE_DC,0.5079,0.6614,0.745,0.8033,0.8497,0.8867


In [13]:
del df_full["job_id"]
grouped = df_full.groupby(['dataset', 'learner'])
data = []
for name, group in grouped:
    one_row = [name[0], str(name[1]).upper()]
    std = group.std(axis=0).values
    mean = group.mean(axis=0).values
    if np.all(np.isnan(std)):
        one_row.extend(["{:.4f}".format(m) for m in mean])
    else:
        one_row.extend(["{:.3f}+-{:.3f}".format(m, s) for m, s in zip(mean, std)])
    data.append(one_row)

In [14]:
df = pd.DataFrame(data, columns=columns[1:])
df.sort_values(by='dataset')
df_path = os.path.join(DIR_PATH, 'results' , DATASET+'.csv')
df.to_csv(df_path)
df

Unnamed: 0,dataset,learner,categoricalaccuracy,categoricaltopk2,categoricaltopk3,categoricaltopk4,categoricaltopk5,categoricaltopk6
0,CRITIQUE_FIT_LESS,FATE_DC,0.226+-0.002,0.353+-0.003,0.462+-0.004,0.560+-0.004,0.652+-0.004,0.734+-0.004
1,CRITIQUE_FIT_LESS,FETA_DC_ZERO,0.173+-0.041,0.295+-0.039,0.404+-0.033,0.507+-0.028,0.604+-0.023,0.695+-0.018
2,CRITIQUE_FIT_LESS,GENERALIZED_EXTREME_VALUE,0.159+-0.006,0.271+-0.007,0.375+-0.009,0.476+-0.011,0.572+-0.010,0.667+-0.009
3,CRITIQUE_FIT_LESS,MULTINOMIAL_LOGIT_MODEL,0.161+-0.001,0.272+-0.002,0.377+-0.002,0.478+-0.002,0.576+-0.002,0.671+-0.001
4,CRITIQUE_FIT_LESS,NESTED_LOGIT_MODEL,0.158+-0.003,0.271+-0.007,0.377+-0.006,0.480+-0.007,0.578+-0.007,0.674+-0.006
5,CRITIQUE_FIT_LESS,PAIRED_COMBINATORIAL_LOGIT,0.147+-0.004,0.257+-0.003,0.362+-0.002,0.464+-0.002,0.565+-0.001,0.664+-0.003
6,CRITIQUE_FIT_LESS,RANKNET_DC,0.173+-0.002,0.306+-0.002,0.420+-0.002,0.522+-0.002,0.616+-0.003,0.704+-0.002
7,CRITIQUE_FIT_LESS,RANKSVM_DC,0.120+-0.018,0.237+-0.020,0.350+-0.021,0.458+-0.020,0.561+-0.018,0.659+-0.015
8,CRITIQUE_FIT_MORE,FATE_DC,0.356+-0.005,0.513+-0.007,0.621+-0.006,0.707+-0.005,0.778+-0.005,0.840+-0.004
9,CRITIQUE_FIT_MORE,FETA_DC,0.251+-0.023,0.384+-0.027,0.495+-0.026,0.592+-0.022,0.680+-0.018,0.760+-0.015


In [30]:
grouped = df.groupby(['dataset'])
for name, group in grouped:
    df_path = os.path.join(DIR_PATH, 'results' , name.lower()+'.csv')
    group.to_csv(df_path)

In [31]:
import numpy as np
np.arange(48,87)

X_train = np.arange(40).reshape(4,5,2)

learner_params = {}
learner_params['n_objects'], learner_params['n_object_features'] = X_train.shape[1:]