In [1]:
import inspect
import logging
import os

import pandas as pd

from csrank.util import setup_logger
from experiments.util import lp_metric_dict
import numpy as np
from experiments.dbconnection import DBConnector

Using TensorFlow backend.


Current learners 
 
feta_ranker => <class 'csrank.objectranking.feta_object_ranker.FETAObjectRanker'>
ranknet => <class 'csrank.objectranking.rank_net.RankNet'>
cmpnet => <class 'csrank.objectranking.cmp_net.CmpNet'>
err => <class 'csrank.objectranking.expected_rank_regression.ExpectedRankRegression'>
ranksvm => <class 'csrank.objectranking.rank_svm.RankSVM'>
fate_ranker => <class 'csrank.objectranking.fate_object_ranker.FATEObjectRanker'>
listnet => <class 'csrank.objectranking.list_net.ListNet'>
feta_choice => <class 'csrank.choicefunctions.feta_choice.FETAChoiceFunction'>
fate_choice => <class 'csrank.choicefunctions.fate_choice.FATEChoiceFunction'>
feta_dc => <class 'csrank.discretechoice.feta_discrete_choice.FETADiscreteChoiceFunction'>
fate_dc => <class 'csrank.discretechoice.fate_discrete_choice.FATEDiscreteChoiceFunction'>
ranknet_dc => <class 'csrank.discretechoice.ranknet_discrete_choice.RankNetDiscreteChoiceFunction'>
cmpnet_dc => <class 'csrank.discretechoice.cmpnet_discret

In [2]:
DIR_PATH = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
log_path = os.path.join(DIR_PATH, 'logs', 'results.log')
setup_logger(log_path=log_path)
logger = logging.getLogger('Result Parsing')
config_file_path = os.path.join(DIR_PATH, 'config', 'clusterdb.json')
DATASET = "tag_genome_dc"
learning_problem = "discrete_choice"
results_table = 'results.{}'.format(learning_problem)
schema = 'masterthesis'

In [3]:
self = DBConnector(config_file_path=config_file_path, is_gpu=False, schema=schema)

In [4]:
keys = list(lp_metric_dict[learning_problem].keys())
keys[-1] = keys[-1].format(6)
metrics = ', '.join([x for x in keys])
metrics

'CategoricalAccuracy, CategoricalTopK2, CategoricalTopK3, CategoricalTopK4, CategoricalTopK5, CategoricalTopK6'

In [5]:
self.init_connection()
avail_jobs = "{}.avail_jobs".format(self.schema)
select_st = "SELECT dataset_params, learner_params, learner, {3} from {0} INNER JOIN {1} ON {0}.job_id = {1}.job_id where {1}.dataset=\'{2}\'".format(
    results_table, avail_jobs, DATASET, metrics)
print(select_st)
self.cursor_db.execute(select_st)

SELECT dataset_params, learner_params, learner, CategoricalAccuracy, CategoricalTopK2, CategoricalTopK3, CategoricalTopK4, CategoricalTopK5, CategoricalTopK6 from results.discrete_choice INNER JOIN masterthesis.avail_jobs ON results.discrete_choice.job_id = masterthesis.avail_jobs.job_id where masterthesis.avail_jobs.dataset='tag_genome_dc'


In [6]:
data = []
for job in self.cursor_db.fetchall():
    values = list(job.values())
    keys = list(job.keys())
    columns = ['dataset'] + keys[2:]
    if job['learner_params'].get("add_zeroth_order_model", False):
        values[2] = values[2]+'_zero'
    vals = [job['dataset_params']['dataset_type'].upper()] + values[2:]
    data.append(vals)

In [7]:
self.init_connection()
avail_jobs = "{}.avail_jobs".format("pymc3")
select_st = "SELECT dataset_params, learner, {3} from {0} INNER JOIN {1} ON {0}.job_id = {1}.job_id where {1}.dataset=\'{2}\'".format(
    results_table, avail_jobs, DATASET, metrics)
print(select_st)
self.cursor_db.execute(select_st)
for job in self.cursor_db.fetchall():
    values = list(job.values())
    keys = list(job.keys())
    columns = ['dataset'] + keys[1:]
    vals = [job['dataset_params']['dataset_type'].upper()] + values[1:]
    data.append(vals)

SELECT dataset_params, learner, CategoricalAccuracy, CategoricalTopK2, CategoricalTopK3, CategoricalTopK4, CategoricalTopK5, CategoricalTopK6 from results.discrete_choice INNER JOIN pymc3.avail_jobs ON results.discrete_choice.job_id = pymc3.avail_jobs.job_id where pymc3.avail_jobs.dataset='tag_genome_dc'


In [8]:
df_full = pd.DataFrame(data, columns=columns)
#df_full['zeroonerankaccuracy'] = 1 - df_full['zeroonerankloss']
df_full.head()

Unnamed: 0,dataset,learner,categoricalaccuracy,categoricaltopk2,categoricaltopk3,categoricaltopk4,categoricaltopk5,categoricaltopk6
0,CRITIQUE_FIT_MORE,ranksvm_dc,0.1372,0.2651,0.3885,0.5002,0.603,0.7011
1,DISSIMILAR_CRITIQUE_LESS,ranksvm_dc,0.3841,0.5575,0.6679,0.7494,0.8125,0.8648
2,DISSIMILAR_CRITIQUE_MORE,ranksvm_dc,0.4861,0.6672,0.7619,0.8227,0.8675,0.9034
3,CRITIQUE_FIT_LESS,ranksvm_dc,0.1274,0.2477,0.3601,0.4692,0.5719,0.6675
4,CRITIQUE_FIT_MORE,fate_dc,0.3598,0.5167,0.6251,0.709,0.7795,0.84


In [9]:
grouped = df_full.groupby(['dataset', 'learner'])
data = []
for name, group in grouped:
    one_row = [name[0], str(name[1]).upper()]
    std = group.std(axis=0).values
    mean = group.mean(axis=0).values
    if np.all(np.isnan(std)):
        one_row.extend(["{:.4f}".format(m) for m in mean])
    else:
        one_row.extend(["{:.3f}+-{:.3f}".format(m, s) for m, s in zip(mean, std)])
    data.append(one_row)

In [10]:
df = pd.DataFrame(data, columns=columns)
df.sort_values(by='dataset')
df_path = os.path.join(DIR_PATH, 'results' , DATASET+'.csv')
df.to_csv(df_path)
df

Unnamed: 0,dataset,learner,categoricalaccuracy,categoricaltopk2,categoricaltopk3,categoricaltopk4,categoricaltopk5,categoricaltopk6
0,CRITIQUE_FIT_LESS,FATE_DC,0.2257,0.3521,0.4611,0.5599,0.6506,0.7337
1,CRITIQUE_FIT_LESS,RANKSVM_DC,0.1274,0.2477,0.3601,0.4692,0.5719,0.6675
2,CRITIQUE_FIT_MORE,FATE_DC,0.3598,0.5167,0.6251,0.709,0.7795,0.84
3,CRITIQUE_FIT_MORE,RANKNET_DC,0.1998,0.3509,0.4776,0.5855,0.6822,0.769
4,CRITIQUE_FIT_MORE,RANKSVM_DC,0.1372,0.2651,0.3885,0.5002,0.603,0.7011
5,DISSIMILAR_CRITIQUE_LESS,FATE_DC,0.5096,0.6599,0.742,0.8012,0.8479,0.8845
6,DISSIMILAR_CRITIQUE_LESS,FETA_DC_ZERO,0.4181,0.5774,0.6736,0.7431,0.8003,0.8475
7,DISSIMILAR_CRITIQUE_LESS,RANKSVM_DC,0.3841,0.5575,0.6679,0.7494,0.8125,0.8648
8,DISSIMILAR_CRITIQUE_MORE,FATE_DC,0.8968,0.9589,0.9766,0.9853,0.9902,0.9936
9,DISSIMILAR_CRITIQUE_MORE,RANKSVM_DC,0.4861,0.6672,0.7619,0.8227,0.8675,0.9034


In [11]:
grouped = df.groupby(['dataset'])
for name, group in grouped:
    df_path = os.path.join(DIR_PATH, 'results' , name.lower()+'.csv')
    group.to_csv(df_path)

In [12]:
import numpy as np
np.arange(48,87)

X_train = np.arange(40).reshape(4,5,2)

learner_params = {}
learner_params['n_objects'], learner_params['n_object_features'] = X_train.shape[1:]