In [1]:
import warnings
warnings.filterwarnings('ignore')
import inspect
import logging
import os
import pandas as pd
from csrank.util import setup_logging
from experiments.util import lp_metric_dict
import numpy as np
from experiments.dbconnection import DBConnector
from experiments.constants import CHOICE_FUNCTIONS

Using TensorFlow backend.


In [2]:
DIR_PATH = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
log_path = os.path.join(DIR_PATH, 'logs', 'results.log')
setup_logging(log_path=log_path)
logger = logging.getLogger('ResultParsing')
learning_problem = "choice_function"
schema = "choice_functions"
datasets = ['synthetic_choice', 'mnist_choice']
keys = list(lp_metric_dict[learning_problem].keys())
keys[-1] = keys[-1].format(6)
metrics = ', '.join([x.lower() for x in keys])
models = ['FETA-Net', 'FATE-Net', 'RankNet-Choice', 'PairwiseSVM', 'GeneralizedLinearModel', "RandomGuessing"]
Dlower = [d.upper() for d in CHOICE_FUNCTIONS]
models_dict = dict(zip(Dlower, models))

In [3]:
def get_results_for_dataset(DATASET, del_jid = True, dataset_type=None):
    config_file_path = os.path.join(DIR_PATH, 'config', 'clusterdb.json')
    results_table = 'results.{}'.format(learning_problem)
    schema = 'choice_functions'
    start = 3
    select_jobs = "SELECT learner_params, dataset_params, hp_ranges, {0}.job_id, dataset, learner, {3} from {0} INNER JOIN {1} ON {0}.job_id = {1}.job_id where {1}.dataset=\'{2}\'"
    self = DBConnector(config_file_path=config_file_path, is_gpu=False, schema=schema)
    self.init_connection()
    avail_jobs = "{}.avail_jobs".format(schema)
    select_st = select_jobs.format(results_table, avail_jobs, DATASET, metrics)
    #print(select_st)
    self.cursor_db.execute(select_st)
    data = []
    for job in self.cursor_db.fetchall():
        job = dict(job)
        if job['learner'] in job['hp_ranges'].keys():
            n_hidden = job['hp_ranges'][job['learner']].get("n_hidden", [])
            if job['hp_ranges'][job['learner']].get("n_hidden_set_layers", None)==[1,8]:
                job['learner'] = job['learner']+'_shallow'
            elif n_hidden==[1,4] or n_hidden==[1,5]:
                job['learner'] = job['learner']+'_shallow'

        if job['learner_params'].get("add_zeroth_order_model", False):
            job['learner'] = job['learner']+'_zero'
        if "letor" in job['dataset']:
            job['dataset'] = get_letor_string(job['dataset_params'])
        elif "sushi" in job['dataset']:
            job['dataset'] =  job['dataset']
        else:
            job['dataset'] = job['dataset_params']['dataset_type']
        job['learner'] = job['learner'].upper()
        job['dataset'] = job['dataset'].upper()
        values = list(job.values())
        keys = list(job.keys())
        columns = keys[start:]
        vals = values[start:]
        
        data.append(vals)
    df_full = pd.DataFrame(data, columns=columns)
    df_full = df_full.sort_values('dataset')
    if del_jid:
        del df_full['job_id']
    df_full['subset01loss'] = 1 - df_full['subset01loss']
    df_full['hammingloss'] = 1 - df_full['hammingloss']
    df_full.rename(columns={'subset01loss': 'subset01accuracy', 'hammingloss': 'hammingaccuracy'}, inplace=True)
    columns = list(df_full.columns)
    return df_full, columns
df, cols = get_results_for_dataset(datasets[0])
df.head()

Unnamed: 0,dataset,learner,f1score,precision,recall,subset01accuracy,hammingaccuracy,informedness,aucscore,averageprecisionscore
0,PARETO,FETA_CHOICE_ZERO,0.9537,0.9379,0.9843,0.7127,0.988,0.9736,0.9994,0.9965
27,PARETO,RANKNET_CHOICE,0.6147,0.6422,0.7513,0.068,0.8844,0.664,0.9737,0.8993
26,PARETO,RANKNET_CHOICE,0.6156,0.5987,0.8153,0.0539,0.8664,0.6962,0.9739,0.9
25,PARETO,FATE_CHOICE,0.925,0.9438,0.9241,0.5603,0.9788,0.9135,0.9972,0.9884
24,PARETO,FETA_CHOICE_ZERO,0.9448,0.9385,0.9699,0.6848,0.9861,0.9597,0.9992,0.9958


In [4]:
def create_combined_dfs(DATASET, latex_row=False):
    df_full, columns = get_results_for_dataset(DATASET)
    data = []
    dataf = []
    for dataset, dgroup in df_full.groupby(['dataset']):
        max_feta = -100
        max_fate = -100
        max_ranknet = -100
        feta_r = []
        fate_r = []
        ranknet_r = []
        for learner, group in dgroup.groupby(['learner']):
            one_row = [dataset.lower().title(), learner]
            std = np.around(group.std(axis=0).values,3)
            mean = np.around(group.mean(axis=0).values,3)
            if np.all(np.isnan(std)):
                one_row.extend(["{:.4f}".format(m) for m in mean])
                #latex_row.extend(["${:.3f}$".format(m) for m in mean]) 
            else:
                std_err = [s for s in std]
                #std_err = [s/np.sqrt(len(group)) for s in std]
                #one_row.extend([m for m in mean])
                #one_row.extend([se for se in std_err])
                #one_row.extend(mean)
                if latex_row:
                    one_row.extend(["{:.3f}({:.0f})".format(m, s*1e3) for m, s in zip(mean, std)])
                else:
                    one_row.extend(["{:.3f}±{:.3f}".format(m, s) for m, s in zip(mean, std)])
            if "FETA" in str(learner):
                if max_feta < mean[0] - std[0]:
                    max_feta = mean[0] - std[0]
                    feta_r = one_row
                    feta_r[1] = models_dict["FETA_CHOICE"]
            elif "FATE" in str(learner):
                if max_feta < mean[0] - std[0]:
                    max_fate = mean[0] - std[0]
                    fate_r = one_row
                    fate_r[1] = models_dict["FATE_CHOICE"]
            elif "RANKNET" in str(learner):
                if max_ranknet < mean[0] - std[0]:
                    max_ranknet = mean[0] - std[0]
                    ranknet_r = one_row
                    ranknet_r[1] = models_dict["RANKNET_CHOICE"]
            else:
                one_row[1] = models_dict[one_row[1]]
                data.append(one_row)
        if len(feta_r)!=0:
            data.append(feta_r)
        if len(fate_r)!=0:
            data.append(fate_r)
        if len(ranknet_r)!=0:
            data.append(ranknet_r)
    for i in range(len(columns)):
        columns[i] = columns[i].title()
        if columns[i] == 'Learner':
            columns[i] = "ChoiceModel"
    df = pd.DataFrame(data, columns=columns)
    df.sort_values(by='Dataset')
    return df

In [5]:
import copy
combined = os.path.join(DIR_PATH, 'detailedresults' , "ChoiceFunctions.csv")
dataFrame = None
for dataset in datasets:
    df = create_combined_dfs(dataset)
    df_path = os.path.join(DIR_PATH, 'detailedresults' , dataset.split('_choice')[0].title()+'Choice.csv')
    df.to_csv(df_path, index=False, encoding='utf-8')
    if dataFrame is None:
        dataFrame = copy.copy(df)
    else:
        dataFrame = dataFrame.append(df, ignore_index=True)
dataFrame.to_csv(combined)
dataFrame

Unnamed: 0,Dataset,ChoiceModel,F1Score,Precision,Recall,Subset01Accuracy,Hammingaccuracy,Informedness,Aucscore,Averageprecisionscore
0,Pareto,GeneralizedLinearModel,0.565±0.041,0.579±0.045,0.721±0.049,0.038±0.012,0.859±0.018,0.609±0.057,0.935±0.038,0.834±0.055
1,Pareto,RandomGuessing,0.232±0.000,0.133±0.000,1.000±0.000,0.000±0.000,0.133±0.000,0.000±0.000,0.500±0.000,0.133±0.000
2,Pareto,PairwiseSVM,0.588±0.001,0.596±0.012,0.756±0.015,0.044±0.003,0.866±0.005,0.646±0.007,0.956±0.000,0.865±0.000
3,Pareto,FETA-Net,0.942±0.008,0.938±0.007,0.967±0.013,0.680±0.028,0.985±0.002,0.956±0.012,0.999±0.000,0.996±0.000
4,Pareto,FATE-Net,0.913±0.009,0.919±0.015,0.926±0.005,0.506±0.037,0.975±0.003,0.911±0.006,0.996±0.001,0.984±0.003
5,Pareto,RankNet-Choice,0.612±0.007,0.624±0.026,0.772±0.029,0.060±0.010,0.877±0.011,0.672±0.014,0.971±0.006,0.891±0.019
6,Mode,GeneralizedLinearModel,0.597±0.000,0.442±0.000,0.999±0.001,0.003±0.000,0.443±0.000,0.000±0.000,0.497±0.004,0.561±0.002
7,Mode,RandomGuessing,0.597±0.000,0.442±0.000,1.000±0.000,0.003±0.000,0.442±0.000,0.000±0.000,0.500±0.000,0.442±0.000
8,Mode,PairwiseSVM,0.597±0.000,0.442±0.000,0.999±0.002,0.003±0.000,0.443±0.000,0.000±0.000,0.509±0.006,0.569±0.004
9,Mode,FETA-Net,0.809±0.005,0.742±0.003,0.962±0.009,0.311±0.032,0.809±0.004,0.695±0.009,0.981±0.006,0.980±0.006


In [6]:
import string
def get_val(val):
    vals =  [float(x) for x in re.findall(r"[-+]?\d*\.\d+|\d+", val)]
    return [vals[0], vals[0] - vals[1]*1e-3]
def mark_best(df):
    for col in list(df.columns)[1:]:
        values_str = df[['ChoiceModel',col]].as_matrix()
        values = np.array([get_val(val[1])for val in values_str])
        maxi = np.where(values[:,0] == values[:,0][np.argmax(values[:,0])])[0]
        for ind in maxi:
            values_str[ind] = [values_str[ind][0], "bfseries {}".format(values_str[ind][1])]
        df['ChoiceModel'] = values_str[:,0]
        df[col] = values_str[:,1]
    return df

In [7]:
import re
def create_latex(df):
    grouped = df.groupby(['Dataset'])
    for name, group in grouped:
        custom_dict = dict()
        for i, m in enumerate(models):
            custom_dict[m] = i
        group['rank'] = group['ChoiceModel'].map(custom_dict)
        group.sort_values(by='rank', inplace=True)
        del group["Dataset"]
        del group['rank']
        group = mark_best(group)
        group['ChoiceModel'].replace(to_replace=['GeneralizedLinearModel'], value='glm',inplace=True)
        group['ChoiceModel'].replace(to_replace=['FATE-Net'], value='fatenet',inplace=True)
        group['ChoiceModel'].replace(to_replace=['FETA-Net'], value='fetanet',inplace=True)
        group['ChoiceModel'].replace(to_replace=['RankNet-Choice'], value='ranknet',inplace=True)
        group['ChoiceModel'].replace(to_replace=['PairwiseSVM'], value='pairwisesvm',inplace=True)
        group['ChoiceModel'].replace(to_replace=['RandomGuessing'], value='random',inplace=True)
        group.rename(columns={'F1Score': '$F_1$-measure', 'Subset01Accuracy': 'Subset $0/1$ Accuracy', 'Aucscore':'Auc-Score'}, inplace=True)
        del group['Hammingaccuracy']
        del group['Informedness']
        del group['Averageprecisionscore']
        print("name {}".format(name))
        latex_code = group.to_latex(index = False)
        latex_code = latex_code.replace(' ',"")
        latex_code = latex_code.replace('&'," & ")
        latex_code = str(latex_code)
        for learner in group['ChoiceModel']:
            latex_code = latex_code.replace(learner, "\\{}".format(learner))
        latex_code = latex_code.replace("bfseries", "\\{} ".format("bfseries"))
        latex_code = latex_code.replace("\\$", "$")
        latex_code = latex_code.replace("\\_", "_")
        print(latex_code)
for dataset in datasets:
    df = create_combined_dfs(dataset, latex_row=True)
    df.sort_values(by='Dataset')
    create_latex(df)

name Pareto
\begin{tabular}{llllll}
\toprule
ChoiceModel & $F_1$-measure & Precision & Recall & Subset$0/1$Accuracy & Auc-Score\\
\midrule
\fetanet & \bfseries 0.942(8) & \bfseries 0.938(7) & 0.967(13) & \bfseries 0.680(28) & \bfseries 0.999(0)\\
\fatenet & 0.913(9) & 0.919(15) & 0.926(5) & 0.506(37) & 0.996(1)\\
\ranknet & 0.612(7) & 0.624(26) & 0.772(29) & 0.060(10) & 0.971(6)\\
\pairwisesvm & 0.588(1) & 0.596(12) & 0.756(15) & 0.044(3) & 0.956(0)\\
\glm & 0.565(41) & 0.579(45) & 0.721(49) & 0.038(12) & 0.935(38)\\
\random & 0.232(0) & 0.133(0) & \bfseries 1.000(0) & 0.000(0) & 0.500(0)\\
\bottomrule
\end{tabular}

name Mode
\begin{tabular}{llllll}
\toprule
ChoiceModel & $F_1$-measure & Precision & Recall & Subset$0/1$Accuracy & Auc-Score\\
\midrule
\fetanet & 0.809(5) & 0.742(3) & 0.962(9) & 0.311(32) & 0.981(6)\\
\fatenet & \bfseries 0.976(1) & \bfseries 0.980(2) & 0.979(4) & \bfseries 0.883(10) & \bfseries 0.992(1)\\
\ranknet & 0.597(0) & 0.442(0) & \bfseries 1.000(0) & 0.003(0) &

In [None]:
grouped = df.groupby(['dataset'])
for name, group in grouped:
    df_path = os.path.join(DIR_PATH, 'results' , name.lower()+'.csv')
    group.to_csv(df_path)

In [None]:
import numpy as np
np.arange(48,87)

X_train = np.arange(40).reshape(4,5,2)

learner_params = {}
learner_params['n_objects'], learner_params['n_object_features'] = X_train.shape[1:]

In [None]:
"UNIQUE_MAX_OCCURRING".lower()