In [1]:
import warnings
warnings.filterwarnings('ignore')
import inspect
import logging
import os
import pandas as pd
from csrank.util import setup_logging
from experiments.util import lp_metric_dict
import numpy as np
from experiments.dbconnection import DBConnector
from experiments.constants import CHOICE_FUNCTIONS

Using TensorFlow backend.


In [2]:
DIR_PATH = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
log_path = os.path.join(DIR_PATH, 'logs', 'results.log')
setup_logging(log_path=log_path)
logger = logging.getLogger('ResultParsing')
learning_problem = "choice_function"
schema = "choice_functions"
datasets = ['synthetic_choice', 'mnist_choice']
keys = list(lp_metric_dict[learning_problem].keys())
keys[-1] = keys[-1].format(6)
metrics = ', '.join([x.lower() for x in keys])
models = ['FETA-Net', 'FATE-Net', 'RankNet-Choice', 'PairwiseSVM', 'GeneralizedLinearModel', "RandomGuessing"]
Dlower = [d.upper() for d in CHOICE_FUNCTIONS]
models_dict = dict(zip(Dlower, models))

In [3]:
def get_results_for_dataset(DATASET, del_jid = True):
    config_file_path = os.path.join(DIR_PATH, 'config', 'clusterdb.json')
    results_table = 'results.{}'.format(learning_problem)
    schema = 'choice_functions'
    start = 3
    select_jobs = "SELECT learner_params, dataset_params, hp_ranges, {0}.job_id, dataset, learner, {3} from {0} INNER JOIN {1} ON {0}.job_id = {1}.job_id where {1}.dataset=\'{2}\'"
    self = DBConnector(config_file_path=config_file_path, is_gpu=False, schema=schema)
    self.init_connection()
    avail_jobs = "{}.avail_jobs".format(schema)
    select_st = select_jobs.format(results_table, avail_jobs, DATASET, metrics)
    #print(select_st)
    self.cursor_db.execute(select_st)
    data = []
    for job in self.cursor_db.fetchall():
        job = dict(job)
        if job['learner'] in job['hp_ranges'].keys():
            n_hidden = job['hp_ranges'][job['learner']].get("n_hidden", [])
            if job['hp_ranges'][job['learner']].get("n_hidden_set_layers", None)==[1,8]:
                job['learner'] = job['learner']+'_shallow'
            elif n_hidden==[1,4] or n_hidden==[1,5]:
                job['learner'] = job['learner']+'_shallow'

        if job['learner_params'].get("add_zeroth_order_model", False):
            job['learner'] = job['learner']+'_zero'
        if "letor" in job['dataset']:
            job['dataset'] = get_letor_string(job['dataset_params'])
        elif "sushi" in job['dataset']:
            job['dataset'] =  job['dataset']
        else:
            job['dataset'] = job['dataset_params']['dataset_type']
        job['learner'] = job['learner'].upper()
        job['dataset'] = job['dataset'].upper()
        values = list(job.values())
        keys = list(job.keys())
        columns = keys[start:]
        vals = values[start:]
        
        data.append(vals)
    df_full = pd.DataFrame(data, columns=columns)
    df_full = df_full.sort_values('dataset')
    if del_jid:
        del df_full['job_id']
    columns = list(df_full.columns)
    return df_full, columns
df, cols = get_results_for_dataset(datasets[0])
df.head()

Unnamed: 0,dataset,learner,f1score,precision,recall,subset01loss,hammingloss,informedness,aucscore,averageprecisionscore
0,PARETO,FATE_CHOICE,0.925,0.9438,0.9241,0.4397,0.0212,0.9135,0.9972,0.9884
21,PARETO,RANDOM_CHOICE,0.2315,0.1332,1.0,1.0,0.8668,0.0,0.5,0.1332
20,PARETO,GLM_CHOICE,0.5876,0.606,0.7402,0.9544,0.1302,0.6377,0.956,0.8649
19,PARETO,RANDOM_CHOICE,0.2318,0.1334,1.0,1.0,0.8666,0.0,0.5,0.1334
18,PARETO,GLM_CHOICE,0.4924,0.5052,0.6432,0.983,0.1696,0.5095,0.8673,0.7367


In [4]:
def create_combined_dfs(DATASET, latex_row=False):
    df_full, columns = get_results_for_dataset(DATASET)
    data = []
    dataf = []
    for dataset, dgroup in df_full.groupby(['dataset']):
        max_feta = -100
        max_fate = -100
        max_ranknet = -100
        feta_r = []
        fate_r = []
        ranknet_r = []
        for learner, group in dgroup.groupby(['learner']):
            one_row = [dataset.lower().title(), learner]
            std = np.around(group.std(axis=0).values,3)
            mean = np.around(group.mean(axis=0).values,3)
            if np.all(np.isnan(std)):
                one_row.extend(["{:.4f}".format(m) for m in mean])
                #latex_row.extend(["${:.3f}$".format(m) for m in mean]) 
            else:
                std_err = [s for s in std]
                #std_err = [s/np.sqrt(len(group)) for s in std]
                #one_row.extend([m for m in mean])
                #one_row.extend([se for se in std_err])
                #one_row.extend(mean)
                if latex_row:
                    one_row.extend(["{:.3f}({:.0f})".format(m, s*1e3) for m, s in zip(mean, std)])
                else:
                    one_row.extend(["{:.3f}±{:.3f}".format(m, s) for m, s in zip(mean, std)])
            if "FETA" in str(learner):
                if max_feta < mean[0] - std[0]:
                    max_feta = mean[0] - std[0]
                    feta_r = one_row
                    feta_r[1] = models_dict["FETA_CHOICE"]
            elif "FATE" in str(learner):
                if max_feta < mean[0] - std[0]:
                    max_fate = mean[0] - std[0]
                    fate_r = one_row
                    fate_r[1] = models_dict["FATE_CHOICE"]
            elif "RANKNET" in str(learner):
                if max_ranknet < mean[0] - std[0]:
                    max_ranknet = mean[0] - std[0]
                    ranknet_r = one_row
                    ranknet_r[1] = models_dict["RANKNET_CHOICE"]
            else:
                one_row[1] = models_dict[one_row[1]]
                data.append(one_row)
        if len(feta_r)!=0:
            data.append(feta_r)
        if len(fate_r)!=0:
            data.append(fate_r)
        if len(ranknet_r)!=0:
            data.append(ranknet_r)
    for i in range(len(columns)):
        columns[i] = columns[i].title()
        if columns[i] == 'Learner':
            columns[i] = "ChoiceModel"
    df = pd.DataFrame(data, columns=columns)
    df.sort_values(by='Dataset')
    return df

In [5]:
for dataset in datasets:
    df = create_combined_dfs(dataset)
    df_path = os.path.join(DIR_PATH, 'detailedresults' , dataset.split('_choice')[0].title()+'.csv')
    df.to_csv(df_path, index=False, encoding='utf-8')
df

Unnamed: 0,Dataset,ChoiceModel,F1Score,Precision,Recall,Subset01Loss,Hammingloss,Informedness,Aucscore,Averageprecisionscore
0,Mode,GeneralizedLinearModel,0.597±0.000,0.442±0.000,0.999±0.001,0.997±0.000,0.557±0.000,0.000±0.000,0.497±0.004,0.561±0.002
1,Mode,RandomGuessing,0.597±0.000,0.442±0.000,1.000±0.000,0.997±0.000,0.558±0.000,0.000±0.000,0.500±0.000,0.442±0.000
2,Mode,PairwiseSVM,0.597±0.000,0.442±0.000,0.999±0.002,0.997±0.000,0.557±0.000,0.000±0.000,0.509±0.006,0.569±0.004
3,Mode,FATE-Net,0.976±0.000,0.980±0.004,0.979±0.003,0.118±0.003,0.022±0.000,0.960±0.001,0.991±0.001,0.990±0.002
4,Unique,GeneralizedLinearModel,0.562±0.000,0.405±0.000,1.000±0.000,1.000±0.000,0.595±0.000,0.000±0.000,0.500±0.001,0.532±0.001
5,Unique,RandomGuessing,0.562±0.000,0.405±0.000,1.000±0.000,1.000±0.000,0.595±0.000,0.000±0.000,0.500±0.000,0.405±0.000
6,Unique,PairwiseSVM,0.562±0.000,0.405±0.000,1.000±0.000,1.000±0.000,0.595±0.000,0.000±0.000,0.500±0.000,0.532±0.000
7,Unique,FETA-Net,0.562±0.000,0.405±0.000,1.000±0.000,1.000±0.000,0.595±0.000,0.000±0.000,0.500±0.001,0.532±0.000
8,Unique,FATE-Net,0.140±0.281,0.101±0.202,0.250±0.500,1.000±0.000,0.452±0.095,0.000±0.000,0.500±0.000,0.405±0.000


In [6]:
DATASET = datasets[1]
df = create_combined_dfs(DATASET, latex_row=True)
df.sort_values(by='Dataset')
df

Unnamed: 0,Dataset,ChoiceModel,F1Score,Precision,Recall,Subset01Loss,Hammingloss,Informedness,Aucscore,Averageprecisionscore
0,Mode,GeneralizedLinearModel,0.597(0),0.442(0),0.999(1),0.997(0),0.557(0),0.000(0),0.497(4),0.561(2)
1,Mode,RandomGuessing,0.597(0),0.442(0),1.000(0),0.997(0),0.558(0),0.000(0),0.500(0),0.442(0)
2,Mode,PairwiseSVM,0.597(0),0.442(0),0.999(2),0.997(0),0.557(0),0.000(0),0.509(6),0.569(4)
3,Mode,FATE-Net,0.976(0),0.980(4),0.979(3),0.118(3),0.022(0),0.960(1),0.991(1),0.990(2)
4,Unique,GeneralizedLinearModel,0.562(0),0.405(0),1.000(0),1.000(0),0.595(0),0.000(0),0.500(1),0.532(1)
5,Unique,RandomGuessing,0.562(0),0.405(0),1.000(0),1.000(0),0.595(0),0.000(0),0.500(0),0.405(0)
6,Unique,PairwiseSVM,0.562(0),0.405(0),1.000(0),1.000(0),0.595(0),0.000(0),0.500(0),0.532(0)
7,Unique,FETA-Net,0.562(0),0.405(0),1.000(0),1.000(0),0.595(0),0.000(0),0.500(1),0.532(0)
8,Unique,FATE-Net,0.140(281),0.101(202),0.250(500),1.000(0),0.452(95),0.000(0),0.500(0),0.405(0)


In [7]:
import re
def remove_ranker(sub_df):
    remove_ranker = None
    if len(sub_df)==2:
        sub_df = sub_df[:,1:3]
        val1 = [float(x) for x in re.findall(r"[-+]?\d*\.\d+|\d+", sub_df[0][1])]
        val2 = [float(x) for x in re.findall(r"[-+]?\d*\.\d+|\d+", sub_df[1][1])]
        val1 = val1[0] if len(val1)==1 else val1[0] - val1[1]*1e-3
        val2 = val2[0] if len(val2)==1 else val2[0] - val2[1]*1e-3
        if val1 < val2 :
            remove_ranker = sub_df[0][0]
        else:
            remove_ranker = sub_df[1][0]
    return remove_ranker

In [8]:
def get_val(val):
    vals =  [float(x) for x in re.findall(r"[-+]?\d*\.\d+|\d+", val)]
    return [vals[0], vals[0] - vals[1]*1e-3]
def mark_best(df):
    for col in list(df.columns)[1:]:
        values_str = df[['ChoiceModel',col]].as_matrix()
        values = np.array([get_val(val[1])for val in values_str])
        maxi = np.where(values[:,0] == values[:,0][np.argmax(values[:,0])])[0]
        for ind in maxi:
            values_str[ind] = [values_str[ind][0], "bfseries {}".format(values_str[ind][1])]
        df['ChoiceModel'] = values_str[:,0]
        df[col] = values_str[:,1]
    return df

In [11]:
#from tabulate import tabulate
import string
grouped = df.groupby(['Dataset'])
for name, group in grouped:
    custom_dict = dict()
    for i, m in enumerate(models):
        custom_dict[m] = i
    group['rank'] = group['ChoiceModel'].map(custom_dict)
    group.sort_values(by='rank', inplace=True)
    del group["Dataset"]
    del group['rank']
    group = mark_best(group)
    group['ChoiceModel'].replace(to_replace=['GeneralizedLinearModel'], value='glm',inplace=True)
    group['ChoiceModel'].replace(to_replace=['FATE-Net'], value='fate',inplace=True)
    group['ChoiceModel'].replace(to_replace=['FETA-Net'], value='feta',inplace=True)
    group['ChoiceModel'].replace(to_replace=['RankNet-Choice'], value='ranknet',inplace=True)
    group['ChoiceModel'].replace(to_replace=['PairwiseSVM'], value='pairwisesvm',inplace=True)
    group['ChoiceModel'].replace(to_replace=['RandomGuessing'], value='random',inplace=True)
    print("name {}".format(name))
    latex_code = group.to_latex(index = False)
    latex_code = latex_code.replace(' ',"")
    latex_code = latex_code.replace('&'," & ")
    latex_code = str(latex_code)
    for learner in group['ChoiceModel']:
        latex_code = latex_code.replace(learner, "\\{}".format(learner))
    latex_code = latex_code.replace("bfseries", "\\{} ".format("bfseries"))
    #latex_code = latex_code.replace("0.", ".")

    print(latex_code)
#df.T.to_latex()

name Mode
\begin{tabular}{lllllllll}
\toprule
ChoiceModel & F1Score & Precision & Recall & Subset01Loss & Hammingloss & Informedness & Aucscore & Averageprecisionscore\\
\midrule
\fate & \bfseries 0.976(0) & \bfseries 0.980(4) & 0.979(3) & 0.118(3) & 0.022(0) & \bfseries 0.960(1) & \bfseries 0.991(1) & \bfseries 0.990(2)\\
\pairwisesvm & 0.597(0) & 0.442(0) & 0.999(2) & \bfseries 0.997(0) & 0.557(0) & 0.000(0) & 0.509(6) & 0.569(4)\\
\glm & 0.597(0) & 0.442(0) & 0.999(1) & \bfseries 0.997(0) & 0.557(0) & 0.000(0) & 0.497(4) & 0.561(2)\\
\random & 0.597(0) & 0.442(0) & \bfseries 1.000(0) & \bfseries 0.997(0) & \bfseries 0.558(0) & 0.000(0) & 0.500(0) & 0.442(0)\\
\bottomrule
\end{tabular}

name Unique
\begin{tabular}{lllllllll}
\toprule
ChoiceModel & F1Score & Precision & Recall & Subset01Loss & Hammingloss & Informedness & Aucscore & Averageprecisionscore\\
\midrule
\feta & \bfseries 0.562(0) & \bfseries 0.405(0) & \bfseries 1.000(0) & \bfseries 1.000(0) & \bfseries 0.595(0) & \bfserie

In [None]:
df_path = os.path.join(DIR_PATH, 'results' , "discrete_choice.csv")

if not os.path.isfile(df_path):
    dataFrame = df
else:
    dataFrame = pd.read_csv(df_path, index_col=0)
    dataFrame = dataFrame.append(df, ignore_index=True)
dataFrame
dataFrame.to_csv(df_path)

In [None]:
grouped = df.groupby(['dataset'])
for name, group in grouped:
    df_path = os.path.join(DIR_PATH, 'results' , name.lower()+'.csv')
    group.to_csv(df_path)

In [None]:
import numpy as np
np.arange(48,87)

X_train = np.arange(40).reshape(4,5,2)

learner_params = {}
learner_params['n_objects'], learner_params['n_object_features'] = X_train.shape[1:]

In [None]:
"UNIQUE_MAX_OCCURRING".lower()