In [None]:
import pandas as pd
import numpy as np
import os
import pickle
pd.set_option("display.max_rows", 500)
%matplotlib inline

In [None]:
def get_run_gauc_penalizer(cross_validators, run_number):
    penalizers = []
    mean_gauc = []
    for idp, penalizer in enumerate(sorted(cross_validators.keys())):
        mean_gauc.append(np.mean(list(cross_validators[penalizer].global_auc.values())))
        penalizers.append(penalizer)
    return pd.Series(mean_gauc, index=penalizers, name=run_number)

def get_run_FP_FN_df(cross_validators, j_events=2, n_splits=5):
    total_positives_df = pd.DataFrame()
    gauc = []
    for risk in range(1, j_events+1):
        for idp, penalizer in enumerate(cross_validators.keys()):
            
            tmp_gauc = np.mean(list(cross_validators[penalizer].global_auc.values()))
            
            tmp_j1_params_df = pd.DataFrame()
            for i_fold in range(n_splits):
                tmp_j1_params_df = pd.concat([tmp_j1_params_df, cross_validators[penalizer].models[i_fold].beta_models[risk].params_], axis=1)
                
            ser_1 = tmp_j1_params_df.mean(axis=1) 
            ser_1.name = penalizer

            if idp == 0:
                j1_params_df = ser_1.to_frame()
            else:
                j1_params_df = pd.concat([j1_params_df, ser_1], axis=1)

            j1_params_df = j1_params_df.round(4)  

            true_positives = (j1_params_df.abs() > 0).iloc[:5].sum()
            true_positives.name = 'True Positives'
            false_positives = (j1_params_df.abs() > 0).iloc[5:].sum()
            false_positives.name = 'False Positives'

            positives_df = pd.concat([true_positives, false_positives], axis=1)
            positives_df.index.name = r'Log ($\eta$)'
        total_positives_df = pd.concat([total_positives_df, pd.concat([positives_df], keys=[fr'$\beta_{risk}$'], axis=1)], axis=1)

    return total_positives_df

In [None]:
OUTPUT_DIR = '/app/output'

In [None]:
files_list = sorted([f for f in os.listdir(OUTPUT_DIR) if 'FP-FN_cross_validators_' in f])
files_list

In [None]:
all_runs = pd.DataFrame()
gauc_df = pd.DataFrame()

for file in files_list:
    with open(os.path.join(OUTPUT_DIR, file), 'rb') as f:
        cross_validators = pickle.load(f)
        run_number = int(file.split('_')[-1].split('.')[0])
        df = pd.concat([get_run_FP_FN_df(cross_validators)], axis=1, 
                       keys=[run_number])
        all_runs = pd.concat([all_runs, df], axis=1)
        gauc_df = pd.concat([gauc_df, get_run_gauc_penalizer(cross_validators, run_number)], axis=1)

gauc_df

In [None]:
# other_gauc_df = pd.read_csv(os.path.join(OUTPUT_DIR, 'gauc_7XX_runs.csv'), index_col=0)
# other_all_runs = pd.read_csv(os.path.join(OUTPUT_DIR, 'all_runs_7XX_runs.csv'), index_col=0, header=[0,1,2])

In [None]:
# gauc_df = pd.concat([gauc_df, other_gauc_df], axis=1)
# all_runs = pd.concat([all_runs, other_all_runs], axis=1)


In [None]:
# gauc_df = pd.read_csv(os.path.join(OUTPUT_DIR, 'gauc_all_runs.csv'), index_col=0)
# all_runs = pd.read_csv(os.path.join(OUTPUT_DIR, 'all_runs.csv'), index_col=0, header=[0,1,2])

In [None]:
gauc_df.idxmax(axis=0).mean()

In [None]:
gauc_df.idxmax(axis=0).std()

In [None]:
final_df = all_runs.mean(level=[1,2], axis=1)
std_df = all_runs.std(level=[1,2], axis=1)
for i_row in range(final_df.shape[0]):
    for j_col in range(final_df.shape[1]):
        final_df.iloc[i_row, j_col] = f"{final_df.iloc[i_row, j_col]:.2f} ({std_df.iloc[i_row, j_col]:.2f})"

final_df

In [None]:
print(final_df.to_latex(escape=False))