In [1]:
import os
import sys
import pandas as pd
sys.path.append('/path/to/repo/performance_evaluation/development_code/')
import modeval

This notebook assumes a sinple 5CV grid search was performed as (e.g. such as specified in https://app.box.com/file/565474267762) <br>

In [None]:
# pandas v0.25 (lower than 1 is enough)

In [2]:
pd.__version__

'0.25.3'

In [None]:
# specify sparsechem's folder containing HP grid search results
result_dir = 'results/'
model_dir = 'models/'

### SELECT BEST FROM JSON FILES

#### Select best following official definition 
The official definition for Y1 is on page1-2 of the option doc: "[SharingAggregateFigures](https://app.box.com/file/493035529676)"

In [None]:
#1/ LOAD RESULTS .JSON FILES
grid_search_results = perf_from_json(model_dir,            # This could be either the models dir or a .json file
                                     tasks_for_eval=None,  # to mask out some tasks, specify an np.array of tasks to consider only
                                     aggregate=False,      # do not use the aggregate results of sparsechem
                                     evaluation_set='va',  # using "va" results (specify "te" if test results)
                                     model_name='Y',       # model name will be added in the dataframe
                                     n_cv=5,               # will load only files relative to folds 0,1,2...,n_cv-1
                                     filename_mask=None,   # allows ignoring some files present in the models/ dir
                                     drop_na_col=True,     # drops columns full of nan in resulting data frame
                                     verbose=True)         # default value 


In [None]:
#2/ FIND THE BEST HYPERPARAMETERS
# this step will : 
#  - aggregate_overall: 
#       > quorum_filter: selects only tasks verifying the quorum (5 actives , 5 inactives in each folds)
#       > aggregates remaining tasks with mean and standard deviation
#  - reports the top HPs/score per score type


modeval.find_best_hyperparam(grid_search_results,              # results data from step before (assumed to contain > 1 combin of HPs)
                             min_samples=5,                    # minimum number of actives / inactives in each n_cv folds for a task to be considered
                             n_cv=5,                           # number of folds to look for
                             perf_metrics=['roc_auc_score', 
                                           'auc_pr', 
                                           'avg_prec_score', 
                                           'max_f1_score',
                                           'kappa'],           # default value: allows selection of score types
                             verbose=True)                     # default value




#### Select best using sparsechem aggregate results

In [None]:
# use the following if performance reports in *.json files 
# collect performance metrics from conf file (aggregate form) 
conf_df = modeval.perf_from_json(result_dir, aggregate=True)

In [None]:
# melt: in conf files, there is only auc_pr and auc_roc
conf_dfm = modeval.melt_perf(conf_df, perf_metrics=['roc_auc_score_agg', 'auc_pr_agg', 'avg_prec_score_agg', 'max_f1_score_agg', 'kappa_agg', 'logloss_agg']) 

# Find out the best hyperparameters
best_hp = modeval.best_hyperparam(conf_dfm)

In [None]:
# show the hyperparameters 
wanted_score='auc_pr_agg'
best_hp.loc[best_hp['score_type']==wanted_score]

### [DEPRECATED] SELECT BEST FROM CONF.NPY FILES (OLD SPARSECHEM VERSION)

In [3]:
# use the following if performance reports in *.conf.npy files 
# collect performance metrics from conf file (aggregate form) 
conf_df = modeval.perf_from_conf(model_dir, aggregate=True)

In [3]:
# melt: in conf files, there is only auc_pr and auc_roc (no kappa, f1, ...) 
conf_dfm = modeval.melt_perf(conf_df, perf_metrics=['auc_pr_va_mean', 'auc_va_mean']) 

# Find out the best hyperparameters
best_hp = modeval.best_hyperparam(conf_dfm)

In [None]:
# show the hyperparameters 
wanted_score='auc_pr_va_mean'
best_hp.loc[best_hp['score_type']==wanted_score]