In [None]:
from visualization import plot_gridmetrics, plot_roc, plot_macro_roc, plot_auprc, plot_beta
import pandas as pd
import os

## Visualization module
Visualization of grid-search training/validation F2 scores and AUPRC curves on test sets.

### GPC

### OPCRIT 90

In [None]:
# Organize output folder as follows
out_folder = ""

clin_files = f'{out_folder}/clinical'
gen_files = f'{out_folder}/genetic'
all_files = f'{out_folder}/all'

### Logistic regression
Parameters:

```
"LR": {
      "penalty": ["l2"],
      "C": [0.001, 0.01, 0.1 ,1, 10, 100],
      "solver": ["saga"],
      "max_iter": [10000]
      }
```

In [None]:
cl_lr = pd.read_csv(os.path.join(clin_files, 'gridsearch_LRscores.txt'), sep=',', index_col=0)
gen_lr = pd.read_csv(os.path.join(gen_files, 'gridsearch_LRscores.txt'), sep=',', index_col=0)
all_lr = pd.read_csv(os.path.join(all_files, 'gridsearch_LRscores.txt'), sep=',', index_col=0)

In [None]:
plot_gridmetrics(cl_lr, scorers=["F2"], color_list=["black"], figsize=(6, 6),
                 ylab="F2", ylim_v=(0.0, 1.0), yticks_v=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
                 setoff=0.05, horizontalalignment="right",
                 file_name=f"{clin_files}/gs_opcrit90_clinical.pdf", train_score=True)
plot_gridmetrics(all_lr, scorers=["F2"], color_list=["black"], figsize=(6, 6),
                 ylab="F2", horizontalalignment="right",
                 setoff=0.05,
                 ylim_v=(0.0, 1.0), yticks_v=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
                 file_name=f"{all_files}/gs_opcrit90_all.pdf", train_score=True)
plot_gridmetrics(gen_lr, scorers=["F2"], color_list=["black"], figsize=(6, 6),
                 ylab="F2", ylim_v=(0.0, 1.0), yticks_v=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
                 horizontalalignment="right",
                 setoff=0.05,
                 file_name=f"{gen_files}/gs_opcrit90_genetic.pdf", train_score=True)

In [None]:
# OPCRIT.90
plot_auprc(f'{out_folder}', metric='f2', save='auprc_comparison.pdf')

### Biome

Random oversampling within grid-search cross-validation.

### Agressive

In [None]:
# Organize output folder as follows
out_folder = ""

clin_files = f'{out_folder}/clinical'
gen_files = f'{out_folder}/genetic'
all_files = f'{out_folder}/all'

### Logistic regression
Parameters:

```
"LR": {
      "penalty": ["l1", "l2"],
      "C": [0.001, 0.01, 0.1, 1.0, 10.0, 100.0],
      "solver": ["saga"],
      "max_iter": [10000]
    }
```

In [None]:
cl_lr = pd.read_csv(os.path.join(clin_files, 'gridsearch_LRscores.txt'), sep=',', index_col=0)
gen_lr = pd.read_csv(os.path.join(gen_files, 'gridsearch_LRscores.txt'), sep=',', index_col=0)
all_lr = pd.read_csv(os.path.join(all_files, 'gridsearch_LRscores.txt'), sep=',', index_col=0)

In [None]:
plot_gridmetrics(cl_lr, scorers=["F2"], color_list=["black"], figsize=(6, 6),
                 ylab="F2", horizontalalignment="left", ylim_v=(0.0, 1.0), 
                 yticks_v=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
                 setoff=0.19,
                 file_name=f"{clin_files}/gs_agressive_clinical.pdf", train_score=True)
plot_gridmetrics(all_lr, scorers=["F2"], color_list=["black"], figsize=(6, 6),
                 ylab="F2", ylim_v=(0.0, 1.0), 
                 yticks_v=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
                 setoff=0.18,
                 file_name=f"{all_files}/gs_agressive_all.pdf", train_score=True)
plot_gridmetrics(gen_lr, scorers=["F2"], color_list=["black"], figsize=(6, 6),
                 ylab="F2", ylim_v=(0.0, 1.0), 
                 yticks_v=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
                 setoff=-0.15, horizontalalignment="right",
                 file_name=f"{gen_files}/gs_agressive_genetic.pdf", train_score=True)

In [None]:
plot_auprc(f'{out_folder}', metric='f2', save='auprc_comparison.pdf')

### Psych admit

In [None]:
# Organize output folder as follows
out_folder = "../out/out_biome_psych_admit"

clin_files = f'{out_folder}/clinical'
gen_files = f'{out_folder}/genetic'
all_files = f'{out_folder}/all'

### Logistic regression
Parameters:

```
"LR": {
      "penalty": ["l1", "l2"],
      "C": [0.001, 0.01, 0.1, 1.0, 10.0, 100.0],
      "solver": ["saga"],
      "max_iter": [10000]
    }
```

In [None]:
cl_lr = pd.read_csv(os.path.join(clin_files, 'gridsearch_LRscores.txt'), sep=',', index_col=0)
gen_lr = pd.read_csv(os.path.join(gen_files, 'gridsearch_LRscores.txt'), sep=',', index_col=0)
all_lr = pd.read_csv(os.path.join(all_files, 'gridsearch_LRscores.txt'), sep=',', index_col=0)

In [None]:
plot_gridmetrics(cl_lr, scorers=["F2"], color_list=["black"], figsize=(6, 6),
                 ylab="F2", horizontalalignment="left", ylim_v=(0.0, 1.0), 
                 yticks_v=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
                 setoff=0.11,
                 file_name=f"{clin_files}/gs_psych_admit_clinical.pdf", train_score=True)
plot_gridmetrics(all_lr, scorers=["F2"], color_list=["black"], figsize=(6, 6),
                 ylab="F2", ylim_v=(0.0, 1.0), 
                 yticks_v=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
                 setoff=0.1,
                 file_name=f"{all_files}/gs_psych_admit_all.pdf", train_score=True)
plot_gridmetrics(gen_lr, scorers=["F2"], color_list=["black"], figsize=(6, 6),
                 ylab="F2", ylim_v=(0.0, 1.0), 
                 yticks_v=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
                 setoff=0.085, horizontalalignment="right",
                 file_name=f"{gen_files}/gs_psych_admit_genetic.pdf", train_score=True)

In [None]:
plot_auprc(f'{out_folder}', metric='f2', save='auprc_comparison.pdf')