In [1]:
from scipy.sparse import csr_matrix, load_npz, save_npz, coo_matrix
import numpy as np 
import pandas as pd 
from sklearn.metrics import roc_auc_score,average_precision_score
import matplotlib.pyplot as plt 
from scipy.stats import entropy
from tqdm import tqdm 
import os 
import json 
import types 

In [2]:
pvs = ['0'
       ,'0_2'
       ,'0_4'
       ,'0_5'
       ,'0_6'
       ,'0_7'
       ,'0_8'
       ,'0_9'
       ,'0_95'
       ,'0_99'
      ]

In [4]:
datapath = './files/baseline_plus_aux_data/pseudolabels_plus_baseline/matrices/cls/'

In [5]:
# labels - filtering on the test fold already here in order to drop data
fold_te = 0 
path = datapath+'cls_T11_fold_vector.npy'
folds = np.load(path)

path = datapath+'cls_T10_y.npz'
y = load_npz(path)

fte_indx = np.where([folds==fold_te])[1]

y_df = pd.DataFrame({
    'label':coo_matrix(y).data
    ,'row':coo_matrix(y).row
    ,'col':coo_matrix(y).col
}).query('row in @fte_indx')

In [6]:
y.shape

(422185, 2963)

In [8]:
path = './cp/summary_eps_0.05_mgd_cp.csv'
df_cp = pd.read_csv(path)
df_cp = df_cp[['melloddy_col_idx','cont_classification_task_id']] # 'index',

In [9]:
l = list(df_cp['cont_classification_task_id'])
y_pseudolabels_df = y_df.query('col in @l')

### 1. for the models with auxilary data (only diagonal for now) 

In [None]:
# matching pseudolabels with predictions

In [10]:
preds_ppvnpv = {}
preds_labels_ppvnpv_df = {}
for pv in tqdm(pvs) : 
    path = '../predictions/preds/pred_inferencemodel_step3_fold0_baseline_plus_aux_ppv{}_npv{}_-class.npy'.format(pv,pv)
    preds_ppvnpv[pv] = np.load(path, allow_pickle=True).item()
    preds_ppvnpv_df = pd.DataFrame({
        'pred':coo_matrix(preds_ppvnpv[pv]).data
        ,'row':coo_matrix(preds_ppvnpv[pv]).row
        ,'col':coo_matrix(preds_ppvnpv[pv]).col
    })

    preds_ppvnpv_df = pd.merge(
        preds_ppvnpv_df
        ,df_cp
        ,how='inner'
        ,right_on='melloddy_col_idx'
        ,left_on='col'
    )

    # merging with the fold 0 predictions will reduce the pseudolabel data to fold 0
    preds_labels_ppvnpv_df[pv] = pd.merge(
        preds_ppvnpv_df
        ,y_pseudolabels_df
            # col field from the baseline_plus_aux_data y matrix 
        ,right_on=['row','col'] 
            # cont_classification_task_id from cp/summary_eps_0.05_mgd_cp file : ccti for the base+aux dataset in step 3 
        ,left_on=['row','cont_classification_task_id'] 
        ,how='inner'
    )
    

  0%|          | 0/10 [00:00<?, ?it/s]


FileNotFoundError: [Errno 2] No such file or directory: '../predictions/preds/pred_inferencemodel_step3_fold0_baseline_plus_aux_ppv0_npv0_-class.npy'

In [None]:
    f = 'cls_T10_y_ppv{}_npv{}.npz'.format(pv,pv)
    t10 = load_npz(os.path.join(datapath,f))
    s = set(t10.nonzero()[1])
    preds_labels_ppvnpv_df_tmp = preds_labels_ppvnpv_df[pv].query('cont_classification_task_id in @s')
    preds_labels_baseline_df_tmp = preds_labels_baseline_df.query('cont_classification_task_id in @s')

In [None]:
rocs = {}
avgprs = {}
sizes = {}
fracs = {}

for pv in tqdm(pvs) : 
    rocs[pv] = []
    avgprs[pv] = []
    sizes[pv] = []
    fracs[pv] = [] 
    
    # limit ourselves to the cols that are actually populated in the training
    f = './confidence_selection/cls_T10_y_ppv{}_npv{}.npz'.format(pv,pv)
    t10 = load_npz(os.path.join(datapath,f))
    s = np.unique(t10.nonzero()[1])
    cols_to_consider = preds_labels_ppvnpv_df[pv].query('cont_classification_task_id in @s')['cont_classification_task_id'].drop_duplicates()
    
    for col in cols_to_consider:
        arr = preds_labels_ppvnpv_df[pv].query('cont_classification_task_id == @col')
        try:
            roc = roc_auc_score(arr['label'],arr['pred'])
            avgpr = average_precision_score(arr['label'],arr['pred'])
            rocs[pv].append(roc)
            avgprs[pv].append(avgpr)
            sizes[pv].append(len(arr))
            fracs[pv].append(arr['label'].value_counts().loc[-1]/arr.shape[0])
    
        except: 
            print(col)
            

### 2. for the models without auxilary data (baseline) 

In [None]:
path = '../predictions/preds/pred_inferencemodel_step3_fold0_baseline-class.npy'
preds_baseline = np.load(path, allow_pickle=True).item()

preds_baseline_df = pd.DataFrame({
    'pred':coo_matrix(preds_baseline).data
    ,'row':coo_matrix(preds_baseline).row
    ,'col':coo_matrix(preds_baseline).col
})

preds_baseline_df = pd.merge(
    preds_baseline_df
    ,df_cp
    ,how='inner'
    ,right_on='melloddy_col_idx'
    ,left_on='col'
)

preds_labels_baseline_df = pd.merge(
    preds_baseline_df
    ,y_pseudolabels_df
    ,right_on=['row','col']
    ,left_on=['row','cont_classification_task_id']
    ,how='inner'
)

In [None]:
# head-to-head comparison of both models will require equal amount of datapoints 
assert preds_labels_ppvnpv_df['0_9'].shape == preds_labels_baseline_df.shape

In [None]:
cnts = preds_baseline_df['cont_classification_task_id'].value_counts()
# take two random records - values should match for the baseline - dense predictions with no confidence selection
assert cnts.iloc[0] == cnts.iloc[2]

In [None]:
cnts = y_pseudolabels_df['col'].value_counts()
# take two random records - values will only match in rare cases (confidence row selection for every task)
assert cnts.iloc[0] != cnts.iloc[2]

In [None]:
rocs_baseline = {}
avgprs_baseline = {}
sizes_baseline = {}
fracs_baseline = {}


for pv in tqdm(pvs) : 

    rocs_baseline[pv] = []
    avgprs_baseline[pv] = []
    sizes_baseline[pv] = []
    fracs_baseline[pv] = []
    
    # limit ourselves to the cols that are actually populated in the training
    f = './confidence_selection/cls_T10_y_ppv{}_npv{}.npz'.format(pv,pv)
    t10 = load_npz(os.path.join(datapath,f))
    s = np.unique(t10.nonzero()[1])
    cols_to_consider = preds_labels_ppvnpv_df[pv].query('cont_classification_task_id in @s')['cont_classification_task_id'].drop_duplicates()
    
    for col in cols_to_consider:
        arr = preds_labels_baseline_df.query('cont_classification_task_id == @col')
        try:
            roc = roc_auc_score(arr['label'],arr['pred'])
            avgpr = average_precision_score(arr['label'],arr['pred'])
            rocs_baseline[pv].append(roc)
            avgprs_baseline[pv].append(avgpr)
            sizes_baseline[pv].append(len(arr))
            fracs_baseline[pv].append(arr['label'].value_counts().loc[-1]/arr.shape[0])
    
        except: 
            print(col)
            

In [None]:
assert np.median(rocs_baseline['0'])!= np.median(rocs_baseline['0_99'])

In [None]:
diffs = []

for pv in pvs : 
    diffs.append(np.array(rocs[pv]) - np.array(rocs_baseline[pv]))
diffs.append(np.array(rocs_baseline['0']) - np.array(rocs_baseline['0']))
_ = plt.boxplot(diffs)
plt.grid(ls='--')
_ = plt.xticks(
    range(1,len(diffs)+1)
    ,l[:]
    ,rotation=90
)
plt.ylabel('Delta cross-AUCROC \n(on set of tasks with \nan image-based counterpart)')
plt.xlabel('NPV and PPV thresholds (diagonal)')
_ = plt.plot([0.5,len(pvs)+1+0.5],[0.,0.], linestyle='--', color='blue')
_ = plt.ylim([-.25,.25])
plt.savefig(
    './figures/delta_cross_AUC_diff.svg'
    ,bbox_inches='tight'
    ,pad_inches=0
           )

#### Primary activity data

In [None]:
import sys
sys.path.append('/projects/home/wheyndri/git/performance_evaluation/development/')
import modeval

In [None]:

def load_results(filename, two_heads=False):
    """Loads conf and results from a file
    Args:
        filename    name of the json/npy file
        two_heads   set up class_output_size if missing
    """
    if filename.endswith(".npy"):
        return np.load(filename, allow_pickle=True).item()

    with open(filename, "r") as f:
        data = json.load(f)

    for key in ["model_type"]:
        if key not in data["conf"]:
            data["conf"][key] = None
    if two_heads and ("class_output_size" not in data["conf"]):
        data["conf"]["class_output_size"] = data["conf"]["output_size"]
        data["conf"]["regr_output_size"]  = 0

    data["conf"] = types.SimpleNamespace(**data["conf"])


    if "results" in data:
        for key in data["results"]:
            data["results"][key] = pd.read_json(data["results"][key])

    if "results_agg" in data:
        for key in data["results_agg"]:
            data["results_agg"][key] = pd.read_json(data["results_agg"][key], typ="series")

    for key in ["training", "validation"]:
        if key not in data:
            continue
        for dfkey in ["classification", "regression"]:
            data[key][dfkey] = pd.read_json(data[key][dfkey])
        for skey in ["classification_agg", "regression_agg"]:
            data[key][skey]  = pd.read_json(data[key][skey], typ="series")

    return data



In [None]:
path = '../modelling/baseline_plus_aux/models/sc_baseline_2010_h4000_ldo0.8_wd1e-06_lr0.001_lrsteps10_ep20_fva0_fte-1.json'
prim_perf_baseline = load_results(path)

In [None]:
path = '../modelling/baseline_plus_aux/models/sc_baseline_plus_aux_ppv0_6_npv0_6_2010_h4000_ldo0.8_wd1e-06_lr0.001_lrsteps10_ep20_fva0_fte-1.json'
prim_perf_0_6 = load_results(path)

In [None]:
preds_labels_baseline_df

In [None]:
pv = '0_6'
f = './confidence_selection/cls_T10_y_ppv{}_npv{}.npz'.format(pv,pv)
t10 = load_npz(os.path.join(datapath,f))
s = np.unique(t10.nonzero()[1])
cols_to_consider = preds_labels_ppvnpv_df[pv].query('cont_classification_task_id in @s')['cont_classification_task_id'].drop_duplicates()
main_tasks = df_cp.query('cont_classification_task_id in @cols_to_consider')['melloddy_col_idx']

In [None]:
name = 'inferno'
cmap = plt.get_cmap(name)
perfcolors = cmap(prim_perf['validation']['classification'].loc[main_tasks].sort_index()['roc_auc_score'])

In [None]:
plt.scatter(
    prim_perf_0_6['validation']['classification'].loc[main_tasks].sort_index()['roc_auc_score'] - prim_perf_baseline['validation']['classification'].loc[main_tasks].sort_index()['roc_auc_score']
    ,np.array(rocs[pv])-np.array(rocs_baseline[pv])
    ,alpha=.3
    ,c=perfcolors
    ,s=np.log10(prim_perf_baseline['validation']['classification'].loc[main_tasks].sort_index()['num_pos'])*3
)
plt.grid('--')
plt.xlabel('delta primary AUC ROC')
plt.ylabel('delta cross-AUC ROC')
_ = plt.title('NPV/PPV threshold {}'.format(pv))

plt.savefig(
    './figures/delta_cross_AUC_vs_primary.svg'
    ,bbox_inches='tight'
    ,pad_inches=0
           )


In [None]:
# correct colorbar 
mn = prim_perf['validation']['classification'].loc[main_tasks].sort_index()['roc_auc_score'].min()
mx = prim_perf['validation']['classification'].loc[main_tasks].sort_index()['roc_auc_score'].max()
im = plt.imshow([[mn,mn],[mx,mx]],cmap)
plt.colorbar(im)

In [None]:
primarys = []
secondarys = []
for pv in tqdm(pvs) : 
    
    path = '../modelling/baseline_plus_aux/models/sc_baseline_plus_aux_ppv{}_npv{}_2010_h4000_ldo0.8_wd1e-06_lr0.001_lrsteps10_ep20_fva0_fte-1.json'.format(pv,pv)
    prim_perf = load_results(path)
    
    f = './confidence_selection/cls_T10_y_ppv{}_npv{}.npz'.format(pv,pv)
    t10 = load_npz(os.path.join(datapath,f))
    s = np.unique(t10.nonzero()[1])
    cols_to_consider = preds_labels_ppvnpv_df[pv].query('cont_classification_task_id in @s')['cont_classification_task_id'].drop_duplicates()
    main_tasks = df_cp.query('cont_classification_task_id in @cols_to_consider')['melloddy_col_idx']
    secondary = np.median(np.array(rocs[pv])-np.array(rocs_baseline[pv]))
    secondarys.append(np.median(secondary))
    primary = np.nanmedian(prim_perf['validation']['classification'].loc[main_tasks].sort_index()['roc_auc_score'] - prim_perf_baseline['validation']['classification'].loc[main_tasks].sort_index()['roc_auc_score'])
    primarys.append(np.median(primary))
    

In [None]:
_ = plt.plot(primarys)
_ = plt.plot(secondarys)
_ = plt.xticks(
    range(len(primarys))
    ,l[:-1]
    ,rotation=90
)
_ = plt.legend([
    'primary'
    ,'secondary'
    
])
plt.savefig(
    './figures/primary_vs_secondary.svg'
    ,bbox_inches='tight'
    ,pad_inches=0
           )