In [1]:
from relapse_prediction import constants

In [2]:
from sklearn.metrics import auc
from tqdm.auto import tqdm
import pandas as pd
import pickle

# 1. Cercare features : 

In [3]:
df_cercare_auc = pd.DataFrame(columns=["patient", "label", "imaging", "feature", "AUC"])

for patient in tqdm(constants.list_patients):
    for label in ["L3R", "L3R_5x5x5", "L3R - (L1 + L3)", "L3R - (L1 + L3)_5x5x5"]:
        for imaging in constants.L_CERCARE_MAPS:
            for feature in [None, "mean_5x5x5"]:
                
                
                feature_col = f"{imaging}_{feature}" if feature is not None else imaging
                with open(constants.dir_thresholds / patient / label / f"{feature_col}_quantized.pickle", "rb") as f:
                    dict_thresholds = pickle.load(f)
                
                fpr, tpr = dict_thresholds["fpr"], dict_thresholds["tpr"]
                auc_value = auc(fpr, tpr)
                
                df_cercare_auc.loc[len(df_cercare_auc)] = [patient, label, imaging, feature, auc_value]

df_cercare_auc.to_csv("cercare_auc_values_per_patient.csv", index=False)

  0%|          | 0/103 [00:00<?, ?it/s]

In [5]:
df_cercare_auc["feature"].fillna("", inplace=True)
df = df_cercare_auc.groupby(["label", "imaging", "feature"])["AUC"].mean().reset_index()

In [14]:
df.loc[df['imaging'] == 'rLeakage']

Unnamed: 0,label,imaging,feature,AUC
12,L3R,rLeakage,,0.550002
13,L3R,rLeakage,mean_5x5x5,0.560691
26,L3R - (L1 + L3),rLeakage,,0.53139
27,L3R - (L1 + L3),rLeakage,mean_5x5x5,0.542321
40,L3R - (L1 + L3)_5x5x5,rLeakage,,0.504674
41,L3R - (L1 + L3)_5x5x5,rLeakage,mean_5x5x5,0.51497
54,L3R_5x5x5,rLeakage,,0.54569
55,L3R_5x5x5,rLeakage,mean_5x5x5,0.555032


# 2. MRI features

In [15]:
df_mri_auc = pd.DataFrame(columns=["patient", "label", "imaging", "feature", "norm", "AUC"])

for patient in tqdm(constants.list_patients):
    for label in ["L3R", "L3R_5x5x5", "L3R - (L1 + L3)", "L3R - (L1 + L3)_5x5x5"]:
        for imaging in constants.L_IRM_MAPS:
            for feature in [None, "mean_5x5x5"]:
                for norm in ["z_score", "min_max", "max"]:
                
                    feature_col = f"{imaging}_{feature}" if feature is not None else imaging
                    with open(constants.dir_thresholds / patient / label / f"{feature_col}_{norm}_normalized.pickle", "rb") as f:
                        dict_thresholds = pickle.load(f)
                    
                    fpr, tpr = dict_thresholds["fpr"], dict_thresholds["tpr"]
                    auc_value = auc(fpr, tpr)
                    
                    df_mri_auc.loc[len(df_mri_auc)] = [patient, label, imaging, feature, norm, auc_value]

df_mri_auc.to_csv("mri_auc_values_per_patient.csv", index=False)

  0%|          | 0/103 [00:00<?, ?it/s]

In [16]:
df_mri_auc["feature"].fillna("", inplace=True)
df = df_mri_auc.groupby(["label", "imaging", "feature", "norm"])["AUC"].mean().reset_index()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_mri_auc["feature"].fillna("", inplace=True)


In [26]:
df.loc[(df['imaging'] == 'FLAIR') & (df['norm'] == 'min_max')]

Unnamed: 0,label,imaging,feature,norm,AUC
1,L3R,FLAIR,,min_max,0.659678
4,L3R,FLAIR,mean_5x5x5,min_max,0.705842
19,L3R - (L1 + L3),FLAIR,,min_max,0.638843
22,L3R - (L1 + L3),FLAIR,mean_5x5x5,min_max,0.681963
37,L3R - (L1 + L3)_5x5x5,FLAIR,,min_max,0.634283
40,L3R - (L1 + L3)_5x5x5,FLAIR,mean_5x5x5,min_max,0.674923
55,L3R_5x5x5,FLAIR,,min_max,0.656677
58,L3R_5x5x5,FLAIR,mean_5x5x5,min_max,0.702527
