In [1]:
from relapse_prediction import constants

In [2]:
from sklearn.metrics import auc
from tqdm.auto import tqdm
import pandas as pd
import pickle
import numpy as np

# 1. Cercare features : 

In [3]:
df_cercare_auc = pd.DataFrame(columns=["patient", "label", "imaging", "feature", "AUC", "cutoff", "recall", "specificity"])

for patient in tqdm(constants.list_patients):
    for label in ["L1", "L2", "L3", "L4", "L5",
                  'L3R', "L3 + L3R", "L3R - (L1 + L3)",
                    "SumPreRT + L3R", "L2 + L3R - (L1 + L3)"]:
        
        
        for imaging in ['CTH', 'OEF', 'rCBV']:
            for feature in [None, "mean_5x5x5"]:
                
                feature_col = f"{imaging}_{feature}" if feature is not None else imaging                

                with open(constants.dir_thresholds / "all_voxels"  / patient / label / f"{feature_col}_quantized.pickle", "rb") as f:
                    dict_thresholds = pickle.load(f)
                
                fpr, tpr, thresholds = dict_thresholds["fpr"], dict_thresholds["tpr"], dict_thresholds["thresholds"]
                auc_value = auc(fpr, tpr)
                
                 # Compute cutoff, recall, and specificity
                optimal_idx = np.argmax(tpr - fpr)
                cutoff = thresholds[optimal_idx]
                recall = tpr[optimal_idx]
                specificity = 1 - fpr[optimal_idx]
                
                df_cercare_auc.loc[len(df_cercare_auc)] = [patient, label, imaging, feature, auc_value, cutoff, recall, specificity]

df_cercare_auc.to_excel("cercare_auc_values_per_patient.xlsx", index=False)


  0%|          | 0/184 [00:00<?, ?it/s]

# 2. MRI features

In [4]:
df_mri_auc = pd.DataFrame(columns=["patient", "label", "imaging", "feature", "norm", "AUC", "cutoff", "recall", "specificity"])

for patient in tqdm(constants.list_patients):
    for label in ["L1", "L2", "L3", "L4", "L5",
                  'L3R', "L3 + L3R", "L3R - (L1 + L3)",
                    "SumPreRT + L3R", "L2 + L3R - (L1 + L3)"]:
        
        for imaging in constants.L_IRM_MAPS:
            for feature in [None, "mean_5x5x5"]:
                for norm in ["z_score"]:
                
                    feature_col = f"{imaging}_{feature}" if feature is not None else imaging
                    
                    with open(constants.dir_thresholds / "all_voxels" / patient / label / f"{feature_col}_{norm}_normalized.pickle", "rb") as f:
                        dict_thresholds = pickle.load(f)
                    
                    fpr, tpr, thresholds = dict_thresholds["fpr"], dict_thresholds["tpr"], dict_thresholds["thresholds"]
                    auc_value = auc(fpr, tpr)
                    
                    # Compute cutoff, recall, and specificity
                    optimal_idx = np.argmax(tpr - fpr)
                    cutoff = thresholds[optimal_idx]
                    recall = tpr[optimal_idx]
                    specificity = 1 - fpr[optimal_idx]
                    
                    df_mri_auc.loc[len(df_mri_auc)] = [patient, label, imaging, feature, norm, auc_value, cutoff, recall, specificity]

df_mri_auc.to_excel("mri_auc_values_per_patient.xlsx", index=False)

  0%|          | 0/184 [00:00<?, ?it/s]

In [16]:
df_mri_auc["feature"].fillna("", inplace=True)
df = df_mri_auc.groupby(["label", "imaging", "feature", "norm"])["AUC"].mean().reset_index()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_mri_auc["feature"].fillna("", inplace=True)


In [26]:
df.loc[(df['imaging'] == 'FLAIR') & (df['norm'] == 'min_max')]

Unnamed: 0,label,imaging,feature,norm,AUC
1,L3R,FLAIR,,min_max,0.659678
4,L3R,FLAIR,mean_5x5x5,min_max,0.705842
19,L3R - (L1 + L3),FLAIR,,min_max,0.638843
22,L3R - (L1 + L3),FLAIR,mean_5x5x5,min_max,0.681963
37,L3R - (L1 + L3)_5x5x5,FLAIR,,min_max,0.634283
40,L3R - (L1 + L3)_5x5x5,FLAIR,mean_5x5x5,min_max,0.674923
55,L3R_5x5x5,FLAIR,,min_max,0.656677
58,L3R_5x5x5,FLAIR,mean_5x5x5,min_max,0.702527


In [12]:
dict_naming = {"all" : "on_all_patients",
               "Class" : "per_class",
               "surgery_type" : "surgery_type"}

for type in ["all", "Class", "surgery_type"]:
    
    dir_data = constants.dir_total_cutoffs / "all_voxels" / type
    
    path_cercare = list(dir_data.glob("Cercare_results_*.csv"))[0]
    path_mri = list(dir_data.glob("IRM_results_*.csv"))[0]
    
    df_cercare_auc = pd.read_csv(path_cercare)
    df_mri_auc = pd.read_csv(path_mri)
    
    df_type = pd.concat([df_cercare_auc, df_mri_auc], ignore_index=True)
    
    df_type.to_excel(dir_data.parent / f"Cutoff_values_computed_{dict_naming[type]}.xlsx", index=False)

In [8]:
dir_data

PosixPath('/media/maichi/T7/Relapse prediction/results/total_cutoffs/all_voxels/surgery_type')