In [13]:
from relapse_prediction import constants, utils
from relapse_prediction import features

from tqdm.auto import tqdm
from scipy import stats
import pandas as pd
import pickle


# First, let's correct the IRM features : 

In [33]:
def correct_irm_features(patient, imaging):
    
    path_irm_features = constants.dir_features / patient / f"{patient}_{imaging}_features.parquet"
    df_irm_imaging = pd.read_parquet(path_irm_features, engine="pyarrow")
    
    list_cols = [imaging, "mean_7x7", 'mean_5x5', 'mean_3x3', 'mean_3x3x3', 'mean_5x5x5']
    
    for col in list_cols:
        df_irm_imaging[col] = stats.zscore(df_irm_imaging[col])
    
    df_irm_imaging.to_parquet(path_irm_features, engine="pyarrow")


In [34]:
for patient in tqdm(constants.list_patients):
    for imaging in constants.L_IRM_MAPS:
        correct_irm_features(patient, imaging)
        

  0%|          | 0/105 [00:00<?, ?it/s]

## Intensity binning : 

In [None]:
def get_list_thresholds(imaging, feature, label):
    
    path_list_thresholds = constants.dir_features / "list_thresholds" / f"{label}_{imaging}_{feature}_thresholds.pickle"
    
    with open(path_list_thresholds, "rb") as f:
        list_thresholds = pickle.load(f)
    

# Second, Let's correct the labels columns :

In [46]:
def correct_labels(patient):
    
    path_labels = constants.dir_labels / f"{patient}_labels.parquet"
    
    df_labels = pd.read_parquet(path_labels, engine="pyarrow")
    
    list_cols = ['mean_L3R_5x5x5', 'mean_L3R - (L1 + L3)_5x5x5', 'mean_(L1 + L3)_5x5x5', 'mean_L2_5x5x5',   
                    'mean_L3_5x5x5', 'mean_L4_5x5x5', 'mean_L5_5x5x5', 'mean_(L4 + L5)_5x5x5']
    
    for col in list_cols:
        df_labels.loc[df_labels[col] < 0.5, col] = 0
        df_labels.loc[df_labels[col] >= 0.5, col] = 1
        
    dict_cols = {col: col.lstrip("mean_") for col in list_cols}
    
    df_labels = df_labels.rename(columns=dict_cols)
    
    df_labels.to_parquet(path_labels, engine="pyarrow")
    

In [47]:
for patient in tqdm(constants.list_patients):
    correct_labels(patient)
    

  0%|          | 0/105 [00:00<?, ?it/s]