### According to the results on the best tcga model using the tcga optimal threshold, 
### to move the attention-map-related pngs into subfolders
1. TP
2. TN
3. FN
4. FP

### Then summary the [slide id, patch name, subfolder] in a list

In [1]:
import os
import pandas as pd
from shutil import copy, move
from glob import glob

In [2]:
root = "/media/visiopharm5/WDGold/deeplearning/MIL/CLAM/eval_results"
target = "EVAL_mondor_hcc_tumor_139_Interferon_Gamma_Biology_cv_highvsrest_00X_CLAM_50_s1_cv"
path = os.path.join(root, target)
fold = 3 # -1
downscale = 8

In [92]:
# list folds to process
if fold == -1: # iterate all the folds
    folds=[]
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.startswith("fold_") and file.endswith("_optimal_tcga.csv"):
                folds.append(file.split("_")[1])
else: # for a specific fold
    folds = range(fold, fold+1)

In [93]:
for fold in folds:
    # load predictions with the optimal tcga threshold
    file = os.path.join(path, "fold_"+str(fold)+"_optimal_tcga.csv")
    print(file)
    
    df = pd.read_csv(file)
    print(df.shape)
    display(df.head(5))
    
    # Build a df for patch summary
    df_res = pd.DataFrame(columns=["slide", "patch", "class"])
    
    
    # build 4 subfolds
    # Attention maps
    os.makedirs(os.path.join(path, "attention_maps_"+str(fold)+"_"+str(downscale), "tp"))
    os.makedirs(os.path.join(path, "attention_maps_"+str(fold)+"_"+str(downscale), "fp"))
    os.makedirs(os.path.join(path, "attention_maps_"+str(fold)+"_"+str(downscale), "tn"))
    os.makedirs(os.path.join(path, "attention_maps_"+str(fold)+"_"+str(downscale), "fn"))
    # Representative patches
    os.makedirs(os.path.join(path, "repres_patches_"+str(fold), "tp"))
    os.makedirs(os.path.join(path, "repres_patches_"+str(fold), "fp"))
    os.makedirs(os.path.join(path, "repres_patches_"+str(fold), "tn"))
    os.makedirs(os.path.join(path, "repres_patches_"+str(fold), "fn"))
    
    # classify predictions by the columns "Y" and "true_prediction"
    for i in range(df.shape[0]):
        if (df.iloc[i, 1] == 1.0) and df.iloc[i, 7]: #TP
            # Attention maps
            for file in glob(os.path.join(path, "attention_maps_"+str(fold)+"_"+str(downscale), df.iloc[i, 0]+'_*')):
                move(file, os.path.join(path, "attention_maps_"+str(fold)+"_"+str(downscale), "tp")) 
            # Representative patches
            for file in glob(os.path.join(path, "repres_patches_"+str(fold), df.iloc[i, 0]+'_*')):
                move(file, os.path.join(path, "repres_patches_"+str(fold), "tp"))
            # A list for patch summary
            for file in sorted(glob(os.path.join(path, "repres_patches_"+str(fold), 'tp', df.iloc[i, 0]+'_*'))):
                df_res.loc[len(df_res)] = [df.iloc[i, 0], file.split('/')[-1], 'tp']
                
        elif (df.iloc[i, 1] == 0.0) and df.iloc[i, 7]: #TN
            # Attention maps
            for file in glob(os.path.join(path, "attention_maps_"+str(fold)+"_"+str(downscale), df.iloc[i, 0]+'_*')):
                move(file, os.path.join(path, "attention_maps_"+str(fold)+"_"+str(downscale), "tn"))
            # Representative patches
            for file in glob(os.path.join(path, "repres_patches_"+str(fold), df.iloc[i, 0]+'_*')):
                move(file, os.path.join(path, "repres_patches_"+str(fold), "tn"))
            # A list for patch summary
            for file in sorted(glob(os.path.join(path, "repres_patches_"+str(fold), 'tn', df.iloc[i, 0]+'_*'))):
                df_res.loc[len(df_res)] = [df.iloc[i, 0], file.split('/')[-1], 'tn']
                
        elif (df.iloc[i, 1] == 0.0) and not df.iloc[i, 7]: #FN
            # Attention maps
            for file in glob(os.path.join(path, "attention_maps_"+str(fold)+"_"+str(downscale), df.iloc[i, 0]+'_*')):
                move(file, os.path.join(path, "attention_maps_"+str(fold)+"_"+str(downscale), "fn"))
            # Representative patches
            for file in glob(os.path.join(path, "repres_patches_"+str(fold), df.iloc[i, 0]+'_*')):
                move(file, os.path.join(path, "repres_patches_"+str(fold), "fn"))
            # A list for patch summary
            for file in sorted(glob(os.path.join(path, "repres_patches_"+str(fold), 'fn', df.iloc[i, 0]+'_*'))):
                df_res.loc[len(df_res)] = [df.iloc[i, 0], file.split('/')[-1], 'fn']
                
        else: #FP
            # Attention maps
            for file in glob(os.path.join(path, "attention_maps_"+str(fold)+"_"+str(downscale), df.iloc[i, 0]+'_*')):
                move(file, os.path.join(path, "attention_maps_"+str(fold)+"_"+str(downscale), "fp"))
            # Representative patches
            for file in glob(os.path.join(path, "repres_patches_"+str(fold), df.iloc[i, 0]+'_*')):
                move(file, os.path.join(path, "repres_patches_"+str(fold), "fp"))
            # A list for patch summary
            for file in sorted(glob(os.path.join(path, "repres_patches_"+str(fold), 'fp', df.iloc[i, 0]+'_*'))):
                df_res.loc[len(df_res)] = [df.iloc[i, 0], file.split('/')[-1], 'fp']
            
    # Stucture the patch summary df
    # sort to make sure tp goes first and patches of the same slide are together
    df_res.sort_values(by=['class', 'patch'], ascending=False, inplace=True)
    # make a copy
    df_res_ = df_res.copy(deep=True)
    # Make sure the intra-slide order is from high attention score to low: 0 to 7, -8 to -1 
    for j in range(len(df_res)//16):
        for i in range(8):
            df_res_.iloc[16*j+7-i] = df_res.iloc[16*j+i]
    df_res_.reset_index(drop=True, inplace=True)
    print(df_res_.shape)
    display(df_res_.head(5))
    df_res_.to_csv(os.path.join(path, "repres_patches_"+str(fold), "summary.csv"))
    

/media/visiopharm5/WDGold/deeplearning/MIL/CLAM/eval_results/EVAL_mondor_hcc_tumor_139_Interferon_Gamma_Biology_cv_highvsrest_00X_CLAM_50_s1_cv/fold_3_optimal_tcga.csv
(139, 8)


Unnamed: 0,slide_id,Y,Y_hat,p_0,p_1,case_id,consistent_prediction,true_prediction
0,HMNT0499_bis - 2017-06-05 11.22.25,1.0,1.0,0.299018,0.700982,HMNT0499,,True
1,HMNT0500 - 2017-06-06 20.39.35,1.0,0.0,0.809231,0.190769,HMNT0500,,False
2,HMNT0343_bis - 2017-06-06 12.56.13,1.0,1.0,0.429594,0.570406,HMNT0343,,True
3,HMNT0156_bis - 2017-06-05 03.33.14,1.0,0.0,0.817555,0.182445,HMNT0156,,False
4,HMNT0998_bis - 2017-06-30 07.23.23,1.0,1.0,0.36295,0.63705,HMNT0998,,True


Unnamed: 0,slide,patch,class
127,HMNT2387_P761471-A03-HES,HMNT2387_P761471-A03-HES_7_0.9972688255950058_...,tp
126,HMNT2387_P761471-A03-HES,HMNT2387_P761471-A03-HES_6_0.9976589933671479_...,tp
125,HMNT2387_P761471-A03-HES,HMNT2387_P761471-A03-HES_5_0.9980491611392899_...,tp
124,HMNT2387_P761471-A03-HES,HMNT2387_P761471-A03-HES_4_0.998439328911432_[...,tp
123,HMNT2387_P761471-A03-HES,HMNT2387_P761471-A03-HES_3_0.9988294966835739_...,tp


(2224, 3)


Unnamed: 0,slide,patch,class
0,HMNT2387_P761471-A03-HES,"HMNT2387_P761471-A03-HES_0_1.0_[30720,27392].tif",tp
1,HMNT2387_P761471-A03-HES,HMNT2387_P761471-A03-HES_1_0.9996098322278579_...,tp
2,HMNT2387_P761471-A03-HES,HMNT2387_P761471-A03-HES_2_0.999219664455716_[...,tp
3,HMNT2387_P761471-A03-HES,HMNT2387_P761471-A03-HES_3_0.9988294966835739_...,tp
4,HMNT2387_P761471-A03-HES,HMNT2387_P761471-A03-HES_4_0.998439328911432_[...,tp
