## Adjust threshold for better sensitivity and specificity
1. Test threshold of 0.1, 0.2, 0.3, ..., 0.9
2. Doc for the function: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.classification_report.html.
Note that in binary classification, recall of the positive class is also known as “sensitivity”; recall of the negative class is “specificity”.

In [15]:
import os
import numpy as np
import pandas as pd
from sklearn.metrics import roc_curve, roc_auc_score, classification_report

In [51]:
root = "/media/visiopharm5/WDGold/deeplearning/MIL/CLAM/eval_results"
target = "EVAL_mondor_hcc_tumor_139_T-cell_Exhaustion_cv_highvsrest_00X_CLAM_50_s1_cv"
fold = 5
path = os.path.join(root, target)
file = os.path.join(path, "fold_"+str(fold)+".csv")
print(file)

/media/visiopharm5/WDGold/deeplearning/MIL/CLAM/eval_results/EVAL_mondor_hcc_tumor_139_T-cell_Exhaustion_cv_highvsrest_00X_CLAM_50_s1_cv/fold_5.csv


In [52]:
df = pd.read_csv(file)
print(df.shape)
display(df.head(5))

(139, 5)


Unnamed: 0,slide_id,Y,Y_hat,p_0,p_1
0,HMNT0343_bis - 2017-06-06 12.56.13,1.0,1.0,0.452502,0.547498
1,HMNT0582_bis - 2017-07-16 22.01.02,1.0,0.0,0.85036,0.14964
2,HMNT0499_bis - 2017-06-05 11.22.25,1.0,0.0,0.578438,0.421562
3,HMNT0156_bis - 2017-06-05 03.33.14,1.0,0.0,0.942139,0.057861
4,HMNT0998_bis - 2017-06-30 07.23.23,1.0,1.0,0.396821,0.603179


In [53]:
# print("sensitivity, specificity:")
# display(list(zip(tpr, 1 - tpr)))
# print("threshold:")
# display(thresholds)

In [54]:
cutoffs = np.arange(0.1, 1, 0.1)
print(cutoffs)
#Ribas_10G_Interferon_Gamma 0.05839334
# Inflammatory 0.065155, 0.115860

[0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9]


In [55]:
sens = []
spes = []

for cutoff in cutoffs:
    df_new = df.copy(deep=True)
    df_new.loc[df_new['p_1'] >= float(cutoff), 'Y_hat'] = 1.0
    df_new.loc[df_new['p_1'] < float(cutoff), 'Y_hat'] = 0.0

    df_new['case_id'] = ''
    for i in range(df_new.shape[0]):
        # col 'case_id'
        df_new.iloc[i, 5] = df_new.iloc[i, 0][:8]

    df_new['consistent_prediction'] = ''
    df_new['true_prediction'] = ''
    for i in range(df_new.shape[0]):    
        # col 'consistent_prediction'
        if df_new[df_new['case_id'] == df_new.iloc[i, 5]].shape[0] == 1:
            df_new.iloc[i, 6] = None
        else:
            if len(set(df_new[df_new['case_id'] == df_new.iloc[i, 5]]['Y_hat'].to_list())) == 1:
                df_new.iloc[i, 6] = True
            else:
                df_new.iloc[i, 6] = False

        # col 'true_prediction'
        if df_new.iloc[i, 1] == df_new.iloc[i, 2]:
            df_new.iloc[i, 7] = True
        else:
            df_new.iloc[i, 7] = False

    display(df_new.head(5))

    print(classification_report(df_new.Y, df_new['Y_hat'], digits=4))
    
    sens.append(classification_report(df_new.Y, df_new['Y_hat'], digits=4, output_dict=True)['1.0']['recall'])
    spes.append(classification_report(df_new.Y, df_new['Y_hat'], digits=4, output_dict=True)['0.0']['recall'])

Unnamed: 0,slide_id,Y,Y_hat,p_0,p_1,case_id,consistent_prediction,true_prediction
0,HMNT0343_bis - 2017-06-06 12.56.13,1.0,1.0,0.452502,0.547498,HMNT0343,,True
1,HMNT0582_bis - 2017-07-16 22.01.02,1.0,1.0,0.85036,0.14964,HMNT0582,,True
2,HMNT0499_bis - 2017-06-05 11.22.25,1.0,1.0,0.578438,0.421562,HMNT0499,,True
3,HMNT0156_bis - 2017-06-05 03.33.14,1.0,0.0,0.942139,0.057861,HMNT0156,,False
4,HMNT0998_bis - 2017-06-30 07.23.23,1.0,1.0,0.396821,0.603179,HMNT0998,,True


              precision    recall  f1-score   support

         0.0     0.9552    0.5766    0.7191       111
         1.0     0.3472    0.8929    0.5000        28

    accuracy                         0.6403       139
   macro avg     0.6512    0.7347    0.6096       139
weighted avg     0.8327    0.6403    0.6750       139



Unnamed: 0,slide_id,Y,Y_hat,p_0,p_1,case_id,consistent_prediction,true_prediction
0,HMNT0343_bis - 2017-06-06 12.56.13,1.0,1.0,0.452502,0.547498,HMNT0343,,True
1,HMNT0582_bis - 2017-07-16 22.01.02,1.0,0.0,0.85036,0.14964,HMNT0582,,False
2,HMNT0499_bis - 2017-06-05 11.22.25,1.0,1.0,0.578438,0.421562,HMNT0499,,True
3,HMNT0156_bis - 2017-06-05 03.33.14,1.0,0.0,0.942139,0.057861,HMNT0156,,False
4,HMNT0998_bis - 2017-06-30 07.23.23,1.0,1.0,0.396821,0.603179,HMNT0998,,True


              precision    recall  f1-score   support

         0.0     0.9192    0.8198    0.8667       111
         1.0     0.5000    0.7143    0.5882        28

    accuracy                         0.7986       139
   macro avg     0.7096    0.7671    0.7275       139
weighted avg     0.8348    0.7986    0.8106       139



Unnamed: 0,slide_id,Y,Y_hat,p_0,p_1,case_id,consistent_prediction,true_prediction
0,HMNT0343_bis - 2017-06-06 12.56.13,1.0,1.0,0.452502,0.547498,HMNT0343,,True
1,HMNT0582_bis - 2017-07-16 22.01.02,1.0,0.0,0.85036,0.14964,HMNT0582,,False
2,HMNT0499_bis - 2017-06-05 11.22.25,1.0,1.0,0.578438,0.421562,HMNT0499,,True
3,HMNT0156_bis - 2017-06-05 03.33.14,1.0,0.0,0.942139,0.057861,HMNT0156,,False
4,HMNT0998_bis - 2017-06-30 07.23.23,1.0,1.0,0.396821,0.603179,HMNT0998,,True


              precision    recall  f1-score   support

         0.0     0.8899    0.8739    0.8818       111
         1.0     0.5333    0.5714    0.5517        28

    accuracy                         0.8129       139
   macro avg     0.7116    0.7227    0.7168       139
weighted avg     0.8181    0.8129    0.8153       139



Unnamed: 0,slide_id,Y,Y_hat,p_0,p_1,case_id,consistent_prediction,true_prediction
0,HMNT0343_bis - 2017-06-06 12.56.13,1.0,1.0,0.452502,0.547498,HMNT0343,,True
1,HMNT0582_bis - 2017-07-16 22.01.02,1.0,0.0,0.85036,0.14964,HMNT0582,,False
2,HMNT0499_bis - 2017-06-05 11.22.25,1.0,1.0,0.578438,0.421562,HMNT0499,,True
3,HMNT0156_bis - 2017-06-05 03.33.14,1.0,0.0,0.942139,0.057861,HMNT0156,,False
4,HMNT0998_bis - 2017-06-30 07.23.23,1.0,1.0,0.396821,0.603179,HMNT0998,,True


              precision    recall  f1-score   support

         0.0     0.8814    0.9369    0.9083       111
         1.0     0.6667    0.5000    0.5714        28

    accuracy                         0.8489       139
   macro avg     0.7740    0.7185    0.7399       139
weighted avg     0.8381    0.8489    0.8404       139



Unnamed: 0,slide_id,Y,Y_hat,p_0,p_1,case_id,consistent_prediction,true_prediction
0,HMNT0343_bis - 2017-06-06 12.56.13,1.0,1.0,0.452502,0.547498,HMNT0343,,True
1,HMNT0582_bis - 2017-07-16 22.01.02,1.0,0.0,0.85036,0.14964,HMNT0582,,False
2,HMNT0499_bis - 2017-06-05 11.22.25,1.0,0.0,0.578438,0.421562,HMNT0499,,False
3,HMNT0156_bis - 2017-06-05 03.33.14,1.0,0.0,0.942139,0.057861,HMNT0156,,False
4,HMNT0998_bis - 2017-06-30 07.23.23,1.0,1.0,0.396821,0.603179,HMNT0998,,True


              precision    recall  f1-score   support

         0.0     0.8504    0.9730    0.9076       111
         1.0     0.7500    0.3214    0.4500        28

    accuracy                         0.8417       139
   macro avg     0.8002    0.6472    0.6788       139
weighted avg     0.8302    0.8417    0.8154       139



Unnamed: 0,slide_id,Y,Y_hat,p_0,p_1,case_id,consistent_prediction,true_prediction
0,HMNT0343_bis - 2017-06-06 12.56.13,1.0,0.0,0.452502,0.547498,HMNT0343,,False
1,HMNT0582_bis - 2017-07-16 22.01.02,1.0,0.0,0.85036,0.14964,HMNT0582,,False
2,HMNT0499_bis - 2017-06-05 11.22.25,1.0,0.0,0.578438,0.421562,HMNT0499,,False
3,HMNT0156_bis - 2017-06-05 03.33.14,1.0,0.0,0.942139,0.057861,HMNT0156,,False
4,HMNT0998_bis - 2017-06-30 07.23.23,1.0,1.0,0.396821,0.603179,HMNT0998,,True


              precision    recall  f1-score   support

         0.0     0.8182    0.9730    0.8889       111
         1.0     0.5714    0.1429    0.2286        28

    accuracy                         0.8058       139
   macro avg     0.6948    0.5579    0.5587       139
weighted avg     0.7685    0.8058    0.7559       139



Unnamed: 0,slide_id,Y,Y_hat,p_0,p_1,case_id,consistent_prediction,true_prediction
0,HMNT0343_bis - 2017-06-06 12.56.13,1.0,0.0,0.452502,0.547498,HMNT0343,,False
1,HMNT0582_bis - 2017-07-16 22.01.02,1.0,0.0,0.85036,0.14964,HMNT0582,,False
2,HMNT0499_bis - 2017-06-05 11.22.25,1.0,0.0,0.578438,0.421562,HMNT0499,,False
3,HMNT0156_bis - 2017-06-05 03.33.14,1.0,0.0,0.942139,0.057861,HMNT0156,,False
4,HMNT0998_bis - 2017-06-30 07.23.23,1.0,0.0,0.396821,0.603179,HMNT0998,,False


              precision    recall  f1-score   support

         0.0     0.8000    0.9730    0.8780       111
         1.0     0.2500    0.0357    0.0625        28

    accuracy                         0.7842       139
   macro avg     0.5250    0.5043    0.4703       139
weighted avg     0.6892    0.7842    0.7138       139



Unnamed: 0,slide_id,Y,Y_hat,p_0,p_1,case_id,consistent_prediction,true_prediction
0,HMNT0343_bis - 2017-06-06 12.56.13,1.0,0.0,0.452502,0.547498,HMNT0343,,False
1,HMNT0582_bis - 2017-07-16 22.01.02,1.0,0.0,0.85036,0.14964,HMNT0582,,False
2,HMNT0499_bis - 2017-06-05 11.22.25,1.0,0.0,0.578438,0.421562,HMNT0499,,False
3,HMNT0156_bis - 2017-06-05 03.33.14,1.0,0.0,0.942139,0.057861,HMNT0156,,False
4,HMNT0998_bis - 2017-06-30 07.23.23,1.0,0.0,0.396821,0.603179,HMNT0998,,False


              precision    recall  f1-score   support

         0.0     0.7986    1.0000    0.8880       111
         1.0     0.0000    0.0000    0.0000        28

    accuracy                         0.7986       139
   macro avg     0.3993    0.5000    0.4440       139
weighted avg     0.6377    0.7986    0.7091       139



Unnamed: 0,slide_id,Y,Y_hat,p_0,p_1,case_id,consistent_prediction,true_prediction
0,HMNT0343_bis - 2017-06-06 12.56.13,1.0,0.0,0.452502,0.547498,HMNT0343,,False
1,HMNT0582_bis - 2017-07-16 22.01.02,1.0,0.0,0.85036,0.14964,HMNT0582,,False
2,HMNT0499_bis - 2017-06-05 11.22.25,1.0,0.0,0.578438,0.421562,HMNT0499,,False
3,HMNT0156_bis - 2017-06-05 03.33.14,1.0,0.0,0.942139,0.057861,HMNT0156,,False
4,HMNT0998_bis - 2017-06-30 07.23.23,1.0,0.0,0.396821,0.603179,HMNT0998,,False


              precision    recall  f1-score   support

         0.0     0.7986    1.0000    0.8880       111
         1.0     0.0000    0.0000    0.0000        28

    accuracy                         0.7986       139
   macro avg     0.3993    0.5000    0.4440       139
weighted avg     0.6377    0.7986    0.7091       139



In [56]:
df_eval = pd.DataFrame(list(zip(sens, spes)), index = cutoffs, columns = ["Sensitivity", "Specificity"])
display(df_eval)
df_eval.to_csv(os.path.join(path, 'threshold_tuning_fold_'+str(fold)+'.csv'))

Unnamed: 0,Sensitivity,Specificity
0.1,0.892857,0.576577
0.2,0.714286,0.81982
0.3,0.571429,0.873874
0.4,0.5,0.936937
0.5,0.321429,0.972973
0.6,0.142857,0.972973
0.7,0.035714,0.972973
0.8,0.0,1.0
0.9,0.0,1.0
