# 전체 classification 결과를 환자 단위, 파트 별 단위로 묶어서 결과 만드는 코드

In [2]:
import os
import numpy as np
import pandas

## ENV SETTING

In [11]:
label_type_3cls = '3classes'
label_type_5cls = '5classes'
learning_rate = '5e-5'

num_fold = 5

# true label env
true_dataset_root = 'E:/Thesis_research/Database/Medical/Dental_directory_dataset'
true_lbl_dir = os.path.join(true_dataset_root, 'ClassificationClass',label_type_3cls)

# prediction env
pred_root = f'E:/Thesis_research/results_materials/Dental/raw_prediction_results/{learning_rate}'

exp_dir_3cls = os.path.join(pred_root, label_type_3cls)
exp_dir_5cls = os.path.join(pred_root, label_type_5cls)

## PREDICTION SETTING AND VOTING

* 각 네트워크 별로 4개의 part에 대한 prediction 중 unique 병록번호에 해당하는 prediction들을 모아서 voting해서 true와 비교!

In [9]:
from collections import Counter
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

part_list = [16, 26, 36, 46]

patient_wise_overall_acc_lst = []

confusion_matrix_metric_tot_lst = []

for i_fold_iter in range(num_fold):
    print()
    print(f'Current fold: {i_fold_iter +1 }')
    
    # ## TRUE LABEL SETTING
    true_imageset_path = os.path.join(true_dataset_root,'ImageSets','Classification','eval' + str(i_fold_iter+1) + '.txt')
    
    with open(true_imageset_path, 'r') as f:
         eval_img_list = f.read().split('\n')

    person_num_list =[]
    for i_eval_img in eval_img_list:
        if i_eval_img == '':
            continue
        eval_img_info = i_eval_img.split('_')
        age_person_num = eval_img_info[0] + '_' + eval_img_info[1] # e.g. '20_2392392' because there are three miss labeled images file name
        if len(eval_img_info)>1: # skip blank line
            person_num_list.append(age_person_num)
    person_num_unique_list, unique_idx = np.unique(np.array(person_num_list), return_index=True)
    
    person_num_perdiction_all_list = []
    true_lbl_unique = []
    
    pred_dir_3cls = os.path.join(pred_root,label_type_3cls, f'resnet152-TL_aug-{label_type_3cls}-fold{i_fold_iter}','eval_result_resnet152_cls_best_model', 'prediction_class')
    pred_result_list_3cls = sorted(os.listdir(pred_dir_3cls))
    pred_dir_5cls = os.path.join(pred_root,label_type_5cls, f'resnet152-TL_aug-{label_type_5cls}-fold{i_fold_iter}','eval_result_resnet152_cls_best_model', 'prediction_class')
    for i_iter, i_person_num_unique in enumerate(person_num_unique_list):
        
        pred_result_person_num = [s for s in pred_result_list_3cls if i_person_num_unique in s]
        
        # 하나라도 파트 없으면 false alarm!!
        if not len(pred_result_person_num) == 4 :
            print('Each person must have four teeth parts')
            raise AssertionError
            
        # true label setting
        true_lbl = 0
        for i, i_pred in enumerate(pred_result_person_num):
            true_lbl_path = os.path.join(true_lbl_dir, i_pred)
            with open(true_lbl_path,'r') as f:
                lbl = int(f.read())
            if i==0:
                true_lbl = lbl
            else:
                if true_lbl != lbl: # check all patients label is the same each other
                    raise AssertionError
                else:
                    true_lbl = lbl
        true_lbl_unique.append(true_lbl)
            
        person_num_prediction = []
        for i_pred in pred_result_person_num:
            pred_txt_nameOnly = os.path.splitext(i_pred)[0]
            pred_name_info = pred_txt_nameOnly.split('_')
            part_num = int(pred_name_info[-1])
            
            pred_result_3cls_path = os.path.join(pred_dir_3cls, i_pred)
            with open(pred_result_3cls_path, 'r') as f:
                pred_lbl_3cls = int(f.read())
            person_num_prediction.append(pred_lbl_3cls)

            pred_result_5cls_path = os.path.join(pred_dir_5cls, i_pred)
            with open(pred_result_5cls_path, 'r') as f:
                pred_lbl_5cls = int(f.read())
            if pred_lbl_5cls in [1,2,3]:
                pred_lbl_5cls = 1
            elif pred_lbl_5cls == 4:
                pred_lbl_5cls = 2
            person_num_prediction.append(pred_lbl_5cls)
            
        person_num_perdiction_all_list.append(person_num_prediction)
    
    network_final_pred_list = []
    for i_person_num_pred in person_num_perdiction_all_list:
        most_common_pred, num_most_common_pred = Counter(i_person_num_pred).most_common(1)[0] # 4, 6 times
        network_final_pred_list.append(most_common_pred)
    
    confusion_matrix_metric = confusion_matrix(true_lbl_unique, network_final_pred_list)
    print('Confusion matrix: ')
    print(confusion_matrix_metric)
    confusion_matrix_metric_tot_lst.append(confusion_matrix_metric)
    
    overall_acc_metric = accuracy_score(true_lbl_unique, network_final_pred_list)
    print('Overall accuracy = ', overall_acc_metric)
    
    patient_wise_overall_acc_lst.append(overall_acc_metric)
        


Current fold: 1
Confusion matrix: 
[[ 72   7   0]
 [  3 193   5]
 [  0  12  35]]
Overall accuracy =  0.9174311926605505

Current fold: 2
Confusion matrix: 
[[ 75   3   0]
 [  3 182   9]
 [  0   6  26]]
Overall accuracy =  0.930921052631579

Current fold: 3
Confusion matrix: 
[[ 71   9   0]
 [  4 195   2]
 [  0   8  26]]
Overall accuracy =  0.926984126984127

Current fold: 4
Confusion matrix: 
[[ 69  10   0]
 [  0 186  14]
 [  0   7  26]]
Overall accuracy =  0.9006410256410257

Current fold: 5
Confusion matrix: 
[[ 66  14   0]
 [  1 187   8]
 [  0  11  39]]
Overall accuracy =  0.8957055214723927


# Patient wise cv 결과 정리

In [10]:
print('Confusion matrix: ')
confusion_matrix_metric_tot = np.array(confusion_matrix_metric_tot_lst)
confusion_matrix_metric_avg = np.mean(confusion_matrix_metric_tot, axis = 0)
print(confusion_matrix_metric_avg)
print()
print('Overall Accuracy: ')
patient_wise_avg_acc = np.mean(patient_wise_overall_acc_lst)
patient_wise_std_error= np.std(patient_wise_overall_acc_lst) / np.sqrt(len(patient_wise_overall_acc_lst))
print('acc: ',patient_wise_avg_acc)
print('std_error: ', patient_wise_std_error)
print()
print('Group-wise accuracy: ')
group_wise_acc_dict={}
for i_group in range(confusion_matrix_metric_tot.shape[1]):
    group_wise_acc_dict[i_group] = []
    for i_fold in range(confusion_matrix_metric_tot.shape[0]):
        confusion_matrix_cur = confusion_matrix_metric_tot[i_fold]
        group_wise_acc = confusion_matrix_cur[i_group, i_group] / np.sum(confusion_matrix_cur[i_group, :])
        group_wise_acc_dict[i_group].append(group_wise_acc)
        
    group_wise_acc_mean = np.mean(group_wise_acc_dict[i_group])
    group_wise_acc_std_error = np.std(group_wise_acc_dict[i_group]) / np.sqrt(len(group_wise_acc_dict[i_group]))
    print('Age group ' + str(i_group+1))
    print('acc: ',group_wise_acc_mean)
    print('std_error: ',group_wise_acc_std_error)
    print()


Confusion matrix: 
[[ 70.6   8.6   0. ]
 [  2.2 188.6   7.6]
 [  0.    8.8  30.4]]

Overall Accuracy: 
acc:  0.9143365838779349
std_error:  0.006258646357674501

Group-wise accuracy: 
Age group 1
acc:  0.891769717624148
std_error:  0.02006584055648567

Age group 2
acc:  0.9505148442512873
std_error:  0.0065296353469551345

Age group 3
acc:  0.7779531042591119
std_error:  0.010156236908906872



# 3cls part-wise와 비교

In [12]:
from scipy.stats import ttest_ind
print('====== patient-wise =====')
print('(3cls + 5cls) voting vs 3cls patient-wise acc')
patient_wise_acc_lst_3cls = np.load(os.path.join(exp_dir_3cls,'3cls_patient_wise_acc_lst.npy'))
ttest,pval = ttest_ind(patient_wise_overall_acc_lst,patient_wise_acc_lst_3cls)
print("p-value",pval)
print()

print('===== part-wise ======')
for i_part in part_list:
    print('(3cls + 5cls) voting vs 3cls part ' + str(i_part) + ' acc')
    part_wise_name = os.path.join(exp_dir_3cls, '3cls_part'+str(i_part)+'_acc_lst.npy')
    part_wise_acc_lst = np.load(part_wise_name)
    ttest,pval = ttest_ind(patient_wise_overall_acc_lst,part_wise_acc_lst)
    print("p-value",pval)
    print()

(3cls + 5cls) voting vs 3cls patient-wise acc
p-value 0.49414063152958787

(3cls + 5cls) voting vs 3cls part 16 acc
p-value 0.008440845954472493

(3cls + 5cls) voting vs 3cls part 26 acc
p-value 0.05656757821722401

(3cls + 5cls) voting vs 3cls part 36 acc
p-value 0.013973450006104386

(3cls + 5cls) voting vs 3cls part 46 acc
p-value 0.0047389854203696465



# 5cls part-wise와 비교 (p-value)

In [13]:
print('====== patient-wise =====')
print('(3cls + 5cls) voting vs 5cls patient-wise acc')
patient_wise_acc_lst_5cls = np.load(os.path.join(exp_dir_5cls, '5cls_patient_wise_acc_lst.npy'))
ttest,pval = ttest_ind(patient_wise_overall_acc_lst,patient_wise_acc_lst_5cls, equal_var=False)
print("p-value",pval)
print()

print('===== part-wise ======')
for i_part in part_list:
    print('(3cls + 5cls) voting vs 5cls part ' + str(i_part) + ' acc')
    part_wise_name = os.path.join(exp_dir_5cls, '5cls_part'+str(i_part)+'_acc_lst.npy')
    part_wise_acc_lst = np.load(part_wise_name)
    ttest,pval = ttest_ind(patient_wise_overall_acc_lst,part_wise_acc_lst, equal_var=False)
    print("p-value",pval)
    print()

(3cls + 5cls) voting vs 5cls patient-wise acc
p-value 0.15717687963179283

(3cls + 5cls) voting vs 5cls part 16 acc
p-value 0.010623953765516905

(3cls + 5cls) voting vs 5cls part 26 acc
p-value 0.002901277540254535

(3cls + 5cls) voting vs 5cls part 36 acc
p-value 0.022206565962268235

(3cls + 5cls) voting vs 5cls part 46 acc
p-value 0.0060579472586501735



## Compare 3 cls and 5 cls

In [14]:
print('====== patient-wise =====')
print('3cls voting vs 5cls voting acc')
patient_wise_acc_lst_3cls = np.load(os.path.join(exp_dir_3cls, '3cls_patient_wise_acc_lst.npy'))
patient_wise_acc_lst_5cls = np.load(os.path.join(exp_dir_5cls, '5cls_patient_wise_acc_lst.npy'))
ttest,pval = ttest_ind(patient_wise_acc_lst_5cls,patient_wise_acc_lst_3cls)
print("p-value",pval)
print()


3cls voting vs 5cls voting acc
p-value 0.5178819509214782

