In [105]:
import os
import pandas as pd
import numpy as np
import traceback
import copy
import pickle

#Statistics
from scipy.special import ndtri
from statsmodels.stats.contingency_tables import mcnemar
from sklearn.metrics import cohen_kappa_score
import scipy.stats as stats #For Mann-Whitney U test

import warnings #To stop pandas warnings 
warnings.simplefilter(action='ignore', category=Warning)

In [106]:
#Paths with folders containing subfolders named by each participant id. 
#These subfolders contain txt files and images of discrepancies reviewed by radiologists (information provided in txt files)
path_noemph=os.getcwd()+'/no_emphysema_reviewed'
path_emph=os.getcwd()+'/emphysema_reviewed'

In [107]:
#Read excel files with data
mod=pd.read_excel(os.getcwd()+'/emphysema_exp_files'+"\\moderate_manual.xlsx")
conf=pd.read_excel(os.getcwd()+'/emphysema_exp_files'+"\\moderate_manual.xlsx")
adv=pd.read_excel(os.getcwd()+'/emphysema_exp_files'+"\\advanced_manual.xlsx")
noemph=pd.read_excel(os.getcwd()+'/emphysema_exp_files'+"\\noemphysema_manual.xlsx")

#We also need a folder named 'emph_csv' with REDCap exports, one for each degree of emphysema - see below

Function to check results of radiologists' review

In [108]:
def show_information_of_review(path):
    
    'Gets the path of a folder with subfolders containing images and txt files of results of nodule review.'
    'These results should be of the following format: "nodule"/"no nodule" and then description and a confidence score'
    'It prints the participant_id, the txt file (with the slice number and if it is a FP or FN), the confidence score,'
    'and a description of the finding given by the radiologists.'
    'Returns dictionaries with participant id and nodule ids of findings belonging to each of the nodule/non-nodule'
    'categories (two dictionaries for each category, one with FPs and one with FNs). Moreover, it returns 4 dictionaries,'
    '2 containing the participant with the correct nodule ids for each of the FPs and FNs and 2 with the wrong ones.'
    'Moreover, we get 4 more dictionaries, 2 containing only lymph nodes and 2 containing only nodule ids (FP and FN again).'
    'At last, we get 4 dictionaries with non-nodule categories, 2 with FPs and 2 with FNs. Each of them has lung and non-lung findings.'
    

    uncertain=0 #Unsure of what the finding is
    nodule_all=0 #Count all nodules, FPs, and FNs
    total_files=0 #Total files
    excluded=[] #Files not taken into account
    tp_mistakes=0 #For TP accidentaly considered as discrepancies during review - happened probably only once
    
    
    #All possible non-nodule categories - based on new definition   
    fibr_scar_pleural=0 
    other=0
    
    #FPs and FNs for non-nodule categories
    fibr_FP=0
    fibr_FN=0
    other_FP=0
    other_FN=0
    
    #Possible TPs (errors) for non-nodule categories
    fibr_TP=0
    other_TP=0
    
    
    #Nodule categories
    cal_nod=0
    pleu_nod=0
    other_nod=0
    subgrou_nod=0
    canc_nod=0
    
    atypical_triang=0 #This and the next are typically benign so less important if AI would miss them
    peri_fissur=0
    bronchperi=0 
    
    #TP (errors) for nodules
    other_nod_TP=0
    cal_TP=0
    pleu_TP=0
    subgrou_TP=0
    canc_TP=0
    
    atypical_TP=0
    peri_TP=0
    bronchperi_TP=0
    
    #FPs and FNs for nodule categories
    other_nod_FP=0
    cal_FP=0
    pleu_FP=0
    subgrou_FP=0
    canc_FP=0
    other_nod_FN=0
    cal_FN=0
    pleu_FN=0
    subgrou_FN=0
    canc_FN=0
    
    atypical_FP=0
    atypical_FN=0
    peri_FP=0
    peri_FN=0
    bronchperi_FP=0
    bronchperi_FN=0
    
    #Dictionaries to be filled participant_ids and nodule_ids that belong to a given category
    atyp_FN={}
    per_FN={}
    bronchioperi_FN={}
    pleural_FN={}
    calcif_FN={}
    sub_ground_FN={}
    cancer_FN={}
    other_nodules_FN={}
    
    other_nonodules_FN={}
    fibrosis_FN={}
    other_nonodules_FN_lung={}
    other_nonodules_FN_nolung={}
    
    atyp_FP={}
    per_FP={}
    bronchioperi_FP={}
    pleural_FP={}
    calcif_FP={}
    sub_ground_FP={}
    cancer_FP={}
    other_nodules_FP={}
    
    #Non-nodule categories
    other_nonodules_FP={}
    fibrosis_FP={}
    other_nonodules_FP_lung={}
    other_nonodules_FP_nolung={}

    peri=0
    
    #Initialize empty dictionaries to keep track FP and FN slices
    
    #These are for both nodules (+lymph nodes) and non-nodules
    dict_FP_correct={}
    dict_FN_correct={}
    dict_FP_wrong={}
    dict_FN_wrong={}

    #Only for lymph nodes
    lymph_FN_correct={}
    lymph_FP_wrong={}

    #Only for nodules
    nod_FN_correct={}
    nod_FP_wrong={}
    
    
    for dirpath, dirnames, filenames in os.walk(path): #Loop over folders and subfolders
        for folder in dirnames: #For each folder (has participant name) in the above directory
            for file in os.listdir(dirpath+'/'+folder): #For each file in the above folder

                if file.endswith('.txt'): #If it's a txt print it (contains the review) along with the folder name (ID)
                    print(dirpath,':',folder,':',file)

                    with open(dirpath+'/'+folder+'/'+file) as f: #Read txt file
                        lines = f.readlines()

                    folder_pat=folder[:6] #keep only first 6 letters that correspond to participant id
                        
                    #Get confidence score - the only number in the text
                    confidence=[num for line in lines for num in line if num.isdigit()] 
            
                    if len(confidence)==1: #If there are more numbers it should be checked for errors
                        print('Confidence is',int(confidence[0]))
                    else:
                        print("ERROR in confidence level of file",file)

                        
                    no_nodules=[line for line in lines if 'no ' in line.lower()] #if this string in txt then no nodule

                    if len(no_nodules)!=0: #Confirm that above non-empty list
                        
                        total_files=total_files+1 #Increase total number of files taken into account
                        print('Finding is NOT a nodule (or it is a lymph node)')
                        
                        information=[info.split('nodule',1) for info in no_nodules][0] #split only on first occurence 
                        details=[elem for elem in information if len(elem)>5] #Since we may also have an element with 'no'
                        
                        if len(details)>0: #If we have a description of finding

                            #Perform some replacements to delete '\n','-', empty spaces and confidence score
                            detailed_info=details[0].replace('-', '').replace('\n','').replace(confidence[0],'').strip()

                            print(detailed_info.replace(':',''), 'was written in the txt file')
                            
#                             if int(confidence[0])>=4: #Only take into account confident predictions
                                
                            #Below categories for non-nodules

                            #For atypical and perifissural we noted them as non-nodules while actually want to be detected
                            #We will consider them as nodules - that's why we changed to 'fn_correct', 'fp_wrong' for them
                            if ('atypical' in detailed_info.lower() or 'triangular' in detailed_info.lower()): 

                                print('atypical/triangular lymph node')
                                atypical_triang=atypical_triang+1 #Count them

                                if 'tp' in file.lower():
                                    print('This will not be considered')
                                    atypical_TP=atypical_TP+1
                                    tp_mistakes=tp_mistakes+1
                                    nodule_all=nodule_all+1

                                elif 'fp' in file.lower() or 'ai' in file.lower() and 'fn' not in file.lower():
                                    atypical_FP=atypical_FP+1 #Count them
                                    nodule_all=nodule_all+1
                                    
                                    if folder_pat not in dict_FP_wrong: #Add participant id to dictionary - list only with slice numbers
                                        dict_FP_wrong[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))] 
                                    else:
                                        dict_FP_wrong[folder_pat]=dict_FP_wrong[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    
                                    #Atypical lymph nodes added to nodule group
                                    # if folder_pat not in lymph_FP_wrong: #Add it to dictionary with lymph nodes
                                    #     lymph_FP_wrong[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    # else:
                                    #     lymph_FP_wrong[folder_pat]=lymph_FP_wrong[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    if folder_pat not in nod_FP_wrong:
                                        nod_FP_wrong[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        nod_FP_wrong[folder_pat]=nod_FP_wrong[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
        

                                    if 'ai' in file.lower() and 'fp' not in file.lower(): #AI manually extracted image names have different naming conventions
                                        dict_FP_wrong[folder_pat]=dict_FP_wrong[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                        # lymph_FP_wrong[folder_pat]=lymph_FP_wrong[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                        nod_FP_wrong[folder_pat]=nod_FP_wrong[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]

    
                                    if int(folder_pat) in atyp_FP: #Add it to corresponding category dictionary
                                        atyp_FP[int(folder_pat)].append(file.lower().split('fp')[0])
                                    else:
                                        atyp_FP[int(folder_pat)]=[file.lower().split('fp')[0]]

                                elif 'fn' in file.lower(): #Similarly as above for FNs
                                    atypical_FN=atypical_FN+1
                                    nodule_all=nodule_all+1
                                    
                                    if folder_pat not in dict_FN_correct:
                                        dict_FN_correct[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        dict_FN_correct[folder_pat]=dict_FN_correct[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    
                                    # if folder_pat not in lymph_FN_correct:
                                    #     lymph_FN_correct[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    # else:
                                    #     lymph_FN_correct[folder_pat]=lymph_FN_correct[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]    
                                    if folder_pat not in nod_FN_correct:
                                        nod_FN_correct[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        nod_FN_correct[folder_pat]=nod_FN_correct[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]    
                                       
                                        
                                    if '_fn' in file.lower(): #To address some issues with manually created images
                                        dict_FN_correct[folder_pat]=dict_FN_correct[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                        # lymph_FN_correct[folder_pat]=lymph_FN_correct[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                        nod_FN_correct[folder_pat]=nod_FN_correct[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]

            
                                    if int(folder_pat) in atyp_FN:
                                        atyp_FN[int(folder_pat)].append(file.lower().split('fn')[0])
                                    else:
                                        atyp_FN[int(folder_pat)]=[file.lower().split('fn')[0]]

                                else:
                                    print('ERROR in atypical')


                            elif ('periphysural' in detailed_info.lower() or 'fissural' in detailed_info.lower() 
                                  or 'fiscu' in detailed_info.lower() or 'pfn' in detailed_info.lower()):

                                print('periphysural/fissural/PFN')
                                peri_fissur=peri_fissur+1  

                                if 'tp' in file.lower():
                                    print('This will not be considered')
                                    peri_TP=peri_TP+1
                                    tp_mistakes=tp_mistakes+1
                                    nodule_all=nodule_all+1

                                elif 'fp' in file.lower() or 'ai' in file.lower() and 'fn' not in file.lower():
                                    peri_FP=peri_FP+1
                                    nodule_all=nodule_all+1
                                    
                                    if folder_pat not in dict_FP_wrong:
                                        dict_FP_wrong[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        dict_FP_wrong[folder_pat]=dict_FP_wrong[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                                                                                 
                                    if folder_pat not in lymph_FP_wrong:
                                        lymph_FP_wrong[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        lymph_FP_wrong[folder_pat]=lymph_FP_wrong[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                        
                        
                                    if 'ai' in file.lower() and 'fp' not in file.lower():
                                        dict_FP_wrong[folder_pat]=dict_FP_wrong[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                        lymph_FP_wrong[folder_pat]=lymph_FP_wrong[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                    
                                    if int(folder_pat) in per_FP:
                                        per_FP[int(folder_pat)].append(file.lower().split('fp')[0])
                                    else:
                                        per_FP[int(folder_pat)]=[file.lower().split('fp')[0]]

                                elif 'fn' in file.lower():
                                    peri_FN=peri_FN+1
                                    nodule_all=nodule_all+1
                                    
                                    if folder_pat not in dict_FN_correct:
                                        dict_FN_correct[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        dict_FN_correct[folder_pat]=dict_FN_correct[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    
                                    if folder_pat not in lymph_FN_correct:
                                        lymph_FN_correct[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        lymph_FN_correct[folder_pat]=lymph_FN_correct[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                        
                                    if '_fn' in file.lower():
                                        dict_FN_correct[folder_pat]=dict_FN_correct[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                        lymph_FN_correct[folder_pat]=lymph_FN_correct[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]

                                    if int(folder_pat) in per_FN:
                                        per_FN[int(folder_pat)].append(file.lower().split('fn')[0])
                                    else:
                                        per_FN[int(folder_pat)]=[file.lower().split('fn')[0]]

                                else:
                                    print('ERROR in periphysural')


                            elif ('fibrosis' in detailed_info.lower() or 'scar' in detailed_info.lower() 
                                  or 'thick' in detailed_info.lower()):

                                print('fibrosis/scar/pleural thickening')
                                fibr_scar_pleural=fibr_scar_pleural+1 

                                if 'tp' in file.lower():
                                    print('This will not be considered')
                                    fibr_TP=fibr_TP+1
                                    tp_mistakes=tp_mistakes+1
                                    nodule_all=nodule_all+1

                                elif 'fp' in file.lower() or 'ai' in file.lower() and 'fn' not in file.lower():
                                    fibr_FP=fibr_FP+1
                                    
                                    if folder_pat not in dict_FP_correct:
                                        dict_FP_correct[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        dict_FP_correct[folder_pat]=dict_FP_correct[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                        
                                    if 'ai' in file.lower() and 'fp' not in file.lower():
                                        dict_FP_correct[folder_pat]=dict_FP_correct[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                    
                                    if int(folder_pat) in fibrosis_FP:
                                        fibrosis_FP[int(folder_pat)].append(file.lower().split('fp')[0])
                                    else:
                                        fibrosis_FP[int(folder_pat)]=[file.lower().split('fp')[0]]

                                elif 'fn' in file.lower():
                                    fibr_FN=fibr_FN+1
                                    
                                    if folder_pat not in dict_FN_wrong:
                                        dict_FN_wrong[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        dict_FN_wrong[folder_pat]=dict_FN_wrong[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    if '_fn' in file.lower():
                                        dict_FN_wrong[folder_pat]=dict_FN_wrong[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                    
                                    if int(folder_pat) in fibrosis_FN:
                                        fibrosis_FN[int(folder_pat)].append(file.lower().split('fn')[0])
                                    else:
                                        fibrosis_FN[int(folder_pat)]=[file.lower().split('fn')[0]]

                                else:
                                    print('ERROR IN fibrosis')


                            elif ('bronch' in detailed_info.lower() or 'peribronchial' in detailed_info.lower() or 'pbv' in detailed_info.lower()):

                                print('peribronchial/bronchiovascular')
                                bronchperi=bronchperi+1

                                if 'tp' in file.lower():
                                    print('This will not be considered')
                                    bronchperi_TP=bronchperi_TP+1
                                    tp_mistakes=tp_mistakes+1
                                    nodule_all=nodule_all+1

                                #In case peribronchial lymph nodes are excluded from analysis, activate below
                                # elif 'peribronchial' in detailed_info.lower() or 'pbv' in detailed_info.lower():
                                #     print('peribronchial that will not be considered - Need to confirm manually that vol<100mm3.')
                                #     print(folder,file)
                                #     nodule_all=nodule_all+1
                                #     peri=peri+1

                                elif 'fp' in file.lower() or 'ai' in file.lower() and 'fn' not in file.lower():
                                    bronchperi_FP=bronchperi_FP+1
                                    nodule_all=nodule_all+1
                                    
                                    if folder_pat not in dict_FP_wrong:
                                        dict_FP_wrong[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        dict_FP_wrong[folder_pat]=dict_FP_wrong[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]

                                    if folder_pat not in lymph_FP_wrong:
                                        lymph_FP_wrong[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        lymph_FP_wrong[folder_pat]=lymph_FP_wrong[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                        
                        
                                    if 'ai' in file.lower() and 'fp' not in file.lower():
                                        dict_FP_wrong[folder_pat]=dict_FP_wrong[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                        lymph_FP_wrong[folder_pat]=lymph_FP_wrong[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                    
                                    if int(folder_pat) in bronchioperi_FP:
                                        bronchioperi_FP[int(folder_pat)].append(file.lower().split('fp')[0])
                                    else:
                                        bronchioperi_FP[int(folder_pat)]=[file.lower().split('fp')[0]]

                                elif 'fn' in file.lower():
                                    bronchperi_FN=bronchperi_FN+1
                                    nodule_all=nodule_all+1
                                    
                                    if folder_pat not in dict_FN_correct:
                                        dict_FN_correct[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        dict_FN_correct[folder_pat]=dict_FN_correct[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                        
                                    if folder_pat not in lymph_FN_correct:
                                        lymph_FN_correct[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        lymph_FN_correct[folder_pat]=lymph_FN_correct[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                        
                                    if '_fn' in file.lower():
                                        dict_FN_correct[folder_pat]=dict_FN_correct[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                        lymph_FN_correct[folder_pat]=lymph_FN_correct[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                        
                                    if int(folder_pat) in bronchioperi_FN:
                                        bronchioperi_FN[int(folder_pat)].append(file.lower().split('fn')[0])
                                    else:
                                        bronchioperi_FN[int(folder_pat)]=[file.lower().split('fn')[0]]
                                else:
                                    print('ERROR IN peribronchial')


                            elif 'lymph' in detailed_info.lower(): 
                                #Seperate from above since sometimes it may start with 'fissural lymph node'
                                #and therefore being a different category - this checked first in the 'if' above

                                print('atypical/triangular lymph node')
                                atypical_triang=atypical_triang+1

                                if 'tp' in file.lower():
                                    print('This will not be considered')
                                    atypical_TP=atypical_TP+1
                                    tp_mistakes=tp_mistakes+1
                                    nodule_all=nodule_all+1

                                elif 'fp' in file.lower() or 'ai' in file.lower() and 'fn' not in file.lower():
                                    atypical_FP=atypical_FP+1
                                    nodule_all=nodule_all+1
                                    
                                    if folder_pat not in dict_FP_wrong:
                                        dict_FP_wrong[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        dict_FP_wrong[folder_pat]=dict_FP_wrong[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]

                                    # if folder_pat not in lymph_FP_wrong:
                                    #     lymph_FP_wrong[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    # else:
                                    #     lymph_FP_wrong[folder_pat]=lymph_FP_wrong[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    if folder_pat not in nod_FP_wrong:
                                        nod_FP_wrong[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        nod_FP_wrong[folder_pat]=nod_FP_wrong[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                        

                                    if 'ai' in file.lower() and 'fp' not in file.lower():
                                        dict_FP_wrong[folder_pat]=dict_FP_wrong[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                        # lymph_FP_wrong[folder_pat]=lymph_FP_wrong[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                        nod_FP_wrong[folder_pat]=nod_FP_wrong[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]

                                    
                                    if int(folder_pat) in atyp_FP:
                                        atyp_FP[int(folder_pat)].append(file.lower().split('fp')[0])
                                    else:
                                        atyp_FP[int(folder_pat)]=[file.lower().split('fp')[0]]

                                elif 'fn' in file.lower():
                                    atypical_FN=atypical_FN+1
                                    nodule_all=nodule_all+1
                                    
                                    if folder_pat not in dict_FN_correct:
                                        dict_FN_correct[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        dict_FN_correct[folder_pat]=dict_FN_correct[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                   
                                    # if folder_pat not in lymph_FN_correct:
                                    #     lymph_FN_correct[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    # else:
                                    #     lymph_FN_correct[folder_pat]=lymph_FN_correct[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    if folder_pat not in nod_FN_correct:
                                        nod_FN_correct[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        nod_FN_correct[folder_pat]=nod_FN_correct[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]    
                                 


                                    if '_fn' in file.lower():
                                        dict_FN_correct[folder_pat]=dict_FN_correct[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                        # lymph_FN_correct[folder_pat]=lymph_FN_correct[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                        nod_FN_correct[folder_pat]=nod_FN_correct[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]

                                    if int(folder_pat) in atyp_FN:
                                        atyp_FN[int(folder_pat)].append(file.lower().split('fn')[0])
                                    else:
                                        atyp_FN[int(folder_pat)]=[file.lower().split('fn')[0]]

                                else:
                                    print('ERROR IN atypical')


                            else: #Here when we have description but it's other non-nods (eg. atelectasis)
                                print('other')
                                other=other+1

                                if 'tp' in file.lower():
                                    print('This will not be considered')
                                    other_TP=other_TP+1
                                    tp_mistakes=tp_mistakes+1
                                    nodule_all=nodule_all+1

                                elif 'fp' in file.lower() or 'ai' in file.lower() and 'fn' not in file.lower():
                                    other_FP=other_FP+1
                                    
                                    if folder_pat not in dict_FP_correct:
                                        dict_FP_correct[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        dict_FP_correct[folder_pat]=dict_FP_correct[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                        
                                    if 'ai' in file.lower() and 'fp' not in file.lower():
                                        dict_FP_correct[folder_pat]=dict_FP_correct[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                    
                                    if int(folder_pat) in other_nonodules_FP:
                                        other_nonodules_FP[int(folder_pat)].append(file.lower().split('fp')[0])

                                        res=detailed_info.lower() #Get information about type of non-nodule

                                        if 'atele' in res or 'infe' in res or 'conso' in res or 'mucu' in res or 'vess' in res: #Without parenthesis always get in!
                                            try: #Lung findings
                                                other_nonodules_FP_lung[int(folder_pat)].append(file.lower().split('fp')[0])
                                            except:
                                                other_nonodules_FP_lung[int(folder_pat)]=[file.lower().split('fp')[0]]

                                        elif 'bone' in res or 'osis' in res or 'fat' in res or 'tiss' in res: #Non-lung
                                            try:
                                                other_nonodules_FP_nolung[int(folder_pat)].append(file.lower().split('fp')[0])
                                            except:
                                                other_nonodules_FP_nolung[int(folder_pat)]=[file.lower().split('fp')[0]]
                                        else:
                                            print("Cannot classify it as lung/non-lung based on description")

                                    else:
                                        other_nonodules_FP[int(folder_pat)]=[file.lower().split('fp')[0]]

                                        res=detailed_info.lower()

                                        if 'atele' in res or 'infe' in res or 'conso' in res or 'mucu' in res or 'vess' in res:
                                            try:
                                                other_nonodules_FP_lung[int(folder_pat)].append(file.lower().split('fp')[0])
                                            except:
                                                other_nonodules_FP_lung[int(folder_pat)]=[file.lower().split('fp')[0]]

                                        elif 'bone' in res or 'osis' in res or 'fat' in res or 'tiss' in res:
                                            try:
                                                other_nonodules_FP_nolung[int(folder_pat)].append(file.lower().split('fp')[0])
                                            except:
                                                other_nonodules_FP_nolung[int(folder_pat)]=[file.lower().split('fp')[0]]
                                        else:
                                            print("Cannot classify it as lung/non-lung based on description")
                                        
                                elif 'fn' in file.lower():
                                    other_FN=other_FN+1
                                    
                                    if folder_pat not in dict_FN_wrong:
                                        dict_FN_wrong[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:  
                                        dict_FN_wrong[folder_pat]=dict_FN_wrong[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                        
                                    if '_fn' in file.lower():
                                        dict_FN_wrong[folder_pat]=dict_FN_wrong[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                    
                                    if int(folder_pat) in other_nonodules_FN:
                                        other_nonodules_FN[int(folder_pat)].append(file.lower().split('fn')[0])

                                        res=detailed_info.lower()

                                        if 'atele' in res or 'infe' in res or 'conso' in res or 'mucu' in res or 'vess' in res:
                                            try:
                                                other_nonodules_FN_lung[int(folder_pat)].append(file.lower().split('fn')[0])
                                            except:
                                                other_nonodules_FN_lung[int(folder_pat)]=[file.lower().split('fn')[0]]

                                        elif 'bone' in res or 'osis' in res or 'fat' in res or 'tiss' in res:

                                            try:
                                                other_nonodules_FN_nolung[int(folder_pat)].append(file.lower().split('fn')[0])
                                            except:
                                                other_nonodules_FN_nolung[int(folder_pat)]=[file.lower().split('fn')[0]]
                                        else:
                                            print("Cannot classify it as lung/non-lung based on description")

                                    else:
                                        other_nonodules_FN[int(folder_pat)]=[file.lower().split('fn')[0]]

                                        res=detailed_info.lower()

                                        if 'atele' in res or 'infe' in res or 'conso' in res or 'mucu' in res or 'vess' in res:
                                            try:
                                                other_nonodules_FN_lung[int(folder_pat)].append(file.lower().split('fn')[0])
                                            except:
                                                other_nonodules_FN_lung[int(folder_pat)]=[file.lower().split('fn')[0]]

                                        elif 'bone' in res or 'osis' in res or 'fat' in res or 'tiss' in res:
                                            try:
                                                other_nonodules_FN_nolung[int(folder_pat)].append(file.lower().split('fn')[0])
                                            except:
                                                other_nonodules_FN_nolung[int(folder_pat)]=[file.lower().split('fn')[0]]
                                        else:
                                            print("Cannot classify it as lung/non-lung based on description")

                                else:
                                    print('ERROR IN other')
                                       

                                        
#                             else:
#                                 print('Low confidence <=3 - excluded from analysis')
#                                 excluded.append(folder+':'+file)
                                
                                
                        else: #If we don't have a description of the finding - we add those 'non-nodule' in 'other'
                            
#                             if int(confidence[0])>=4:

                                print('No information for non-nodule file',dirpath,':',folder,':',file)
                                other=other+1

                                if 'tp' in file.lower():
                                    print('This will not be considered')                    
                                    other_TP=other_TP+1
                                    tp_mistakes=tp_mistakes+1

                                elif 'fp' in file.lower() or 'ai' in file.lower():
                                    other_FP=other_FP+1
                                    
                                    if folder_pat not in dict_FP_correct:
                                        dict_FP_correct[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        dict_FP_correct[folder_pat]=dict_FP_correct[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                        
                                    if 'ai' in file.lower() and 'fp' not in file.lower():
                                        dict_FP_correct[folder_pat]=dict_FP_correct[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                    
                                    if int(folder_pat) in other_nonodules_FP:
                                        other_nonodules_FP[int(folder_pat)].append(file.lower().split('fp')[0])
                                    else:
                                        other_nonodules_FP[int(folder_pat)]=[file.lower().split('fp')[0]]

                                        
                                elif 'fn' in file.lower():
                                    other_FN=other_FN+1
                                    
                                    if folder_pat not in dict_FN_wrong:
                                        dict_FN_wrong[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    else:
                                        dict_FN_wrong[folder_pat]=dict_FN_wrong[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                        
                                    if '_fn' in file.lower():
                                        dict_FN_wrong[folder_pat]=dict_FN_wrong[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                    
                                    if int(folder_pat) in other_nonodules_FN:
                                        other_nonodules_FN[int(folder_pat)].append(file.lower().split('fn')[0])
                                    else:
                                        other_nonodules_FN[int(folder_pat)]=[file.lower().split('fn')[0]]
                                        
                                else:
                                    print('ERROR IN other')


#                             else:
#                                 print('Low confidence <=3 - excluded from analysis')
#                                 excluded.append(folder+':'+file)
                                
                                
                        print('\n')

                                

                    else: #If it's not a non-nodule, it will be either 'unsure' or 'nodule'
                        
                        total_files=total_files+1 #Increase total number of files taken into account
                        unsure=[line for line in lines if 'unsure' in line.lower()] #If line contains 'unsure'
                        
                        if len(unsure)!=0: #If it's not empty, then unsure about finding
                            print('Unsure about what this finding is')
                            uncertain=uncertain+1
#                             print('Low confidence <=3 - excluded from analysis')
#                             excluded.append(folder+':'+file)
                            
                        else: #Otherwise it's a nodule
                            print('Finding is a nodule')
                            
                            nodules=[line for line in lines if 'nodule' in line.lower()] #Confirm 'nodule' in line
                            information=[info.split('nodule',1) for info in nodules][0] #split only on first occurence
                            details=[elem for elem in information if len(elem)>5] #similar as above
                            
                            if len(details)>0: #If we have a description of finding 
                                    
                                    #Clean as above plus ':'
                                    nod_desc=details[0].lower().replace('nodule','').replace('-', '').replace('\n','').replace(confidence[0],'').replace(':','').strip()
                                    print(nod_desc)
            
#                                     if int(confidence[0])>=4:
                    
                                    #Below categories for nodules

                                    if 'calc' in nod_desc:
                                        cal_nod=cal_nod+1
                                        print('Calcified nodule added')

                                        if 'tp' in file.lower():
                                            print('This will not be considered')
                                            cal_TP=cal_TP+1
                                            tp_mistakes=tp_mistakes+1

                                        elif 'fp' in file.lower() or 'ai' in file.lower():
                                            cal_FP=cal_FP+1
                                            
                                            if int(folder_pat) in calcif_FP:
                                                calcif_FP[int(folder_pat)].append(file.lower().split('fp')[0])
                                            else:
                                                calcif_FP[int(folder_pat)]=[file.lower().split('fp')[0]]

                                        elif 'fn' in file.lower():
                                            cal_FN=cal_FN+1
                                            
                                            if int(folder_pat) in calcif_FN:
                                                calcif_FN[int(folder_pat)].append(file.lower().split('fn')[0])
                                            else:
                                                calcif_FN[int(folder_pat)]=[file.lower().split('fn')[0]]

                                        else:
                                            print('ERROR IN calcified')


                                    elif 'pleu' in nod_desc:
                                        pleu_nod=pleu_nod+1
                                        print('pleural nodule added')

                                        if 'tp' in file.lower():
                                            print('This will not be considered')
                                            pleu_TP=pleu_TP+1
                                            tp_mistakes=tp_mistakes+1

                                        elif 'fp' in file.lower() or 'ai' in file.lower():
                                            pleu_FP=pleu_FP+1
                                            
                                            if int(folder_pat) in pleural_FP:
                                                pleural_FP[int(folder_pat)].append(file.lower().split('fp')[0])
                                            else:
                                                pleural_FP[int(folder_pat)]=[file.lower().split('fp')[0]]

                                        elif 'fn' in file.lower():
                                            pleu_FN=pleu_FN+1
                                            
                                            if int(folder_pat) in pleural_FN: 
                                                pleural_FN[int(folder_pat)].append(file.lower().split('fn')[0])
                                            else:
                                                pleural_FN[int(folder_pat)]=[file.lower().split('fn')[0]]

                                        else:
                                            print('ERROR IN pleural nodules')


                                    elif 'sub' in nod_desc or 'grou' in nod_desc:
                                        subgrou_nod=subgrou_nod+1
                                        print('subsolid/ground glass nodule added')

                                        if 'tp' in file.lower():
                                            print('This will not be considered')
                                            subgrou_TP=subgrou_TP+1
                                            tp_mistakes=tp_mistakes+1

                                        elif 'fp' in file.lower() or 'ai' in file.lower():
                                            subgrou_FP=subgrou_FP+1
                                            
                                            if int(folder_pat) in sub_ground_FP:
                                                sub_ground_FP[int(folder_pat)].append(file.lower().split('fp')[0])
                                            else:
                                                sub_ground_FP[int(folder_pat)]=[file.lower().split('fp')[0]]

                                        elif 'fn' in file.lower():
                                            subgrou_FN=subgrou_FN+1
                                            
                                            if int(folder_pat) in sub_ground_FN:
                                                sub_ground_FN[int(folder_pat)].append(file.lower().split('fn')[0])
                                            else:
                                                sub_ground_FN[int(folder_pat)]=[file.lower().split('fn')[0]]

                                        else:
                                            print('ERROR IN subsolid/ground class nodules')


                                    elif 'canc' in nod_desc:
                                        canc_nod=canc_nod+1
                                        print('cancer added')

                                        if 'tp' in file.lower():
                                            print('This will not be considered')
                                            canc_TP=canc_TP+1
                                            tp_mistakes=tp_mistakes+1

                                        elif 'fp' in file.lower() or 'ai' in file.lower():
                                            canc_FP=canc_FP+1
                                            
                                            if int(folder_pat) in cancer_FP:
                                                cancer_FP[int(folder_pat)].append(file.lower().split('fp')[0])
                                            else:
                                                cancer_FP[int(folder_pat)]=[file.lower().split('fp')[0]]                                           

                                        elif 'fn' in file.lower():
                                            canc_FN=canc_FN+1
                                            
                                            if int(folder_pat) in cancer_FN:
                                                cancer_FN[int(folder_pat)].append(file.lower().split('fn')[0])
                                            else:
                                                cancer_FN[int(folder_pat)]=[file.lower().split('fn')[0]]

                                        else:
                                            print('ERROR IN cancer')

                                                
#                                     else:
#                                         print('Low confidence <=3 - excluded from analysis')
#                                         excluded.append(folder+':'+file)
                        
                        
                            else:
                                
#                                 if int(confidence[0])>=4:
                                    
                                    other_nod=other_nod+1
                                    print('No information for file with nodule:',dirpath,':',folder,':',file)


                                    if 'tp' in file.lower():
                                        print('This will not be considered')
                                        other_nod_TP=other_nod_TP+1
                                        tp_mistakes=tp_mistakes+1

                                    elif 'fp' in file.lower() or 'ai' in file.lower():
                                        other_nod_FP=other_nod_FP+1
                                        
                                        if int(folder_pat) in other_nodules_FP:
                                            other_nodules_FP[int(folder_pat)].append(file.lower().split('fp')[0])
                                        else:     
                                            other_nodules_FP[int(folder_pat)]=[file.lower().split('fp')[0]]

                                    elif 'fn' in file.lower():
                                        other_nod_FN=other_nod_FN+1
                                        
                                        if int(folder_pat) in other_nodules_FN:
                                            other_nodules_FN[int(folder_pat)].append(file.lower().split('fn')[0])
                                        else:
                                            other_nodules_FN[int(folder_pat)]=[file.lower().split('fn')[0]]

                                    else:
                                        print('ERROR IN other')
                                        
                                        
#                                 else:
#                                     print('Low confidence <=3 - excluded from analysis')
#                                     excluded.append(folder+':'+file)
                                    
                                    
                            
#                             if int(confidence[0])>=4:
                            
                            nodule_all=nodule_all+1                                      

                            if 'fn' in file.lower() and 'fp' not in file.lower(): 
                            #Ensure that it was FN - second condition to confirm it
                        
                                if folder_pat not in dict_FN_correct:
                                    dict_FN_correct[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                else:
                                    #First letters pick up the slice number
                                    dict_FN_correct[folder_pat]=dict_FN_correct[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                    
                                    
                                if folder_pat not in nod_FN_correct:
                                    nod_FN_correct[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                else:
                                    nod_FN_correct[folder_pat]=nod_FN_correct[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]    
                                    
                                    
                                if '_fn' in file.lower():
                                    dict_FN_correct[folder_pat]=dict_FN_correct[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                    nod_FN_correct[folder_pat]=nod_FN_correct[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]

                            elif ('fp' in file.lower() or 'ai' in file.lower()) and 'fn' not in file.lower():
                               
                                if folder_pat not in dict_FP_wrong:
                                    dict_FP_wrong[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                else:
                                    dict_FP_wrong[folder_pat]=dict_FP_wrong[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                    
                                if folder_pat not in nod_FP_wrong:
                                    nod_FP_wrong[folder_pat]=[int(''.join([x for x in file[:5] if x.isdigit()]))]
                                else:
                                    nod_FP_wrong[folder_pat]=nod_FP_wrong[folder_pat]+[int(''.join([x for x in file[:5] if x.isdigit()]))]
                        

                                if 'ai' in file.lower() and 'fp' not in file.lower():
                                    dict_FP_wrong[folder_pat]=dict_FP_wrong[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]
                                    nod_FP_wrong[folder_pat]=nod_FP_wrong[folder_pat][:-1]+[int(''.join([x for x in file[-7:-4] if x.isdigit()]))]

                        print('\n')

                                                                                                 
    print('Num of uncertainties',uncertain)
    print('\n')
        
    print("From nodules, there were {} FP, {} FN, and {} TP calcified nodules".format(cal_FP,cal_FN,cal_TP))    
    print("From nodules, there were {} FP, {} FN, and {} TP pleural nodules".format(pleu_FP,pleu_FN,pleu_TP))
    print("From nodules, there were {} FP, {} FN, and {} TP 'other' nodules".format(other_nod_FP,other_nod_FN,other_nod_TP))
    print("From nodules, there were {} FP, {} FN, and {} TP subsolid/ground class nodules".format(subgrou_FP,subgrou_FN,subgrou_TP))
    print("From nodules, there were {} FP, {} FN, and {} TP cancer cases".format(canc_FP,canc_FN,canc_TP))
    print("There are {} FP, {} FN, and {} TP atypical PFN and/or triangular lymph nodes".format(atypical_FP,atypical_FN,atypical_TP))
    print("There are {} FP, {} FN, and {} TP periphysural/fissural/PFN".format(peri_FP,peri_FN,peri_TP))
    print("There are {} FP, {} FN, and {} TP bronchiovascular lymph nodes".format(bronchperi_FP,bronchperi_FN,bronchperi_TP))
    print("There are {} peribronchial (excluded) lymph nodes - both FP and FN".format(peri))
    print('\n')

    print("There are {} FP, {} FN, and {} TP fibrosis/scar/pleural thickening".format(fibr_FP,fibr_FN,fibr_TP))
    print("There are {} FP, {} FN, and {} TP other findings (bone, tissue, mucus, arthosis, vessel, consolidation, infection, fat, atelectasis, etc. )".format(other_FP,other_FN,other_TP))
    print('\n')

    print("Total number of files is ",total_files)
    print("From those, there were {} files excluded due to low confidence <=3 and their names are: {}".format(len(excluded),excluded))

    #Confirm that non-nodules found properly
    assert fibr_scar_pleural==fibr_FP+fibr_FN+fibr_TP

    #Confirm that nodules found properly
    assert cal_nod==cal_FP+cal_FN+cal_TP
    assert pleu_nod==pleu_FP+pleu_FN+pleu_TP
    assert subgrou_nod==subgrou_FP+subgrou_FN+subgrou_TP
    assert canc_nod==canc_FP+canc_FN+canc_TP
    
    assert atypical_triang==atypical_FP+atypical_FN+atypical_TP
    assert peri_fissur==peri_FP+peri_FN+peri_TP
    # assert bronchperi==bronchperi_FP+bronchperi_FN+bronchperi_TP
    
    #Lists with nodule findings moved from non-nodule categories initially 
    assert nodule_all==cal_nod+pleu_nod+other_nod+subgrou_nod+canc_nod+atypical_triang+peri_fissur+bronchperi
    
    return (atyp_FN,per_FN,pleural_FN,calcif_FN,sub_ground_FN,cancer_FN,other_nodules_FN,other_nonodules_FN,fibrosis_FN,
            bronchioperi_FN,atyp_FP,per_FP,pleural_FP,calcif_FP,sub_ground_FP,cancer_FP,other_nodules_FP,
            other_nonodules_FP,fibrosis_FP,bronchioperi_FP,dict_FP_correct,dict_FP_wrong,dict_FN_correct,dict_FN_wrong,
            lymph_FP_wrong,lymph_FN_correct, nod_FP_wrong, nod_FN_correct,
            other_nonodules_FN_lung,other_nonodules_FN_nolung, other_nonodules_FP_lung,other_nonodules_FP_nolung)

In [109]:
### All confidence levels were included for now - in paper just mentioned those with <=3
#unsure for 1 finding - nothing reported - included after reviewed again and category reported

# Total files excluded in noemphysema due to low confidence <=3 are 13 (14 with last review) and their names are:
#['199391:199391_AI1_272.txt', '225858:368fpAIlastfinal_gray0.txt', '369762:228fnlastfinal_gray0.txt', 
#'429703:132fnlastfinal_gray0.txt','435703:129fpAIlastfinal_gray0.txt', '591162:123fnlastfinal_gray0.txt', 
#'673634:095fnlastfinal_gray0.txt','673634:096fnlastfinal_gray0.txt', '673634:138fnlastfinal_gray0.txt',
#'673634:173fnlastfinal_gray0.txt', '673634:287fpAIlastfinal_gray0.txt', '845334:845334_ai6_265.txt', 
#'951248:284fpAIlastfinal_gray00.txt']
#from those there are 6 nodules, 5 lymph nodes and 1 non-nodules (1 case unsure)

# Toal files excluded in emphysema due to low confidence <=3 are 3 and their names are: 
#['282528:330fpAIlastfinal_gray0.txt', '609065:609065_112fp.txt', '991277:251fpAIlastfinal_gray0.txt']
#from those there are 1 nodule and 2 non-nodules

In [2]:
# %%capture cap --no-stderr

(atyp_FN_noemph,per_FN_noemph,pleural_FN_noemph,calcif_FN_noemph,sub_ground_FN_noemph,cancer_FN_noemph,
 other_nodules_FN_noemph,other_nonodules_FN_noemph,fibrosis_FN_noemph,bronchioperi_FN_noemph,atyp_FP_noemph,per_FP_noemph,
 pleural_FP_noemph,calcif_FP_noemph,sub_ground_FP_noemph,cancer_FP_noemph,other_nodules_FP_noemph,other_nonodules_FP_noemph,
 fibrosis_FP_noemph,bronchioperi_FP_noemph,dict_FP_correct_noemph,dict_FP_wrong_noemph,dict_FN_correct_noemph,
 dict_FN_wrong_noemph,lymph_FP_wrong_noemph,lymph_FN_correct_noemph, nod_FP_wrong_noemph, nod_FN_correct_noemph,
 other_nonodules_FN_lung_noemph,other_nonodules_FN_nolung_noemph,
 other_nonodules_FP_lung_noemph,other_nonodules_FP_nolung_noemph)=show_information_of_review(path_noemph)

#If we want to save the cell output to txt use below and activate above
# with open('no_emph_review.txt', 'w') as f:
#     f.write(cap.stdout)

In [3]:
# %%capture cap --no-stderr

(atyp_FN_emph,per_FN_emph,pleural_FN_emph,calcif_FN_emph,sub_ground_FN_emph,cancer_FN_emph,other_nodules_FN_emph,
 other_nonodules_FN_emph,fibrosis_FN_emph,bronchioperi_FN_emph,atyp_FP_emph,per_FP_emph,pleural_FP_emph,calcif_FP_emph,
 sub_ground_FP_emph,cancer_FP_emph,other_nodules_FP_emph,other_nonodules_FP_emph,fibrosis_FP_emph,bronchioperi_FP_emph,
 dict_FP_correct_emph,dict_FP_wrong_emph,dict_FN_correct_emph,dict_FN_wrong_emph,
 lymph_FP_wrong_emph,lymph_FN_correct_emph, nod_FP_wrong_emph, nod_FN_correct_emph,
 other_nonodules_FN_lung_emph,other_nonodules_FN_nolung_emph,
 other_nonodules_FP_lung_emph,other_nonodules_FP_nolung_emph)=show_information_of_review(path_emph)

# with open('emph_review.txt', 'w') as f:
#     f.write(cap.stdout) 

### Convert slices to ids - Use manually checked annotations

In [112]:
# #Manual corrections in dictionaries to be used

# #Non-emphysema
# print(dict_FP_correct_noemph)
# print(dict_FP_wrong_noemph)
# print(dict_FN_correct_noemph)
# print(dict_FN_wrong_noemph)

# print("\n")

# #Emphysema
# print(dict_FP_correct_emph)
# print(dict_FP_wrong_emph)
# print(dict_FN_correct_emph)
# print(dict_FN_wrong_emph)

In [113]:
mod=mod[:28] #These are the indices in df of those with moderate emphysema (if only nodules [:13])
mod=mod.reset_index(drop=True) #Reset indices

conf=conf[30:38] #Get patients with confluent emphysema from df (it contains both mod+conf patients) (if only nodules [33:])
conf=conf[conf['participant_id']!=592863]
conf=conf.reset_index(drop=True) #Reset indices

adv=adv[:5] #Select participants with advanced emphysema
adv=adv.reset_index(drop=True) #Reset indices

noemph=noemph[:88] #Select participants with nodules and without emphysema (if only nodules [:67])
noemph=noemph.reset_index(drop=True) #Reset indices

In [114]:
#Set all columns with 300+ nodules to nan since they will not be used
mod['300+ tp'] = np.nan
mod['300+ fp']=np.nan
mod['300+ fn']=np.nan

adv['300+ tp']=np.nan
adv['300+ fp']=np.nan
adv['300+ fn']=np.nan

conf['300+ tp']=np.nan
conf['300+ fp']=np.nan
conf['300+ fn']=np.nan

noemph['300+ tp']=np.nan
noemph['300+ fp']=np.nan
noemph['300+ fn']=np.nan

In [115]:
vol_cols=[col for col in noemph.columns if 'V' in col] #Get name of columns containing volumes of AI nodules

emph_deg=['noemph_fp','adv_fp','mod_fp','conf_fp']

for deg in emph_deg: #Loop over emphysema degrees
    for col in vol_cols: #Loop over columns with volumes
        #If the volume is less than 30mm3 or more than 300mm3 we should ignore them - set it along with the corresponding AI nod to '-'
        #This can be done since we get TP and FN from other file - This only considers FPs
        for ind,val in eval(deg[:-3]+"[("+deg[:-3]+"['"+col+"']<=30) | ("+deg[:-3]+"['"+col+"']>300)]['"+col+"'].items()"): 
            exec(deg[:-3]+"['"+col+"'].iloc[ind]=np.nan") #was '-' instead of nan
            exec(deg[:-3]+"['AI_nod"+str(col[1:])+"'].iloc[ind]=np.nan")

In [116]:
#Select rows where we have at least one FP in any of the 0-100 or 100-300 volume subgroup
mod_fp=mod[(mod['100-300fp'].notnull() | mod['0-100fp'].notnull() ) & mod['participant_id'].notnull()]
conf_fp=conf[(conf['100-300fp'].notnull() | conf['0-100fp'].notnull() ) & conf['participant_id'].notnull()]
adv_fp=adv[(adv['100-300fp'].notnull() | adv['0-100fp'].notnull() ) & adv['participant_id'].notnull()]
noemph_fp=noemph[(noemph['100-300fp'].notnull() | noemph['0-100fp'].notnull() ) & noemph['participant_id'].notnull()]

In [117]:
#Initialize empty dicts in the form {'pat_id1':[],'pat_id2':[],...}
noemph_dict=dict.fromkeys([str(numeric_string) for numeric_string in noemph_fp['participant_id'].values], [])
noemph_dict=[[key[:6],[]] for (key, value) in noemph_dict.items()]
noemph_dict = {item[0]: item[1] for item in noemph_dict}
noemph_fp['participant_id']=list(noemph_dict.keys())

mod_dict=dict.fromkeys([str(numeric_string) for numeric_string in mod_fp['participant_id'].values], [])
mod_dict=[[key[:6],[]] for (key, value) in mod_dict.items()]
mod_dict = {item[0]: item[1] for item in mod_dict}
mod_fp['participant_id']=list(mod_dict.keys())

conf_dict=dict.fromkeys([str(numeric_string) for numeric_string in conf_fp['participant_id'].values], [])
conf_dict=[[key[:6],[]] for (key, value) in conf_dict.items()]
conf_dict = {item[0]: item[1] for item in conf_dict}
conf_fp['participant_id']=list(conf_dict.keys())

adv_dict=dict.fromkeys([str(numeric_string) for numeric_string in adv_fp['participant_id'].values], [])
adv_dict=[[key[:6],[]] for (key, value) in adv_dict.items()]
adv_dict = {item[0]: item[1] for item in adv_dict}
adv_fp['participant_id']=list(adv_dict.keys())

In [118]:
#Volume dictionaries - Normal copies don't work properly. This is why deepcopy is used
noemph_dict_vol=copy.deepcopy(noemph_dict)
mod_dict_vol=copy.deepcopy(mod_dict)
conf_dict_vol=copy.deepcopy(conf_dict)
adv_dict_vol=copy.deepcopy(adv_dict)

In [119]:
AI_cols=[col for col in noemph_fp.columns if 'AI_nod' in col] #Get name of columns containing AI nodules

In [120]:
emph_deg=['noemph_fp','adv_fp','mod_fp','conf_fp'] #list with strings of dfs to loop

for deg in emph_deg: #Loop over emphysema degrees
    print(deg)
    
    for ind_col,col in enumerate(AI_cols): #Loop over AI nodule columns
        
        #Following line to change nan with '-' since otherwise cannot check for string with 'L' below
        exec(deg[:-3]+"_fp['"+col+"']="+deg[:-3]+"_fp['"+col+"'].fillna('-')")
        exec(deg[:-3]+"_fp['"+str(col)+"'] = "+deg[:-3]+"_fp['"+str(col)+"'].astype(str)") #Convert to string type to use below
        
        #Create variables storing only those rows of df that a specific AI_nod col contains 'L' (denotes a TP)-or not those
        exec('temp='+deg[:-3]+'_fp[~'+deg[:-3]+"_fp['"+str(col)+"'].str.contains('L')]") #FPs
        exec('temp_tp='+deg[:-3]+'_fp['+deg[:-3]+"_fp['"+str(col)+"'].str.contains('L')]") #TPs

        if not temp.empty: #If we have FP for that participant

            for ind,pat in enumerate(temp['participant_id']): #Loop over all participants with FP in a specific AI col

                try: #To ensure that there are no errors
                    nod_id=temp.iloc[ind,ind_col+1][temp.iloc[ind,ind_col+1].find('L')+1:] #Get id
                    nod_id=nod_id.split(' ')[0] #To get actual id
                    vol=temp.iloc[ind,ind_col+11] #To get the value of the volume
                    
                    exec(deg[:-3]+'_dict'+"['"+str(pat)+"'].append('"+nod_id+"')") #Add that to the dictionary
                    
                    if pd.isnull(vol): #When there is no volume - is nan
                        exec(deg[:-3]+'_dict_vol'+"['"+str(pat)+"'].append('-')") #Same for volume dictionary
                    else:
                        exec(deg[:-3]+'_dict_vol'+"['"+str(pat)+"'].append('"+str(vol)+"')") #Same for volume dictionary
                except:
                    print(traceback.print_exc()) #print error
                    
                    
        if not temp_tp.empty: #If we have TP for that participant

            for ind,pat in enumerate(temp_tp['participant_id']): #Loop over all participants with TP in a specific AI col

                try: #To ensure that there are no errors
                    exec(deg[:-3]+'_dict'+"['"+str(pat)+"'].append('"+"-"+"')") #Add that to the dictionary
                    exec(deg[:-3]+'_dict_vol'+"['"+str(pat)+"'].append('"+"-"+"')") #Same for volume dictionary
                except:
                    print(traceback.print_exc())

noemph_fp
adv_fp
mod_fp
conf_fp


In [4]:
# noemph_dict

#### Check below IDs again - They have nodules not reviewed by radiologists

In [5]:
#Confirm all participants' nodules counted - These are nodules not reviewed - Should be checked again

print("All participants below have nodules not reviewed yet. We should not have any participants printed below at the end. \n\
For now we do, since we manually deleted those in which AI detected a finding >30mm3 but manually measured <30mm3.")
print("\n")

for deg in emph_deg: #Loop over emphysema degrees
    
    for pat in eval(deg[:-3]+"_dict"): #For each participant in a given degree
        
        temp=[x for x in eval(deg[:-3]+"_dict['"+pat+"']") if x!='-'] #Get how many FP we have - ignore '-' values
        counted=0 #Initialize an index to 0
        
        if deg!='noemph_fp': #Since we have emph/noemph dictionaries below
            temp_deg='emph_tp'
        else:
            temp_deg=deg
            
        #If the participant is in any of the FP dictionaries increase the count by the values of that dictionary list
        if pat in eval("dict_FP_wrong_"+temp_deg[:-3]): 
            counted=counted+len(eval("dict_FP_wrong_"+temp_deg[:-3]+"['"+pat+"']"))

        if pat in eval("dict_FP_correct_"+temp_deg[:-3]):
            counted=counted+len(eval("dict_FP_correct_"+temp_deg[:-3]+"['"+pat+"']"))
            
        #Check if the counts match - If not then we missed some slices
        try:
            assert len(temp)==counted
        except:
            print('Missing FP slice(s) for participant',pat,'with',temp_deg[:-3]+'ysema')  

All participants below have nodules not reviewed yet. We should not have any participants printed below at the end. 
For now we do, since we manually deleted those in which AI detected a finding >30mm3 but manually measured <30mm3.




In [123]:
#Create copies of dictionaries with FP to be filled with the corresponding ids
dict_FP_wrong_noemph_ids=copy.deepcopy(dict_FP_wrong_noemph)
dict_FP_correct_noemph_ids=copy.deepcopy(dict_FP_correct_noemph)
dict_FP_wrong_emph_ids=copy.deepcopy(dict_FP_wrong_emph)
dict_FP_correct_emph_ids=copy.deepcopy(dict_FP_correct_emph)

#Same for dictionaries with lymph nodes only and nodule only
lymph_FP_wrong_noemph_ids=copy.deepcopy(lymph_FP_wrong_noemph)
lymph_FP_wrong_emph_ids=copy.deepcopy(lymph_FP_wrong_emph)
nod_FP_wrong_noemph_ids=copy.deepcopy(nod_FP_wrong_noemph)
nod_FP_wrong_emph_ids=copy.deepcopy(nod_FP_wrong_emph)

In [124]:
#Create new dictionaries 'correct' and 'wrong' with FP indices

pat_manual_check=[] #Initialize a list to be filled with participants in whom nodules should be filled in manually

for deg in emph_deg: #Loop over emphysema degree
    
    deg=deg[:-3] #Keep name of emphysema degree only, without '_fp'
    
    for pat in eval(deg+"['participant_id']"): #Loop over participants in each emphysema degree
        
        if isinstance(pat,str): #required conversions to only keep first 6 digits of participant_id
            try:
                pat=int(pat[:6])
            except:
                pass
        else: #If participant_id consists only of numbers, then we assume that this is the 6 digit participant_id
            try:
                pat=int(pat)
            except:
                pass
        
        
    #Initialize a list to keep track of slices and ensure that there is no overlap between them - Unique mapping to ids
        all_slices=[] 
        
        #There are a lot of key errors since a participant might not exist in the FP dictionaries - only TP and/or FNs
        try: 
            for elem in eval(deg+"_dict['"+str(pat)+"']"): #Loop over all participants findings
                if elem!='-': #If there is a value in that finding
                    
                    for i in range(int(elem.split('-')[0]),int(elem.split('-')[1])): #To get slices of TP or FP
                        if i not in all_slices: #If this slice not in the list of slices add it
                            all_slices.append(i)
                        else: #if exists
                            if pat in pat_manual_check: #If that participant has already been added in list to check 
                                pass
                            else: #If not in the list to check, add it
                                pat_manual_check.append(pat)
        except:
            pass
        
        
        if deg!='noemph': #This is used below for dictionary names
            temp_deg='emph'
        else:
            temp_deg=deg
        
        #For those that there is a unique mapping make the conversion:
        try:
            for AI_ind,elem in enumerate(eval(deg+"_dict['"+str(pat)+"']")): #Loop over participants of given degree
                if elem!='-' and (pat not in pat_manual_check): #For participants with a 1-to-1 mapping
                    
                    for i in range(int(elem.split('-')[0]),int(elem.split('-')[1])): #Loop over the range of slices
                        
                        #Then loop over the unique slices in the FP dictionaries
                        try: #Again avoid key errors
                            for ind,slice_FP_wrong in enumerate(eval("dict_FP_wrong_"+str(temp_deg)+"_ids['"+str(pat)+"']")): 
                                    if slice_FP_wrong==i: #If we found that slice replace it with id
                                        exec("dict_FP_wrong_"+str(temp_deg)+"_ids['"+str(pat)+"']"+"["+str(ind)+"]="+str(AI_ind+1))
                        except:
                            pass
                        
                        try:
                            for ind,slice_FP_wrong in enumerate(eval("dict_FP_correct_"+str(temp_deg)+"_ids['"+str(pat)+"']")): 
                                    if slice_FP_wrong==i: #If we found that slice replace it with id
                                        exec("dict_FP_correct_"+str(temp_deg)+"_ids['"+str(pat)+"']"+"["+str(ind)+"]="+str(AI_ind+1))   
                        except:
                            pass
                        
                                            
                        #Again avoid key errors for lymph nodes and nodules
                        try: 
                            for ind,slice_FP_wrong in enumerate(eval("lymph_FP_wrong_"+str(temp_deg)+"_ids['"+str(pat)+"']")): 
                                    if slice_FP_wrong==i: #If we found that slice replace it with id
                                        exec("lymph_FP_wrong_"+str(temp_deg)+"_ids['"+str(pat)+"']"+"["+str(ind)+"]="+str(AI_ind+1))
                        except:
                            pass
                        
                        try:
                            for ind,slice_FP_wrong in enumerate(eval("nod_FP_wrong_"+str(temp_deg)+"_ids['"+str(pat)+"']")): 
                                    if slice_FP_wrong==i: #If we found that slice replace it with id
                                        exec("nod_FP_wrong_"+str(temp_deg)+"_ids['"+str(pat)+"']"+"["+str(ind)+"]="+str(AI_ind+1))   
                        except:
                            pass
                        
        except:
            pass

print("The following participants have to be checked manually to map slices to ids:",pat_manual_check)

The following participants have to be checked manually to map slices to ids: [673634]


Check if the FP in the dictionaries are as expected until now

In [125]:
for pat in dict_FP_correct_noemph_ids: #Loop over participants in no emphysema FP_correct
    for id in dict_FP_correct_noemph_ids[pat]:
        try:
            if id in dict_FP_wrong_noemph_ids[pat]: #If this ID is also in FP_wrong
                print("No emphysema and FP_correct",pat,id," should be checked manually") #Should fix that manually
        except: #might not exist in above dict
            pass

for pat in dict_FP_wrong_noemph_ids: #Loop over participants in no emphysema FP_wrong
    for id in dict_FP_wrong_noemph_ids[pat]:
        try:
            if id in dict_FP_correct_noemph_ids[pat]: #If this ID is also in FP_correct
                print("No emphysema and FP_wrong",pat,id,"should be checked manually") #Should fix that manually
        except: #might not exist in above dict
            pass

print("\n")

for pat in dict_FP_correct_emph_ids: #Loop over participants in emphysema FP_correct
    for id in dict_FP_correct_emph_ids[pat]:
        try:
            if id in dict_FP_wrong_emph_ids[pat]: #If this ID is also in FP_wrong
                print("Emphysema FP_correct",pat,id,"should be checked manually") #Should fix that manually
        except: #might not exist in above dict
            pass

for pat in dict_FP_wrong_emph_ids: #Loop over participants in emphysema FP_wrong
    for id in dict_FP_wrong_emph_ids[pat]: 
        try:
            if id in dict_FP_correct_emph_ids[pat]: #If this ID is also in FP_correct
                print("Emphysema FP_wrong",pat,id,"should be checked manually") #Should fix that manually
        except: #might not exist in above dict
            pass





In [126]:
#Delete participants with volumes <30mm3 or >300mm3 or those cases above that mapping is not possible
for deg in emph_deg: #Loop over emphysema degree
    print("Below are for ",deg[:-3],"emphysema")
    
    deg=deg[:-3] #Keep name of emphysema degree only, without '_fp'
    
    if deg!='noemph': #This is used below for dictionary names
        temp_deg='emph'
    else:
        temp_deg=deg
    
    for key,values in eval("dict_FP_wrong_"+str(temp_deg)+"_ids.items()"): #Loop over participants and their id
        for ind,val in enumerate(values):

            try: #Since we will loop many times in the 'emph' and so, we won't be able to evaluate '-' value as integer
                if int(val)>10: #For cases with errors
                    exec("dict_FP_wrong_"+str(temp_deg)+"_ids['"+key+"']["+str(ind)+"]="+"'-'")
            except:
                print('dict_FP_wrong_'+str(temp_deg)+"_ids['"+str(key)+"']"+'not deleted')
                pass
                
    for key,values in eval("dict_FP_correct_"+str(temp_deg)+"_ids.items()"): 
        for ind,val in enumerate(values):
            try:
                if int(val)>10:
                    exec("dict_FP_correct_"+str(temp_deg)+"_ids['"+key+"']["+str(ind)+"]="+"'-'")
            except:
                print('dict_FP_correct_'+str(temp_deg)+"_ids['"+str(key)+"']"+'not deleted')
                pass       


    #Delete participants with only '-' values
    del_keys=[]
    for key,values in eval("dict_FP_wrong_"+str(temp_deg)+"_ids.items()"): 
        if np.unique(values)[0]=='-': #If a participant has '-' which denotes an error
            del_keys.append(key)
            print('For dict_FP_wrong we have',len(values),'"-" values for',key)
            
    for key in del_keys:
        try:
            exec("del dict_FP_wrong_"+str(temp_deg)+"_ids['"+key+"']") #Delete that participants and its values from dict
            print('dict_FP_wrong_'+str(temp_deg)+"_ids['"+str(key)+"']"+'deleted')
        except:
            print('dict_FP_wrong_'+str(temp_deg)+"_ids['"+str(key)+"']"+' not deleted')
            pass
        
    #Similar for dict_FP_correct    
    del_keys=[]
    for key,values in eval("dict_FP_correct_"+str(temp_deg)+"_ids.items()"): 
        if np.unique(values)[0]=='-':
            del_keys.append(key)
            print('For dict_FP_correct we have',len(values),'"-" values for',key)
       
    for key in del_keys:
        try:
            exec("del dict_FP_correct_"+str(temp_deg)+"_ids['"+key+"']") 
            print('dict_FP_correct_'+str(temp_deg)+"_ids['"+str(key)+"']"+'deleted')
        except:
            print('dict_FP_correct_'+str(temp_deg)+"_ids['"+str(key)+"']"+' not deleted')
            pass
        
        
        
    #Similar for lymph nodes only and nod only dictionaries
    for key,values in eval("lymph_FP_wrong_"+str(temp_deg)+"_ids.items()"): 
        for ind,val in enumerate(values):

            try: #Since we will loop many times in the 'emph' and so, we won't be able to evaluate '-' value as integer
                if int(val)>10:
                    exec("lymph_FP_wrong_"+str(temp_deg)+"_ids['"+key+"']["+str(ind)+"]="+"'-'")
            except:
                print("lymph_FP_wrong_"+str(temp_deg)+"_ids["+key+"] not deleted")
                pass
                
    for key,values in eval("nod_FP_wrong_"+str(temp_deg)+"_ids.items()"): 
        for ind,val in enumerate(values):
            try:
                if int(val)>10:
                    exec("nod_FP_wrong_"+str(temp_deg)+"_ids['"+key+"']["+str(ind)+"]="+"'-'")
            except:
                print("nod_FP_wrong_"+str(temp_deg)+"_ids["+key+"] not deleted")
                pass
            
            
    #Delete participants with only '-' values
    del_keys=[]
    for key,values in eval("lymph_FP_wrong_"+str(temp_deg)+"_ids.items()"): 
        if np.unique(values)[0]=='-':
            del_keys.append(key)            
       
    for key in del_keys:
        try:
            exec("del lymph_FP_wrong_"+str(temp_deg)+"_ids['"+key+"']") 
            print('lymph_FP_wrong_'+str(temp_deg)+"_ids['"+str(key)+"']"+'deleted' )

        except:
            print('lymph_FP_wrong_'+str(temp_deg)+"_ids['"+str(key)+"']"+'not deleted')
            pass
        
    del_keys=[]
    for key,values in eval("nod_FP_wrong_"+str(temp_deg)+"_ids.items()"): 
        if np.unique(values)[0]=='-':
            del_keys.append(key)
       
    for key in del_keys:
        try:
            exec("del nod_FP_wrong_"+str(temp_deg)+"_ids['"+key+"']") 
            print('nod_FP_wrong_'+str(temp_deg)+"_ids['"+str(key)+"']"+'deleted')
        except:
            print('nod_FP_wrong_'+str(temp_deg)+"_ids['"+str(key)+"']"+' not deleted')
            pass

Below are for  noemph emphysema
For dict_FP_wrong we have 1 "-" values for 673634
dict_FP_wrong_noemph_ids['673634']deleted
For dict_FP_correct we have 1 "-" values for 673634
dict_FP_correct_noemph_ids['673634']deleted
nod_FP_wrong_noemph_ids['673634']deleted
Below are for  adv emphysema
Below are for  mod emphysema
Below are for  conf emphysema


In [127]:
pat_manual_check #These are the participants that should be checked manually

[673634]

In [128]:
#Checks again - Copy pasted from above
for pat in dict_FP_correct_noemph_ids: #Loop over participants in low BMI FP_correct
    for id in dict_FP_correct_noemph_ids[pat]:
        try:
            if id in dict_FP_wrong_noemph_ids[pat]: #If this ID is also in FP_wrong
                print("No emphysema and FP_correct",pat,id," should be checked manually") #Should fix that manually
        except: #might not exist in above dict
            pass

for pat in dict_FP_wrong_noemph_ids: #Loop over participants in low BMI FP_wrong
    for id in dict_FP_wrong_noemph_ids[pat]:
        try:
            if id in dict_FP_correct_noemph_ids[pat]: #If this ID is also in FP_correct
                print("No emphysema and FP_wrong",pat,id,"should be checked manually") #Should fix that manually
        except: #might not exist in above dict
            pass

print("\n")

for pat in dict_FP_correct_emph_ids: #Loop over participants in high BMI FP_correct
    for id in dict_FP_correct_emph_ids[pat]:
        try:
            if id in dict_FP_wrong_emph_ids[pat]: #If this ID is also in FP_wrong
                print("Emphysema FP_correct",pat,id,"should be checked manually") #Should fix that manually
        except: #might not exist in above dict
            pass

for pat in dict_FP_wrong_emph_ids: #Loop over participants in high BMI FP_wrong
    for id in dict_FP_wrong_emph_ids[pat]: 
        try:
            if id in dict_FP_correct_emph_ids[pat]: #If this ID is also in FP_correct
                print("Emphysema FP_wrong",pat,id,"should be checked manually") #Should fix that manually
        except: #might not exist in above dict
            pass





Further checks based on number of nodules, lymph nodes, and non-nodules

In [129]:
print("Emphysema Cases")
all=0
for pat in dict_FP_correct_emph:
    all=all+len(dict_FP_correct_emph[pat])
print('Non-nodules:',all)

FP_lymph_wrong=0
for pat in lymph_FP_wrong_emph:
    FP_lymph_wrong=FP_lymph_wrong+len(lymph_FP_wrong_emph[pat])
print("Lymph nodes:",FP_lymph_wrong) 

FP_nod_wrong=0
for pat in nod_FP_wrong_emph:
    FP_nod_wrong=FP_nod_wrong+len(nod_FP_wrong_emph[pat])
print("Nodules",FP_nod_wrong)

for pat in dict_FP_wrong_emph:
    all=all+len(dict_FP_wrong_emph[pat])
print("All findings:",all) 

Emphysema Cases
Non-nodules: 20
Lymph nodes: 3
Nodules 19
All findings: 42


In [130]:
print("Non-emphysema cases")
all=0
for pat in dict_FP_correct_noemph:
    all=all+len(dict_FP_correct_noemph[pat])
print('Non-nodules:',all)

FP_lymph_wrong=0
for pat in lymph_FP_wrong_noemph:
    FP_lymph_wrong=FP_lymph_wrong+len(lymph_FP_wrong_noemph[pat])
print("Lymph nodes:",FP_lymph_wrong) 

FP_nod_wrong=0
for pat in nod_FP_wrong_noemph:
    FP_nod_wrong=FP_nod_wrong+len(nod_FP_wrong_noemph[pat])
print("Nodules",FP_nod_wrong)

for pat in dict_FP_wrong_noemph:
    all=all+len(dict_FP_wrong_noemph[pat])
print("All findings:",all) 

Non-emphysema cases
Non-nodules: 18
Lymph nodes: 7
Nodules 20
All findings: 45


In [131]:
FP_wrong_emph=np.sum([len(x) for x in dict_FP_wrong_emph.values()])
FP_wrong_noemph=np.sum([len(x) for x in dict_FP_wrong_noemph.values()])
print("Total FP wrong for emph are:",FP_wrong_emph)
print("Total FP wrong for noemph are:",FP_wrong_noemph)

FP_correct_emph=np.sum([len(x) for x in dict_FP_correct_emph.values()])
FP_correct_noemph=np.sum([len(x) for x in dict_FP_correct_noemph.values()])
print("Total FP correct for emph are:",FP_correct_emph)
print("Total FP correct for noemph are:",FP_correct_noemph)

Total FP wrong for emph are: 22
Total FP wrong for noemph are: 27
Total FP correct for emph are: 20
Total FP correct for noemph are: 18


Manually add participants for whom a unique mapping wasn't possible - Atypical lymph nodes considered as nodules below

In [132]:
#Manually add participants for who a unique mapping wasn't possible
dict_FP_correct_noemph['673634']=[287]#,287,306,224] #correct if radiologists said 'not nodule' (but not lymph node) 
dict_FP_correct_noemph_ids['673634']=[1]#,5,6,7] #Last 3 changed from row below to here since they should be treated as FP (nonods) due to manually measured vol<30mm3
dict_FP_wrong_noemph['673634']=[254] #wrong if radiologists said 'nodule' or lymph node
dict_FP_wrong_noemph_ids['673634']=[4]

dict_FP_wrong_emph['971099']=[89,352,382] 
dict_FP_wrong_emph_ids['971099']=[8,6,5]
dict_FP_correct_emph['971099']=[129,233,255,363] 
dict_FP_correct_emph_ids['971099']=[3,4,2,7] 

dict_FP_correct_emph['845594']=[128,99,337,369] 
dict_FP_correct_emph_ids['845594']=[3,4,5,6] 

#Same for the lymph node only and nodules only dictionaries
nod_FP_wrong_noemph['673634']=[254]
nod_FP_wrong_noemph_ids['673634']=[4] #Same change here as above due to manually measured vol<30mm3

# lymph_FP_wrong_emph['971099']=[89,352,382] #These are atypical lymph nodes that are considered as nodules
# lymph_FP_wrong_emph_ids['971099']=[8,6,5]
nod_FP_wrong_emph['971099']=[89,352,382]
nod_FP_wrong_emph_ids['971099']=[8,6,5]

In [133]:
#Checks again - Copy pasted from above
for pat in dict_FP_correct_noemph_ids: #Loop over participants in low BMI FP_correct
    for id in dict_FP_correct_noemph_ids[pat]:
        try:
            if id in dict_FP_wrong_noemph_ids[pat]: #If this ID is also in FP_wrong
                print("No emphysema and FP_correct",pat,id," should be checked manually") #Should fix that manually
        except: #might not exist in above dict
            pass

for pat in dict_FP_wrong_noemph_ids: #Loop over participants in low BMI FP_wrong
    for id in dict_FP_wrong_noemph_ids[pat]:
        try:
            if id in dict_FP_correct_noemph_ids[pat]: #If this ID is also in FP_correct
                print("No emphysema and FP_wrong",pat,id,"should be checked manually") #Should fix that manually
        except: #might not exist in above dict
            pass

print("\n")

for pat in dict_FP_correct_emph_ids: #Loop over participants in high BMI FP_correct
    for id in dict_FP_correct_emph_ids[pat]:
        try:
            if id in dict_FP_wrong_emph_ids[pat]: #If this ID is also in FP_wrong
                print("Emphysema FP_correct",pat,id,"should be checked manually") #Should fix that manually
        except: #might not exist in above dict
            pass

for pat in dict_FP_wrong_emph_ids: #Loop over participants in high BMI FP_wrong
    for id in dict_FP_wrong_emph_ids[pat]: 
        try:
            if id in dict_FP_correct_emph_ids[pat]: #If this ID is also in FP_correct
                print("Emphysema FP_wrong",pat,id,"should be checked manually") #Should fix that manually
        except: #might not exist in above dict
            pass


#For lymph nodes and nodules
for pat in lymph_FP_wrong_emph_ids: #Loop over participants in low BMI FP_wrong
    for id in lymph_FP_wrong_emph_ids[pat]:
        try:
            if id in nod_FP_wrong_emph_ids[pat]: #If this ID is also in nod_FP_wrong
                print("Emphysema lymph_FP_wrong",pat,'with ID',id,"should be checked manually")
        except: #might not exist in above dict
            pass

for pat in nod_FP_wrong_emph_ids: #Loop over participants in low BMI nod_FP_wrong
    for id in nod_FP_wrong_emph_ids[pat]:
        try:
            if id in lymph_FP_wrong_emph_ids[pat]: #If this ID is also in lymph_FP_wrong
                print("Emphysema nod_FP_wrong",pat,'with ID',id,"should be checked manually")
        except: #might not exist in above dict
            pass





In [134]:
all=0
for pat in dict_FP_correct_emph:
    all=all+len(dict_FP_correct_emph[pat])
print('Non-nodules:',all)

FP_lymph_wrong=0
for pat in lymph_FP_wrong_emph:
    FP_lymph_wrong=FP_lymph_wrong+len(lymph_FP_wrong_emph[pat])
print("Lymph nodes:",FP_lymph_wrong) 

FP_nod_wrong=0
for pat in nod_FP_wrong_emph:
    FP_nod_wrong=FP_nod_wrong+len(nod_FP_wrong_emph[pat])
print("Nodules",FP_nod_wrong)

for pat in dict_FP_wrong_emph:
    all=all+len(dict_FP_wrong_emph[pat])
print("All findings:",all) 

Non-nodules: 20
Lymph nodes: 3
Nodules 19
All findings: 42


In [135]:
FP_wrong_emph=np.sum([len(x) for x in dict_FP_wrong_emph.values()])
FP_wrong_noemph=np.sum([len(x) for x in dict_FP_wrong_noemph.values()])
print("Total FP wrong for emph are:",FP_wrong_emph)
print("Total FP wrong for noemph are:",FP_wrong_noemph)

FP_correct_emph=np.sum([len(x) for x in dict_FP_correct_emph.values()])
FP_correct_noemph=np.sum([len(x) for x in dict_FP_correct_noemph.values()])
print("Total FP correct for emph are:",FP_correct_emph)
print("Total FP correct for noemph are:",FP_correct_noemph)

Total FP wrong for emph are: 22
Total FP wrong for noemph are: 27
Total FP correct for emph are: 20
Total FP correct for noemph are: 18


In [136]:
#lists of dictionaries with participant and nodule ids that belong to a given category
all_categories=[atyp_FN_noemph,per_FN_noemph,pleural_FN_noemph,calcif_FN_noemph,sub_ground_FN_noemph,
                cancer_FN_noemph,other_nodules_FN_noemph,other_nonodules_FN_noemph,fibrosis_FN_noemph,
                bronchioperi_FN_noemph,atyp_FP_noemph,per_FP_noemph,pleural_FP_noemph,calcif_FP_noemph,
                sub_ground_FP_noemph,cancer_FP_noemph,other_nodules_FP_noemph,other_nonodules_FP_noemph,
                fibrosis_FP_noemph,bronchioperi_FP_noemph,  #Until here no emphysema
                atyp_FN_emph,per_FN_emph,pleural_FN_emph,calcif_FN_emph,sub_ground_FN_emph,cancer_FN_emph,
                other_nodules_FN_emph,other_nonodules_FN_emph,fibrosis_FN_emph,bronchioperi_FN_emph,atyp_FP_emph,
                per_FP_emph,pleural_FP_emph,calcif_FP_emph,sub_ground_FP_emph,cancer_FP_emph,
                other_nodules_FP_emph,other_nonodules_FP_emph,fibrosis_FP_emph,bronchioperi_FP_emph] 

In [137]:
#Same as above with the names as strings
name_cats=['atyp_FN_noemph','per_FN_noemph','pleural_FN_noemph','calcif_FN_noemph','sub_ground_FN_noemph',
           'cancer_FN_noemph','other_nodules_FN_noemph','other_nonodules_FN_noemph','fibrosis_FN_noemph',
           'bronchioperi_FN_noemph','atyp_FP_noemph','per_FP_noemph','pleural_FP_noemp','calcif_FP_noemph',
           'sub_ground_FP_noemph','cancer_FP_noemph','other_nodules_FP_noemph','other_nonodules_FP_noemph',
           'fibrosis_FP_noemph','bronchioperi_FP_noemph', #Until here no emphysema
           'atyp_FN_emph','per_FN_emph','pleural_FN_emph','calcif_FN_emph','sub_ground_FN_emph','cancer_FN_emph',
           'other_nodules_FN_emph','other_nonodules_FN_emph','fibrosis_FN_emph','bronchioperi_FN_emph','atyp_FP_emph',
           'per_FP_emph','pleural_FP_emph','calcif_FP_emph','sub_ground_FP_emph','cancer_FP_emph',
           'other_nodules_FP_emph','other_nonodules_FP_emph','fibrosis_FP_emph','bronchioperi_FP_emph']

In [138]:
#Save dictionaries to be used from 'patient_selection_emphysema_experiment.ipynb' file to match slices with ids

with open('dict_FN_wrong_emph.pickle','wb') as f:
    pickle.dump(dict_FN_wrong_emph,f)

with open('dict_FN_correct_emph.pickle','wb') as f:
    pickle.dump(dict_FN_correct_emph,f)    

with open('dict_FN_wrong_noemph.pickle','wb') as f:
    pickle.dump(dict_FN_wrong_noemph,f)

with open('dict_FN_correct_noemph.pickle','wb') as f:
    pickle.dump(dict_FN_correct_noemph,f) 
    
#Same for lymph nodes only and nodules only dictionaries
with open('lymph_FN_correct_emph.pickle','wb') as f:
    pickle.dump(lymph_FN_correct_emph,f)    

with open('lymph_FN_correct_noemph.pickle','wb') as f:
    pickle.dump(lymph_FN_correct_noemph,f) 
    
with open('nod_FN_correct_emph.pickle','wb') as f:
    pickle.dump(nod_FN_correct_emph,f)    

with open('nod_FN_correct_noemph.pickle','wb') as f:
    pickle.dump(nod_FN_correct_noemph,f) 

### Get volume subgroups for nodules/non-nodules for each of emphysema/non-emphysema

##### AI found, reader missed

In [139]:
#Get numbers for nodules (+lymph nodes) vs no nodules
#Nodules can also be found be just adding nodules only + lymph nodes only from below)

ai_nonods_noemph_30_100=0
ai_nonods_noemph_100_300=0
ai_nonods_noemph_300=0

ai_nonods_emph_30_100=0
ai_nonods_emph_100_300=0
ai_nonods_emph_300=0

#Similarly get numbers for nodules only and for lymph nodes only
ai_only_nods_noemph_30_100=0
ai_only_nods_noemph_100_300=0
ai_only_nods_noemph_300=0

ai_lymph_noemph_30_100=0
ai_lymph_noemph_100_300=0
ai_lymph_noemph_300=0

ai_only_nods_emph_30_100=0
ai_only_nods_emph_100_300=0
ai_only_nods_emph_300=0

ai_lymph_emph_30_100=0
ai_lymph_emph_100_300=0
ai_lymph_emph_300=0

In [140]:
#Similarly for volume of nodules (if comparison between groups with Mann-Whitney U test is used below)

#Get detailed list of volume of nodules (+lymph nodes) vs no nodules
#Nodules can also be found be just adding nodules only + lymph nodes only from below)

ai_nonods_noemph_30_100_vols=[]
ai_nonods_noemph_100_300_vols=[]
ai_nonods_noemph_300_vols=[]

ai_nonods_emph_30_100_vols=[]
ai_nonods_emph_100_300_vols=[]
ai_nonods_emph_300_vols=[]

#Similarly get numbers for nodules only and for lymph nodes only
ai_only_nods_noemph_30_100_vols=[]
ai_only_nods_noemph_100_300_vols=[]
ai_only_nods_noemph_300_vols=[]

ai_lymph_noemph_30_100_vols=[]
ai_lymph_noemph_100_300_vols=[]
ai_lymph_noemph_300_vols=[]

ai_only_nods_emph_30_100_vols=[]
ai_only_nods_emph_100_300_vols=[]
ai_only_nods_emph_300_vols=[]

ai_lymph_emph_30_100_vols=[]
ai_lymph_emph_100_300_vols=[]
ai_lymph_emph_300_vols=[]

Get numbers of nodules in each volume subgroup for : lymph node only, nodules only, and non-nodule categories in emphysema/non-emphysema

In [141]:
#For lymph node subgroup with/without emphysema

for emph in ['emph','noemph']:
    total=0 #count total number

    for pat in eval('lymph_FP_wrong_'+emph+'_ids'): #loop over participants

        for nod_id in eval('lymph_FP_wrong_'+emph+'_ids[pat]'): #Loop over nodule ids

            if emph=='noemph':
                vol=float(eval(noemph_dict_vol[pat][nod_id-1])) #Get volume of that nodule id
            else: #For emphysema groups volume will be taken from the corresponding degree of that participant
                try:
                    vol=float(mod_dict_vol[pat][nod_id-1])
                except:
                    try:
                        vol=float(conf_dict_vol[pat][nod_id-1])
                    except:
                        vol=float(adv_dict_vol[pat][nod_id-1])

            #Increase the number of findings of a specific volume subgroup depending on volume of finding - Add volume to the corresponding variable
            if vol>=30 and vol<=100:
                exec('ai_lymph_'+emph+'_30_100=ai_lymph_'+emph+'_30_100+1')
                exec('ai_lymph_'+emph+'_30_100_vols.append(vol)')
                total=total+1
            elif vol>100 and vol<=300:
                exec('ai_lymph_'+emph+'_100_300=ai_lymph_'+emph+'_100_300+1')
                exec('ai_lymph_'+emph+'_100_300_vols.append(vol)')
                total=total+1
            elif vol>300:
                exec('ai_lymph_'+emph+'_300=ai_lymph_'+emph+'_300+1') #Should be 0
                exec('ai_lymph_'+emph+'_300_vols.append(vol)')
            else:
                print('For participant {} volume is smaller than 30mm3',pat)

    print('Total lymph nodes in {} group is {}'.format(emph,total))

Total lymph nodes in emph group is 3
Total lymph nodes in noemph group is 7


In [142]:
#Similarly for nodule only group with/without emphysema - from discrepancies

for emph in ['emph','noemph']:

    total=0

    for pat in eval('nod_FP_wrong_'+emph+'_ids'):

        for nod_id in eval('nod_FP_wrong_'+emph+'_ids[pat]'):

            if emph=='noemph':
                vol=float(noemph_dict_vol[pat][nod_id-1])
            else:
                try:
                    vol=float(mod_dict_vol[pat][nod_id-1])
                except:
                    try:
                        vol=float(conf_dict_vol[pat][nod_id-1])
                    except:
                        vol=float(adv_dict_vol[pat][nod_id-1])

            if vol>=30 and vol<=100:
                exec('ai_only_nods_'+emph+'_30_100=ai_only_nods_'+emph+'_30_100+1')
                exec('ai_only_nods_'+emph+'_30_100_vols.append(vol)')
                total=total+1
            elif vol>100 and vol<=300:
                exec('ai_only_nods_'+emph+'_100_300=ai_only_nods_'+emph+'_100_300+1')
                exec('ai_only_nods_'+emph+'_100_300_vols.append(vol)')
                total=total+1
            elif vol>300:
                exec('ai_only_nods_'+emph+'_300=ai_only_nods_'+emph+'_300+1') #Should be 0
                exec('ai_only_nods_'+emph+'_300_vols.append(vol)')
            else:
                print('For participant {} volume is smaller than 30mm3',pat)
                
    print('Total nodules in {} group is {}'.format(emph,total))

Total nodules in emph group is 19
Total nodules in noemph group is 20


In [143]:
#Similarly for non-nodule emphysema/non-emphysema groups

for emph in ['emph','noemph']:
    total=0

    for pat in eval('dict_FP_correct_'+emph+'_ids'):

        for nod_id in eval('dict_FP_correct_'+emph+'_ids[pat]'):

            if emph=='noemph':
                vol=float(noemph_dict_vol[pat][nod_id-1])
            else:
                try:
                    vol=float(mod_dict_vol[pat][nod_id-1])
                except:
                    try:
                        vol=float(conf_dict_vol[pat][nod_id-1])
                    except:
                        vol=float(adv_dict_vol[pat][nod_id-1])

            if vol>=30 and vol<=100:
                exec('ai_nonods_'+emph+'_30_100=ai_nonods_'+emph+'_30_100+1')
                exec('ai_nonods_'+emph+'_30_100_vols.append(vol)')
                total=total+1
            elif vol>100 and vol<=300:
                exec('ai_nonods_'+emph+'_100_300=ai_nonods_'+emph+'_100_300+1')
                exec('ai_nonods_'+emph+'_100_300_vols.append(vol)')
                total=total+1
            elif vol>300:
                exec('ai_nonods_'+emph+'_300=ai_nonods_'+emph+'_300+1') #Should be 0
                exec('ai_nonods_'+emph+'_300_vols.append(vol)')
            else:
                print('For participant {} volume is smaller than 30mm3',pat)

    print('Total non-nodules in {} group is {}'.format(emph,total))

Total non-nodules in emph group is 20
Total non-nodules in noemph group is 18


In [144]:
#Confirm everything worked as expected
assert len(np.unique(list(mod_dict_vol.keys())))+len(np.unique(list(conf_dict_vol.keys())))+len(np.unique(list(adv_dict_vol.keys())))==len(np.unique(list(mod_dict_vol.keys())+list(conf_dict_vol.keys())+list(adv_dict_vol.keys())))

In [145]:
#Total number of nodules in each of the emphysema/non-emphysema groups is the sum of the nodules and lymph nodes in those
ai_nods_noemph_30_100=ai_only_nods_noemph_30_100+ai_lymph_noemph_30_100
ai_nods_noemph_100_300=ai_only_nods_noemph_100_300+ai_lymph_noemph_100_300
ai_nods_noemph_300=ai_only_nods_noemph_300+ai_lymph_noemph_300 #Should be 0

ai_nods_emph_30_100=ai_only_nods_emph_30_100+ai_lymph_emph_30_100
ai_nods_emph_100_300=ai_only_nods_emph_100_300+ai_lymph_emph_100_300
ai_nods_emph_300=ai_only_nods_emph_300+ai_lymph_emph_300 #Should be 0

In [146]:
assert ai_nods_emph_300==ai_nods_noemph_300==ai_nonods_emph_300==ai_nonods_noemph_300==0

Statistics for FPs

In [147]:
noemph=noemph.dropna(axis=1, how='all') #There are many columns only with nans - Some strange error

In [148]:
#Select rows with participant ids and create new cols with the total number of FPs and FNs for each participant
mod_all=mod[~mod['participant_id'].isnull()]
mod_all['fp_all']=0
mod_all['fn_all']=0
mod_all['fp_30_100']=0
mod_all['fp_100_300']=0
mod_all['fn_30_100']=0
mod_all['fn_100_300']=0

adv_all=adv[~adv['participant_id'].isnull()]
adv_all['fp_all']=0
adv_all['fn_all']=0
adv_all['fp_30_100']=0
adv_all['fp_100_300']=0
adv_all['fn_30_100']=0
adv_all['fn_100_300']=0

conf_all=conf[~conf['participant_id'].isnull()]
conf_all['fp_all']=0
conf_all['fn_all']=0
conf_all['fp_30_100']=0
conf_all['fp_100_300']=0
conf_all['fn_30_100']=0
conf_all['fn_100_300']=0

noemph_all=noemph[~noemph['participant_id'].isnull()]
noemph_all['fp_all']=0
noemph_all['fn_all']=0
noemph_all['fp_30_100']=0
noemph_all['fp_100_300']=0
noemph_all['fn_30_100']=0
noemph_all['fn_100_300']=0

In [149]:
#Drop rows without participant IDs
noemph_all=noemph_all.drop(index=noemph_all[noemph_all['participant_id'].str.contains('below|BELOW')==True].index)

#Keep only valid IDs
noemph_all['participant_id']=[int(str(pat)[:6]) for pat in noemph_all['participant_id']]

In [150]:
#Loop through all participants and add the number of FPs for AI for each participant (based on consensus review)
for ind,pat in enumerate(mod['participant_id']):
    try: #avoid nan
        if int(str(pat)[:6]) in list(other_nonodules_FP_emph.keys()):            
            mod_all.loc[ind,'fp_all']=mod_all.loc[ind,'fp_all']+len(other_nonodules_FP_emph[int(str(pat)[:6])])
        if int(str(pat)[:6]) in list(fibrosis_FP_emph.keys()):
            mod_all.loc[ind,'fp_all']=mod_all.loc[ind,'fp_all']+len(fibrosis_FP_emph[int(str(pat)[:6])])
    except:
        try:
            if int(str(pat)[:6]) in list(fibrosis_FP_emph.keys()):
                mod_all.loc[ind,'fp_all']=mod_all.loc[ind,'fp_all']+len(fibrosis_FP_emph[int(str(pat)[:6])])
        except:
            pass

for ind,pat in enumerate(adv['participant_id']):
    try: #avoid nan
        if int(str(pat)[:6]) in (list(other_nonodules_FP_emph.keys())):            
            adv_all.loc[ind,'fp_all']=adv_all.loc[ind,'fp_all']+len(other_nonodules_FP_emph[int(str(pat)[:6])])
        if int(str(pat)[:6]) in list(fibrosis_FP_emph.keys()):
            adv_all.loc[ind,'fp_all']=adv_all.loc[ind,'fp_all']+len(fibrosis_FP_emph[int(str(pat)[:6])])
    except:
        try:
            if int(str(pat)[:6]) in list(fibrosis_FP_emph.keys()):
                adv_all.loc[ind,'fp_all']=adv_all.loc[ind,'fp_all']+len(fibrosis_FP_emph[int(str(pat)[:6])])
        except:
            pass


for ind,pat in enumerate(conf['participant_id']):
    try: #avoid nan
        if int(str(pat)[:6]) in (list(other_nonodules_FP_emph.keys())):            
            conf_all.loc[ind,'fp_all']=conf_all.loc[ind,'fp_all']+len(other_nonodules_FP_emph[int(str(pat)[:6])])
        if int(str(pat)[:6]) in list(fibrosis_FP_emph.keys()):
            conf_all.loc[ind,'fp_all']=conf_all.loc[ind,'fp_all']+len(fibrosis_FP_emph[int(str(pat)[:6])])
    except:
        try:
            if int(str(pat)[:6]) in list(fibrosis_FP_emph.keys()):
                conf_all.loc[ind,'fp_all']=conf_all.loc[ind,'fp_all']+len(fibrosis_FP_emph[int(str(pat)[:6])])
        except:
            pass


for ind,pat in enumerate(noemph['participant_id']):
    try: #avoid nan
        if int(str(pat)[:6]) in (list(other_nonodules_FP_noemph.keys())):
            noemph_all.loc[ind,'fp_all']=noemph_all.loc[ind,'fp_all']+len(other_nonodules_FP_noemph[int(str(pat)[:6])])
        if int(str(pat)[:6]) in list(fibrosis_FP_noemph.keys()):
            noemph_all.loc[ind,'fp_all']=noemph_all.loc[ind,'fp_all']+len(fibrosis_FP_noemph[int(str(pat)[:6])])
    except:
        try:
            if int(str(pat)[:6]) in list(fibrosis_FP_noemph.keys()):
                noemph_all.loc[ind,'fp_all']=noemph_all.loc[ind,'fp_all']+len(fibrosis_FP_noemph[int(str(pat)[:6])])
        except:
            pass

In [151]:
#Loop through all participants and add the number of FPs for the reader for each participant (based on consensus review)
for ind,pat in enumerate(mod['participant_id']):
    try: #avoid nan
        if int(str(pat)[:6]) in (list(other_nonodules_FN_emph.keys())):            
            mod_all.loc[ind,'fn_all']=mod_all.loc[ind,'fn_all']+len(other_nonodules_FN_emph[int(str(pat)[:6])])
        if int(str(pat)[:6]) in list(fibrosis_FN_emph.keys()):
            mod_all.loc[ind,'fn_all']=mod_all.loc[ind,'fn_all']+len(fibrosis_FN_emph[int(str(pat)[:6])])
    except:
        pass

for ind,pat in enumerate(adv['participant_id']):
    try: #avoid nan
        if int(str(pat)[:6]) in (list(other_nonodules_FN_emph.keys())):            
            adv_all.loc[ind,'fn_all']=adv_all.loc[ind,'fn_all']+len(other_nonodules_FN_emph[int(str(pat)[:6])])
        if int(str(pat)[:6]) in list(fibrosis_FN_emph.keys()):
            adv_all.loc[ind,'fn_all']=adv_all.loc[ind,'fn_all']+len(fibrosis_FN_emph[int(str(pat)[:6])])
    except:
        pass


for ind,pat in enumerate(conf['participant_id']):
    try: #avoid nan
        if int(str(pat)[:6]) in (list(other_nonodules_FN_emph.keys())):            
            conf_all.loc[ind,'fn_all']=conf_all.loc[ind,'fn_all']+len(other_nonodules_FN_emph[int(str(pat)[:6])])
        if int(str(pat)[:6]) in list(fibrosis_FN_emph.keys()):
            conf_all.loc[ind,'fn_all']=conf_all.loc[ind,'fn_all']+len(fibrosis_FN_emph[int(str(pat)[:6])])
    except:
        pass


for ind,pat in enumerate(noemph['participant_id']):
    try: #avoid nan
        if int(str(pat)[:6]) in (list(other_nonodules_FN_noemph.keys())):
            noemph_all.loc[ind,'fn_all']=noemph_all.loc[ind,'fn_all']+len(other_nonodules_FN_noemph[int(str(pat)[:6])])
        if int(str(pat)[:6]) in list(fibrosis_FN_noemph.keys()):
            noemph_all.loc[ind,'fn_all']=noemph_all.loc[ind,'fn_all']+len(fibrosis_FN_noemph[int(str(pat)[:6])])
    except:
        pass

In [152]:
emph_all=adv_all.append(conf_all).append(mod_all) #Merge all emphysema dfs to one
emph_all['participant_id']=[int(str(pat)[:6]) for pat in emph_all['participant_id']]

emph_all.reset_index(inplace=True,drop=True) #Reset index
noemph_all.reset_index(inplace=True,drop=True) #Reset index

print("Num of FPs for AI in emphysema cases:",np.sum(emph_all['fp_all']))
print("Num of FPs for AI in non-emphysema cases:",np.sum(noemph_all['fp_all']))

print("Num of FPs for reader in emphysema cases:",np.sum(emph_all['fn_all']))
print("Num of FPs for reader in non-emphysema cases:",np.sum(noemph_all['fn_all']))

Num of FPs for AI in emphysema cases: 20
Num of FPs for AI in non-emphysema cases: 18
Num of FPs for reader in emphysema cases: 6
Num of FPs for reader in non-emphysema cases: 22


In [153]:
# #Not used for now
# print('Paired T-test')
# print("Non-emphysema Reader vs AI:",stats.ttest_rel(noemph_all['fn_all'], noemph_all['fp_all']).pvalue)
# print("Emphysema Reader vs AI:",stats.ttest_rel(emph_all['fn_all'], emph_all['fp_all']).pvalue) 
# print('\n')

# print('T-test of independent samples')
# print("Non-emphysema Reader vs AI:",stats.ttest_ind(noemph_all['fn_all'], noemph_all['fp_all']).pvalue)
# print("Emphysema Reader vs AI:",stats.ttest_ind(emph_all['fn_all'], emph_all['fp_all']).pvalue) 
# print('\n')

# print("Below only possible is independent samples t-test. Paired t-test does not make sense here.")
# print("Emphysema vs non-emphysema for reader",stats.ttest_ind(noemph_all['fn_all'], emph_all['fn_all']).pvalue)
# print("Emphysema vs non-emphysema for AI",stats.ttest_ind(noemph_all['fp_all'], emph_all['fp_all']).pvalue)

In [154]:
# # conduct the Wilcoxon-Signed Rank Test
# print("Non-emphysema Reader vs AI:",stats.wilcoxon(noemph_all['fn_all'], noemph_all['fp_all']).pvalue)
# print("Emphysema Reader vs AI:",stats.wilcoxon(emph_all['fn_all'], emph_all['fp_all']).pvalue) 
# print('\n')

#For unequal sample size Mann-Whitney U test is used
print("Emphysema vs non-emphysema for reader",stats.mannwhitneyu(noemph_all['fn_all'], emph_all['fn_all']).pvalue)
print("Emphysema vs non-emphysema for AI",stats.mannwhitneyu(noemph_all['fp_all'], emph_all['fp_all']).pvalue)

Emphysema vs non-emphysema for reader 0.2336660600138144
Emphysema vs non-emphysema for AI 0.028342555964237917


Statistics for FP for AI volume subgroups

In [155]:
for emph in ['emph','noemph']:
    print(emph)

    for pat in eval('dict_FP_correct_'+emph+'_ids'):

            for nod_id in eval('dict_FP_correct_'+emph+'_ids[pat]'):

                if emph=='noemph':
                    vol=float(noemph_dict_vol[pat][nod_id-1])
                else:
                    try:
                        vol=float(mod_dict_vol[pat][nod_id-1])
                    except:
                        try:
                            vol=float(conf_dict_vol[pat][nod_id-1])
                        except:
                            vol=float(adv_dict_vol[pat][nod_id-1])

                if vol>=30 and vol<=100:
                    exec("index="+emph+"_all["+emph+"_all['participant_id']==int(pat)].index[0]")
                    exec(emph+"_all.iloc[index,"+emph+"_all.columns.get_loc('fp_30_100')]="+emph+"_all.iloc[index,"+emph+"_all.columns.get_loc('fp_30_100')]+1")

                elif vol>100 and vol<=300:
                    exec("index="+emph+"_all["+emph+"_all['participant_id']==int(pat)].index[0]")
                    exec(emph+"_all.iloc[index,"+emph+"_all.columns.get_loc('fp_100_300')]="+emph+"_all.iloc[index,"+emph+"_all.columns.get_loc('fp_100_300')]+1")

emph
noemph


##### AI missed, reader found

Before running part below we should execute the other file ('patient_selection_emphysema_experiment.ipynb') to get dictionaries containing information about the ids of FNs. We need REDCap information to extract those 

Up until here there are 8 files generated that will be used by the other notebook

In [6]:
#Run other notebook and continue execution on next cell if it gives error
#This requires the path 'emph_csv' that contains 6 different excel files, one for each degree of emphysema. Those are the REDCap exports

try: #To ignore error and continue in next cell we need try-except and 'no raise error' flag
    %run ./patient_selection_emphysema_experiment.ipynb --no-raise-error
except:
    pass

FP for reader's volume subgroups

In [157]:
for emph in ['emph','noemph']: 
    print(emph)
    temp_emph=emph

    for pat in eval('dict_FN_wrong_'+emph+'_ids'):

            for nod_id,_ in enumerate(eval('dict_FN_wrong_'+emph+'_ids[pat]')):

                vol=float(eval('dict_FN_wrong_'+emph+'_vols[pat][nod_id]'))


                if vol>=30 and vol<=100:

                    if emph!='noemph':
                        emph='emph'

                    exec("index="+emph+"_all["+emph+"_all['participant_id']==int(pat)].index[0]")
                    exec(emph+"_all.iloc[index,"+emph+"_all.columns.get_loc('fn_30_100')]="+emph+"_all.iloc[index,"+emph+"_all.columns.get_loc('fn_30_100')]+1")

                elif vol>100 and vol<=300:

                    if emph!='noemph':
                        emph='emph'

                    exec("index="+emph+"_all["+emph+"_all['participant_id']==int(pat)].index[0]")
                    exec(emph+"_all.iloc[index,"+emph+"_all.columns.get_loc('fn_100_300')]="+emph+"_all.iloc[index,"+emph+"_all.columns.get_loc('fn_100_300')]+1")

                emph=temp_emph

emph
noemph


In [158]:
assert list(emph_all['fp_all'])==list(emph_all['fp_30_100']+emph_all['fp_100_300'])
assert list(emph_all['fn_all'])==list(emph_all['fn_30_100']+emph_all['fn_100_300'])
assert list(noemph_all['fp_all'])==list(noemph_all['fp_30_100']+noemph_all['fp_100_300'])
assert list(noemph_all['fn_all'])==list(noemph_all['fn_30_100']+noemph_all['fn_100_300'])

assert np.sum(emph_all['fp_all'])==np.sum(emph_all['fp_30_100'])+np.sum(emph_all['fp_100_300'])
assert np.sum(emph_all['fn_all'])==np.sum(emph_all['fn_30_100'])+np.sum(emph_all['fn_100_300'])
assert np.sum(noemph_all['fp_all'])==np.sum(noemph_all['fp_30_100'])+np.sum(noemph_all['fp_100_300'])
assert np.sum(noemph_all['fn_all'])==np.sum(noemph_all['fn_30_100'])+np.sum(noemph_all['fn_100_300'])

print("FP AI emph",np.sum(emph_all['fp_all']))
print("FP AI noemph",np.sum(noemph_all['fp_all']))
print("FN read emph",np.sum(emph_all['fn_all']))
print("FN read noemph",np.sum(noemph_all['fn_all']))

FP AI emph 20
FP AI noemph 18
FN read emph 6
FN read noemph 22


In [159]:
print("Volume subgroup 30-100mm3")
# print('Paired T-test')
# print("Non-emphysema Reader vs AI:",stats.ttest_rel(noemph_all['fn_30_100'], noemph_all['fp_30_100']).pvalue)
# print("Emphysema Reader vs AI:",stats.ttest_rel(emph_all['fn_30_100'], emph_all['fp_30_100']).pvalue) 
# print('\n')

# print('T-test of independent samples')
# print("Non-emphysema Reader vs AI:",stats.ttest_ind(noemph_all['fn_30_100'], noemph_all['fp_30_100']).pvalue)
# print("Emphysema Reader vs AI:",stats.ttest_ind(emph_all['fn_30_100'], emph_all['fp_30_100']).pvalue) 
# print('\n')

# print("Below only possible is independent samples t-test. Paired t-test does not make sense here.")
# print("Emphysema vs non-emphysema for reader",stats.ttest_ind(noemph_all['fn_30_100'], emph_all['fn_30_100']).pvalue)
# print("Emphysema vs non-emphysema for AI",stats.ttest_ind(noemph_all['fp_30_100'], emph_all['fp_30_100']).pvalue)
# print('\n')

print("Below Wilcoxon-Signed Rank Test is used")
print("Non-emphysema Reader vs AI:",stats.wilcoxon(noemph_all['fn_30_100'], noemph_all['fp_30_100']).pvalue)
print("Emphysema Reader vs AI:",stats.wilcoxon(emph_all['fn_30_100'], emph_all['fp_30_100']).pvalue)
print('\n')

# print("For unequal sample size Mann-Whitney U test is used")
# print("Emphysema vs non-emphysema for reader",stats.mannwhitneyu(noemph_all['fn_30_100'], emph_all['fn_30_100']).pvalue)
# print("Emphysema vs non-emphysema for AI",stats.mannwhitneyu(noemph_all['fp_30_100'], emph_all['fp_30_100']).pvalue)

Volume subgroup 30-100mm3
Below Wilcoxon-Signed Rank Test is used
Non-emphysema Reader vs AI: 0.007027589183477798
Emphysema Reader vs AI: 0.4536952997039291




In [160]:
print("Volume subgroup 100-300mm3")
# print('Paired T-test')
# print("Non-emphysema Reader vs AI:",stats.ttest_rel(noemph_all['fn_100_300'], noemph_all['fp_100_300']).pvalue)
# print("Emphysema Reader vs AI:",stats.ttest_rel(emph_all['fn_100_300'], emph_all['fp_100_300']).pvalue) 
# print('\n')

# print('T-test of independent samples')
# print("Non-emphysema Reader vs AI:",stats.ttest_ind(noemph_all['fn_100_300'], noemph_all['fp_100_300']).pvalue)
# print("Emphysema Reader vs AI:",stats.ttest_ind(emph_all['fn_100_300'], emph_all['fp_100_300']).pvalue) 
# print('\n')

# print("Below only possible is independent samples t-test. Paired t-test does not make sense here.")
# print("Emphysema vs non-emphysema for reader",stats.ttest_ind(noemph_all['fn_100_300'], emph_all['fn_100_300']).pvalue)
# print("Emphysema vs non-emphysema for AI",stats.ttest_ind(noemph_all['fp_100_300'], emph_all['fp_100_300']).pvalue)
# print('\n')

print("Below Wilcoxon-Signed Rank Test is used")
print("Non-emphysema Reader vs AI:",stats.wilcoxon(noemph_all['fn_100_300'], noemph_all['fp_100_300']).pvalue)
print("Emphysema Reader vs AI:",stats.wilcoxon(emph_all['fn_100_300'], emph_all['fp_100_300']).pvalue)
print('\n')

# print("For unequal sample size Mann-Whitney U test is used")
# print("Emphysema vs non-emphysema for reader",stats.mannwhitneyu(noemph_all['fn_100_300'], emph_all['fn_100_300']).pvalue)
# print("Emphysema vs non-emphysema for AI",stats.mannwhitneyu(noemph_all['fp_100_300'], emph_all['fp_100_300']).pvalue)

Volume subgroup 100-300mm3
Below Wilcoxon-Signed Rank Test is used
Non-emphysema Reader vs AI: 0.011831666473446547
Emphysema Reader vs AI: 0.020999326278937417




Load dictionaries

In [161]:
#Load ids of FNs

with open('dict_FN_wrong_emph_ids.pickle', 'rb') as f:
    dict_FN_wrong_emph_ids = pickle.load(f)

with open('dict_FN_correct_emph_ids.pickle', 'rb') as f:
    dict_FN_correct_emph_ids = pickle.load(f)

with open('dict_FN_wrong_noemph_ids.pickle', 'rb') as f:
    dict_FN_wrong_noemph_ids = pickle.load(f)
    
with open('dict_FN_correct_noemph_ids.pickle', 'rb') as f:
    dict_FN_correct_noemph_ids = pickle.load(f)
    
    
#Same for their vols

with open('dict_FN_wrong_emph_vols.pickle', 'rb') as f:
    dict_FN_wrong_emph_vols = pickle.load(f)

with open('dict_FN_correct_emph_vols.pickle', 'rb') as f:
    dict_FN_correct_emph_vols = pickle.load(f)

with open('dict_FN_wrong_noemph_vols.pickle', 'rb') as f:
    dict_FN_wrong_noemph_vols = pickle.load(f)
    
with open('dict_FN_correct_noemph_vols.pickle', 'rb') as f:
    dict_FN_correct_noemph_vols = pickle.load(f)

In [162]:
#Similarly for lymph nodes and nodules only and of their ids and volumes

with open('lymph_FN_correct_emph.pickle','rb') as f:
    lymph_FN_correct_emph=pickle.load(f)    

with open('lymph_FN_correct_noemph.pickle','rb') as f:
    lymph_FN_correct_noemph=pickle.load(f) 
    
with open('nod_FN_correct_emph.pickle','rb') as f:
    nod_FN_correct_emph=pickle.load(f)    

with open('nod_FN_correct_noemph.pickle','rb') as f:
    nod_FN_correct_noemph=pickle.load(f) 
    
    
with open('lymph_FN_correct_emph_ids.pickle','rb') as f:
    lymph_FN_correct_emph_ids=pickle.load(f)    

with open('lymph_FN_correct_noemph_ids.pickle','rb') as f:
    lymph_FN_correct_noemph_ids=pickle.load(f) 
    
with open('nod_FN_correct_emph_ids.pickle','rb') as f:
    nod_FN_correct_emph_ids=pickle.load(f)    

with open('nod_FN_correct_noemph_ids.pickle','rb') as f:
    nod_FN_correct_noemph_ids=pickle.load(f) 
    
    
with open('lymph_FN_correct_emph_vols.pickle','rb') as f:
    lymph_FN_correct_emph_vols=pickle.load(f)    

with open('lymph_FN_correct_noemph_vols.pickle','rb') as f:
    lymph_FN_correct_noemph_vols=pickle.load(f) 
    
with open('nod_FN_correct_emph_vols.pickle','rb') as f:
    nod_FN_correct_emph_vols=pickle.load(f)    

with open('nod_FN_correct_noemph_vols.pickle','rb') as f:
    nod_FN_correct_noemph_vols=pickle.load(f)     

In [163]:
#Initialize zero values for non-nodules, nodules only, and lymph nodes for each volume subgroup and for each emph/non-emph - All '_300' should be 0

reader_nonods_noemph_30_100=0
reader_nonods_noemph_100_300=0
reader_nonods_noemph_300=0

reader_nonods_emph_30_100=0
reader_nonods_emph_100_300=0
reader_nonods_emph_300=0


reader_only_nods_noemph_30_100=0
reader_only_nods_noemph_100_300=0
reader_only_nods_noemph_300=0

reader_only_nods_emph_30_100=0
reader_only_nods_emph_100_300=0
reader_only_nods_emph_300=0


reader_lymph_noemph_30_100=0
reader_lymph_noemph_100_300=0
reader_lymph_noemph_300=0

reader_lymph_emph_30_100=0
reader_lymph_emph_100_300=0
reader_lymph_emph_300=0

In [164]:
#Similarly keep track of volumes for each of those groups (if Mann-Whitney U test is used below)
reader_nonods_noemph_30_100_vols=[]
reader_nonods_noemph_100_300_vols=[]
reader_nonods_noemph_300_vols=[]
reader_nonods_emph_30_100_vols=[]
reader_nonods_emph_100_300_vols=[]
reader_nonods_emph_300_vols=[]

reader_only_nods_noemph_30_100_vols=[]
reader_only_nods_noemph_100_300_vols=[]
reader_only_nods_noemph_300_vols=[]
reader_only_nods_emph_30_100_vols=[]
reader_only_nods_emph_100_300_vols=[]
reader_only_nods_emph_300_vols=[]

reader_lymph_noemph_30_100_vols=[]
reader_lymph_noemph_100_300_vols=[]
reader_lymph_noemph_300_vols=[]
reader_lymph_emph_30_100_vols=[]
reader_lymph_emph_100_300_vols=[]
reader_lymph_emph_300_vols=[]

Get numbers of reader nodules for lymph nodes only, nodules only, and non-nodule categories in emphysema/non-emphysema groups

In [165]:
#Similarly for non-nodule emphysema/non-emphysema groups for FNs

for emph in ['emph','noemph']:
    tot=0
    for pat in eval('dict_FN_wrong_'+emph+'_ids'):
        for ind,nod_id in enumerate(eval('dict_FN_wrong_'+emph+'_ids[pat]')):

            if emph=='noemph':
                vol=float(dict_FN_wrong_noemph_vols[pat][ind])
            else:
                try:
                    #It might not exist there since only FP - AI nods exist in dicts like 'mod_dict_vol[pat][nod_id-1]'
                    vol=float(dict_FN_wrong_emph_vols[pat][ind]) 
                except: #We should never get in here
                    print("We shouldn't be here")
                    try:
                        vol=float(conf_dict_vol[pat][nod_id-1])
                    except:
                        vol=float(adv_dict_vol[pat][nod_id-1])

            if vol>=30 and vol<=100:
                exec('reader_nonods_'+emph+'_30_100=reader_nonods_'+emph+'_30_100+1')
                exec('reader_nonods_'+emph+'_30_100_vols.append(vol)')
                tot=tot+1
            elif vol>100 and vol<=300:
                exec('reader_nonods_'+emph+'_100_300=reader_nonods_'+emph+'_100_300+1')
                exec('reader_nonods_'+emph+'_100_300_vols.append(vol)')
                tot=tot+1
            elif vol>300:
                exec('reader_nonods_'+emph+'_300=reader_nonods_'+emph+'_300+1') #Should be 0
                exec('reader_nonods_'+emph+'_300_vols.append(vol)') #Should be 0
            else:
                print('For participant {} volume is smaller than 30mm3',pat)

    print('Total non-nodules in {} group is {}'.format(emph,tot))

Total non-nodules in emph group is 6
Total non-nodules in noemph group is 22


In [166]:
#Similarly for lymph nodes emphysema/non-emphysema groups for FNs

for emph in ['emph','noemph']:
    tot=0
    for pat in eval('lymph_FN_correct_'+emph+'_ids'):
        for ind,nod_id in enumerate(eval('lymph_FN_correct_'+emph+'_ids[pat]')):
            
            if emph=='noemph':
                vol=float(lymph_FN_correct_noemph_vols[pat][ind])
            else:
                try:
                    vol=float(lymph_FN_correct_emph_vols[pat][ind]) 
                except: #We should never get in here
                    print("We shouldn't be here")
                    try:
                        vol=float(conf_dict_vol[pat][nod_id-1])
                    except:
                        vol=float(adv_dict_vol[pat][nod_id-1])

            if vol>=30 and vol<=100:
                exec('reader_lymph_'+emph+'_30_100=reader_lymph_'+emph+'_30_100+1')
                exec('reader_lymph_'+emph+'_30_100_vols.append(vol)')
                tot=tot+1
            elif vol>100 and vol<=300:
                exec('reader_lymph_'+emph+'_100_300=reader_lymph_'+emph+'_100_300+1')
                exec('reader_lymph_'+emph+'_100_300_vols.append(vol)')
                tot=tot+1
            elif vol>300:
                exec('reader_lymph_'+emph+'_300=reader_lymph_'+emph+'_300+1') #Should be 0
                exec('reader_lymph_'+emph+'_300_vols.append(vol)')
            else:
                print('For participant {} volume is smaller than 30mm3',pat)

    print('Total lymphs in {} group is {}'.format(emph,tot))

Total lymphs in emph group is 16
Total lymphs in noemph group is 19


In [167]:
#Similarly for nodules only emphysema/non-emphysema groups for FNs
for emph in ['emph','noemph']:
    tot=0
    for pat in eval('nod_FN_correct_'+emph+'_ids'):
        for ind,nod_id in enumerate(eval('nod_FN_correct_'+emph+'_ids[pat]')):

            if emph=='noemph':
                vol=float(nod_FN_correct_noemph_vols[pat][ind])
            else:
                try:
                    vol=float(nod_FN_correct_emph_vols[pat][ind]) 
                except: #We should never get in here
                    print("We shouldn't be here")
                    try:
                        vol=float(conf_dict_vol[pat][nod_id-1])
                    except:
                        vol=float(adv_dict_vol[pat][nod_id-1])

            if vol>=30 and vol<=100:
                exec('reader_only_nods_'+emph+'_30_100=reader_only_nods_'+emph+'_30_100+1')
                exec('reader_only_nods_'+emph+'_30_100_vols.append(vol)')
                tot=tot+1
            elif vol>100 and vol<=300:
                exec('reader_only_nods_'+emph+'_100_300=reader_only_nods_'+emph+'_100_300+1')
                exec('reader_only_nods_'+emph+'_100_300_vols.append(vol)')
                tot=tot+1
            elif vol>300:
                exec('reader_only_nods_'+emph+'_300=reader_only_nods_'+emph+'_300+1') #Should be 0
                exec('reader_only_nods_'+emph+'_300_vols.append(vol)')
            else:
                print('For participant {} volume is smaller than 30mm3',pat)

    print('Total nodules only in {} group is {}'.format(emph,tot))

Total nodules only in emph group is 13
Total nodules only in noemph group is 21


In [168]:
#Total number of nodules in each of the emphysema/non-emphysema groups is the sum of the nodules and lymph nodes in those

reader_nods_noemph_30_100=reader_lymph_noemph_30_100+reader_only_nods_noemph_30_100
reader_nods_noemph_100_300=reader_lymph_noemph_100_300+reader_only_nods_noemph_100_300
reader_nods_noemph_300=reader_lymph_noemph_300+reader_only_nods_noemph_300

reader_nods_emph_30_100=reader_lymph_emph_30_100+reader_only_nods_emph_30_100
reader_nods_emph_100_300=reader_lymph_emph_100_300+reader_only_nods_emph_100_300
reader_nods_emph_300=reader_lymph_emph_300+reader_only_nods_emph_300

In [7]:
reader_only_nods_noemph_30_100_vols

In [170]:
reader_only_nods_noemph_30_100

19

In [171]:
assert reader_nods_noemph_300==reader_nods_emph_300==0

## Create Tables & Statistics

##### Based on the current definition the following equations hold true:
1. AI found nodules, reader missed = FN reader
2. AI found non-nodules, reader missed = FP AI
3. AI missed nodules, reader found = FN AI
4. AI missed non-nodules, reader found = FP reader

#### Emphysema non-nodules

In [172]:
#Below are the non-nodule categories. With FP is denoted a finding that was missed by AI, whereas with FN a finding missed by the reader
#Transform above dictionaries to numbers to be used below
fibrosis_FP_emph=sum([len(x) for x in fibrosis_FP_emph.values()])
other_nonodules_FP_emph=sum([len(x) for x in other_nonodules_FP_emph.values()])
fibrosis_FN_emph=sum([len(x) for x in fibrosis_FN_emph.values()])
other_nonodules_FN_emph=sum([len(x) for x in other_nonodules_FN_emph.values()])
other_nonodules_FN_lung_emph=sum([len(x) for x in other_nonodules_FN_lung_emph.values()])
other_nonodules_FN_nolung_emph=sum([len(x) for x in other_nonodules_FN_nolung_emph.values()])
other_nonodules_FP_lung_emph=sum([len(x) for x in other_nonodules_FP_lung_emph.values()])
other_nonodules_FP_nolung_emph=sum([len(x) for x in other_nonodules_FP_nolung_emph.values()])

#Print the above
print('Fibrosis/scar emphysema FP: '+str(fibrosis_FP_emph))
print('Other non-nodules emphysema FP: '+str(other_nonodules_FP_emph))
print('Other non-nodules emphysema FP (lung): '+str(other_nonodules_FP_lung_emph))
print('Other non-nodules emphysema FP (non-lung): '+str(other_nonodules_FP_nolung_emph))
print('Fibrosis/scar emphysema FN: '+str(fibrosis_FN_emph))
print('Other non-nodules emphysema FN: '+str(other_nonodules_FN_emph))
print('Other non-nodules emphysema FN (lung): '+str(other_nonodules_FN_lung_emph))
print('Other non-nodules emphysema FN (non-lung): '+str(other_nonodules_FN_nolung_emph))

Fibrosis/scar emphysema FP: 13
Other non-nodules emphysema FP: 7
Other non-nodules emphysema FP (lung): 5
Other non-nodules emphysema FP (non-lung): 1
Fibrosis/scar emphysema FN: 1
Other non-nodules emphysema FN: 5
Other non-nodules emphysema FN (lung): 1
Other non-nodules emphysema FN (non-lung): 2


In [173]:
#Detailed comparison of FP categories for emphysema and non-emphysema groups (no volume subgroups)

df_categories=pd.DataFrame(columns=['Incorrectly detected by AI','Incorrectly detected by reader'], #below index with the correct order as above
                          index=['fibrosis/scar emphysema','other non-nodules emphysema'])

df_categories.index.name = 'GT by radiologists for discrepancies'

df_categories['Incorrectly detected by AI']=[fibrosis_FP_emph,other_nonodules_FP_emph]

df_categories['Incorrectly detected by reader']=[fibrosis_FN_emph,other_nonodules_FN_emph]

df_categories['All findings']=df_categories['Incorrectly detected by AI']+df_categories['Incorrectly detected by reader'] #Sum of findings for each of emph/non-emph categories

df_categories.loc['Total']= df_categories.sum() #Total FP findings for AI/reader

all_findings=df_categories.iloc[:-1,:-1].sum().sum() #All findings

#Add percentages next to the number of each category
percentage_fp=np.round((df_categories['Incorrectly detected by AI']/all_findings)*100,1)  
df_categories['Incorrectly detected by AI']=[str(value[1])+' ('+str(percentage_fp[index])+'%)' 
                                             for index,value in enumerate(df_categories['Incorrectly detected by AI'].items())]

percentage_fn=np.round((df_categories['Incorrectly detected by reader']/all_findings)*100,1) 
df_categories['Incorrectly detected by reader']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' 
                                                 for index,value in enumerate(df_categories['Incorrectly detected by reader'].items())]

df_categories['All findings']=[str(val)+' ('+str(np.round(100*val/all_findings,1))+'%)' for val in df_categories['All findings'].values]

# #Rename columns
# df_categories.rename(columns={'FP': 'Incorrectly detected by AI', 'FN': 'Incorrectly detected by reader'}, inplace=True)

df_categories
# ‘Other’ could be bone, tissue, mucus, arthrosis, vessel, consolidation, infection, fat, or atelectasis.

Unnamed: 0_level_0,Incorrectly detected by AI,Incorrectly detected by reader,All findings
GT by radiologists for discrepancies,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
fibrosis/scar emphysema,13 (50.0%),1 (3.8%),14 (53.8%)
other non-nodules emphysema,7 (26.9%),5 (19.2%),12 (46.2%)
Total,20 (76.9%),6 (23.1%),26 (100.0%)


In [174]:
# df_categories.style.to_latex() #Just as a starting point - Need to be modified manually

In [175]:
df_categories.to_excel('non_nodules_emphysema.xlsx')

In [176]:
#Detailed comparison of FP categories for emphysema and non-emphysema groups (no volume subgroups)

df_categories=pd.DataFrame(columns=['Incorrectly detected by AI','Incorrectly detected by reader'], #below index with the correct order as above
                          index=['fibrosis/scar emphysema','other non-nodules lung emphysema','other non-nodules nolung emphysema','other non-nodules (no description)'])

df_categories.index.name = 'GT by radiologists for discrepancies'

rest_no_desc_fp=other_nonodules_FP_emph-other_nonodules_FP_lung_emph-other_nonodules_FP_nolung_emph
df_categories['Incorrectly detected by AI']=[fibrosis_FP_emph,other_nonodules_FP_lung_emph, other_nonodules_FP_nolung_emph, rest_no_desc_fp]

rest_no_desc_fn=other_nonodules_FN_emph-other_nonodules_FN_lung_emph-other_nonodules_FN_nolung_emph
df_categories['Incorrectly detected by reader']=[fibrosis_FN_emph,other_nonodules_FN_lung_emph, other_nonodules_FN_nolung_emph, rest_no_desc_fn]

df_categories['All findings']=df_categories['Incorrectly detected by AI']+df_categories['Incorrectly detected by reader'] #Sum of findings for each of emph/non-emph categories

df_categories.loc['Total']= df_categories.sum() #Total FP findings for AI/reader

all_findings=df_categories.iloc[:-1,:-1].sum().sum() #All findings

#Add percentages next to the number of each category
percentage_fp=np.round((df_categories['Incorrectly detected by AI']/all_findings)*100,1)  
df_categories['Incorrectly detected by AI']=[str(value[1])+' ('+str(percentage_fp[index])+'%)' 
                                             for index,value in enumerate(df_categories['Incorrectly detected by AI'].items())]

percentage_fn=np.round((df_categories['Incorrectly detected by reader']/all_findings)*100,1) 
df_categories['Incorrectly detected by reader']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' 
                                                 for index,value in enumerate(df_categories['Incorrectly detected by reader'].items())]

df_categories['All findings']=[str(val)+' ('+str(np.round(100*val/all_findings,1))+'%)' for val in df_categories['All findings'].values]

df_categories
# ‘Other’ could be bone, tissue, mucus, arthrosis, vessel, consolidation, infection, fat, or atelectasis.

Unnamed: 0_level_0,Incorrectly detected by AI,Incorrectly detected by reader,All findings
GT by radiologists for discrepancies,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
fibrosis/scar emphysema,13 (50.0%),1 (3.8%),14 (53.8%)
other non-nodules lung emphysema,5 (19.2%),1 (3.8%),6 (23.1%)
other non-nodules nolung emphysema,1 (3.8%),2 (7.7%),3 (11.5%)
other non-nodules (no description),1 (3.8%),2 (7.7%),3 (11.5%)
Total,20 (76.9%),6 (23.1%),26 (100.0%)


In [177]:
# df_categories.to_excel('non_nodules_types_emphysema.xlsx')

#### Non-emphysema non-nodules

In [178]:
#Same as above for non-emphysema group
fibrosis_FP_noemph=sum([len(x) for x in fibrosis_FP_noemph.values()])
other_nonodules_FP_noemph=sum([len(x) for x in other_nonodules_FP_noemph.values()])
other_nonodules_FP_lung_noemph=sum([len(x) for x in other_nonodules_FP_lung_noemph.values()])
other_nonodules_FP_nolung_noemph=sum([len(x) for x in other_nonodules_FP_nolung_noemph.values()])
fibrosis_FN_noemph=sum([len(x) for x in fibrosis_FN_noemph.values()])
other_nonodules_FN_noemph=sum([len(x) for x in other_nonodules_FN_noemph.values()])
other_nonodules_FN_lung_noemph=sum([len(x) for x in other_nonodules_FN_lung_noemph.values()])
other_nonodules_FN_nolung_noemph=sum([len(x) for x in other_nonodules_FN_nolung_noemph.values()])

#Print the above
print('Fibrosis/scar non-emphysema FP: '+str(fibrosis_FP_noemph))
print('Other non-nodules non-emphysema FP: '+str(other_nonodules_FP_noemph))
print('Other non-nodules non-emphysema FP (lung): '+str(other_nonodules_FP_lung_noemph))
print('Other non-nodules non-emphysema FP (non-lung): '+str(other_nonodules_FP_nolung_noemph))
print('Fibrosis/scar non-emphysema FN: '+str(fibrosis_FN_noemph))
print('Other non-nodules non-emphysema FN: '+str(other_nonodules_FN_noemph))
print('Other non-nodules non-emphysema FN (lung): '+str(other_nonodules_FN_lung_noemph))
print('Other non-nodules non-emphysema FN (non-lung): '+str(other_nonodules_FN_nolung_noemph))

Fibrosis/scar non-emphysema FP: 9
Other non-nodules non-emphysema FP: 9
Other non-nodules non-emphysema FP (lung): 6
Other non-nodules non-emphysema FP (non-lung): 2
Fibrosis/scar non-emphysema FN: 3
Other non-nodules non-emphysema FN: 19
Other non-nodules non-emphysema FN (lung): 4
Other non-nodules non-emphysema FN (non-lung): 11


In [179]:
#Same as above for non-emphysema

#Detailed comparison of FP categories for emphysema and non-emphysema groups (no volume subgroups)

df_categories=pd.DataFrame(columns=['Incorrectly detected by AI','Incorrectly detected by reader'], #below index with the correct order as above
                          index=['fibrosis/scar non-emphysema',
                                 'other non-nodules non-emphysema'
                                ])

df_categories.index.name = 'GT by radiologists for discrepancies'#\initial reading (emph+non-emph)'

df_categories['Incorrectly detected by AI']=[fibrosis_FP_noemph,other_nonodules_FP_noemph]

df_categories['Incorrectly detected by reader']=[fibrosis_FN_noemph,other_nonodules_FN_noemph]

df_categories['All findings']=df_categories['Incorrectly detected by AI']+df_categories['Incorrectly detected by reader']

df_categories.loc['Total']= df_categories.sum()

all_findings=df_categories.iloc[:-1,:-1].sum().sum()

percentage_fp=np.round((df_categories['Incorrectly detected by AI']/all_findings)*100,1)  
df_categories['Incorrectly detected by AI']=[str(value[1])+' ('+str(percentage_fp[index])+'%)' 
                                             for index,value in enumerate(df_categories['Incorrectly detected by AI'].items())]

percentage_fn=np.round((df_categories['Incorrectly detected by reader']/all_findings)*100,1) 
df_categories['Incorrectly detected by reader']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' 
                                                 for index,value in enumerate(df_categories['Incorrectly detected by reader'].items())]

df_categories['All findings']=[str(val)+' ('+str(np.round(100*val/all_findings,1))+'%)' for val in df_categories['All findings'].values]

df_categories
# ‘Other’ could be bone, tissue, mucus, arthrosis, vessel, consolidation, infection, fat, or atelectasis.

Unnamed: 0_level_0,Incorrectly detected by AI,Incorrectly detected by reader,All findings
GT by radiologists for discrepancies,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
fibrosis/scar non-emphysema,9 (22.5%),3 (7.5%),12 (30.0%)
other non-nodules non-emphysema,9 (22.5%),19 (47.5%),28 (70.0%)
Total,18 (45.0%),22 (55.0%),40 (100.0%)


In [180]:
df_categories.to_excel('non_nodules_nonemphysema.xlsx')

In [181]:
#Same as above for non-emphysema

#Detailed comparison of FP categories for emphysema and non-emphysema groups (no volume subgroups)

df_categories=pd.DataFrame(columns=['Incorrectly detected by AI','Incorrectly detected by reader'], #below index with the correct order as above
                          index=['fibrosis/scar non-emphysema',
                                 'other non-nodules lung non-emphysema','other non-nodules nolung non-emphysema','other non-nodules (no description)'
                                ])

df_categories.index.name = 'GT by radiologists for discrepancies'#\initial reading (emph+non-emph)'

rest_no_desc_fp=other_nonodules_FP_noemph-other_nonodules_FP_lung_noemph-other_nonodules_FP_nolung_noemph
df_categories['Incorrectly detected by AI']=[fibrosis_FP_noemph,other_nonodules_FP_lung_noemph,other_nonodules_FP_nolung_noemph,rest_no_desc_fp]

rest_no_desc_fn=other_nonodules_FN_noemph-other_nonodules_FN_lung_noemph-other_nonodules_FN_nolung_noemph
df_categories['Incorrectly detected by reader']=[fibrosis_FN_noemph,other_nonodules_FN_lung_noemph,other_nonodules_FN_nolung_noemph,rest_no_desc_fn]

df_categories['All findings']=df_categories['Incorrectly detected by AI']+df_categories['Incorrectly detected by reader']

df_categories.loc['Total']= df_categories.sum()

all_findings=df_categories.iloc[:-1,:-1].sum().sum()

percentage_fp=np.round((df_categories['Incorrectly detected by AI']/all_findings)*100,1)  
df_categories['Incorrectly detected by AI']=[str(value[1])+' ('+str(percentage_fp[index])+'%)' 
                                             for index,value in enumerate(df_categories['Incorrectly detected by AI'].items())]

percentage_fn=np.round((df_categories['Incorrectly detected by reader']/all_findings)*100,1) 
df_categories['Incorrectly detected by reader']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' 
                                                 for index,value in enumerate(df_categories['Incorrectly detected by reader'].items())]

df_categories['All findings']=[str(val)+' ('+str(np.round(100*val/all_findings,1))+'%)' for val in df_categories['All findings'].values]

df_categories

Unnamed: 0_level_0,Incorrectly detected by AI,Incorrectly detected by reader,All findings
GT by radiologists for discrepancies,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
fibrosis/scar non-emphysema,9 (22.5%),3 (7.5%),12 (30.0%)
other non-nodules lung non-emphysema,6 (15.0%),4 (10.0%),10 (25.0%)
other non-nodules nolung non-emphysema,2 (5.0%),11 (27.5%),13 (32.5%)
other non-nodules (no description),1 (2.5%),4 (10.0%),5 (12.5%)
Total,18 (45.0%),22 (55.0%),40 (100.0%)


In [182]:
# df_categories.to_excel('non_nodules_types_nonemphysema.xlsx')

### Metrics Calculation

In [183]:
#Load number of nodules and lymph nodes for each of the emphysema/non-emphysema groups - These are the TPs

#Define nodule group names
nod_groups_only=['sub_ground','pleural', 'calcified','other_all', 'atypical_triangular'] 
lymph_groups=['per_fisu','peri_bronch'] 

#Initialize number of TP for each of the reader and AI to 0

#These can also be the sum of the volume subgroups below - Kept as is for now
TP_nod_emph=0
TP_nod_noemph=0
TP_lymph_emph=0
TP_lymph_noemph=0

TP_nod_emph_30_100=0
TP_nod_noemph_30_100=0
TP_nod_emph_100_300=0
TP_nod_noemph_100_300=0
TP_nod_emph_300=0
TP_nod_noemph_300=0

TP_lymph_emph_30_100=0
TP_lymph_noemph_30_100=0
TP_lymph_emph_100_300=0
TP_lymph_noemph_100_300=0
TP_lymph_emph_300=0
TP_lymph_noemph_300=0


for emph in ['_emph','_noemph']: #Loop over emphysema/non-emphysema
    
    for nod_group in nod_groups_only: #Loop over nodule groups
        
        #Load variables with TP created in 'patient_selection_emphysema_experiment.ipynb' notebook
        with open(nod_group+emph+'_nod_only'+'.pickle','rb') as f:
            exec(nod_group+emph+'_nod_only= pickle.load(f)')
            
        #Same for each volume subgroup    
        with open(nod_group+emph+'_nod_only_30_100'+'.pickle','rb') as f:
            exec(nod_group+emph+'_nod_only_30_100= pickle.load(f)')   
        with open(nod_group+emph+'_nod_only_100_300'+'.pickle','rb') as f:
            exec(nod_group+emph+'_nod_only_100_300= pickle.load(f)')             
        with open(nod_group+emph+'_nod_only_300'+'.pickle','rb') as f:
            exec(nod_group+emph+'_nod_only_300= pickle.load(f)')    

        if emph=='_emph': #Set variables depending on if we have emphysema or non-emphysema + for each volume subgroup
            TP_nod_emph=TP_nod_emph+eval(nod_group+emph+'_nod_only')
            
            TP_nod_emph_30_100=TP_nod_emph_30_100+eval(nod_group+emph+'_nod_only_30_100')
            TP_nod_emph_100_300=TP_nod_emph_100_300+eval(nod_group+emph+'_nod_only_100_300')
            TP_nod_emph_300=TP_nod_emph_300+eval(nod_group+emph+'_nod_only_300')
            
        else:
            TP_nod_noemph=TP_nod_noemph+eval(nod_group+emph+'_nod_only')
            
            TP_nod_noemph_30_100=TP_nod_noemph_30_100+eval(nod_group+emph+'_nod_only_30_100')   
            TP_nod_noemph_100_300=TP_nod_noemph_100_300+eval(nod_group+emph+'_nod_only_100_300')
            TP_nod_noemph_300=TP_nod_noemph_300+eval(nod_group+emph+'_nod_only_300')

        
    for lymph_group in lymph_groups: #Similar as above for lymph node groups
        
        with open(lymph_group+emph+'_lymph'+'.pickle','rb') as f:
            exec(lymph_group+emph+'_lymph= pickle.load(f)')
            
        with open(lymph_group+emph+'_lymph_30_100'+'.pickle','rb') as f:
            exec(lymph_group+emph+'_lymph_30_100= pickle.load(f)')   
        with open(lymph_group+emph+'_lymph_100_300'+'.pickle','rb') as f:
            exec(lymph_group+emph+'_lymph_100_300= pickle.load(f)')             
        with open(lymph_group+emph+'_lymph_300'+'.pickle','rb') as f:
            exec(lymph_group+emph+'_lymph_300= pickle.load(f)')    
            
        if emph=='_emph':
            TP_lymph_emph=TP_lymph_emph+eval(lymph_group+emph+'_lymph')
            
            TP_lymph_emph_30_100=TP_lymph_emph_30_100+eval(lymph_group+emph+'_lymph_30_100')
            TP_lymph_emph_100_300=TP_lymph_emph_100_300+eval(lymph_group+emph+'_lymph_100_300')
            TP_lymph_emph_300=TP_lymph_emph_300+eval(lymph_group+emph+'_lymph_300')
            
        else:
            TP_lymph_noemph=TP_lymph_noemph+eval(lymph_group+emph+'_lymph')
            TP_lymph_noemph_30_100=TP_lymph_noemph_30_100+eval(lymph_group+emph+'_lymph_30_100')   
            TP_lymph_noemph_100_300=TP_lymph_noemph_100_300+eval(lymph_group+emph+'_lymph_100_300')
            TP_lymph_noemph_300=TP_lymph_noemph_300+eval(lymph_group+emph+'_lymph_300')

In [184]:
assert TP_nod_emph_300==TP_nod_noemph_300==TP_lymph_emph_300==TP_lymph_noemph_300==0

#### Below definition of TP depends on reader/AI

In [185]:
#Get total number of nodules (nodules+lymph nodes) for the whole emphysema/non-emphysema groups and for volume subgroups
#300+ volumes kept here since all these values should be 0 - If not, then delete them
TP_emph=TP_nod_emph+TP_lymph_emph
TP_noemph=TP_nod_noemph+TP_lymph_noemph

TP_emph_30_100=TP_nod_emph_30_100+TP_lymph_emph_30_100
TP_emph_100_300=TP_nod_emph_100_300+TP_lymph_emph_100_300
TP_emph_300=TP_nod_emph_300+TP_lymph_emph_300
TP_noemph_30_100=TP_nod_noemph_30_100+TP_lymph_noemph_30_100
TP_noemph_100_300=TP_nod_noemph_100_300+TP_lymph_noemph_100_300
TP_noemph_300=TP_nod_noemph_300+TP_lymph_noemph_300

In [186]:
assert TP_emph==TP_emph_30_100+TP_emph_100_300
assert TP_noemph==TP_noemph_30_100+TP_noemph_100_300

#### Confidence Interval Calculations

In [187]:
#Code below taken from https://gist.github.com/maidens/29939b3383a5e57935491303cf0d8e0b
#For F1 score there was a suggestion on https://github.com/sousanunes/confidence_intervals/blob/master/propagation_confidence_interval.py
#This will not used since it assumes normal distribution

def _proportion_confidence_interval(r, n, z): 
    """Compute confidence interval for a proportion.
    https://real-statistics.com/binomial-and-related-distributions/proportion-distribution/proportion-parameter-confidence-interval/
    Follows notation described on pages 46--47 of [1]. 
    
    References
    ----------
    [1] R. G. Newcombe and D. G. Altman, Proportions and their differences, in Statisics
    with Confidence: Confidence intervals and statisctical guidelines, 2nd Ed., D. G. Altman, 
    D. Machin, T. N. Bryant and M. J. Gardner (Eds.), pp. 45-57, BMJ Books, 2000. 

    Based on the book, r is the observed number of subjects with some feature in a sample of size n. z is a percentile from the norm distribution.
    The formula in the link of the code is the same as in https://real-statistics.com/binomial-and-related-distributions/proportion-distribution/proportion-parameter-confidence-interval/
    There is no continuity correction here. This is used in http://stats.org.uk/statistical-inference/Newcombe1998.pdf
    The actual implementation used continuity correction. This is recommended for small sample sizes:  
    https://towardsdatascience.com/five-confidence-intervals-for-proportions-that-you-should-know-about-7ff5484c024f
    """
    
    A = 2*r + z**2
    # B = z*np.sqrt(z**2 + 4*r*(1 - r/n))
    B_low=1+z*np.sqrt(z**2 + 4*r*(1 - r/n) + (((4*r)-(2*n)-1)/n))
    # if B_low<0:
    #     B_low=0
    
    B_high=1+z*np.sqrt(z**2 + 4*r*(1 - r/n) - (((4*r)-(2*n)+1)/n))
    # if B_high>1:
    #     B_high=1

    C = 2*(n + z**2)
    return ((A-B_low)/C, (A+B_high)/C)


def sensitivity_and_specificity_with_confidence_intervals(TP, FP, FN, TN, alpha=0.95):
    """Compute confidence intervals for sensitivity and specificity using Wilson's method. 
    Based on https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval this calculation is without continuity correction.
    For more information about that check on https://www.statskingdom.com/doc_confidence_interval.html
    Based on the first link, of the possible approximations, Wilson score interval methods (with or without continuity correction) 
    have been shown to be the most accurate and the most robust, though some prefer the Agresti–Coull approach for larger sample sizes
    Another link for that is https://statisticaloddsandends.wordpress.com/2019/06/09/wilson-score-and-agresti-coull-intervals-for-binomial-proportions/
    
    This method does not rely on a normal approximation and results in accurate confidence intervals even for small sample sizes.
    
    Parameters
    ----------
    TP : int
        Number of true positives
    FP : int 
        Number of false positives
    FN : int
        Number of false negatives
    TN : int
        Number of true negatives
    alpha : float, optional
        Desired confidence. Defaults to 0.95, which yields a 95% confidence interval. 
    
    Returns
    -------
    sensitivity_confidence_interval : Tuple (float, float)
        Lower and upper bounds on the alpha confidence interval for sensitivity
    PPV_confidence_interval : Tuple (float, float)
        Lower and upper bounds on the alpha confidence interval for PPV
    F1_confidence_interval : Tuple (float, float)
        Lower and upper bounds on the alpha confidence interval for F1 score
        
    References
    ----------
    [1] R. G. Newcombe and D. G. Altman, Proportions and their differences, in Statisics
    with Confidence: Confidence intervals and statisctical guidelines, 2nd Ed., D. G. Altman, 
    D. Machin, T. N. Bryant and M. J. Gardner (Eds.), pp. 45-57, BMJ Books, 2000. 
    [2] E. B. Wilson, Probable inference, the law of succession, and statistical inference,
    J Am Stat Assoc 22:209-12, 1927. 
    """
    
    z = -ndtri((1.0-alpha)/2)
    
    # Compute sensitivity using method described in [1] 
    sensitivity_confidence_interval = _proportion_confidence_interval(TP, TP + FN, z)

     # Compute PPV
    PPV_confidence_interval = _proportion_confidence_interval(TP, TP + FP, z)
    
    #Compute F1score
    F1_confidence_interval = _proportion_confidence_interval(2*TP, 2*TP + (FP+FN), z) #if n=TP+FP+FN used we get nan errors - sample size of proportion should be with 2*TP
    # Check also based on https://stats.stackexchange.com/questions/363382/confidence-interval-of-precision-recall-and-f1-score
    #It is not a binomial outcome (eg. like accuracy which is num of correct over num of predicted) and so, we probably
    #can't apply any number of binomial conf intervals as stated in https://stats.stackexchange.com/questions/563582/calculate-confidence-intervals-on-accuracy-metrics
    #Also if data not normally distributed we cannot use simple formulas like those in https://aegis4048.github.io/comprehensive_confidence_intervals_for_python_developers

    return sensitivity_confidence_interval, PPV_confidence_interval, F1_confidence_interval

In [188]:
def sensitivity(TP,FN): #same as recall
    return TP/(TP+FN)

def PPV(TP,FP): #Same as precision
    return TP/(TP+FP)

def F1score(TP,FP,FN):
    return (2*TP)/(2*TP+(FP+FN))

#Metrics with TN in their definition can't be used

##### Example of CI calculation

In [189]:
for a in [0.5]: #Can also set other values of a to check the CI

    sensitivity_confidence_interval, PPV_confidence_interval, F1_confidence_interval\
    = sensitivity_and_specificity_with_confidence_intervals(37, 20, 2, 0, alpha=a) #Here TP, FP, FN, TN were set based on an example below - just for demonstration

    print("Sensitivity: %f, PPV: %f, F1 score: %f" %(sensitivity(37,2), PPV(37,20),F1score(37,20,2)))
    print("alpha = %f CI for sensitivity:"%a, sensitivity_confidence_interval)
    print("alpha = %f CI for PPV:"%a, PPV_confidence_interval)
    print("alpha = %f CI for F1 score:"%a, F1_confidence_interval)    
    #Confidence intervals of proportions were calculated using the Wilson method (without continuity correction). 

Sensitivity: 0.948718, PPV: 0.649123, F1 score: 0.770833
alpha = 0.500000 CI for sensitivity: (0.9040941124792334, 0.9775414611018507)
alpha = 0.500000 CI for PPV: (0.5965219235920692, 0.6988783015915245)
alpha = 0.500000 CI for F1 score: (0.7352529801795313, 0.8034004651312406)


From the intervals above we can conclude that we won't get the same results if we use normal approximations (z=1.96 and mean between lower and upper bound of CI)

In [190]:
#Get total number of nodules/non-nodules that were detected only by AI/human reader for each of the emph/non-emph groups

#nodules+Lymph nodes included in the right part of the equations below - if only nodules comments below should be activated
FP_nods_emph=ai_nods_emph_30_100+ai_nods_emph_100_300+ai_nods_emph_300 #-(ai_lymph_emph_30_100+ai_lymph_emph_100_300+ai_lymph_emph_300)
FP_nods_noemph=ai_nods_noemph_30_100+ai_nods_noemph_100_300+ai_nods_noemph_300 #-(ai_lymph_noemph_30_100+ai_lymph_noemph_100_300+ai_lymph_noemph_300)

FP_nonods_emph=ai_nonods_emph_30_100+ai_nonods_emph_100_300+ai_nonods_emph_300
FP_nonods_noemph=ai_nonods_noemph_30_100+ai_nonods_noemph_100_300+ai_nonods_noemph_300

FN_nods_emph=reader_nods_emph_30_100+reader_nods_emph_100_300+reader_nods_emph_300 #-(reader_lymph_emph_30_100+reader_lymph_emph_100_300+reader_lymph_emph_300)
FN_nods_noemph=reader_nods_noemph_30_100+reader_nods_noemph_100_300+reader_nods_noemph_300 #-(reader_lymph_noemph_30_100+reader_lymph_noemph_100_300+reader_lymph_noemph_300)

FN_nonods_emph=reader_nonods_emph_30_100+reader_nonods_emph_100_300+reader_nonods_emph_300
FN_nonods_noemph=reader_nonods_noemph_30_100+reader_nonods_noemph_100_300+reader_nonods_noemph_300

#Similar only for lymph nodes
lymph_reader_emph=reader_lymph_emph_30_100+reader_lymph_emph_100_300+reader_lymph_emph_300
lymph_reader_noemph=reader_lymph_noemph_30_100+reader_lymph_noemph_100_300+reader_lymph_noemph_300
lymph_AI_emph=ai_lymph_emph_30_100+ai_lymph_emph_100_300+ai_lymph_emph_300
lymph_AI_noemph=ai_lymph_noemph_30_100+ai_lymph_noemph_100_300+ai_lymph_noemph_300

Explanation below assumes that GT is whatever is detected only! For TN, this might be incorrect! We assumed that TP (in REDCap) will always be nodules, even though sometimes this might not be correct.

To calculate metrics for AI we consider the following (demonstrated for emphysema - same for non-emphysema cases):

1. TP_AI=TP_both+FP_nods_emph (nodules found as nodules) 
2. FP_AI=FP_nonods_emph (non-nodules found as nodules)
3. FN_AI=FN_nods_emph (nodules missed by AI)

Similarly, for reader metrics:

1. TP_read=TP_both+FN_nods_emph
2. FP_read=FN_nonods_emph
3. FN_read=FP_nods_emph

'AI found and reader found' can be seen from TP in REDCap

'AI missed and reader missed' does not exist - assumes that consensus found extra nodules while they just reviewed discrepancies

In [191]:
AI_found_lymph=lymph_AI_emph+lymph_AI_noemph
read_found_lymph=lymph_reader_emph+lymph_reader_noemph

reader_found_only=FN_nods_emph+FN_nods_noemph - read_found_lymph
AI_found_only=FP_nods_emph+FP_nods_noemph-AI_found_lymph
print("Actual number of nodules among discrepancies is",reader_found_only+AI_found_only)
print("From those {} were detected by the AI only and {} from reader only".format(AI_found_only,reader_found_only))
print("\n")
print("Actual number of lymph nodes among discrepancies is",read_found_lymph+AI_found_lymph)
print("From those {} were detected by the AI only and {} from reader only".format(AI_found_lymph,read_found_lymph))

Actual number of nodules among discrepancies is 73
From those 39 were detected by the AI only and 34 from reader only


Actual number of lymph nodes among discrepancies is 45
From those 10 were detected by the AI only and 35 from reader only


#### Table only for nodules (lymph nodes not included in calculations - considered as non-existent) in emphysema cases. Statistical tests based on it

In [192]:
TP_AI=TP_emph+FP_nods_emph-(TP_lymph_emph) -lymph_AI_emph #'FP_nods' include lymph and that's why we subtract 'lymph_AI_emph'
FP_AI=FP_nonods_emph
FN_AI=TP_read_only=FN_nods_emph-lymph_reader_emph #nodules detected only by the reader, excluding lymph nodes

TP_read=TP_emph+FN_nods_emph-(TP_lymph_emph) - lymph_reader_emph
FP_read=FN_nonods_emph
FN_read=TP_AI_only=FP_nods_emph-lymph_AI_emph #nodules detected only by AI, excluding lymph nodes

TP_both=TP_emph-(TP_lymph_emph) #Common nodules detected by both AI and reader

#Print the above
print("Emphysema numbers")
print("TP_AI",TP_AI)
print("FP_AI",FP_AI)
print("FN_AI",FN_AI)
print("TP_read",TP_read)
print("FP_read",FP_read)
print("FN_read",FN_read)
print("TP_both",TP_both)

Emphysema numbers
TP_AI 53
FP_AI 20
FN_AI 13
TP_read 47
FP_read 6
FN_read 19
TP_both 34


In [193]:
#Two tables: one for emphysema and one for non-emphysema, having also percentages.
#Assessing detection performance for emph/non-emph groups - For nodules only, we treat lymph nodes as non-existent

df_all_new=pd.DataFrame(columns=['sensitivity (95% CI)','PPV (95% CI)','F1 score (95% CI)','nodules detected','non-nodules detected','nodules missed'],
                        index=['AI, emphysema', 'reader, emphysema'] )

#AI nodules only - emphysema
df_all_new.iloc[0,0]=np.round(sensitivity(TP_AI,FN_AI),2)
df_all_new.iloc[0,1]=np.round(PPV(TP_AI,FP_AI),2)
df_all_new.iloc[0,2]=np.round(F1score(TP_AI,FP_AI,FN_AI),2)
df_all_new.iloc[0,3]=TP_AI
df_all_new.iloc[0,4]=FP_AI
df_all_new.iloc[0,5]=FN_AI

#Calculate CIs for sensitivity, PPV, and F1score
sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_AI, FP_AI, FN_AI, 0, alpha=0.95)

#Round CIs to 2 digits
ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[0]=str(df_all_new['sensitivity (95% CI)'].iloc[0])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[0]=str(df_all_new['PPV (95% CI)'].iloc[0])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[0]=str(df_all_new['F1 score (95% CI)'].iloc[0])+' '+str(tuple(ci_f1_ai))


#Reader nodules only - emphysema
df_all_new.iloc[1,0]=np.round(sensitivity(TP_read,FN_read),2)
df_all_new.iloc[1,1]=np.round(PPV(TP_read,FP_read),2)
df_all_new.iloc[1,2]=np.round(F1score(TP_read,FP_read,FN_read),2)
df_all_new.iloc[1,3]=TP_read
df_all_new.iloc[1,4]=FP_read
df_all_new.iloc[1,5]=FN_read

sensitivity_confidence_interval_read, PPV_confidence_interval_read, F1_confidence_interval_read\
    = sensitivity_and_specificity_with_confidence_intervals(TP_read, FP_read, FN_read, 0, alpha=0.95)

ci_sens_read=[np.round(x,2) for x in sensitivity_confidence_interval_read]
ci_ppv_read=[np.round(x,2) for x in PPV_confidence_interval_read]
ci_f1_read=[np.round(x,2) for x in F1_confidence_interval_read]

df_all_new['sensitivity (95% CI)'].iloc[1]=str(df_all_new['sensitivity (95% CI)'].iloc[1])+' '+str(tuple(ci_sens_read))
df_all_new['PPV (95% CI)'].iloc[1]=str(df_all_new['PPV (95% CI)'].iloc[1])+' '+str(tuple(ci_ppv_read))
df_all_new['F1 score (95% CI)'].iloc[1]=str(df_all_new['F1 score (95% CI)'].iloc[1])+' '+str(tuple(ci_f1_read))


df_all_new['All findings']=df_all_new['nodules detected']+df_all_new['non-nodules detected']+df_all_new['nodules missed']
df_all_new.loc['Total']= df_all_new.sum()
df_all_new.loc['Total'].iloc[0:3]=''

all_findings=df_all_new.iloc[:-1,3:-1].sum().sum()

for i in range(2): #Add percentages to df
    row_all=np.sum(df_all_new.iloc[i][3:6].values) #get all values for a given row

    percentage_tp=np.round((df_all_new.iloc[i][3]/row_all)*100,1) #% of TP
    df_all_new['nodules detected'].iloc[i]=str(df_all_new.iloc[i][3])+' ('+str(percentage_tp)+'%)'
    percentage_fp=np.round((df_all_new.iloc[i][4]/row_all)*100,1) #% of FP
    df_all_new['non-nodules detected'].iloc[i]=str(df_all_new.iloc[i][4])+' ('+str(percentage_fp)+'%)'
    percentage_fn=np.round((df_all_new.iloc[i][5]/row_all)*100,1) #% of FN
    df_all_new['nodules missed'].iloc[i]=str(df_all_new.iloc[i][5])+' ('+str(percentage_fn)+'%)'

    df_all_new['All findings'].iloc[i]=str(df_all_new.iloc[i][6])+' (100%)'

df_all_new

Unnamed: 0,sensitivity (95% CI),PPV (95% CI),F1 score (95% CI),nodules detected,non-nodules detected,nodules missed,All findings
"AI, emphysema","0.8 (0.68, 0.89)","0.73 (0.61, 0.82)","0.76 (0.68, 0.83)",53 (61.6%),20 (23.3%),13 (15.1%),86 (100%)
"reader, emphysema","0.71 (0.59, 0.81)","0.89 (0.76, 0.95)","0.79 (0.7, 0.86)",47 (65.3%),6 (8.3%),19 (26.4%),72 (100%)
Total,,,,100,26,32,158


In [194]:
df_all_new.to_excel('nodules_only_emphysema.xlsx')

#### McNemar' test

- If we want it with continuity correction we should use 'exact=False, correction=False'. We can compare when it's not applied to see if these values are on different sides of the traditional 0.05 cutoff. If they are, we would have to check the 'exact=True' method to decide which to keep (no corrections at all).  Taken from https://cran.r-project.org/web/packages/exact2x2/vignettes/exactMcNemar.pdf
- Continuity corrections no longer used based on https://stats.stackexchange.com/questions/6448/continuity-correction-for-pearson-and-mcnemars-chi-square-test but statistician suggested it due to small sample size
- McNemar's test is used when we want to know whether there is a statistically significant difference in the proportion of nodules detected by AI and reader (paraphrased from https://www.geeksforgeeks.org/how-to-perform-mcnemars-test-in-python/).
- Other useful sources: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8182550/ (paper used it for similar topic), https://stats.stackexchange.com/questions/358101/statistical-significance-p-value-for-comparing-two-classifiers-with-respect-to

In [195]:
#McNemar's test to compare Reader vs AI (using consensus panel)
#Below format is: [[Both AI found and reader found, reader missed and AI found], [Reader found and AI missed, 0]]

#For nodules
data=[[TP_both, FN_read],
        [FN_AI,0]]
# print(data)

# McNemar's Test without continuity correction
print("McNemar's test (nodules only), AI_vs_Reader with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue) 


#For FPs
data=[[0, FP_AI], 
        [FP_read, 0]]
# print(data)

# McNemar's Test without continuity correction
print("For FP findings, with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)

McNemar's test (nodules only), AI_vs_Reader with continuity correction (not exact) p value is 0.3767591178115821
For FP findings, with continuity correction (not exact) p value is 0.010787449254670376


#### Cohen's Kappa

- According to https://en.wikipedia.org/wiki/Fleiss%27_kappa, we must use Fleiss kappa when assessing the agreement between three or more raters or the intra-rater reliability (for one appraiser versus themself). To calculate this, the fleiss_kappa() function from the statsmodels library can be used. Cohen's kappa can be used for two readers and this is what we use below (https://www.statology.org/cohens-kappa-python/).We should only have 0 or 1 labels since otherwise it is considered as a multiclass problem
- Other useful sources https://www.statology.org/cohens-kappa-statistic/, https://vitalflux.com/cohen-kappa-score-python-example-machine-learning/
- Based on the last one, in the contigency table we have reader 1 (actual results) horizontally and reader 2 (predictions) vertically. For this to be true, reasonable to assume reader 1 is GT by radiologists and reader 2 either reader or AI

In [196]:
# #AI vs GT, TP_both included
# # Table looks like below:
# #              GT
# #             Yes                       No
# # AI   Yes    TP_both+TP_AI_only      FP_AI
# #      No     FN_AI                   0

# list_of_lists=[[1 for x in range(TP_both)],[1 for x in range(TP_AI_only)],[0 for x in range(FP_AI) ],[1 for x in range(FN_AI) ] ] 
# rater_GT=[item for sublist in list_of_lists for item in sublist]

# list_of_lists=[[1 for x in range(TP_both)], [1 for x in range(TP_AI_only)],[1 for x in range(FP_AI) ],[0 for x in range(FN_AI) ]]
# rater_AI=[item for sublist in list_of_lists for item in sublist]

# print("AI vs consensus (for nodules only), kappa is ",cohen_kappa_score(rater_GT, rater_AI))

# #Reader vs GT, TP_both included
# # Table looks like below:
# #                    GT
# #                   Yes                       No
# # Reader   Yes    TP_both+TP_read_only      FP_read
# #           No     FN_read                   0

# list_of_lists=[[1 for x in range(TP_both)],[1 for x in range(TP_read_only)],[0 for x in range(FP_read) ],[1 for x in range(FN_read) ] ] 
# rater_GT=[item for sublist in list_of_lists for item in sublist]

# list_of_lists=[[1 for x in range(TP_both)],[1 for x in range(TP_read_only)],[1 for x in range(FP_read) ],[0 for x in range(FN_read) ]]
# rater_read=[item for sublist in list_of_lists for item in sublist]

# print("Reader vs consensus (for non-nodules only), kappa is ",cohen_kappa_score(rater_GT, rater_read))

#### Nodule only table with metrics for non-emphysema - Statistical tests based on it

In [197]:
TP_AI= TP_noemph+FP_nods_noemph -(TP_lymph_noemph) -lymph_AI_noemph #'FP_nods' include lymph and that's why we subtract 'lymph_AI_noemph'
FP_AI=FP_nonods_noemph
FN_AI=TP_read_only=FN_nods_noemph-lymph_reader_noemph #nodules detected only by the reader, excluding lymph nodes

TP_read=TP_noemph+FN_nods_noemph -(TP_lymph_noemph) - lymph_reader_noemph
FP_read=FN_nonods_noemph
FN_read=TP_AI_only=FP_nods_noemph-lymph_AI_noemph #nodules detected only by AI, excluding lymph nodes

TP_both=TP_noemph-(TP_lymph_noemph) #Common nodules detected by both AI and reader

#Print the above
print("Non-emphysema numbers")
print("TP_AI",TP_AI)
print("FP_AI",FP_AI)
print("FN_AI",FN_AI)
print("TP_read",TP_read)
print("FP_read",FP_read)
print("FN_read",FN_read)
print("TP_both",TP_both)

Non-emphysema numbers
TP_AI 77
FP_AI 18
FN_AI 21
TP_read 78
FP_read 22
FN_read 20
TP_both 57


In [198]:
# Second part of table split for non-emphysema
    
#Assessing detection performance - For nodules only, we treat lymph nodes as non-existent - for emph/non-emph groups
df_all_new=pd.DataFrame(columns=['sensitivity (95% CI)','PPV (95% CI)','F1 score (95% CI)','nodules detected','non-nodules detected','nodules missed'],
                        index=['AI, nonemphysema', 'reader, nonemphysema'])

#AI nodules only - no emphysema
df_all_new.iloc[0,0]=np.round(sensitivity(TP_AI,FN_AI),2)
df_all_new.iloc[0,1]=np.round(PPV(TP_AI,FP_AI),2)
df_all_new.iloc[0,2]=np.round(F1score(TP_AI,FP_AI,FN_AI),2)
df_all_new.iloc[0,3]=TP_AI
df_all_new.iloc[0,4]=FP_AI
df_all_new.iloc[0,5]=FN_AI

#Calculate CIs for sensitivity, PPV, and F1score
sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_AI, FP_AI, FN_AI, 0, alpha=0.95)

#Round CIs to 2 digits
ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[0]=str(df_all_new['sensitivity (95% CI)'].iloc[0])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[0]=str(df_all_new['PPV (95% CI)'].iloc[0])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[0]=str(df_all_new['F1 score (95% CI)'].iloc[0])+' '+str(tuple(ci_f1_ai))

#Reader nodules only - no emphysema
df_all_new.iloc[1,0]=np.round(sensitivity(TP_read,FN_read),2)
df_all_new.iloc[1,1]=np.round(PPV(TP_read,FP_read),2)
df_all_new.iloc[1,2]=np.round(F1score(TP_read,FP_read,FN_read),2)
df_all_new.iloc[1,3]=TP_read
df_all_new.iloc[1,4]=FP_read
df_all_new.iloc[1,5]=FN_read

sensitivity_confidence_interval_read, PPV_confidence_interval_read, F1_confidence_interval_read\
    = sensitivity_and_specificity_with_confidence_intervals(TP_read, FP_read, FN_read, 0, alpha=0.95)

ci_sens_read=[np.round(x,2) for x in sensitivity_confidence_interval_read]
ci_ppv_read=[np.round(x,2) for x in PPV_confidence_interval_read]
ci_f1_read=[np.round(x,2) for x in F1_confidence_interval_read]

df_all_new['sensitivity (95% CI)'].iloc[1]=str(df_all_new['sensitivity (95% CI)'].iloc[1])+' '+str(tuple(ci_sens_read))
df_all_new['PPV (95% CI)'].iloc[1]=str(df_all_new['PPV (95% CI)'].iloc[1])+' '+str(tuple(ci_ppv_read))
df_all_new['F1 score (95% CI)'].iloc[1]=str(df_all_new['F1 score (95% CI)'].iloc[1])+' '+str(tuple(ci_f1_read))


df_all_new['All findings']=df_all_new['nodules detected']+df_all_new['non-nodules detected']+df_all_new['nodules missed']
df_all_new.loc['Total']= df_all_new.sum()
df_all_new.loc['Total'].iloc[0:3]=''

all_findings=df_all_new.iloc[:-1,3:-1].sum().sum()

for i in range(2): #Add percentages to df
    row_all=np.sum(df_all_new.iloc[i][3:6].values)

    percentage_fp=np.round((df_all_new.iloc[i][4]/row_all)*100,1) 
    df_all_new['non-nodules detected'].iloc[i]=str(df_all_new.iloc[i][4])+' ('+str(percentage_fp)+'%)'
    percentage_tp=np.round((df_all_new.iloc[i][3]/row_all)*100,1) 
    df_all_new['nodules detected'].iloc[i]=str(df_all_new.iloc[i][3])+' ('+str(percentage_tp)+'%)'
    percentage_fn=np.round((df_all_new.iloc[i][5]/row_all)*100,1) 
    df_all_new['nodules missed'].iloc[i]=str(df_all_new.iloc[i][5])+' ('+str(percentage_fn)+'%)'

    df_all_new['All findings'].iloc[i]=str(df_all_new.iloc[i][6])+' (100%)'

df_all_new

Unnamed: 0,sensitivity (95% CI),PPV (95% CI),F1 score (95% CI),nodules detected,non-nodules detected,nodules missed,All findings
"AI, nonemphysema","0.79 (0.69, 0.86)","0.81 (0.71, 0.88)","0.8 (0.73, 0.85)",77 (66.4%),18 (15.5%),21 (18.1%),116 (100%)
"reader, nonemphysema","0.8 (0.7, 0.87)","0.78 (0.68, 0.85)","0.79 (0.72, 0.84)",78 (65.0%),22 (18.3%),20 (16.7%),120 (100%)
Total,,,,155,40,41,236


In [199]:
df_all_new.to_excel('nodules_only_nonemphysema.xlsx')

##### McNemar's test

In [200]:
#For nodules
data=[[TP_both, FN_read],
        [FN_AI,0]]
# print(data)

# McNemar's Test without continuity correction
print("For nodules only (AI vs reader) with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)


#For FPs
data=[[0, FP_AI], 
        [FP_read, 0]]
# print(data)

# McNemar's Test without continuity correction
print("For FP findings, with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)

For nodules only (AI vs reader) with continuity correction (not exact) p value is 1.0
For FP findings, with continuity correction (not exact) p value is 0.6352562959972483


##### Cohen's Kappa

In [201]:
# #AI vs GT, TP_both included - 
# list_of_lists=[[1 for x in range(TP_both)],[1 for x in range(TP_AI_only)],[0 for x in range(FP_AI) ],[1 for x in range(FN_AI) ] ] 
# rater_GT=[item for sublist in list_of_lists for item in sublist]

# list_of_lists=[[1 for x in range(TP_both)], [1 for x in range(TP_AI_only)],[1 for x in range(FP_AI) ],[0 for x in range(FN_AI) ]]
# rater_AI=[item for sublist in list_of_lists for item in sublist]

# print("AI vs consensus (for nodules only), kappa is ",cohen_kappa_score(rater_GT, rater_AI))

# #Reader vs GT, TP_both included
# list_of_lists=[[1 for x in range(TP_both)],[1 for x in range(TP_read_only)],[0 for x in range(FP_read) ],[1 for x in range(FN_read) ] ] 
# rater_GT=[item for sublist in list_of_lists for item in sublist]

# list_of_lists=[[1 for x in range(TP_both)],[1 for x in range(TP_read_only)],[1 for x in range(FP_read) ],[0 for x in range(FN_read) ]]
# rater_read=[item for sublist in list_of_lists for item in sublist]

# print("read vs consensus (for non-nodules only), kappa is ",cohen_kappa_score(rater_GT, rater_read))

#### Comparison of volume subgroups for emphysema - Statistical tests based on it

In [202]:
TP_AI_100=TP_emph_30_100+ai_nods_emph_30_100-(TP_lymph_emph_30_100) -ai_lymph_emph_30_100  
FP_AI_100=ai_nonods_emph_30_100
FN_AI_100=reader_nods_emph_30_100-reader_lymph_emph_30_100 #nodules of reader excluding lymph nodes

TP_read_100=TP_emph_30_100+reader_nods_emph_30_100-(TP_lymph_emph_30_100) - reader_lymph_emph_30_100
FP_read_100=reader_nonods_emph_30_100
FN_read_100=ai_nods_emph_30_100-ai_lymph_emph_30_100 #nodules of AI excluding lymph nodes

TP_AI_100_300=TP_emph_100_300+ai_nods_emph_100_300-(TP_lymph_emph_100_300) -(ai_lymph_emph_100_300)
FP_AI_100_300=ai_nonods_emph_100_300
FN_AI_100_300=reader_nods_emph_100_300-(reader_lymph_emph_100_300) #nodules of reader excluding lymph nodes

TP_read_100_300=TP_emph_100_300+reader_nods_emph_100_300-(TP_lymph_emph_100_300) - (reader_lymph_emph_100_300)
FP_read_100_300=reader_nonods_emph_100_300
FN_read_100_300=ai_nods_emph_100_300-(ai_lymph_emph_100_300) #nodules of AI excluding lymph nodes

TP_both_100=TP_emph_30_100-(TP_lymph_emph_30_100) 
TP_both_100_300=TP_emph_100_300-(TP_lymph_emph_100_300)

#Print the above
print("Emphysema numbers")
print("TP_AI_100",TP_AI_100)
print("FP_AI_100",FP_AI_100)
print("FN_AI_100",FN_AI_100)
print("TP_read_100",TP_read_100)
print("FP_read_100",FP_read_100)
print("FN_read_100",FN_read_100)
print("TP_AI_100_300",TP_AI_100_300)
print("FP_AI_100_300",FP_AI_100_300)
print("FN_AI_100_300",FN_AI_100_300)
print("TP_read_100_300",TP_read_100_300)
print("FP_read_100_300",FP_read_100_300)
print("FN_read_100_300",FN_read_100_300)
print("TP_both_100",TP_both_100)
print("TP_both_100_300",TP_both_100_300)

Emphysema numbers
TP_AI_100 30
FP_AI_100 8
FN_AI_100 10
TP_read_100 29
FP_read_100 5
FN_read_100 11
TP_AI_100_300 23
FP_AI_100_300 12
FN_AI_100_300 3
TP_read_100_300 18
FP_read_100_300 1
FN_read_100_300 8
TP_both_100 19
TP_both_100_300 15


In [203]:
#For emphysema only comparison between reader and AI for volume subgroups
df_all_new=pd.DataFrame(columns=['sensitivity (95% CI)','PPV (95% CI)','F1 score (95% CI)','nodules correctly detected','non-nodules incorrectly detected','nodules missed'], 
                        index=['AI, emphysema 30-100mm3', 'AI, emphysema 100-300mm3','',
                               'reader, emphysema 30-100mm3', 'reader, emphysema 100-300mm3',''
                              ])

df_all_new.index.name = 'GT by radiologists for discrepancies' 

df_all_new.iloc[0,0]=np.round(sensitivity(TP_AI_100,FN_AI_100),2)
df_all_new.iloc[0,1]=np.round(PPV(TP_AI_100,FP_AI_100),2)
df_all_new.iloc[0,2]=np.round(F1score(TP_AI_100,FP_AI_100,FN_AI_100),2)
df_all_new.iloc[0,3]=TP_AI_100
df_all_new.iloc[0,4]=FP_AI_100
df_all_new.iloc[0,5]=FN_AI_100

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_AI_100, FP_AI_100, FN_AI_100, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[0]=str(df_all_new['sensitivity (95% CI)'].iloc[0])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[0]=str(df_all_new['PPV (95% CI)'].iloc[0])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[0]=str(df_all_new['F1 score (95% CI)'].iloc[0])+' '+str(tuple(ci_f1_ai))

df_all_new.iloc[3,0]=np.round(sensitivity(TP_read_100,FN_read_100),2)
df_all_new.iloc[3,1]=np.round(PPV(TP_read_100,FP_read_100),2)
df_all_new.iloc[3,2]=np.round(F1score(TP_read_100,FP_read_100,FN_read_100),2)
df_all_new.iloc[3,3]=TP_read_100
df_all_new.iloc[3,4]=FP_read_100
df_all_new.iloc[3,5]=FN_read_100

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_read_100, FP_read_100, FN_read_100, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[3]=str(df_all_new['sensitivity (95% CI)'].iloc[3])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[3]=str(df_all_new['PPV (95% CI)'].iloc[3])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[3]=str(df_all_new['F1 score (95% CI)'].iloc[3])+' '+str(tuple(ci_f1_ai))

df_all_new.iloc[1,0]=np.round(sensitivity(TP_AI_100_300,FN_AI_100_300),2)
df_all_new.iloc[1,1]=np.round(PPV(TP_AI_100_300,FP_AI_100_300),2)
df_all_new.iloc[1,2]=np.round(F1score(TP_AI_100_300,FP_AI_100_300,FN_AI_100_300),2)
df_all_new.iloc[1,3]=TP_AI_100_300
df_all_new.iloc[1,4]=FP_AI_100_300
df_all_new.iloc[1,5]=FN_AI_100_300

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_AI_100_300, FP_AI_100_300, FN_AI_100_300, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[1]=str(df_all_new['sensitivity (95% CI)'].iloc[1])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[1]=str(df_all_new['PPV (95% CI)'].iloc[1])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[1]=str(df_all_new['F1 score (95% CI)'].iloc[1])+' '+str(tuple(ci_f1_ai))

df_all_new.iloc[4,0]=np.round(sensitivity(TP_read_100_300,FN_read_100_300),2)
df_all_new.iloc[4,1]=np.round(PPV(TP_read_100_300,FP_read_100_300),2)
df_all_new.iloc[4,2]=np.round(F1score(TP_read_100_300,FP_read_100_300,FN_read_100_300),2)
df_all_new.iloc[4,3]=TP_read_100_300
df_all_new.iloc[4,4]=FP_read_100_300
df_all_new.iloc[4,5]=FN_read_100_300

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_read_100_300, FP_read_100_300, FN_read_100_300, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[4]=str(df_all_new['sensitivity (95% CI)'].iloc[4])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[4]=str(df_all_new['PPV (95% CI)'].iloc[4])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[4]=str(df_all_new['F1 score (95% CI)'].iloc[4])+' '+str(tuple(ci_f1_ai))

df_all_new.iloc[2,0]=0
df_all_new.iloc[2,1]=0
df_all_new.iloc[2,2]=0
df_all_new.iloc[2,3]=0
df_all_new.iloc[2,4]=0
df_all_new.iloc[2,5]=0

AI_all=np.sum(df_all_new.iloc[0:2,3:].values)
reader_all=np.sum(df_all_new.iloc[3:5,3:].values)

df_all_new['All findings']=df_all_new['nodules correctly detected']+df_all_new['non-nodules incorrectly detected']+df_all_new['nodules missed']

df_all_new.iloc[5,0]=''
df_all_new.iloc[5,1]=''
df_all_new.iloc[5,2]=''
df_all_new.iloc[5,3]=''
df_all_new.iloc[5,4]=''
df_all_new.iloc[5,5]=''
df_all_new.iloc[5,6]=np.sum(df_all_new['All findings'].iloc[3:5])

df_all_new.iloc[2,0]=''
df_all_new.iloc[2,1]=''
df_all_new.iloc[2,2]=''
df_all_new.iloc[2,3]=''
df_all_new.iloc[2,4]=''
df_all_new.iloc[2,5]=''
df_all_new.iloc[2,6]=np.sum(df_all_new['All findings'].iloc[0:2])

# print(df_all_new['sensitivity (95% CI)'])
for i in range(5):
    if i!=2:
        if i<2:
            sum_all=AI_all
        elif i>2:
            sum_all=reader_all
            
        percentage_tp=np.round((df_all_new.iloc[i][3]/sum_all)*100,1) 
        df_all_new['nodules correctly detected'].iloc[i]=str(df_all_new.iloc[i][3])+' ('+str(percentage_tp)+'%)'

        percentage_fp=np.round((df_all_new.iloc[i][4]/sum_all)*100,1) 
        df_all_new['non-nodules incorrectly detected'].iloc[i]=str(df_all_new.iloc[i][4])+' ('+str(percentage_fp)+'%)'

        percentage_fn=np.round((df_all_new.iloc[i][5]/sum_all)*100,1) 
        df_all_new['nodules missed'].iloc[i]=str(df_all_new.iloc[i][5])+' ('+str(percentage_fn)+'%)'

        df_all_new['All findings'].iloc[i]=str(df_all_new.iloc[i][6])+' ('+str(np.round(100*df_all_new.iloc[i][6]/sum_all,1))+'%)'

    
df_all_new['All findings'].iloc[2]=str(df_all_new.iloc[2][6])+' (100%)'
df_all_new['All findings'].iloc[5]=str(df_all_new.iloc[5][6])+' (100%)'

df_all_new #Detection performance comparison for nodules and lymph nodes

Unnamed: 0_level_0,sensitivity (95% CI),PPV (95% CI),F1 score (95% CI),nodules correctly detected,non-nodules incorrectly detected,nodules missed,All findings
GT by radiologists for discrepancies,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"AI, emphysema 30-100mm3","0.75 (0.58, 0.87)","0.79 (0.62, 0.9)","0.77 (0.66, 0.85)",30 (34.9%),8 (9.3%),10 (11.6%),48 (55.8%)
"AI, emphysema 100-300mm3","0.88 (0.69, 0.97)","0.66 (0.48, 0.8)","0.75 (0.62, 0.85)",23 (26.7%),12 (14.0%),3 (3.5%),38 (44.2%)
,,,,,,,86 (100%)
"reader, emphysema 30-100mm3","0.72 (0.56, 0.85)","0.85 (0.68, 0.94)","0.78 (0.67, 0.87)",29 (40.3%),5 (6.9%),11 (15.3%),45 (62.5%)
"reader, emphysema 100-300mm3","0.69 (0.48, 0.85)","0.95 (0.72, 1.0)","0.8 (0.65, 0.9)",18 (25.0%),1 (1.4%),8 (11.1%),27 (37.5%)
,,,,,,,72 (100%)


In [204]:
df_all_new.to_excel('nodules_only_volumes_emphysema_all.xlsx')

In [205]:
data=[[TP_both_100, FN_read_100],
        [FN_AI_100,0]]
# print(data)

# McNemar's Test without continuity correction
print("For nodules only (AI vs reader) of 30-100mm3 with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)
# print("\n")

#For FPs
data=[[0, FP_AI_100], 
        [FP_read_100, 0]]
# print(data)

# McNemar's Test without continuity correction
print("For FP findings of 30-100mm3, with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)
print("\n")


data=[[TP_both_100_300,FN_read_100_300], 
        [FN_AI_100_300, 0]]
# print(data)

# McNemar's Test without continuity correction
print("For nodules only (AI vs reader) of 100-300mm3 with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)
# print("\n")

#For FPs
data=[[0, FP_AI_100_300], 
        [FP_read_100_300, 0]]
# print(data)

# McNemar's Test without continuity correction
print("For FP findings of 100-300mm3, with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)

For nodules only (AI vs reader) of 30-100mm3 with continuity correction (not exact) p value is 1.0
For FP findings of 30-100mm3, with continuity correction (not exact) p value is 0.5790997419539188


For nodules only (AI vs reader) of 100-300mm3 with continuity correction (not exact) p value is 0.22779999398822554
For FP findings of 100-300mm3, with continuity correction (not exact) p value is 0.0055456673152440615


#### Comparison of volume subgroups for no emphysema - Statistical tests based on it

In [206]:
TP_AI_100=TP_noemph_30_100+ai_nods_noemph_30_100-(TP_lymph_noemph_30_100) -ai_lymph_noemph_30_100
FP_AI_100=ai_nonods_noemph_30_100
FN_AI_100=reader_nods_noemph_30_100-reader_lymph_noemph_30_100 #nodules of reader excluding lymph nodes

TP_read_100=TP_noemph_30_100+reader_nods_noemph_30_100-(TP_lymph_noemph_30_100) - reader_lymph_noemph_30_100
FP_read_100=reader_nonods_noemph_30_100
FN_read_100=ai_nods_noemph_30_100-ai_lymph_noemph_30_100 #nodules of AI excluding lymph nodes

TP_AI_100_300=TP_noemph_100_300+ai_nods_noemph_100_300-(TP_lymph_noemph_100_300) -(ai_lymph_noemph_100_300)
FP_AI_100_300=ai_nonods_noemph_100_300
FN_AI_100_300=reader_nods_noemph_100_300-(reader_lymph_noemph_100_300) #nodules of reader excluding lymph nodes

TP_read_100_300=TP_noemph_100_300+reader_nods_noemph_100_300-(TP_lymph_noemph_100_300) - (reader_lymph_noemph_100_300)
FP_read_100_300=reader_nonods_noemph_100_300
FN_read_100_300=ai_nods_noemph_100_300-(ai_lymph_noemph_100_300) #nodules of AI excluding lymph nodes

TP_both_100=TP_noemph_30_100-(TP_lymph_noemph_30_100)
TP_both_100_300=TP_noemph_100_300-(TP_lymph_noemph_100_300)

#Print the above
print("Non-emphysema numbers")
print("TP_AI_100",TP_AI_100)
print("FP_AI_100",FP_AI_100)
print("FN_AI_100",FN_AI_100)
print("TP_read_100",TP_read_100)
print("FP_read_100",FP_read_100)
print("FN_read_100",FN_read_100)
print("TP_AI_100_300",TP_AI_100_300)
print("FP_AI_100_300",FP_AI_100_300)
print("FN_AI_100_300",FN_AI_100_300)
print("TP_read_100_300",TP_read_100_300)
print("FP_read_100_300",FP_read_100_300)
print("FN_read_100_300",FN_read_100_300)
print("TP_both_100",TP_both_100)
print("TP_both_100_300",TP_both_100_300)

Non-emphysema numbers
TP_AI_100 58
FP_AI_100 5
FN_AI_100 19
TP_read_100 62
FP_read_100 20
FN_read_100 15
TP_AI_100_300 19
FP_AI_100_300 13
FN_AI_100_300 2
TP_read_100_300 16
FP_read_100_300 2
FN_read_100_300 5
TP_both_100 43
TP_both_100_300 14


In [207]:
#For emphysema only comparison between reader and AI for volume subgroups
df_all_new=pd.DataFrame(columns=['sensitivity (95% CI)','PPV (95% CI)','F1 score (95% CI)','nodules correctly detected','non-nodules incorrectly detected','nodules missed'], 
                        index=['AI, nonemphysema 30-100mm3', 'AI, nonemphysema 100-300mm3','',
                               'reader, nonemphysema 30-100mm3', 'reader, nonemphysema 100-300mm3',''
                              ])

df_all_new.index.name = 'GT by radiologists for discrepancies' 

df_all_new.iloc[0,0]=np.round(sensitivity(TP_AI_100,FN_AI_100),2)
df_all_new.iloc[0,1]=np.round(PPV(TP_AI_100,FP_AI_100),2)
df_all_new.iloc[0,2]=np.round(F1score(TP_AI_100,FP_AI_100,FN_AI_100),2)
df_all_new.iloc[0,3]=TP_AI_100
df_all_new.iloc[0,4]=FP_AI_100
df_all_new.iloc[0,5]=FN_AI_100

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_AI_100, FP_AI_100, FN_AI_100, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[0]=str(df_all_new['sensitivity (95% CI)'].iloc[0])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[0]=str(df_all_new['PPV (95% CI)'].iloc[0])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[0]=str(df_all_new['F1 score (95% CI)'].iloc[0])+' '+str(tuple(ci_f1_ai))


df_all_new.iloc[3,0]=np.round(sensitivity(TP_read_100,FN_read_100),2)
df_all_new.iloc[3,1]=np.round(PPV(TP_read_100,FP_read_100),2)
df_all_new.iloc[3,2]=np.round(F1score(TP_read_100,FP_read_100,FN_read_100),2)
df_all_new.iloc[3,3]=TP_read_100
df_all_new.iloc[3,4]=FP_read_100
df_all_new.iloc[3,5]=FN_read_100

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_read_100, FP_read_100, FN_read_100, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[3]=str(df_all_new['sensitivity (95% CI)'].iloc[3])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[3]=str(df_all_new['PPV (95% CI)'].iloc[3])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[3]=str(df_all_new['F1 score (95% CI)'].iloc[3])+' '+str(tuple(ci_f1_ai))


df_all_new.iloc[1,0]=np.round(sensitivity(TP_AI_100_300,FN_AI_100_300),2)
df_all_new.iloc[1,1]=np.round(PPV(TP_AI_100_300,FP_AI_100_300),2)
df_all_new.iloc[1,2]=np.round(F1score(TP_AI_100_300,FP_AI_100_300,FN_AI_100_300),2)
df_all_new.iloc[1,3]=TP_AI_100_300
df_all_new.iloc[1,4]=FP_AI_100_300
df_all_new.iloc[1,5]=FN_AI_100_300

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_AI_100_300, FP_AI_100_300, FN_AI_100_300, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[1]=str(df_all_new['sensitivity (95% CI)'].iloc[1])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[1]=str(df_all_new['PPV (95% CI)'].iloc[1])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[1]=str(df_all_new['F1 score (95% CI)'].iloc[1])+' '+str(tuple(ci_f1_ai))

df_all_new.iloc[4,0]=np.round(sensitivity(TP_read_100_300,FN_read_100_300),2)
df_all_new.iloc[4,1]=np.round(PPV(TP_read_100_300,FP_read_100_300),2)
df_all_new.iloc[4,2]=np.round(F1score(TP_read_100_300,FP_read_100_300,FN_read_100_300),2)
df_all_new.iloc[4,3]=TP_read_100_300
df_all_new.iloc[4,4]=FP_read_100_300
df_all_new.iloc[4,5]=FN_read_100_300

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_read_100_300, FP_read_100_300, FN_read_100_300, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[4]=str(df_all_new['sensitivity (95% CI)'].iloc[4])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[4]=str(df_all_new['PPV (95% CI)'].iloc[4])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[4]=str(df_all_new['F1 score (95% CI)'].iloc[4])+' '+str(tuple(ci_f1_ai))

df_all_new.iloc[2,0]=0
df_all_new.iloc[2,1]=0
df_all_new.iloc[2,2]=0
df_all_new.iloc[2,3]=0
df_all_new.iloc[2,4]=0
df_all_new.iloc[2,5]=0

AI_all=np.sum(df_all_new.iloc[0:2,3:].values)
reader_all=np.sum(df_all_new.iloc[3:5,3:].values)

df_all_new['All findings']=df_all_new['nodules correctly detected']+df_all_new['non-nodules incorrectly detected']+df_all_new['nodules missed']

df_all_new.iloc[5,0]=''
df_all_new.iloc[5,1]=''
df_all_new.iloc[5,2]=''
df_all_new.iloc[5,3]=''
df_all_new.iloc[5,4]=''
df_all_new.iloc[5,5]=''
df_all_new.iloc[5,6]=np.sum(df_all_new['All findings'].iloc[3:5])

df_all_new.iloc[2,0]=''
df_all_new.iloc[2,1]=''
df_all_new.iloc[2,2]=''
df_all_new.iloc[2,3]=''
df_all_new.iloc[2,4]=''
df_all_new.iloc[2,5]=''
df_all_new.iloc[2,6]=np.sum(df_all_new['All findings'].iloc[0:2])

# print(df_all_new['sensitivity (95% CI)'])
for i in range(5):
    if i!=2:
        if i<2:
            sum_all=AI_all
        elif i>2:
            sum_all=reader_all
            
        percentage_tp=np.round((df_all_new.iloc[i][3]/sum_all)*100,1) 
        df_all_new['nodules correctly detected'].iloc[i]=str(df_all_new.iloc[i][3])+' ('+str(percentage_tp)+'%)'

        percentage_fp=np.round((df_all_new.iloc[i][4]/sum_all)*100,1) 
        df_all_new['non-nodules incorrectly detected'].iloc[i]=str(df_all_new.iloc[i][4])+' ('+str(percentage_fp)+'%)'

        percentage_fn=np.round((df_all_new.iloc[i][5]/sum_all)*100,1) 
        df_all_new['nodules missed'].iloc[i]=str(df_all_new.iloc[i][5])+' ('+str(percentage_fn)+'%)'

        df_all_new['All findings'].iloc[i]=str(df_all_new.iloc[i][6])+' ('+str(np.round(100*df_all_new.iloc[i][6]/sum_all,1))+'%)'

    
df_all_new['All findings'].iloc[2]=str(df_all_new.iloc[2][6])+' (100%)'
df_all_new['All findings'].iloc[5]=str(df_all_new.iloc[5][6])+' (100%)'

df_all_new #Detection performance comparison for nodules and lymph nodes

Unnamed: 0_level_0,sensitivity (95% CI),PPV (95% CI),F1 score (95% CI),nodules correctly detected,non-nodules incorrectly detected,nodules missed,All findings
GT by radiologists for discrepancies,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"AI, nonemphysema 30-100mm3","0.75 (0.64, 0.84)","0.92 (0.82, 0.97)","0.83 (0.75, 0.88)",58 (50.0%),5 (4.3%),19 (16.4%),82 (70.7%)
"AI, nonemphysema 100-300mm3","0.9 (0.68, 0.98)","0.59 (0.41, 0.76)","0.72 (0.57, 0.83)",19 (16.4%),13 (11.2%),2 (1.7%),34 (29.3%)
,,,,,,,116 (100%)
"reader, nonemphysema 30-100mm3","0.81 (0.7, 0.88)","0.76 (0.65, 0.84)","0.78 (0.71, 0.84)",62 (51.7%),20 (16.7%),15 (12.5%),97 (80.8%)
"reader, nonemphysema 100-300mm3","0.76 (0.52, 0.91)","0.89 (0.64, 0.98)","0.82 (0.66, 0.92)",16 (13.3%),2 (1.7%),5 (4.2%),23 (19.2%)
,,,,,,,120 (100%)


In [208]:
df_all_new.to_excel('nodules_only_volumes_nonemphysema_all.xlsx')

In [209]:
data=[[TP_both_100, FN_read_100],
        [FN_AI_100,0]]
# print(data)

# McNemar's Test without continuity correction
print("For nodules only (AI vs reader) of 30-100mm3 with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)
# print("\n")

#For FPs
data=[[0, FP_AI_100], 
        [FP_read_100, 0]]
# print(data)

# McNemar's Test without continuity correction
print("For FP findings of 30-100mm3, with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)
print("\n")



data=[[TP_both_100_300,FN_read_100_300 ], 
        [FN_AI_100_300, 0]]
# print(data)

# McNemar's Test without continuity correction
print("For nodules only (AI vs reader) of 100-300mm3 with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)
# print("\n")

#For FPs
data=[[0, FP_AI_100_300], 
        [FP_read_100_300, 0]]
# print(data)

# McNemar's Test without continuity correction
print("For FP findings of 100-300mm3, with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)

For nodules only (AI vs reader) of 30-100mm3 with continuity correction (not exact) p value is 0.6069054272179508
For FP findings of 30-100mm3, with continuity correction (not exact) p value is 0.005110260660855866


For nodules only (AI vs reader) of 100-300mm3 with continuity correction (not exact) p value is 0.4496917979688908
For FP findings of 100-300mm3, with continuity correction (not exact) p value is 0.009823274507519235


Analysis based on volume for subcategories not possible since we only have volume subgroups for TPs

Nodule types

In [210]:
# #Further analysis for nodule/lymph node subcategories - Not kept for now
# #Detailed analysis of what detected or not from both AI and reader for each category in nodules & lymph nodes 

# df_categories=pd.DataFrame(columns=['TP','FP','FN'], #below index with the correct order as above
#                           index=['pleural nodules',
#                                  'calcified nodules',
#                                  'subsolid & ground glass nodules',
#                                  'other nodules',
#                                  'atypical PFNs',
#                                  'typical PFNs & periphysural lymph nodes',
#                                  'bronchiovascular lymph nodes'
#                                 ])

# df_categories.index.name = 'GT by radiologists for discrepancies'


# df_categories['FP']=[sum([len(x) for x in pleural_FP_emph.values()])+sum([len(x) for x in pleural_FP_noemph.values()]),
#                      sum([len(x) for x in calcif_FP_emph.values()])+sum([len(x) for x in calcif_FP_noemph.values()]),
#                      sum([len(x) for x in sub_ground_FP_emph.values()])+sum([len(x) for x in sub_ground_FP_noemph.values()]),
#                      sum([len(x) for x in other_nodules_FP_emph.values()])+sum([len(x) for x in other_nodules_FP_noemph.values()]),
#                      sum([len(x) for x in atyp_FP_emph.values()])+sum([len(x) for x in atyp_FP_noemph.values()]),
#                      sum([len(x) for x in per_FP_emph.values()])+sum([len(x) for x in per_FP_noemph.values()]),
#                      sum([len(x) for x in bronchioperi_FP_emph.values()])+sum([len(x) for x in bronchioperi_FP_noemph.values()])
# ]


# df_categories['FN']=[sum([len(x) for x in pleural_FN_emph.values()])+sum([len(x) for x in pleural_FN_noemph.values()]),
#                      sum([len(x) for x in calcif_FN_emph.values()])+sum([len(x) for x in calcif_FN_noemph.values()]),
#                      sum([len(x) for x in sub_ground_FN_emph.values()])+sum([len(x) for x in sub_ground_FN_noemph.values()]),
#                      sum([len(x) for x in other_nodules_FN_emph.values()])+sum([len(x) for x in other_nodules_FN_noemph.values()]),
#                      sum([len(x) for x in atyp_FN_emph.values()])+sum([len(x) for x in atyp_FN_noemph.values()]),
#                      sum([len(x) for x in per_FN_emph.values()])+sum([len(x) for x in per_FN_noemph.values()]),
#                      sum([len(x) for x in bronchioperi_FN_emph.values()])+sum([len(x) for x in bronchioperi_FN_noemph.values()])
# ]


# df_categories['TP']=[pleural_emph_nod_only+pleural_noemph_nod_only,
#                      calcified_emph_nod_only+calcified_noemph_nod_only,
#                      sub_ground_emph_nod_only+sub_ground_noemph_nod_only,
#                      other_all_emph_nod_only+other_all_noemph_nod_only,
#                      atypical_triangular_emph_lymph+atypical_triangular_noemph_lymph,
#                      per_fisu_emph_lymph+per_fisu_noemph_lymph,
#                      peri_bronch_emph_lymph+peri_bronch_noemph_lymph
# ]

# df_categories['All findings']=df_categories['FP']+df_categories['FN']+df_categories['TP']

# df_categories.loc['Total']= df_categories.sum()

# total_num_discrepancies_with_tp=df_categories.iloc[:-1,:-1].sum().sum() #To be used in next cells for percentages

# all_findings=df_categories.iloc[:-1,:-1].sum().sum()

# percentage_fp=np.round((df_categories['FP']/total_num_discrepancies_with_tp)*100,1) 
# df_categories['FP']=[str(value[1])+' ('+str(percentage_fp[index])+'%)' for index,value in enumerate(df_categories['FP'].items())]

# percentage_fn=np.round((df_categories['FN']/total_num_discrepancies_with_tp)*100,1) #sum(df_categories['emphysema'])
# df_categories['FN']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['FN'].items())]

# percentage_fn=np.round((df_categories['TP']/total_num_discrepancies_with_tp)*100,1) #sum(df_categories['emphysema'])
# df_categories['TP']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['TP'].items())]

# df_categories['All findings']=[str(val)+' ('+str(np.round(100*val/total_num_discrepancies_with_tp,1))+'%)' for val in df_categories['All findings'].values]

# #Rename columns
# df_categories.rename(columns={'FP': 'AI found, reader missed', 'FN': 'AI missed, reader found', 'TP':'Both found'}, inplace=True)

# df_categories

In [211]:
#Further analysis for nodule/lymph node subcategories - Not kept for now
#Detailed analysis of what detected or not from both AI and reader for each category in nodules & lymph nodes 

df_categories=pd.DataFrame(columns=['TP','FP','FN'], #below index with the correct order as above
                          index=['pleural nodules',
                                 'calcified nodules',
                                 'subsolid & ground glass nodules',
                                 'other nodules'
                                ])

# df_categories.index.name = 'GT by radiologists for discrepancies'

df_categories['FP']=[sum([len(x) for x in pleural_FP_emph.values()])+sum([len(x) for x in pleural_FP_noemph.values()]),
                     sum([len(x) for x in calcif_FP_emph.values()])+sum([len(x) for x in calcif_FP_noemph.values()]),
                     sum([len(x) for x in sub_ground_FP_emph.values()])+sum([len(x) for x in sub_ground_FP_noemph.values()]),
                     sum([len(x) for x in other_nodules_FP_emph.values()])+sum([len(x) for x in other_nodules_FP_noemph.values()])]

df_categories['FN']=[sum([len(x) for x in pleural_FN_emph.values()])+sum([len(x) for x in pleural_FN_noemph.values()]),
                     sum([len(x) for x in calcif_FN_emph.values()])+sum([len(x) for x in calcif_FN_noemph.values()]),
                     sum([len(x) for x in sub_ground_FN_emph.values()])+sum([len(x) for x in sub_ground_FN_noemph.values()]),
                     sum([len(x) for x in other_nodules_FN_emph.values()])+sum([len(x) for x in other_nodules_FN_noemph.values()])]

df_categories['TP']=[pleural_emph_nod_only+pleural_noemph_nod_only,
                     calcified_emph_nod_only+calcified_noemph_nod_only,
                     sub_ground_emph_nod_only+sub_ground_noemph_nod_only,
                     other_all_emph_nod_only+other_all_noemph_nod_only]

df_categories['All findings']=df_categories['FP']+df_categories['FN']+df_categories['TP']

df_categories.loc['Total']= df_categories.sum()

total_num_discrepancies_with_tp=df_categories.iloc[:-1,:-1].sum().sum() #To be used in next cells for percentages

all_findings=df_categories.iloc[:-1,:-1].sum().sum()

percentage_fp=np.round((df_categories['FP']/total_num_discrepancies_with_tp)*100,1) 
df_categories['FP']=[str(value[1])+' ('+str(percentage_fp[index])+'%)' for index,value in enumerate(df_categories['FP'].items())]

percentage_fn=np.round((df_categories['FN']/total_num_discrepancies_with_tp)*100,1) #sum(df_categories['emphysema'])
df_categories['FN']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['FN'].items())]

percentage_fn=np.round((df_categories['TP']/total_num_discrepancies_with_tp)*100,1) #sum(df_categories['emphysema'])
df_categories['TP']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['TP'].items())]

df_categories['All findings']=[str(val)+' ('+str(np.round(100*val/total_num_discrepancies_with_tp,1))+'%)' for val in df_categories['All findings'].values]

#Rename columns
df_categories.rename(columns={'FP': 'AI found, reader missed', 'FN': 'AI missed, reader found', 'TP':'Both found'}, inplace=True)

df_categories

Unnamed: 0,Both found,"AI found, reader missed","AI missed, reader found",All findings
pleural nodules,6 (5.7%),3 (2.9%),0 (0.0%),9 (8.6%)
calcified nodules,6 (5.7%),0 (0.0%),0 (0.0%),6 (5.7%)
subsolid & ground glass nodules,6 (5.7%),2 (1.9%),8 (7.6%),16 (15.2%)
other nodules,65 (61.9%),6 (5.7%),3 (2.9%),74 (70.5%)
Total,83 (79.0%),11 (10.5%),11 (10.5%),105 (100.0%)


In [212]:
df_categories.to_excel('nodule_types_all.xlsx')

In [213]:
#Emphysema only
#Further analysis for nodule/lymph node subcategories - Not kept for now
#Detailed analysis of what detected or not from both AI and reader for each category in nodules & lymph nodes 

df_categories=pd.DataFrame(columns=['TP','FP','FN'], #below index with the correct order as above
                          index=['pleural nodules',
                                 'calcified nodules',
                                 'subsolid & ground glass nodules',
                                 'other nodules'
                                ])

# df_categories.index.name = 'GT by radiologists for discrepancies'

df_categories['FP']=[sum([len(x) for x in pleural_FP_emph.values()]),
                     sum([len(x) for x in calcif_FP_emph.values()]),
                     sum([len(x) for x in sub_ground_FP_emph.values()]),
                     sum([len(x) for x in other_nodules_FP_emph.values()])]

df_categories['FN']=[sum([len(x) for x in pleural_FN_emph.values()]),
                     sum([len(x) for x in calcif_FN_emph.values()]),
                     sum([len(x) for x in sub_ground_FN_emph.values()]),
                     sum([len(x) for x in other_nodules_FN_emph.values()])]

df_categories['TP']=[pleural_emph_nod_only,
                     calcified_emph_nod_only,
                     sub_ground_emph_nod_only,
                     other_all_emph_nod_only]

df_categories['All findings']=df_categories['FP']+df_categories['FN']+df_categories['TP']

df_categories.loc['Total']= df_categories.sum()

total_num_discrepancies_with_tp=df_categories.iloc[:-1,:-1].sum().sum() #To be used in next cells for percentages

all_findings=df_categories.iloc[:-1,:-1].sum().sum()

percentage_fp=np.round((df_categories['FP']/total_num_discrepancies_with_tp)*100,1) 
df_categories['FP']=[str(value[1])+' ('+str(percentage_fp[index])+'%)' for index,value in enumerate(df_categories['FP'].items())]

percentage_fn=np.round((df_categories['FN']/total_num_discrepancies_with_tp)*100,1) #sum(df_categories['emphysema'])
df_categories['FN']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['FN'].items())]

percentage_fn=np.round((df_categories['TP']/total_num_discrepancies_with_tp)*100,1) #sum(df_categories['emphysema'])
df_categories['TP']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['TP'].items())]

df_categories['All findings']=[str(val)+' ('+str(np.round(100*val/total_num_discrepancies_with_tp,1))+'%)' for val in df_categories['All findings'].values]

#Rename columns
df_categories.rename(columns={'FP': 'AI found, reader missed', 'FN': 'AI missed, reader found', 'TP':'Both found'}, inplace=True)

df_categories

Unnamed: 0,Both found,"AI found, reader missed","AI missed, reader found",All findings
pleural nodules,1 (2.6%),2 (5.1%),0 (0.0%),3 (7.7%)
calcified nodules,3 (7.7%),0 (0.0%),0 (0.0%),3 (7.7%)
subsolid & ground glass nodules,1 (2.6%),0 (0.0%),1 (2.6%),2 (5.1%)
other nodules,28 (71.8%),2 (5.1%),1 (2.6%),31 (79.5%)
Total,33 (84.6%),4 (10.3%),2 (5.1%),39 (100.0%)


In [214]:
# df_categories.to_excel('nodule_types_emph.xlsx')

In [215]:
#Non-emphysema
#Further analysis for nodule/lymph node subcategories - Not kept for now
#Detailed analysis of what detected or not from both AI and reader for each category in nodules & lymph nodes 

df_categories=pd.DataFrame(columns=['TP','FP','FN'], #below index with the correct order as above
                          index=['pleural nodules',
                                 'calcified nodules',
                                 'subsolid & ground glass nodules',
                                 'other nodules'
                                ])

# df_categories.index.name = 'GT by radiologists for discrepancies'

df_categories['FP']=[sum([len(x) for x in pleural_FP_noemph.values()]),
                     sum([len(x) for x in calcif_FP_noemph.values()]),
                     sum([len(x) for x in sub_ground_FP_noemph.values()]),
                     sum([len(x) for x in other_nodules_FP_noemph.values()])]

df_categories['FN']=[sum([len(x) for x in pleural_FN_noemph.values()]),
                     sum([len(x) for x in calcif_FN_noemph.values()]),
                     sum([len(x) for x in sub_ground_FN_noemph.values()]),
                     sum([len(x) for x in other_nodules_FN_noemph.values()])]

df_categories['TP']=[pleural_noemph_nod_only,
                     calcified_noemph_nod_only,
                     sub_ground_noemph_nod_only,
                     other_all_noemph_nod_only]

df_categories['All findings']=df_categories['FP']+df_categories['FN']+df_categories['TP']

df_categories.loc['Total']= df_categories.sum()

total_num_discrepancies_with_tp=df_categories.iloc[:-1,:-1].sum().sum() #To be used in next cells for percentages

all_findings=df_categories.iloc[:-1,:-1].sum().sum()

percentage_fp=np.round((df_categories['FP']/total_num_discrepancies_with_tp)*100,1) 
df_categories['FP']=[str(value[1])+' ('+str(percentage_fp[index])+'%)' for index,value in enumerate(df_categories['FP'].items())]

percentage_fn=np.round((df_categories['FN']/total_num_discrepancies_with_tp)*100,1) #sum(df_categories['emphysema'])
df_categories['FN']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['FN'].items())]

percentage_fn=np.round((df_categories['TP']/total_num_discrepancies_with_tp)*100,1) #sum(df_categories['emphysema'])
df_categories['TP']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['TP'].items())]

df_categories['All findings']=[str(val)+' ('+str(np.round(100*val/total_num_discrepancies_with_tp,1))+'%)' for val in df_categories['All findings'].values]

#Rename columns
df_categories.rename(columns={'FP': 'AI found, reader missed', 'FN': 'AI missed, reader found', 'TP':'Both found'}, inplace=True)

df_categories

Unnamed: 0,Both found,"AI found, reader missed","AI missed, reader found",All findings
pleural nodules,5 (7.6%),1 (1.5%),0 (0.0%),6 (9.1%)
calcified nodules,3 (4.5%),0 (0.0%),0 (0.0%),3 (4.5%)
subsolid & ground glass nodules,5 (7.6%),2 (3.0%),7 (10.6%),14 (21.2%)
other nodules,37 (56.1%),4 (6.1%),2 (3.0%),43 (65.2%)
Total,50 (75.8%),7 (10.6%),9 (13.6%),66 (100.0%)


In [216]:
# df_categories.to_excel('nodule_types_noemph.xlsx')

Same as above without TP

In [217]:
# #Emphysema only
# #Further analysis for nodule/lymph node subcategories - Not kept for now
# #Detailed analysis of what detected or not from both AI and reader for each category in nodules & lymph nodes 

# df_categories=pd.DataFrame(columns=['FP','FN'], #below index with the correct order as above
#                           index=['pleural nodules',
#                                  'calcified nodules',
#                                  'subsolid & ground glass nodules',
#                                  'other nodules'
#                                 ])

# # df_categories.index.name = 'GT by radiologists for discrepancies'

# df_categories['FP']=[sum([len(x) for x in pleural_FP_emph.values()]),
#                      sum([len(x) for x in calcif_FP_emph.values()]),
#                      sum([len(x) for x in sub_ground_FP_emph.values()]),
#                      sum([len(x) for x in other_nodules_FP_emph.values()])]

# df_categories['FN']=[sum([len(x) for x in pleural_FN_emph.values()]),
#                      sum([len(x) for x in calcif_FN_emph.values()]),
#                      sum([len(x) for x in sub_ground_FN_emph.values()]),
#                      sum([len(x) for x in other_nodules_FN_emph.values()])]

# all_findings_fp=df_categories['FP'].sum()
# all_findings_fn=df_categories['FN'].sum()

# percentage_fp=np.round((df_categories['FP']/all_findings_fp)*100,1) 
# df_categories['FP']=[str(value[1])+' ('+str(percentage_fp[index])+'%)' for index,value in enumerate(df_categories['FP'].items())]

# percentage_fn=np.round((df_categories['FN']/all_findings_fn)*100,1) #sum(df_categories['emphysema'])
# df_categories['FN']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['FN'].items())]


# df_categories.loc['Total']= [str(all_findings_fp)+ ' (100%)',str(all_findings_fn)+' (100%)']

# #Rename columns
# df_categories.rename(columns={'FP': 'AI found, reader missed', 'FN': 'AI missed, reader found'}, inplace=True)

# df_categories

In [218]:
# df_categories.to_excel('nodule_types_emph_notp.xlsx')

In [219]:
# #Non-emphysema
# #Further analysis for nodule/lymph node subcategories - Not kept for now
# #Detailed analysis of what detected or not from both AI and reader for each category in nodules & lymph nodes 

# df_categories=pd.DataFrame(columns=['FP','FN'], #below index with the correct order as above
#                           index=['pleural nodules',
#                                  'calcified nodules',
#                                  'subsolid & ground glass nodules',
#                                  'other nodules'
#                                 ])

# # df_categories.index.name = 'GT by radiologists for discrepancies'

# df_categories['FP']=[sum([len(x) for x in pleural_FP_noemph.values()]),
#                      sum([len(x) for x in calcif_FP_noemph.values()]),
#                      sum([len(x) for x in sub_ground_FP_noemph.values()]),
#                      sum([len(x) for x in other_nodules_FP_noemph.values()])]

# df_categories['FN']=[sum([len(x) for x in pleural_FN_noemph.values()]),
#                      sum([len(x) for x in calcif_FN_noemph.values()]),
#                      sum([len(x) for x in sub_ground_FN_noemph.values()]),
#                      sum([len(x) for x in other_nodules_FN_noemph.values()])]


# all_findings_fp=df_categories['FP'].sum()
# all_findings_fn=df_categories['FN'].sum()

# percentage_fp=np.round((df_categories['FP']/all_findings_fp)*100,1) 
# df_categories['FP']=[str(value[1])+' ('+str(percentage_fp[index])+'%)' for index,value in enumerate(df_categories['FP'].items())]

# percentage_fn=np.round((df_categories['FN']/all_findings_fn)*100,1) #sum(df_categories['emphysema'])
# df_categories['FN']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['FN'].items())]


# df_categories.loc['Total']= [str(all_findings_fp)+ ' (100%)',str(all_findings_fn)+' (100%)']

# #Rename columns
# df_categories.rename(columns={'FP': 'AI found, reader missed', 'FN': 'AI missed, reader found'}, inplace=True) #, 'TP':'Both found'

# df_categories

In [220]:
# df_categories.to_excel('nodule_types_noemph_notp.xlsx')

### Lymph node types (atypical_triangular_emph_lymph transformed to atypical_triangular_emph_nod_only etc.)

In [221]:
#Further analysis for nodule/lymph node subcategories - Not kept for now
#Detailed analysis of what detected or not from both AI and reader for each category in nodules & lymph nodes 

df_categories=pd.DataFrame(columns=['TP','FP','FN'], #below index with the correct order as above
                          index=['atypical PFNs',
                                 'typical PFNs & periphysural lymph nodes',
                                 'bronchiovascular lymph nodes'
                                ])

# df_categories.index.name = 'GT by radiologists for discrepancies'

df_categories['FP']=[sum([len(x) for x in atyp_FP_emph.values()])+sum([len(x) for x in atyp_FP_noemph.values()]),
                     sum([len(x) for x in per_FP_emph.values()])+sum([len(x) for x in per_FP_noemph.values()]),
                     sum([len(x) for x in bronchioperi_FP_emph.values()])+sum([len(x) for x in bronchioperi_FP_noemph.values()])]

df_categories['FN']=[sum([len(x) for x in atyp_FN_emph.values()])+sum([len(x) for x in atyp_FN_noemph.values()]),
                     sum([len(x) for x in per_FN_emph.values()])+sum([len(x) for x in per_FN_noemph.values()]),
                     sum([len(x) for x in bronchioperi_FN_emph.values()])+sum([len(x) for x in bronchioperi_FN_noemph.values()])]

df_categories['TP']=[atypical_triangular_emph_nod_only+atypical_triangular_noemph_nod_only,
                     per_fisu_emph_lymph+per_fisu_noemph_lymph,
                     peri_bronch_emph_lymph+peri_bronch_noemph_lymph]

df_categories['All findings']=df_categories['FP']+df_categories['FN']+df_categories['TP']

df_categories.loc['Total']= df_categories.sum()

total_num_discrepancies_with_tp=df_categories.iloc[:-1,:-1].sum().sum() #To be used in next cells for percentages

all_findings=df_categories.iloc[:-1,:-1].sum().sum()

percentage_fp=np.round((df_categories['FP']/total_num_discrepancies_with_tp)*100,1) 
df_categories['FP']=[str(value[1])+' ('+str(percentage_fp[index])+'%)' for index,value in enumerate(df_categories['FP'].items())]

percentage_fn=np.round((df_categories['FN']/total_num_discrepancies_with_tp)*100,1) #sum(df_categories['emphysema'])
df_categories['FN']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['FN'].items())]

percentage_fn=np.round((df_categories['TP']/total_num_discrepancies_with_tp)*100,1) #sum(df_categories['emphysema'])
df_categories['TP']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['TP'].items())]

df_categories['All findings']=[str(val)+' ('+str(np.round(100*val/total_num_discrepancies_with_tp,1))+'%)' for val in df_categories['All findings'].values]

#Rename columns
df_categories.rename(columns={'FP': 'AI found, reader missed', 'FN': 'AI missed, reader found', 'TP':'Both found'}, inplace=True)

df_categories

Unnamed: 0,Both found,"AI found, reader missed","AI missed, reader found",All findings
atypical PFNs,8 (6.6%),28 (23.0%),23 (18.9%),59 (48.4%)
typical PFNs & periphysural lymph nodes,18 (14.8%),8 (6.6%),19 (15.6%),45 (36.9%)
bronchiovascular lymph nodes,0 (0.0%),2 (1.6%),16 (13.1%),18 (14.8%)
Total,26 (21.3%),38 (31.1%),58 (47.5%),122 (100.0%)


In [222]:
df_categories.to_excel('lymph_types_all.xlsx')

In [223]:
#Emphysema
#Further analysis for nodule/lymph node subcategories - Not kept for now
#Detailed analysis of what detected or not from both AI and reader for each category in nodules & lymph nodes 

df_categories=pd.DataFrame(columns=['TP','FP','FN'], #below index with the correct order as above
                          index=['atypical PFNs',
                                 'typical PFNs & periphysural lymph nodes',
                                 'bronchiovascular lymph nodes'
                                ])

# df_categories.index.name = 'GT by radiologists for discrepancies'

df_categories['FP']=[sum([len(x) for x in atyp_FP_emph.values()]),
                     sum([len(x) for x in per_FP_emph.values()]),
                     sum([len(x) for x in bronchioperi_FP_emph.values()])]

df_categories['FN']=[sum([len(x) for x in atyp_FN_emph.values()]),
                     sum([len(x) for x in per_FN_emph.values()]),
                     sum([len(x) for x in bronchioperi_FN_emph.values()])]

df_categories['TP']=[atypical_triangular_emph_nod_only,
                     per_fisu_emph_lymph,
                     peri_bronch_emph_lymph]

df_categories['All findings']=df_categories['FP']+df_categories['FN']+df_categories['TP']

df_categories.loc['Total']= df_categories.sum()

total_num_discrepancies_with_tp=df_categories.iloc[:-1,:-1].sum().sum() #To be used in next cells for percentages

all_findings=df_categories.iloc[:-1,:-1].sum().sum()

percentage_fp=np.round((df_categories['FP']/total_num_discrepancies_with_tp)*100,1) 
df_categories['FP']=[str(value[1])+' ('+str(percentage_fp[index])+'%)' for index,value in enumerate(df_categories['FP'].items())]

percentage_fn=np.round((df_categories['FN']/total_num_discrepancies_with_tp)*100,1) #sum(df_categories['emphysema'])
df_categories['FN']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['FN'].items())]

percentage_fn=np.round((df_categories['TP']/total_num_discrepancies_with_tp)*100,1) #sum(df_categories['emphysema'])
df_categories['TP']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['TP'].items())]

df_categories['All findings']=[str(val)+' ('+str(np.round(100*val/total_num_discrepancies_with_tp,1))+'%)' for val in df_categories['All findings'].values]

#Rename columns
df_categories.rename(columns={'FP': 'AI found, reader missed', 'FN': 'AI missed, reader found', 'TP':'Both found'}, inplace=True)

df_categories

Unnamed: 0,Both found,"AI found, reader missed","AI missed, reader found",All findings
atypical PFNs,1 (1.9%),15 (28.8%),11 (21.2%),27 (51.9%)
typical PFNs & periphysural lymph nodes,6 (11.5%),1 (1.9%),10 (19.2%),17 (32.7%)
bronchiovascular lymph nodes,0 (0.0%),2 (3.8%),6 (11.5%),8 (15.4%)
Total,7 (13.5%),18 (34.6%),27 (51.9%),52 (100.0%)


In [224]:
# df_categories.to_excel('lymph_types_emph.xlsx')

In [225]:
#Non-emphysema
#Further analysis for nodule/lymph node subcategories - Not kept for now
#Detailed analysis of what detected or not from both AI and reader for each category in nodules & lymph nodes 

df_categories=pd.DataFrame(columns=['TP','FP','FN'], #below index with the correct order as above
                          index=['atypical PFNs',
                                 'typical PFNs & periphysural lymph nodes',
                                 'bronchiovascular lymph nodes'
                                ])

# df_categories.index.name = 'GT by radiologists for discrepancies'

df_categories['FP']=[sum([len(x) for x in atyp_FP_noemph.values()]),
                     sum([len(x) for x in per_FP_noemph.values()]),
                     sum([len(x) for x in bronchioperi_FP_noemph.values()])]

df_categories['FN']=[sum([len(x) for x in atyp_FN_noemph.values()]),
                     sum([len(x) for x in per_FN_noemph.values()]),
                     sum([len(x) for x in bronchioperi_FN_noemph.values()])]

df_categories['TP']=[atypical_triangular_noemph_nod_only,
                    per_fisu_noemph_lymph,
                     peri_bronch_noemph_lymph]

df_categories['All findings']=df_categories['FP']+df_categories['FN']+df_categories['TP']

df_categories.loc['Total']= df_categories.sum()

total_num_discrepancies_with_tp=df_categories.iloc[:-1,:-1].sum().sum() #To be used in next cells for percentages

all_findings=df_categories.iloc[:-1,:-1].sum().sum()

percentage_fp=np.round((df_categories['FP']/total_num_discrepancies_with_tp)*100,1) 
df_categories['FP']=[str(value[1])+' ('+str(percentage_fp[index])+'%)' for index,value in enumerate(df_categories['FP'].items())]

percentage_fn=np.round((df_categories['FN']/total_num_discrepancies_with_tp)*100,1) #sum(df_categories['emphysema'])
df_categories['FN']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['FN'].items())]

percentage_fn=np.round((df_categories['TP']/total_num_discrepancies_with_tp)*100,1) #sum(df_categories['emphysema'])
df_categories['TP']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['TP'].items())]

df_categories['All findings']=[str(val)+' ('+str(np.round(100*val/total_num_discrepancies_with_tp,1))+'%)' for val in df_categories['All findings'].values]

#Rename columns
df_categories.rename(columns={'FP': 'AI found, reader missed', 'FN': 'AI missed, reader found', 'TP':'Both found'}, inplace=True)

df_categories

Unnamed: 0,Both found,"AI found, reader missed","AI missed, reader found",All findings
atypical PFNs,7 (10.0%),13 (18.6%),12 (17.1%),32 (45.7%)
typical PFNs & periphysural lymph nodes,12 (17.1%),7 (10.0%),9 (12.9%),28 (40.0%)
bronchiovascular lymph nodes,0 (0.0%),0 (0.0%),10 (14.3%),10 (14.3%)
Total,19 (27.1%),20 (28.6%),31 (44.3%),70 (100.0%)


In [226]:
# df_categories.to_excel('lymph_types_noemph.xlsx')

Same as above without TP

In [227]:
# #Further analysis for nodule/lymph node subcategories - Not kept for now
# #Detailed analysis of what detected or not from both AI and reader for each category in nodules & lymph nodes 

# df_categories=pd.DataFrame(columns=['FP','FN'], #below index with the correct order as above
#                           index=['atypical PFNs',
#                                  'typical PFNs & periphysural lymph nodes',
#                                  'bronchiovascular lymph nodes'
#                                 ])

# # df_categories.index.name = 'GT by radiologists for discrepancies'

# df_categories['FP']=[sum([len(x) for x in atyp_FP_emph.values()])+sum([len(x) for x in atyp_FP_noemph.values()]),
#                      sum([len(x) for x in per_FP_emph.values()])+sum([len(x) for x in per_FP_noemph.values()]),
#                      sum([len(x) for x in bronchioperi_FP_emph.values()])+sum([len(x) for x in bronchioperi_FP_noemph.values()])]

# df_categories['FN']=[sum([len(x) for x in atyp_FN_emph.values()])+sum([len(x) for x in atyp_FN_noemph.values()]),
#                      sum([len(x) for x in per_FN_emph.values()])+sum([len(x) for x in per_FN_noemph.values()]),
#                      sum([len(x) for x in bronchioperi_FN_emph.values()])+sum([len(x) for x in bronchioperi_FN_noemph.values()])]


# all_findings_fp=df_categories['FP'].sum()
# all_findings_fn=df_categories['FN'].sum()

# percentage_fp=np.round((df_categories['FP']/all_findings_fp)*100,1) 
# df_categories['FP']=[str(value[1])+' ('+str(percentage_fp[index])+'%)' for index,value in enumerate(df_categories['FP'].items())]

# percentage_fn=np.round((df_categories['FN']/all_findings_fn)*100,1) #sum(df_categories['emphysema'])
# df_categories['FN']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['FN'].items())]

# df_categories.loc['Total']= [str(all_findings_fp)+ ' (100%)',str(all_findings_fn)+' (100%)']

# #Rename columns
# df_categories.rename(columns={'FP': 'AI found, reader missed', 'FN': 'AI missed, reader found'}, inplace=True)

# df_categories

In [228]:
# df_categories.to_excel('lymph_types_emph_notp.xlsx')

In [229]:
# #Non-emphysema
# #Further analysis for nodule/lymph node subcategories - Not kept for now
# #Detailed analysis of what detected or not from both AI and reader for each category in nodules & lymph nodes 

# df_categories=pd.DataFrame(columns=['FP','FN'], #below index with the correct order as above
#                           index=['atypical PFNs',
#                                  'typical PFNs & periphysural lymph nodes',
#                                  'bronchiovascular lymph nodes'
#                                 ])


# df_categories['FP']=[sum([len(x) for x in atyp_FP_noemph.values()]),
#                      sum([len(x) for x in per_FP_noemph.values()]),
#                      sum([len(x) for x in bronchioperi_FP_noemph.values()])]

# df_categories['FN']=[sum([len(x) for x in atyp_FN_noemph.values()]),
#                      sum([len(x) for x in per_FN_noemph.values()]),
#                      sum([len(x) for x in bronchioperi_FN_noemph.values()])]


# all_findings_fp=df_categories['FP'].sum()
# all_findings_fn=df_categories['FN'].sum()


# percentage_fp=np.round((df_categories['FP']/all_findings_fp)*100,1) 
# df_categories['FP']=[str(value[1])+' ('+str(percentage_fp[index])+'%)' for index,value in enumerate(df_categories['FP'].items())]

# percentage_fn=np.round((df_categories['FN']/all_findings_fn)*100,1) #sum(df_categories['emphysema'])
# df_categories['FN']=[str(value[1])+' ('+str(percentage_fn[index])+'%)' for index,value in enumerate(df_categories['FN'].items())]

# df_categories.loc['Total']= [str(all_findings_fp)+ ' (100%)',str(all_findings_fn)+' (100%)']

# #Rename columns
# df_categories.rename(columns={'FP': 'AI found, reader missed', 'FN': 'AI missed, reader found'}, inplace=True)

# df_categories

In [230]:
# df_categories.to_excel('lymph_types_noemph_notp.xlsx')

### Similar analysis as the one performed above (emphysema/non-emphysema, volume subgroups) but including lymph nodes this time

Emphysema (nodules and lymph nodes)

In [231]:
TP_AI= TP_AI_emph=TP_emph+FP_nods_emph
FP_AI=FP_AI_emph=FP_nonods_emph
FN_AI=FN_AI_emph=TP_read_only=FN_nods_emph #nodules and lymph nodes detected only by the reader

TP_read=TP_read_emph=TP_emph+FN_nods_emph
FP_read=FP_read_emph=FN_nonods_emph
FN_read=FN_read_emph=TP_AI_only=FP_nods_emph #nodules and lymph nodes detected only by AI

TP_both=TP_emph #Common nodules and lymph nodules detected by both AI and reader

#Print the above
print("Emphysema numbers")
print('TP_AI: ',TP_AI)
print('FP_AI: ',FP_AI)
print('FN_AI: ',FN_AI)
print('TP_read: ',TP_read)
print('FP_read: ',FP_read)
print('FN_read: ',FN_read)
print('TP_both: ',TP_both)

Emphysema numbers
TP_AI:  62
FP_AI:  20
FN_AI:  29
TP_read:  69
FP_read:  6
FN_read:  22
TP_both:  40


In [232]:
#Table split in two tables: one for emphysema only and one for non-emphysema having also percentages.

#Assessing detection performance - For nodules + lymph nodes 
df_all_new=pd.DataFrame(columns=['sensitivity (95% CI)','PPV (95% CI)','F1 score (95% CI)','nodules detected','non-nodules detected','nodules missed'],
                        index=['AI, emphysema', 'reader, emphysema'])

#AI nodules only emph
df_all_new.iloc[0,0]=np.round(sensitivity(TP_AI,FN_AI),2)
df_all_new.iloc[0,1]=np.round(PPV(TP_AI,FP_AI),2)
df_all_new.iloc[0,2]=np.round(F1score(TP_AI,FP_AI,FN_AI),2)
df_all_new.iloc[0,3]=TP_AI
df_all_new.iloc[0,4]=FP_AI
df_all_new.iloc[0,5]=FN_AI

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_AI, FP_AI, FN_AI, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[0]=str(df_all_new['sensitivity (95% CI)'].iloc[0])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[0]=str(df_all_new['PPV (95% CI)'].iloc[0])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[0]=str(df_all_new['F1 score (95% CI)'].iloc[0])+' '+str(tuple(ci_f1_ai))

#reader nodules only emph
df_all_new.iloc[1,0]=np.round(sensitivity(TP_read,FN_read),2)
df_all_new.iloc[1,1]=np.round(PPV(TP_read,FP_read),2)
df_all_new.iloc[1,2]=np.round(F1score(TP_read,FP_read,FN_read),2)
df_all_new.iloc[1,3]=TP_read
df_all_new.iloc[1,4]=FP_read
df_all_new.iloc[1,5]=FN_read

sensitivity_confidence_interval_read, PPV_confidence_interval_read, F1_confidence_interval_read\
    = sensitivity_and_specificity_with_confidence_intervals(TP_read, FP_read, FN_read, 0, alpha=0.95)

ci_sens_read=[np.round(x,2) for x in sensitivity_confidence_interval_read]
ci_ppv_read=[np.round(x,2) for x in PPV_confidence_interval_read]
ci_f1_read=[np.round(x,2) for x in F1_confidence_interval_read]

df_all_new['sensitivity (95% CI)'].iloc[1]=str(df_all_new['sensitivity (95% CI)'].iloc[1])+' '+str(tuple(ci_sens_read))
df_all_new['PPV (95% CI)'].iloc[1]=str(df_all_new['PPV (95% CI)'].iloc[1])+' '+str(tuple(ci_ppv_read))
df_all_new['F1 score (95% CI)'].iloc[1]=str(df_all_new['F1 score (95% CI)'].iloc[1])+' '+str(tuple(ci_f1_read))


df_all_new['All findings']=df_all_new['nodules detected']+df_all_new['non-nodules detected']+df_all_new['nodules missed']
df_all_new.loc['Total']= df_all_new.sum()
df_all_new.loc['Total'].iloc[0:3]=''


all_findings=df_all_new.iloc[:-1,3:-1].sum().sum()

for i in range(2):
    row_all=np.sum(df_all_new.iloc[i][3:6].values)

    percentage_tp=np.round((df_all_new.iloc[i][3]/row_all)*100,1) 
    df_all_new['nodules detected'].iloc[i]=str(df_all_new.iloc[i][3])+' ('+str(percentage_tp)+'%)'
    percentage_fp=np.round((df_all_new.iloc[i][4]/row_all)*100,1) 
    df_all_new['non-nodules detected'].iloc[i]=str(df_all_new.iloc[i][4])+' ('+str(percentage_fp)+'%)'
    percentage_fn=np.round((df_all_new.iloc[i][5]/row_all)*100,1) 
    df_all_new['nodules missed'].iloc[i]=str(df_all_new.iloc[i][5])+' ('+str(percentage_fn)+'%)'

    df_all_new['All findings'].iloc[i]=str(df_all_new.iloc[i][6])+' (100%)'

df_all_new

Unnamed: 0,sensitivity (95% CI),PPV (95% CI),F1 score (95% CI),nodules detected,non-nodules detected,nodules missed,All findings
"AI, emphysema","0.68 (0.57, 0.77)","0.76 (0.65, 0.84)","0.72 (0.64, 0.78)",62 (55.9%),20 (18.0%),29 (26.1%),111 (100%)
"reader, emphysema","0.76 (0.66, 0.84)","0.92 (0.83, 0.97)","0.83 (0.76, 0.88)",69 (71.1%),6 (6.2%),22 (22.7%),97 (100%)
Total,,,,131,26,51,208


In [233]:
df_all_new.to_excel('nodules_lymph_emphysema.xlsx')

In [234]:
#McNemar's test to compare Reader vs AI (using consensus panel)
#Below format is: [[Both AI found and reader found, reader missed and AI found], [Reader found and AI missed, 0]]

#For nodules
data=[[TP_both, FN_read],
        [FN_AI,0]]
# print(data)

# McNemar's Test without continuity correction
print("McNemar's test (nodules and lymphs), AI_vs_Reader with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue) 


# #For FPs
# data=[[0, FP_AI], 
#         [FP_read, 0]]
# # print(data)

# # McNemar's Test without continuity correction
# print("For FP findings, with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)

McNemar's test (nodules and lymphs), AI_vs_Reader with continuity correction (not exact) p value is 0.40081416938293424


In [235]:
# #AI vs GT, TP_both included
# # Table looks like below:
# #              GT
# #             Yes                       No
# # AI   Yes    TP_both+TP_AI_only      FP_AI
# #      No     FN_AI                   0

# list_of_lists=[[1 for x in range(TP_both)],[1 for x in range(TP_AI_only)],[0 for x in range(FP_AI) ],[1 for x in range(FN_AI) ] ] 
# rater_GT=[item for sublist in list_of_lists for item in sublist]

# list_of_lists=[[1 for x in range(TP_both)], [1 for x in range(TP_AI_only)],[1 for x in range(FP_AI) ],[0 for x in range(FN_AI) ]]
# rater_AI=[item for sublist in list_of_lists for item in sublist]

# print("AI vs consensus (for nodules and lymphs), kappa is ",cohen_kappa_score(rater_GT, rater_AI))

# #Reader vs GT, TP_both included
# # Table looks like below:
# #                    GT
# #                   Yes                       No
# # Reader   Yes    TP_both+TP_read_only      FP_read
# #           No     FN_read                   0

# list_of_lists=[[1 for x in range(TP_both)],[1 for x in range(TP_read_only)],[0 for x in range(FP_read) ],[1 for x in range(FN_read) ] ] 
# rater_GT=[item for sublist in list_of_lists for item in sublist]

# list_of_lists=[[1 for x in range(TP_both)],[1 for x in range(TP_read_only)],[1 for x in range(FP_read) ],[0 for x in range(FN_read) ]]
# rater_read=[item for sublist in list_of_lists for item in sublist]

# print("Reader vs consensus (for non-nodules only), kappa is ",cohen_kappa_score(rater_GT, rater_read))



# #Due to small numbers of TP, FP and FN, we cannot calculate the kappa for consensus vs AI (or reader). Better for AI vs reader.

# #Reader vs AI, TP_both included

# # Table looks like below (for nodule and lymph nodes (not FP)):
# #                    AI
# #                   Yes                       No
# # Reader   Yes    TP_both                  FN_AI
# #           No     FN_read                   0

# list_of_lists=[[1 for x in range(TP_both)],[1 for x in range(FN_read) ],[0 for x in range(FN_AI)]] 
# rater_AI=[item for sublist in list_of_lists for item in sublist]

# list_of_lists=[[1 for x in range(TP_both)],[0 for x in range(FN_read) ],[1 for x in range(FN_AI)]]
# rater_read=[item for sublist in list_of_lists for item in sublist]

# print("Reader vs AI (for nodules/lymph nodes only), kappa is ",cohen_kappa_score(rater_AI, rater_read))


# # Table looks like below (for FP):
# #                    AI
# #                   Yes                       No
# # Reader   Yes        0                     FP_read
# #           No     FP_AI                       0

# list_of_lists=[[0 for x in range(FP_read) ],[1 for x in range(FP_AI)]] 
# rater_AI=[item for sublist in list_of_lists for item in sublist]

# list_of_lists=[[1 for x in range(FP_read) ],[0 for x in range(FP_AI)]]
# rater_read=[item for sublist in list_of_lists for item in sublist]

# print("Reader vs AI (for non-nodules only), kappa is ",cohen_kappa_score(rater_AI, rater_read))

# #For both nodules/lymphs and non-nodules:
# list_of_lists=[[0 for x in range(FP_read) ],[1 for x in range(FP_AI)],[1 for x in range(TP_both)],[1 for x in range(FN_read) ],[0 for x in range(FN_AI)]] 
# rater_AI=[item for sublist in list_of_lists for item in sublist]

# list_of_lists=[[1 for x in range(FP_read) ],[0 for x in range(FP_AI)],[1 for x in range(TP_both)],[0 for x in range(FN_read) ],[1 for x in range(FN_AI)]]
# rater_read=[item for sublist in list_of_lists for item in sublist]

# print("Reader vs AI (for both nods/lymphs and non-nodules), kappa is ",cohen_kappa_score(rater_AI, rater_read))

Non-emphysema (nodules and lymph nodes)

In [236]:
TP_AI=TP_AI_noemph=TP_noemph+FP_nods_noemph
FP_AI=FP_AI_noemph=FP_nonods_noemph
FN_AI=FN_AI_noemph=TP_read_only=FN_nods_noemph #nodules and lymph nodes detected only by the reader

TP_read=TP_read_noemph=TP_noemph+FN_nods_noemph
FP_read=FP_read_noemph=FN_nonods_noemph
FN_read=FN_read_noemph=TP_AI_only=FP_nods_noemph #nodules and lymph nodes detected only by AI

TP_both=TP_noemph #Common nodules and lymph nodules detected by both AI and reader

#Print the above
print("Non-emphysema numbers")
print('TP_AI: ',TP_AI)
print('FP_AI: ',FP_AI)
print('FN_AI: ',FN_AI)
print('TP_read: ',TP_read)
print('FP_read: ',FP_read)
print('FN_read: ',FN_read)
print('TP_both: ',TP_both)

Non-emphysema numbers
TP_AI:  96
FP_AI:  18
FN_AI:  40
TP_read:  109
FP_read:  22
FN_read:  27
TP_both:  69


In [237]:
#Assessing detection performance - For nodules and lymph nodes 
df_all_new=pd.DataFrame(columns=['sensitivity (95% CI)','PPV (95% CI)','F1 score (95% CI)','nodules detected','non-nodules detected','nodules missed'],
                        index=['AI, nonemphysema', 'reader, nonemphysema' ])


#AI nodules only nonemph
df_all_new.iloc[0,0]=np.round(sensitivity(TP_AI,FN_AI),2)
df_all_new.iloc[0,1]=np.round(PPV(TP_AI,FP_AI),2)
df_all_new.iloc[0,2]=np.round(F1score(TP_AI,FP_AI,FN_AI),2)
df_all_new.iloc[0,3]=TP_AI
df_all_new.iloc[0,4]=FP_AI
df_all_new.iloc[0,5]=FN_AI

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_AI, FP_AI, FN_AI, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[0]=str(df_all_new['sensitivity (95% CI)'].iloc[0])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[0]=str(df_all_new['PPV (95% CI)'].iloc[0])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[0]=str(df_all_new['F1 score (95% CI)'].iloc[0])+' '+str(tuple(ci_f1_ai))

#reader nodules only nonemph
df_all_new.iloc[1,0]=np.round(sensitivity(TP_read,FN_read),2)
df_all_new.iloc[1,1]=np.round(PPV(TP_read,FP_read),2)
df_all_new.iloc[1,2]=np.round(F1score(TP_read,FP_read,FN_read),2)
df_all_new.iloc[1,3]=TP_read
df_all_new.iloc[1,4]=FP_read
df_all_new.iloc[1,5]=FN_read

sensitivity_confidence_interval_read, PPV_confidence_interval_read, F1_confidence_interval_read\
    = sensitivity_and_specificity_with_confidence_intervals(TP_read, FP_read, FN_read, 0, alpha=0.95)

ci_sens_read=[np.round(x,2) for x in sensitivity_confidence_interval_read]
ci_ppv_read=[np.round(x,2) for x in PPV_confidence_interval_read]
ci_f1_read=[np.round(x,2) for x in F1_confidence_interval_read]

df_all_new['sensitivity (95% CI)'].iloc[1]=str(df_all_new['sensitivity (95% CI)'].iloc[1])+' '+str(tuple(ci_sens_read))
df_all_new['PPV (95% CI)'].iloc[1]=str(df_all_new['PPV (95% CI)'].iloc[1])+' '+str(tuple(ci_ppv_read))
df_all_new['F1 score (95% CI)'].iloc[1]=str(df_all_new['F1 score (95% CI)'].iloc[1])+' '+str(tuple(ci_f1_read))

df_all_new['All findings']=df_all_new['nodules detected']+df_all_new['non-nodules detected']+df_all_new['nodules missed']
df_all_new.loc['Total']= df_all_new.sum()
df_all_new.loc['Total'].iloc[0:3]=''

all_findings=df_all_new.iloc[:-1,3:-1].sum().sum()

for i in range(2):
    row_all=np.sum(df_all_new.iloc[i][3:6].values)
    percentage_fp=np.round((df_all_new.iloc[i][4]/row_all)*100,1) 
    df_all_new['non-nodules detected'].iloc[i]=str(df_all_new.iloc[i][4])+' ('+str(percentage_fp)+'%)'
    percentage_tp=np.round((df_all_new.iloc[i][3]/row_all)*100,1) 
    df_all_new['nodules detected'].iloc[i]=str(df_all_new.iloc[i][3])+' ('+str(percentage_tp)+'%)'
    percentage_fn=np.round((df_all_new.iloc[i][5]/row_all)*100,1) 
    df_all_new['nodules missed'].iloc[i]=str(df_all_new.iloc[i][5])+' ('+str(percentage_fn)+'%)'

    df_all_new['All findings'].iloc[i]=str(df_all_new.iloc[i][6])+' (100%)'

df_all_new

Unnamed: 0,sensitivity (95% CI),PPV (95% CI),F1 score (95% CI),nodules detected,non-nodules detected,nodules missed,All findings
"AI, nonemphysema","0.71 (0.62, 0.78)","0.84 (0.76, 0.9)","0.77 (0.71, 0.82)",96 (62.3%),18 (11.7%),40 (26.0%),154 (100%)
"reader, nonemphysema","0.8 (0.72, 0.86)","0.83 (0.75, 0.89)","0.82 (0.76, 0.86)",109 (69.0%),22 (13.9%),27 (17.1%),158 (100%)
Total,,,,205,40,67,312


In [238]:
df_all_new.to_excel('nodules_lymph_nonemphysema.xlsx')

In [239]:
#McNemar's test to compare Reader vs AI (using consensus panel)
#Below format is: [[Both AI found and reader found, reader missed and AI found], [Reader found and AI missed, 0]]

#For nodules
data=[[TP_both, FN_read],
        [FN_AI,0]]
# print(data)

# McNemar's Test without continuity correction
print("McNemar's test (nodules and lymphs), AI_vs_Reader with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue) 


# #For FPs
# data=[[0, FP_AI], 
#         [FP_read, 0]]
# # print(data)

# # McNemar's Test without continuity correction
# print("For FP findings, with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)

McNemar's test (nodules and lymphs), AI_vs_Reader with continuity correction (not exact) p value is 0.14263920633056812


In [240]:
# #AI vs GT, TP_both included
# # Table looks like below:
# #              GT
# #             Yes                       No
# # AI   Yes    TP_both+TP_AI_only      FP_AI
# #      No     FN_AI                   0

# list_of_lists=[[1 for x in range(TP_both)],[1 for x in range(TP_AI_only)],[0 for x in range(FP_AI) ],[1 for x in range(FN_AI) ] ] 
# rater_GT=[item for sublist in list_of_lists for item in sublist]

# list_of_lists=[[1 for x in range(TP_both)], [1 for x in range(TP_AI_only)],[1 for x in range(FP_AI) ],[0 for x in range(FN_AI) ]]
# rater_AI=[item for sublist in list_of_lists for item in sublist]

# print("AI vs consensus (for nodules and lymphs), kappa is ",cohen_kappa_score(rater_GT, rater_AI))

# #Reader vs GT, TP_both included
# # Table looks like below:
# #                    GT
# #                   Yes                       No
# # Reader   Yes    TP_both+TP_read_only      FP_read
# #           No     FN_read                   0

# list_of_lists=[[1 for x in range(TP_both)],[1 for x in range(TP_read_only)],[0 for x in range(FP_read) ],[1 for x in range(FN_read) ] ] 
# rater_GT=[item for sublist in list_of_lists for item in sublist]

# list_of_lists=[[1 for x in range(TP_both)],[1 for x in range(TP_read_only)],[1 for x in range(FP_read) ],[0 for x in range(FN_read) ]]
# rater_read=[item for sublist in list_of_lists for item in sublist]

# print("Reader vs consensus (for non-nodules only), kappa is ",cohen_kappa_score(rater_GT, rater_read))

# #Due to small numbers of TP, FP and FN, we cannot calculate the kappa for consensus vs AI (or reader). Better for AI vs reader.

# #Reader vs AI, TP_both included

# # Table looks like below (for nodule and lymph nodes (not FP)):
# #                    AI
# #                   Yes                       No
# # Reader   Yes    TP_both                  FN_AI
# #           No     FN_read                   0

# list_of_lists=[[1 for x in range(TP_both)],[1 for x in range(FN_read) ],[0 for x in range(FN_AI)]] 
# rater_AI=[item for sublist in list_of_lists for item in sublist]

# list_of_lists=[[1 for x in range(TP_both)],[0 for x in range(FN_read) ],[1 for x in range(FN_AI)]]
# rater_read=[item for sublist in list_of_lists for item in sublist]

# print("Reader vs AI (for nodules/lymph nodes only), kappa is ",cohen_kappa_score(rater_AI, rater_read))


# # Table looks like below (for FP):
# #                    AI
# #                   Yes                       No
# # Reader   Yes        0                     FP_read
# #           No     FP_AI                       0

# list_of_lists=[[0 for x in range(FP_read) ],[1 for x in range(FP_AI)]] 
# rater_AI=[item for sublist in list_of_lists for item in sublist]

# list_of_lists=[[1 for x in range(FP_read) ],[0 for x in range(FP_AI)]]
# rater_read=[item for sublist in list_of_lists for item in sublist]

# print("Reader vs AI (for non-nodules only), kappa is ",cohen_kappa_score(rater_AI, rater_read))


# #For both nodules/lymphs and non-nodules:
# list_of_lists=[[0 for x in range(FP_read) ],[1 for x in range(FP_AI)],[1 for x in range(TP_both)],[1 for x in range(FN_read) ],[0 for x in range(FN_AI)]] 
# rater_AI=[item for sublist in list_of_lists for item in sublist]

# list_of_lists=[[1 for x in range(FP_read) ],[0 for x in range(FP_AI)],[1 for x in range(TP_both)],[0 for x in range(FN_read) ],[1 for x in range(FN_AI)]]
# rater_read=[item for sublist in list_of_lists for item in sublist]

# print("Reader vs AI (for both nods/lymphs and non-nodules), kappa is ",cohen_kappa_score(rater_AI, rater_read))

Note: We should not perform comparisons between emphysema/non-emphysema groups using McNemar's test - It should only be used in the same group of participants!

#### Same analysis for volume subgroups (nodules and lymph nodes)

Emphysema volume subgroups

In [241]:
TP_AI_100=TP_emph_30_100+ai_nods_emph_30_100 
FP_AI_100=ai_nonods_emph_30_100
FN_AI_100=reader_nods_emph_30_100 #nodules of reader excluding lymph nodes

TP_read_100=TP_emph_30_100+reader_nods_emph_30_100
FP_read_100=reader_nonods_emph_30_100
FN_read_100=ai_nods_emph_30_100 #nodules of AI excluding lymph nodes

TP_AI_100_300=TP_emph_100_300+ai_nods_emph_100_300
FP_AI_100_300=ai_nonods_emph_100_300
FN_AI_100_300=reader_nods_emph_100_300 #nodules of reader excluding lymph nodes

TP_read_100_300=TP_emph_100_300+reader_nods_emph_100_300
FP_read_100_300=reader_nonods_emph_100_300
FN_read_100_300=ai_nods_emph_100_300 #nodules of AI excluding lymph nodes

TP_both_100=TP_emph_30_100
TP_both_100_300=TP_emph_100_300

#Print the above
print("Emphysema numbers")
print('TP_AI_100: ',TP_AI_100)
print('FP_AI_100: ',FP_AI_100)
print('FN_AI_100: ',FN_AI_100)
print('TP_read_100: ',TP_read_100)
print('FP_read_100: ',FP_read_100)
print('FN_read_100: ',FN_read_100)
print('TP_AI_100_300: ',TP_AI_100_300)
print('FP_AI_100_300: ',FP_AI_100_300)
print('FN_AI_100_300: ',FN_AI_100_300)
print('TP_read_100_300: ',TP_read_100_300)
print('FP_read_100_300: ',FP_read_100_300)
print('FN_read_100_300: ',FN_read_100_300)
print('TP_both_100: ',TP_both_100)
print('TP_both_100_300: ',TP_both_100_300)

Emphysema numbers
TP_AI_100:  37
FP_AI_100:  8
FN_AI_100:  26
TP_read_100:  50
FP_read_100:  5
FN_read_100:  13
TP_AI_100_300:  25
FP_AI_100_300:  12
FN_AI_100_300:  3
TP_read_100_300:  19
FP_read_100_300:  1
FN_read_100_300:  9
TP_both_100:  24
TP_both_100_300:  16


In [242]:
#Table with Reader, AI and consensus findings for nodules and lymph nodes for emphysema
df_all_new=pd.DataFrame(columns=['Reader','AI','Consensus'],
                        index=['Non-nodules','TP 30-100mm3','TP 100-300mm3'])

df_all_new.index.name = 'Emphysema cases'

df_all_new['Reader']=[FP_read_100+FP_read_100_300,TP_both_100+FN_AI_100,TP_both_100_300+FN_AI_100_300]
df_all_new['AI']=[FP_AI_100+FP_AI_100_300,TP_both_100+FN_read_100,TP_both_100_300+FN_read_100_300]
df_all_new['Consensus']=[FP_AI_100+FP_AI_100_300+FP_read_100+FP_read_100_300,FN_AI_100+FN_read_100, FN_AI_100_300+FN_read_100_300]

df_all_new

Unnamed: 0_level_0,Reader,AI,Consensus
Emphysema cases,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Non-nodules,6,20,26
TP 30-100mm3,50,37,39
TP 100-300mm3,19,25,12


In [243]:
df_all_new.to_excel('non_nodules_and_TP_emphysema.xlsx')

In [244]:
#For emphysema only comparison between reader and AI for volume subgroups
df_all_new=pd.DataFrame(columns=['sensitivity (95% CI)','PPV (95% CI)','F1 score (95% CI)','nodules correctly detected','non-nodules incorrectly detected','nodules missed'], 
                        index=['AI, emphysema 30-100mm3', 'AI, emphysema 100-300mm3','',
                               'reader, emphysema 30-100mm3', 'reader, emphysema 100-300mm3',''
                              ])

df_all_new.index.name = 'GT by radiologists for discrepancies' 

df_all_new.iloc[0,0]=np.round(sensitivity(TP_AI_100,FN_AI_100),2)
df_all_new.iloc[0,1]=np.round(PPV(TP_AI_100,FP_AI_100),2)
df_all_new.iloc[0,2]=np.round(F1score(TP_AI_100,FP_AI_100,FN_AI_100),2)
df_all_new.iloc[0,3]=TP_AI_100
df_all_new.iloc[0,4]=FP_AI_100
df_all_new.iloc[0,5]=FN_AI_100

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_AI_100, FP_AI_100, FN_AI_100, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[0]=str(df_all_new['sensitivity (95% CI)'].iloc[0])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[0]=str(df_all_new['PPV (95% CI)'].iloc[0])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[0]=str(df_all_new['F1 score (95% CI)'].iloc[0])+' '+str(tuple(ci_f1_ai))


df_all_new.iloc[3,0]=np.round(sensitivity(TP_read_100,FN_read_100),2)
df_all_new.iloc[3,1]=np.round(PPV(TP_read_100,FP_read_100),2)
df_all_new.iloc[3,2]=np.round(F1score(TP_read_100,FP_read_100,FN_read_100),2)
df_all_new.iloc[3,3]=TP_read_100
df_all_new.iloc[3,4]=FP_read_100
df_all_new.iloc[3,5]=FN_read_100

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_read_100, FP_read_100, FN_read_100, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[3]=str(df_all_new['sensitivity (95% CI)'].iloc[3])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[3]=str(df_all_new['PPV (95% CI)'].iloc[3])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[3]=str(df_all_new['F1 score (95% CI)'].iloc[3])+' '+str(tuple(ci_f1_ai))



df_all_new.iloc[1,0]=np.round(sensitivity(TP_AI_100_300,FN_AI_100_300),2)
df_all_new.iloc[1,1]=np.round(PPV(TP_AI_100_300,FP_AI_100_300),2)
df_all_new.iloc[1,2]=np.round(F1score(TP_AI_100_300,FP_AI_100_300,FN_AI_100_300),2)
df_all_new.iloc[1,3]=TP_AI_100_300
df_all_new.iloc[1,4]=FP_AI_100_300
df_all_new.iloc[1,5]=FN_AI_100_300

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_AI_100_300, FP_AI_100_300, FN_AI_100_300, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[1]=str(df_all_new['sensitivity (95% CI)'].iloc[1])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[1]=str(df_all_new['PPV (95% CI)'].iloc[1])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[1]=str(df_all_new['F1 score (95% CI)'].iloc[1])+' '+str(tuple(ci_f1_ai))



df_all_new.iloc[4,0]=np.round(sensitivity(TP_read_100_300,FN_read_100_300),2)
df_all_new.iloc[4,1]=np.round(PPV(TP_read_100_300,FP_read_100_300),2)
df_all_new.iloc[4,2]=np.round(F1score(TP_read_100_300,FP_read_100_300,FN_read_100_300),2)
df_all_new.iloc[4,3]=TP_read_100_300
df_all_new.iloc[4,4]=FP_read_100_300
df_all_new.iloc[4,5]=FN_read_100_300

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_read_100_300, FP_read_100_300, FN_read_100_300, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[4]=str(df_all_new['sensitivity (95% CI)'].iloc[4])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[4]=str(df_all_new['PPV (95% CI)'].iloc[4])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[4]=str(df_all_new['F1 score (95% CI)'].iloc[4])+' '+str(tuple(ci_f1_ai))



df_all_new.iloc[2,0]=0
df_all_new.iloc[2,1]=0
df_all_new.iloc[2,2]=0
df_all_new.iloc[2,3]=0
df_all_new.iloc[2,4]=0
df_all_new.iloc[2,5]=0

AI_all=np.sum(df_all_new.iloc[0:2,3:].values)
reader_all=np.sum(df_all_new.iloc[3:5,3:].values)

df_all_new['All findings']=df_all_new['nodules correctly detected']+df_all_new['non-nodules incorrectly detected']+df_all_new['nodules missed']

df_all_new.iloc[5,0]=''
df_all_new.iloc[5,1]=''
df_all_new.iloc[5,2]=''
df_all_new.iloc[5,3]=''
df_all_new.iloc[5,4]=''
df_all_new.iloc[5,5]=''
df_all_new.iloc[5,6]=np.sum(df_all_new['All findings'].iloc[3:5])

df_all_new.iloc[2,0]=''
df_all_new.iloc[2,1]=''
df_all_new.iloc[2,2]=''
df_all_new.iloc[2,3]=''
df_all_new.iloc[2,4]=''
df_all_new.iloc[2,5]=''
df_all_new.iloc[2,6]=np.sum(df_all_new['All findings'].iloc[0:2])

# print(df_all_new['sensitivity (95% CI)'])
for i in range(5):
    if i!=2:
        if i<2:
            sum_all=AI_all
        elif i>2:
            sum_all=reader_all
            
        percentage_tp=np.round((df_all_new.iloc[i][3]/sum_all)*100,1) 
        df_all_new['nodules correctly detected'].iloc[i]=str(df_all_new.iloc[i][3])+' ('+str(percentage_tp)+'%)'

        percentage_fp=np.round((df_all_new.iloc[i][4]/sum_all)*100,1) 
        df_all_new['non-nodules incorrectly detected'].iloc[i]=str(df_all_new.iloc[i][4])+' ('+str(percentage_fp)+'%)'

        percentage_fn=np.round((df_all_new.iloc[i][5]/sum_all)*100,1) 
        df_all_new['nodules missed'].iloc[i]=str(df_all_new.iloc[i][5])+' ('+str(percentage_fn)+'%)'

        df_all_new['All findings'].iloc[i]=str(df_all_new.iloc[i][6])+' ('+str(np.round(100*df_all_new.iloc[i][6]/sum_all,1))+'%)'

    
df_all_new['All findings'].iloc[2]=str(df_all_new.iloc[2][6])+' (100%)'
df_all_new['All findings'].iloc[5]=str(df_all_new.iloc[5][6])+' (100%)'

df_all_new #Detection performance comparison for nodules and lymph nodes

Unnamed: 0_level_0,sensitivity (95% CI),PPV (95% CI),F1 score (95% CI),nodules correctly detected,non-nodules incorrectly detected,nodules missed,All findings
GT by radiologists for discrepancies,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"AI, emphysema 30-100mm3","0.59 (0.46, 0.71)","0.82 (0.67, 0.91)","0.69 (0.59, 0.77)",37 (33.3%),8 (7.2%),26 (23.4%),71 (64.0%)
"AI, emphysema 100-300mm3","0.89 (0.71, 0.97)","0.68 (0.5, 0.81)","0.77 (0.65, 0.86)",25 (22.5%),12 (10.8%),3 (2.7%),40 (36.0%)
,,,,,,,111 (100%)
"reader, emphysema 30-100mm3","0.79 (0.67, 0.88)","0.91 (0.79, 0.97)","0.85 (0.77, 0.9)",50 (51.5%),5 (5.2%),13 (13.4%),68 (70.1%)
"reader, emphysema 100-300mm3","0.68 (0.48, 0.83)","0.95 (0.73, 1.0)","0.79 (0.65, 0.89)",19 (19.6%),1 (1.0%),9 (9.3%),29 (29.9%)
,,,,,,,97 (100%)


In [245]:
df_all_new.to_excel('nodules_lymph_volumes_emphysema.xlsx')

In [246]:
data=[[TP_both_100, FN_read_100],
        [FN_AI_100,0]]
# print(data)

# McNemar's Test without continuity correction
print("For nodules and lymph nodes (AI vs reader) of 30-100mm3 with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)
# print("\n")

# #For FPs
# data=[[0, FP_AI_100], 
#         [FP_read_100, 0]]
# # print(data)

# # McNemar's Test without continuity correction
# print("For FP findings of 30-100mm3, with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)
# print("\n")



data=[[TP_both_100_300,FN_read_100_300 ], 
        [FN_AI_100_300, 0]]
# print(data)

# McNemar's Test without continuity correction
print("For nodules and lymph nodes (AI vs reader) of 100-300mm3 with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)
# print("\n")

# #For FPs
# data=[[0, FP_AI_100_300], 
#         [FP_read_100_300, 0]]
# # print(data)

# # McNemar's Test without continuity correction
# print("For FP findings of 100-300mm3, with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)

For nodules and lymph nodes (AI vs reader) of 30-100mm3 with continuity correction (not exact) p value is 0.05466393589167511
For nodules and lymph nodes (AI vs reader) of 100-300mm3 with continuity correction (not exact) p value is 0.14891467317876161


Nonemphysema volume subgroups

In [247]:
TP_AI_100=TP_noemph_30_100+ai_nods_noemph_30_100 
FP_AI_100=ai_nonods_noemph_30_100
FN_AI_100=reader_nods_noemph_30_100 #nodules of reader excluding lymph nodes

TP_read_100=TP_noemph_30_100+reader_nods_noemph_30_100
FP_read_100=reader_nonods_noemph_30_100
FN_read_100=ai_nods_noemph_30_100 #nodules of AI excluding lymph nodes

TP_AI_100_300=TP_noemph_100_300+ai_nods_noemph_100_300
FP_AI_100_300=ai_nonods_noemph_100_300
FN_AI_100_300=reader_nods_noemph_100_300 #nodules of reader excluding lymph nodes

TP_read_100_300=TP_noemph_100_300+reader_nods_noemph_100_300
FP_read_100_300=reader_nonods_noemph_100_300
FN_read_100_300=ai_nods_noemph_100_300 #nodules of AI excluding lymph nodes

TP_both_100=TP_noemph_30_100
TP_both_100_300=TP_noemph_100_300

#Print the above
print("Non-emphysema numbers")
print('TP_AI_100: ',TP_AI_100)
print('FP_AI_100: ',FP_AI_100)
print('FN_AI_100: ',FN_AI_100)
print('TP_read_100: ',TP_read_100)
print('FP_read_100: ',FP_read_100)
print('FN_read_100: ',FN_read_100)
print('TP_AI_100_300: ',TP_AI_100_300)
print('FP_AI_100_300: ',FP_AI_100_300)
print('FN_AI_100_300: ',FN_AI_100_300)
print('TP_read_100_300: ',TP_read_100_300)
print('FP_read_100_300: ',FP_read_100_300)
print('FN_read_100_300: ',FN_read_100_300)
print('TP_both_100: ',TP_both_100)
print('TP_both_100_300: ',TP_both_100_300)

Non-emphysema numbers
TP_AI_100:  70
FP_AI_100:  5
FN_AI_100:  38
TP_read_100:  87
FP_read_100:  20
FN_read_100:  21
TP_AI_100_300:  26
FP_AI_100_300:  13
FN_AI_100_300:  2
TP_read_100_300:  22
FP_read_100_300:  2
FN_read_100_300:  6
TP_both_100:  49
TP_both_100_300:  20


In [248]:
#Table with Reader, AI and consensus findings for nodules and lymph nodes for non-emphysema
df_all_new=pd.DataFrame(columns=['Reader','AI','Consensus'],
                        index=['Non-nodules','TP 30-100mm3','TP 100-300mm3'])

df_all_new.index.name = 'Non-Emphysema cases'

df_all_new['Reader']=[FP_read_100+FP_read_100_300,TP_both_100+FN_AI_100,TP_both_100_300+FN_AI_100_300]
df_all_new['AI']=[FP_AI_100+FP_AI_100_300,TP_both_100+FN_read_100,TP_both_100_300+FN_read_100_300]
df_all_new['Consensus']=[FP_AI_100+FP_AI_100_300+FP_read_100+FP_read_100_300,FN_AI_100+FN_read_100, FN_AI_100_300+FN_read_100_300]

df_all_new

Unnamed: 0_level_0,Reader,AI,Consensus
Non-Emphysema cases,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Non-nodules,22,18,40
TP 30-100mm3,87,70,59
TP 100-300mm3,22,26,8


In [249]:
df_all_new.to_excel('non_nodules_and_TP_nonemphysema.xlsx')

In [250]:
#For emphysema only comparison between reader and AI for volume subgroups
df_all_new=pd.DataFrame(columns=['sensitivity (95% CI)','PPV (95% CI)','F1 score (95% CI)','nodules correctly detected','non-nodules incorrectly detected','nodules missed'], 
                        index=['AI, nonemphysema 30-100mm3', 'AI, nonemphysema 100-300mm3','',
                               'reader, nonemphysema 30-100mm3', 'reader, nonemphysema 100-300mm3',''
                              ])

df_all_new.index.name = 'GT by radiologists for discrepancies' 

df_all_new.iloc[0,0]=np.round(sensitivity(TP_AI_100,FN_AI_100),2)
df_all_new.iloc[0,1]=np.round(PPV(TP_AI_100,FP_AI_100),2)
df_all_new.iloc[0,2]=np.round(F1score(TP_AI_100,FP_AI_100,FN_AI_100),2)
df_all_new.iloc[0,3]=TP_AI_100
df_all_new.iloc[0,4]=FP_AI_100
df_all_new.iloc[0,5]=FN_AI_100

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_AI_100, FP_AI_100, FN_AI_100, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[0]=str(df_all_new['sensitivity (95% CI)'].iloc[0])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[0]=str(df_all_new['PPV (95% CI)'].iloc[0])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[0]=str(df_all_new['F1 score (95% CI)'].iloc[0])+' '+str(tuple(ci_f1_ai))


df_all_new.iloc[3,0]=np.round(sensitivity(TP_read_100,FN_read_100),2)
df_all_new.iloc[3,1]=np.round(PPV(TP_read_100,FP_read_100),2)
df_all_new.iloc[3,2]=np.round(F1score(TP_read_100,FP_read_100,FN_read_100),2)
df_all_new.iloc[3,3]=TP_read_100
df_all_new.iloc[3,4]=FP_read_100
df_all_new.iloc[3,5]=FN_read_100

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_read_100, FP_read_100, FN_read_100, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[3]=str(df_all_new['sensitivity (95% CI)'].iloc[3])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[3]=str(df_all_new['PPV (95% CI)'].iloc[3])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[3]=str(df_all_new['F1 score (95% CI)'].iloc[3])+' '+str(tuple(ci_f1_ai))



df_all_new.iloc[1,0]=np.round(sensitivity(TP_AI_100_300,FN_AI_100_300),2)
df_all_new.iloc[1,1]=np.round(PPV(TP_AI_100_300,FP_AI_100_300),2)
df_all_new.iloc[1,2]=np.round(F1score(TP_AI_100_300,FP_AI_100_300,FN_AI_100_300),2)
df_all_new.iloc[1,3]=TP_AI_100_300
df_all_new.iloc[1,4]=FP_AI_100_300
df_all_new.iloc[1,5]=FN_AI_100_300

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_AI_100_300, FP_AI_100_300, FN_AI_100_300, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[1]=str(df_all_new['sensitivity (95% CI)'].iloc[1])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[1]=str(df_all_new['PPV (95% CI)'].iloc[1])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[1]=str(df_all_new['F1 score (95% CI)'].iloc[1])+' '+str(tuple(ci_f1_ai))



df_all_new.iloc[4,0]=np.round(sensitivity(TP_read_100_300,FN_read_100_300),2)
df_all_new.iloc[4,1]=np.round(PPV(TP_read_100_300,FP_read_100_300),2)
df_all_new.iloc[4,2]=np.round(F1score(TP_read_100_300,FP_read_100_300,FN_read_100_300),2)
df_all_new.iloc[4,3]=TP_read_100_300
df_all_new.iloc[4,4]=FP_read_100_300
df_all_new.iloc[4,5]=FN_read_100_300

sensitivity_confidence_interval_AI, PPV_confidence_interval_AI, F1_confidence_interval_AI \
= sensitivity_and_specificity_with_confidence_intervals(TP_read_100_300, FP_read_100_300, FN_read_100_300, 0, alpha=0.95)

ci_sens_ai=[np.round(x,2) for x in sensitivity_confidence_interval_AI]
ci_ppv_ai=[np.round(x,2) for x in PPV_confidence_interval_AI]
ci_f1_ai=[np.round(x,2) for x in F1_confidence_interval_AI]

df_all_new['sensitivity (95% CI)'].iloc[4]=str(df_all_new['sensitivity (95% CI)'].iloc[4])+' '+str(tuple(ci_sens_ai))
df_all_new['PPV (95% CI)'].iloc[4]=str(df_all_new['PPV (95% CI)'].iloc[4])+' '+str(tuple(ci_ppv_ai))
df_all_new['F1 score (95% CI)'].iloc[4]=str(df_all_new['F1 score (95% CI)'].iloc[4])+' '+str(tuple(ci_f1_ai))



df_all_new.iloc[2,0]=0
df_all_new.iloc[2,1]=0
df_all_new.iloc[2,2]=0
df_all_new.iloc[2,3]=0
df_all_new.iloc[2,4]=0
df_all_new.iloc[2,5]=0

AI_all=np.sum(df_all_new.iloc[0:2,3:].values)
reader_all=np.sum(df_all_new.iloc[3:5,3:].values)

df_all_new['All findings']=df_all_new['nodules correctly detected']+df_all_new['non-nodules incorrectly detected']+df_all_new['nodules missed']

df_all_new.iloc[5,0]=''
df_all_new.iloc[5,1]=''
df_all_new.iloc[5,2]=''
df_all_new.iloc[5,3]=''
df_all_new.iloc[5,4]=''
df_all_new.iloc[5,5]=''
df_all_new.iloc[5,6]=np.sum(df_all_new['All findings'].iloc[3:5])

df_all_new.iloc[2,0]=''
df_all_new.iloc[2,1]=''
df_all_new.iloc[2,2]=''
df_all_new.iloc[2,3]=''
df_all_new.iloc[2,4]=''
df_all_new.iloc[2,5]=''
df_all_new.iloc[2,6]=np.sum(df_all_new['All findings'].iloc[0:2])

# print(df_all_new['sensitivity (95% CI)'])
for i in range(5):
    if i!=2:
        if i<2:
            sum_all=AI_all
        elif i>2:
            sum_all=reader_all
            
        percentage_tp=np.round((df_all_new.iloc[i][3]/sum_all)*100,1) 
        df_all_new['nodules correctly detected'].iloc[i]=str(df_all_new.iloc[i][3])+' ('+str(percentage_tp)+'%)'

        percentage_fp=np.round((df_all_new.iloc[i][4]/sum_all)*100,1) 
        df_all_new['non-nodules incorrectly detected'].iloc[i]=str(df_all_new.iloc[i][4])+' ('+str(percentage_fp)+'%)'

        percentage_fn=np.round((df_all_new.iloc[i][5]/sum_all)*100,1) 
        df_all_new['nodules missed'].iloc[i]=str(df_all_new.iloc[i][5])+' ('+str(percentage_fn)+'%)'

        df_all_new['All findings'].iloc[i]=str(df_all_new.iloc[i][6])+' ('+str(np.round(100*df_all_new.iloc[i][6]/sum_all,1))+'%)'

    
df_all_new['All findings'].iloc[2]=str(df_all_new.iloc[2][6])+' (100%)'
df_all_new['All findings'].iloc[5]=str(df_all_new.iloc[5][6])+' (100%)'

df_all_new #Detection performance comparison for nodules and lymph nodes

Unnamed: 0_level_0,sensitivity (95% CI),PPV (95% CI),F1 score (95% CI),nodules correctly detected,non-nodules incorrectly detected,nodules missed,All findings
GT by radiologists for discrepancies,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"AI, nonemphysema 30-100mm3","0.65 (0.55, 0.74)","0.93 (0.84, 0.98)","0.77 (0.7, 0.82)",70 (45.5%),5 (3.2%),38 (24.7%),113 (73.4%)
"AI, nonemphysema 100-300mm3","0.93 (0.75, 0.99)","0.67 (0.5, 0.8)","0.78 (0.65, 0.87)",26 (16.9%),13 (8.4%),2 (1.3%),41 (26.6%)
,,,,,,,154 (100%)
"reader, nonemphysema 30-100mm3","0.81 (0.72, 0.87)","0.81 (0.72, 0.88)","0.81 (0.75, 0.86)",87 (55.1%),20 (12.7%),21 (13.3%),128 (81.0%)
"reader, nonemphysema 100-300mm3","0.79 (0.59, 0.91)","0.92 (0.72, 0.99)","0.85 (0.71, 0.93)",22 (13.9%),2 (1.3%),6 (3.8%),30 (19.0%)
,,,,,,,158 (100%)


In [251]:
df_all_new.to_excel('nodules_lymph_volumes_nonemphysema.xlsx')

In [252]:
data=[[TP_both_100, FN_read_100],
        [FN_AI_100,0]]
# print(data)

# McNemar's Test without continuity correction
print("For nodules and lymph nodes (AI vs reader) of 30-100mm3 with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)
# print("\n")

# #For FPs
# data=[[0, FP_AI_100], 
#         [FP_read_100, 0]]
# # print(data)

# # McNemar's Test without continuity correction
# print("For FP findings of 30-100mm3, with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)
# print("\n")



data=[[TP_both_100_300,FN_read_100_300 ], 
        [FN_AI_100_300, 0]]
# print(data)

# McNemar's Test without continuity correction
print("For nodules and lymph nodes (AI vs reader) of 100-300mm3 with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)

# #For FPs
# data=[[0, FP_AI_100_300], 
#         [FP_read_100_300, 0]]
# # print(data)

# # McNemar's Test without continuity correction
# print("For FP findings of 100-300mm3, with continuity correction (not exact) p value is",mcnemar(data, exact=False,correction=True).pvalue)

For nodules and lymph nodes (AI vs reader) of 30-100mm3 with continuity correction (not exact) p value is 0.03724916580683402
For nodules and lymph nodes (AI vs reader) of 100-300mm3 with continuity correction (not exact) p value is 0.2888443663464818


#### Mann-Whitney U test to check for differences in volumes between low/high BMI within each volume subgroup (not used)

This is an unpaired test meaning that we consider each nodule as separate from others. It can be used with unequal sample sizes as well.
Bland-Altman is not a good choice since it can be performed on nodules detected by both AI and reader to assess for agreement in the volume measurement

In [253]:
# #Perform the Mann-Whitney U test for nodules only
# # stats.mannwhitneyu(group1, group2, alternative='two-sided')

# #Compare all non-emphysema reader vs AI volumes
# print('non emphysema reader vs AI volumes p value is', stats.mannwhitneyu(reader_only_nods_noemph_30_100_vols+reader_only_nods_noemph_100_300_vols+reader_only_nods_noemph_300_vols,
#                    ai_only_nods_noemph_30_100_vols+ai_only_nods_noemph_100_300_vols+ai_only_nods_noemph_300_vols).pvalue)

# #Compare all emphysema reader vs AI volumes
# print('emphysema reader vs AI volumes p value is',stats.mannwhitneyu(reader_only_nods_emph_30_100_vols+reader_only_nods_emph_100_300_vols+reader_only_nods_emph_300_vols,
#                    ai_only_nods_emph_30_100_vols+ai_only_nods_emph_100_300_vols+ai_only_nods_emph_300_vols).pvalue)

# #Compare all emphysema vs non-emphysema volumes for nodules, for reader only
# print('emphysema vs non-emphysema for reader p value is',stats.mannwhitneyu(reader_only_nods_emph_30_100_vols+reader_only_nods_emph_100_300_vols+reader_only_nods_emph_300_vols,
#                    reader_only_nods_noemph_30_100_vols+reader_only_nods_noemph_100_300_vols+reader_only_nods_noemph_300_vols).pvalue)

# #Compare all emphysema vs non-emphysema volumes for nodules, for AI only
# print('emphysema vs non-emphysema for AI p value is',stats.mannwhitneyu(ai_only_nods_noemph_30_100_vols+ai_only_nods_noemph_100_300_vols+ai_only_nods_noemph_300_vols,
#                    ai_only_nods_emph_30_100_vols+ai_only_nods_emph_100_300_vols+ai_only_nods_emph_300_vols).pvalue)

In [254]:
# #Similarly as above for non-nodule findings

# #Compare all non-emphysema reader vs AI volumes for non-nodule findings
# print('non-emphysema reader vs AI volumes for non-nodule findings p value is',stats.mannwhitneyu(reader_nonods_noemph_30_100_vols+reader_nonods_noemph_100_300_vols+reader_nonods_noemph_300_vols,
#                    ai_nonods_noemph_30_100_vols+ai_nonods_noemph_100_300_vols+ai_nonods_noemph_300_vols).pvalue)

# #Compare all emphysema reader vs AI volumes for non-nodule findings
# print('emphysema reader vs AI volumes for non-nodule findings p value is',stats.mannwhitneyu(reader_nonods_emph_30_100_vols+reader_nonods_emph_100_300_vols+reader_nonods_emph_300_vols,
#                    ai_nonods_emph_30_100_vols+ai_nonods_emph_100_300_vols+ai_nonods_emph_300_vols).pvalue)

# #Compare all emphysema vs non-emphysema volumes for reader only, for non-nodule findings
# print('emphysema vs non-emphysema volumes for reader only, for non-nodule findings p value is',stats.mannwhitneyu(reader_nonods_emph_30_100_vols+reader_nonods_emph_100_300_vols+reader_nonods_emph_300_vols,
#                    reader_nonods_noemph_30_100_vols+reader_nonods_noemph_100_300_vols+reader_nonods_noemph_300_vols).pvalue)

# #Compare all emphysema vs non-emphysema volumes for AI only, for non-nodule findings
# print('emphysema vs non-emphysema volumes for AI only, for non-nodule findings p value is',stats.mannwhitneyu(ai_nonods_noemph_30_100_vols+ai_nonods_noemph_100_300_vols+ai_nonods_noemph_300_vols,
#                    ai_nonods_emph_30_100_vols+ai_nonods_emph_100_300_vols+ai_nonods_emph_300_vols).pvalue)

In [255]:
# #Similarly as above for nodules and lymph nodes

# #Compare all non-emphysema reader vs AI volumes for nodules and lymph nodes
# print('non-emphysema reader vs AI volumes for nodules and lymph nodes p value is',stats.mannwhitneyu(reader_only_nods_noemph_30_100_vols+reader_only_nods_noemph_100_300_vols+reader_only_nods_noemph_300_vols+
#                    reader_lymph_noemph_30_100_vols+ reader_lymph_noemph_100_300_vols+ reader_lymph_noemph_300_vols,
#                    ai_only_nods_noemph_30_100_vols+ai_only_nods_noemph_100_300_vols+ai_only_nods_noemph_300_vols+
#                   ai_lymph_noemph_30_100_vols+ ai_lymph_noemph_100_300_vols+ ai_lymph_noemph_300_vols).pvalue)

# #Compare all emphysema reader vs AI volumes for nodules and lymph nodes
# print('emphysema reader vs AI volumes for nodules and lymph nodes p value is',stats.mannwhitneyu(reader_only_nods_emph_30_100_vols+reader_only_nods_emph_100_300_vols+reader_only_nods_emph_300_vols+
#                    reader_lymph_emph_30_100_vols+ reader_lymph_emph_100_300_vols+ reader_lymph_emph_300_vols,
#                    ai_only_nods_emph_30_100_vols+ai_only_nods_emph_100_300_vols+ai_only_nods_emph_300_vols+
#                   ai_lymph_emph_30_100_vols+ ai_lymph_emph_100_300_vols+ ai_lymph_emph_300_vols).pvalue)

# #Compare all emphysema vs non-emphysema volumes for reader only for nodules and lymph nodes
# print('emphysema vs non-emphysema volumes for reader only for nodules and lymph nodes p value is',stats.mannwhitneyu(reader_only_nods_emph_30_100_vols+reader_only_nods_emph_100_300_vols+reader_only_nods_emph_300_vols+
#                    reader_lymph_emph_30_100_vols+ reader_lymph_emph_100_300_vols+ reader_lymph_emph_300_vols,
#                    reader_only_nods_noemph_30_100_vols+reader_only_nods_noemph_100_300_vols+reader_only_nods_noemph_300_vols+
#                   reader_lymph_noemph_30_100_vols+ reader_lymph_noemph_100_300_vols+ reader_lymph_noemph_300_vols).pvalue)

# #Compare all emphysema vs non-emphysema volumes for AI only for nodules and lymph nodes
# print('emphysema vs non-emphysema volumes for AI only for nodules and lymph nodes p value is',stats.mannwhitneyu(ai_only_nods_noemph_30_100_vols+ai_only_nods_noemph_100_300_vols+ai_only_nods_noemph_300_vols+
#                    ai_lymph_noemph_30_100_vols+ ai_lymph_noemph_100_300_vols+ ai_lymph_noemph_300_vols,
#                    ai_only_nods_emph_30_100_vols+ai_only_nods_emph_100_300_vols+ai_only_nods_emph_300_vols+
#                   ai_lymph_emph_30_100_vols+ ai_lymph_emph_100_300_vols+ ai_lymph_emph_300_vols).pvalue)