In [1]:
import os
import numpy as np
import pydicom

import pandas as pd
import sys
sys.path.append('/mnt/fast-disk1/mjc/utils_codes/read_weasis_raw_v0.96/')

import weasis_raw_data_api as wr
sys.path.append('/mnt/fast-disk1/mjc/utils_codes/')
from utils_test import *
from utils_metrics_3d import *

D_dir2header_df = {}
def get_dicom_header_df(image_dir , labels = []):
    global D_dir2header_df
    if image_dir in D_dir2header_df:
        return D_dir2header_df[image_dir]

    # image_dir = row['Image File Path']


    labels = ['ImageName','InstanceNumber',
            'BitsAllocated', 'BitsStored', 'Columns', 'HighBit', 
            'ImageOrientationPatient_0', 'ImageOrientationPatient_1', 'ImageOrientationPatient_2',
            'ImageOrientationPatient_3', 'ImageOrientationPatient_4', 'ImageOrientationPatient_5',
            'ImagePositionPatient_0', 'ImagePositionPatient_1', 'ImagePositionPatient_2',
            'Modality', 'PatientID', 'PhotometricInterpretation', 'PixelRepresentation',
            'PixelSpacing_0', 'PixelSpacing_1', 'RescaleIntercept', 'RescaleSlope', 'Rows', 'SOPInstanceUID',
            'SamplesPerPixel', 'SeriesInstanceUID', 'StudyID', 'StudyInstanceUID', 
            'WindowCenter', 'WindowWidth', 
        ] if not labels else labels

    data = {l: [] for l in labels}
    
    ctList = os.listdir(image_dir)
    ctList.sort()

    for image in ctList:
        if '.dcm' not in image:
            continue
        if os.path.getsize(os.path.join(image_dir, image)) < 5*1024:
            print('%s size < 5kb skiped!'%os.path.join(image_dir, image) )
            continue
        data["ImageName"].append(image)

        ds = pydicom.dcmread(os.path.join(image_dir, image))
        for metadata in ds.dir():
            if metadata not in data and metadata not in ['ImageOrientationPatient','ImagePositionPatient','PixelSpacing']:
                continue
            if metadata != "PixelData":
                metadata_values = getattr(ds, metadata)
                if type(metadata_values) == pydicom.multival.MultiValue and metadata not in ["WindowCenter", "WindowWidth"]:
                    for i, v in enumerate(metadata_values):
                        data[f"{metadata}_{i}"].append(v)  
                else:

                    if type(metadata_values) == pydicom.multival.MultiValue and metadata in ["WindowCenter", "WindowWidth"]:
                        data[metadata].append(metadata_values[0])
                    else:
                        
                        if metadata in ['ImageOrientationPatient','ImagePositionPatient','PixelSpacing']:
                            print( 'error of loading key: {}'.format(metadata) )                    
                        else:
                            data[metadata].append(metadata_values)

    df_image = pd.DataFrame(data).set_index("InstanceNumber")
    D_dir2header_df[image_dir] = df_image
    return df_image

In [2]:
def pd_str_replace(df , col, ori, new):
    if isinstance(col , str):
        try:
            df[col] = df[col].str.replace(ori,new, case = False) 
        except:
            pass
            
    elif isinstance(col, list):
        for one in col:
            pd_str_replace(df , one, ori, new)
    else:
        raise('col instance should be str or list')


def str_Xdrive2mnt(df_all):
    pd_str_replace(df_all, ['Image File Path' , 'Contour File Path'], "X:" , "/mnt/Y-drive")
    pd_str_replace(df_all, ['Image File Path' , 'Contour File Path'], r"\\" , "/")
    pd_str_replace(df_all, ['Image File Path'], "/mnt/Y-drive/ClinicalTrials/FNIH_VOLPACK", "/mnt/fast-disk1/mjc/AutoRecist/Inputs")
    pd_str_replace(df_all, ['Image File Path'], "/mnt/Y-drive/ClinicalTrialDone/FNIH_VOLPACK", "/mnt/fast-disk1/mjc/AutoRecist/Inputs")
    pd_str_replace(df_all, ['Image File Path'], "/mnt/Y-drive/ClinicalTrials", "/mnt/fast-disk1/mjc/AutoRecist/Inputs")

    pd_str_replace(df_all, ['Contour File Path'], "/mnt/Y-drive/ConvWeasisToRaw/PDS_AUTO_RECIST_Modified_By_Yen",
    "/mnt/fast-disk1/mjc/AutoRecist/Inputs/ConvWeasisToRaw/PDS_AUTO_RECIST_Modified_By_Yen")
    pd_str_replace(df_all, ['Contour File Path'], "/mnt/Y-drive/ConvWeasisToRaw/PDS_AUTO_RECIST", "/mnt/fast-disk1/mjc/AutoRecist/Inputs/ConvWeasisToRaw/PDS_AUTO_RECIST_RAW")
    pd_str_replace(df_all, ['Contour File Path'], "/mnt/Y-drive/ConvWeasisToRaw", "/mnt/fast-disk1/mjc/AutoRecist/Inputs/ConvWeasisToRaw")
    pd_str_replace(df_all, ['Contour File Path'], "/mnt/Y-drive/ConvWeasisToMatlab", "/mnt/fast-disk1/mjc/AutoRecist/Inputs/ConvWeasisToRaw")
    
def get_onect_from_list(df_list , ct):
    for i, df in enumerate(df_list):
        try:
            df_ct = df[ (df["Image File Path"]==ct) & (df['Location'].isin(['liver'])) ]
        except KeyError:
            df_ct = df[ (df["Image File Path"]==ct)]
        if df_ct.shape[0]:
            return df_ct , i
    print("warning! no CT was found")
    return None ,None

def raws2mask(raws , D_z_index, mask_vol = None):

    for raw in raws:

        radiologist_raw = wr.read(raw)
        slice_list = radiologist_raw.get_instance_number_array()
        if mask_vol is None:
            mask_vol = initialize_mask_vol(radiologist_raw , D_z_index)
        for j, one in enumerate(slice_list):
            mask = radiologist_raw.get_mask_image(j)
            mask_vol[D_z_index[one]] += mask
    return mask_vol

In [3]:
subsetname = 'Amgen'
folder = '/mnt/fast-data/mjc/AutoRECIST/Inputs/'

df_CTs = pd.read_excel(folder+'AutoRECIST_List_LesionSize_20220602_JM_SingleCTSeries.xlsx')
str_Xdrive2mnt(df_CTs)

df_CTs = df_CTs[df_CTs['dataset']==subsetname]
print(df_CTs)

                                Comments Patient ID  \
0                         minor revision      BAIJC   
1                         minor revision      BAIJD   
2    minor revision(lesion not in liver)      BAIJG   
3                         minor revision      BAIJL   
4                         major revision      BAIJM   
..                                   ...        ...   
148                       minor revision      BAITS   
149                       minor revision      BAITU   
150                       minor revision      BAITV   
151                       minor revision      BAITW   
152                       major revision      BAITX   

                                       Image File Path  \
0    /mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/20...   
1    /mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/20...   
2    /mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/20...   
3    /mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/20...   
4    /mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/

In [4]:
df_Yen = pd.read_excel(folder+'PDS_AUTO_RECIST CIA-LAB Testing Dataset Gold Standard_Yen_2022-06-13.xlsx')
str_Xdrive2mnt(df_Yen)

AI_raw_list = ['ScaleNAS9SlicesAccLiverToRaw_Test353.csv',]
df_AIs = []
for one in AI_raw_list:
    df = pd.read_csv(one)
    str_Xdrive2mnt(df)
    df_AIs.append(df)




Metrics_vol = []
metrics_save_path = 'Metrics_%s_vs_Yen_%s.csv'%('ScaleNAS9SlicesAccLiver' ,subsetname )
CTs = df_CTs["Image File Path"].values.tolist()

for ct in CTs:
    df_image = get_dicom_header_df( ct )
    instanceNumber_list = df_image.index.to_list()
    D_z_index = instanceNumber2Matrix_z_index(instanceNumber_list)


    df_ct_Yen = df_Yen[df_Yen["Image File Path"]==ct]
    df_ct_AI , dataset_id = get_onect_from_list(df_AIs , ct)
    # break
    if (df_ct_AI is None):
        if df_ct_Yen.shape[0]:
            fn = df_ct_Yen.shape[0]
            print('{} has {} FNs!'.format(ct , fn))   
        continue

    if not df_ct_Yen.shape[0]:
        fp = df_ct_AI.shape[0]
        print( '{} has {} FPs'.format(ct , fp)  )
        continue
    else:
        print(ct)

    raws = df_ct_AI["Contour File Path"].values.tolist()
    vol_pred = raws2mask(raws , D_z_index, mask_vol = None)
    connectivity = 2
    from skimage import measure
    labels_pred=measure.label(vol_pred,connectivity=connectivity)
    l_pred,c_pred = np.unique(labels_pred , return_counts=True)
    ix2 = l_pred>0
    l_pred = l_pred[ix2] #background pixels are labeled as 0, so we exclude them
    c_pred = c_pred[ix2]
    if len(l_pred)!= len(raws):
        print("warning! raws overlaped on {}".format(ct))


    for _ , row in df_ct_Yen.iterrows():

        Yen_raw = wr.read(row['Contour File Path'])
        gt_vol = initialize_mask_vol(Yen_raw , D_z_index)

        slice_list = Yen_raw.get_instance_number_array()
        for j, one in enumerate(slice_list):
            mask = Yen_raw.get_mask_image(j)
            gt_vol[D_z_index[one]] = mask
        
        hit = vols_seg_results(gt_vol , vol_pred, CTname=row['Contour File Path'], gt_keep_largest=1)
        Metrics_vol.extend(hit)

        _n = len(Metrics_vol)
        if _n%100==0 or _n in [1,2,5,10,30,50]:
            df_metrics = pd.DataFrame(Metrics_vol, 
                                    columns = ['file_name','igt','merge','#gt','#pred',
                                                'iou_score', 'dice_score', 'over_seg' , 'under_seg',
                                                'area_gt','area_pred','intersection','union']) 
            df_metrics.to_csv(metrics_save_path)
        

/mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/20020408/BAIJC/D2004_02_27/E20040227/CT/S0002
/mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/20020408/BAIJD/D2004_02_07/E20040207/CT/S0002
/mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/20020408/BAIJG/D2004_02_02/E20040202/CT/S0013
/mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/20020408/BAIJL/D2004_02_24/E20040224/CT/S3464
/mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/20020408/BAIJM/D2004_01_31/E20040131/CT/S0003
/mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/20020408/BAIJN/D2004_02_03/E20040203/CT/S0004
/mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/20020408/BAIJO/D2004_02_19/E20040219/CT/S0005
/mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/20020408/BAIJP/D2004_02_26/E20040226/CT/S0002
/mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/20020408/BAIJQ/D2004_04_27/E20040427/CT/S0007
/mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/20020408/BAIJR/D2004_02_11/E20040211/CT/S0006
/mnt/fast-disk1/mjc/AutoRecist/Inputs/AMGEN/20020408/BAIJT/D2004_02_27/E20040227/CT/S0002
/mnt/fast-

In [5]:
df_metrics = pd.DataFrame(Metrics_vol, 
                        columns = ['file_name','igt','merge','#gt','#pred',
                                    'iou_score', 'dice_score', 'over_seg' , 'under_seg',
                                    'area_gt','area_pred','intersection','union']) 
df_metrics.to_csv(metrics_save_path)

In [6]:
pd.options.display.float_format = "{:.3f}".format
df_metrics = pd.DataFrame(Metrics_vol, 
                          columns = ['file_name','igt','merge','#gt','#pred',
                                     'iou_score', 'dice_score', 'over_seg' , 'under_seg',
                                     'area_gt','area_pred','intersection','union']) 
df_metrics.describe([.05, .25, .5, .75, .95])

Unnamed: 0,igt,merge,#gt,#pred,iou_score,dice_score,over_seg,under_seg,area_gt,area_pred,intersection,union
count,1090.0,1090.0,1090.0,1090.0,1090.0,1090.0,1090.0,1090.0,1090.0,1090.0,1090.0,1090.0
mean,1.014,0.0,1.0,9.486,0.389,0.484,38.856,0.438,23419.305,33775.913,17651.561,39543.657
std,0.117,0.0,0.0,5.426,0.305,0.35,600.935,0.392,87794.384,125347.834,69261.368,137624.538
min,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,18.0
5%,1.0,0.0,1.0,3.0,0.0,0.0,0.0,0.007,63.45,0.0,0.0,73.45
25%,1.0,0.0,1.0,5.0,0.0,0.0,0.0,0.076,295.0,33.0,10.25,451.25
50%,1.0,0.0,1.0,8.0,0.444,0.615,0.122,0.289,1292.5,1227.5,629.5,2035.5
75%,1.0,0.0,1.0,15.0,0.665,0.799,0.451,0.972,6429.25,8022.75,4369.5,10766.25
95%,1.0,0.0,1.0,19.0,0.82,0.901,4.301,1.0,112814.25,154369.95,91023.6,213754.4
max,2.0,0.0,1.0,21.0,0.906,0.951,17835.566,1.0,926709.0,954095.0,806591.0,1061881.0


In [7]:
df_metrics[df_metrics.dice_score>0.25].describe([.05, .25, .5, .75, .95])

Unnamed: 0,igt,merge,#gt,#pred,iou_score,dice_score,over_seg,under_seg,area_gt,area_pred,intersection,union
count,738.0,738.0,738.0,738.0,738.0,738.0,738.0,738.0,738.0,738.0,738.0,738.0
mean,1.012,0.0,1.0,9.549,0.568,0.704,0.469,0.229,32914.678,35103.703,25837.659,42180.722
std,0.11,0.0,0.0,5.552,0.192,0.17,0.714,0.226,103742.592,115146.079,82925.494,133161.978
min,1.0,0.0,1.0,1.0,0.145,0.253,0.0,0.0,24.0,24.0,15.0,62.0
5%,1.0,0.0,1.0,3.0,0.22,0.36,0.009,0.004,166.25,163.55,102.85,255.4
25%,1.0,0.0,1.0,5.0,0.434,0.606,0.072,0.043,722.5,715.5,476.75,1005.75
50%,1.0,0.0,1.0,8.0,0.586,0.739,0.214,0.15,2601.0,2751.5,1853.0,3489.5
75%,1.0,0.0,1.0,15.0,0.732,0.845,0.568,0.35,11014.75,12332.5,9136.0,14595.25
95%,1.0,0.0,1.0,19.0,0.835,0.91,1.593,0.725,187556.55,191437.05,142872.45,226919.85
max,2.0,0.0,1.0,21.0,0.906,0.951,5.625,0.853,926709.0,954095.0,806591.0,1061881.0


In [8]:
dfinnermerge = pd.merge(df_metrics,df_Yen,how='inner',left_on='file_name' , right_on='Contour File Path')
for col in dfinnermerge.columns.tolist():
    print(col , len(set(dfinnermerge[col].tolist() )) )



pts = dfinnermerge["Image File Path"].values.tolist()
FPs = []
for onept in list(set(pts)):
    df_onept = dfinnermerge[dfinnermerge["Image File Path"]==onept]
    assert( min(df_onept["#pred"]) == max(df_onept["#pred"]))
    fp = max(df_onept["#pred"])
    FPs.append(fp)
print("="*80)
print( "In total, {} CT series; {} AI detections ".format( len(FPs) , sum(FPs) ) )

dices = dfinnermerge.dice_score.tolist()

for th in [0, 0.1, 0.2, 0.25, 0.5]:
    TP = [p>th for p in dices ]
    assert( len(TP) == len(dices))
    fprate = ( sum(FPs) - sum(TP) ) / len(FPs)
    print(f"sensitivity is {sum(TP)/len(dices):.3f}({sum(TP)}/{len(dices)}) FP-rate is {fprate:.1f} per CT-serie at threshold {th}")

file_name 1090
igt 2
merge 1
#gt 1
#pred 20
iou_score 820
dice_score 820
over_seg 794
under_seg 787
area_gt 936
area_pred 715
intersection 743
union 989
Image File Path 152
Contour File Path 1090
Raw File Name 1090
Uni 1053
Perp 1041
Bi 1087
Volume 1084
In total, 152 CT series; 984 AI detections 
sensitivity is 0.752(820/1090) FP-rate is 1.1 per CT-serie at threshold 0
sensitivity is 0.706(770/1090) FP-rate is 1.4 per CT-serie at threshold 0.1
sensitivity is 0.693(755/1090) FP-rate is 1.5 per CT-serie at threshold 0.2
sensitivity is 0.677(738/1090) FP-rate is 1.6 per CT-serie at threshold 0.25
sensitivity is 0.581(633/1090) FP-rate is 2.3 per CT-serie at threshold 0.5


In [9]:
#subgroup analysis for gt_lesion >=10mm

Uni_thresh = 10
df_subgroup = dfinnermerge[dfinnermerge['Uni']>=Uni_thresh]
df_subgroup.describe([.05, .25, .5, .75, .95])


Unnamed: 0,igt,merge,#gt,#pred,iou_score,dice_score,over_seg,under_seg,area_gt,area_pred,intersection,union,Uni,Perp,Bi,Volume
count,887.0,887.0,887.0,887.0,887.0,887.0,887.0,887.0,887.0,887.0,887.0,887.0,887.0,887.0,887.0,887.0
mean,1.011,0.0,1.0,9.244,0.438,0.542,15.558,0.385,28750.879,38878.567,21678.726,45950.72,38.896,26.032,1731.282,63726.243
std,0.106,0.0,0.0,5.386,0.295,0.331,204.936,0.369,96545.691,130938.219,76217.278,145025.26,35.872,21.468,3681.914,200706.331
min,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,58.0,0.0,0.0,58.0,10.012,3.232,40.788,151.447
5%,1.0,0.0,1.0,3.0,0.0,0.0,0.0,0.007,225.9,0.0,0.0,268.3,11.18,8.193,91.08,540.416
25%,1.0,0.0,1.0,5.0,0.146,0.254,0.017,0.065,734.0,422.5,270.5,1061.5,16.491,12.19,203.92,1650.13
50%,1.0,0.0,1.0,8.0,0.494,0.661,0.145,0.229,2319.0,2214.0,1167.0,3371.0,27.105,19.022,527.357,5851.04
75%,1.0,0.0,1.0,13.0,0.698,0.822,0.452,0.727,9453.5,11445.0,6391.5,14169.0,45.664,31.499,1358.826,22118.1
95%,1.0,0.0,1.0,19.0,0.828,0.906,4.007,1.0,158755.0,219949.8,106785.8,254870.6,110.41,73.097,8436.398,324567.3
max,2.0,0.0,1.0,21.0,0.906,0.951,4610.371,1.0,926709.0,954095.0,806591.0,1061881.0,254.088,146.833,36132.81,1979810.0


In [10]:
df_subgroup[df_subgroup.dice_score>0.25].describe([.05, .25, .5, .75, .95])

Unnamed: 0,igt,merge,#gt,#pred,iou_score,dice_score,over_seg,under_seg,area_gt,area_pred,intersection,union,Uni,Perp,Bi,Volume
count,666.0,666.0,666.0,666.0,666.0,666.0,666.0,666.0,666.0,666.0,666.0,666.0,666.0,666.0,666.0,666.0
mean,1.012,0.0,1.0,9.333,0.578,0.712,0.442,0.228,36453.342,38871.587,28615.77,46709.159,43.519,29.083,2095.921,80055.35
std,0.109,0.0,0.0,5.477,0.192,0.17,0.705,0.226,108624.332,120616.744,86844.387,139432.663,38.709,23.072,4082.336,220979.188
min,1.0,0.0,1.0,1.0,0.145,0.253,0.0,0.0,160.0,46.0,46.0,170.0,10.012,5.959,61.36,335.854
5%,1.0,0.0,1.0,3.0,0.219,0.359,0.009,0.005,321.75,317.25,232.0,469.0,11.787,8.823,111.488,719.897
25%,1.0,0.0,1.0,5.0,0.445,0.616,0.068,0.044,1027.75,1104.75,738.25,1449.5,18.575,13.822,253.356,2312.655
50%,1.0,0.0,1.0,8.0,0.599,0.749,0.199,0.148,3549.5,3372.0,2501.5,4776.0,30.631,21.771,660.003,8244.655
75%,1.0,0.0,1.0,14.0,0.739,0.85,0.507,0.343,13284.75,14404.75,10728.5,17681.75,50.856,34.728,1763.729,38542.0
95%,1.0,0.0,1.0,19.0,0.838,0.912,1.543,0.73,208804.0,220184.0,159127.25,255364.5,133.763,84.866,9656.741,448836.5
max,2.0,0.0,1.0,21.0,0.906,0.951,5.625,0.853,926709.0,954095.0,806591.0,1061881.0,254.088,146.833,36132.81,1979810.0


In [11]:
def detection_performance(dfinnermerge):
    pts = dfinnermerge["Image File Path"].values.tolist()
    FPs = []
    for onept in list(set(pts)):
        df_onept = dfinnermerge[dfinnermerge["Image File Path"]==onept]
        assert( min(df_onept["#pred"]) == max(df_onept["#pred"]))
        fp = max(df_onept["#pred"])
        FPs.append(fp)
    print("="*80)
    print( "In total, {} CT series; {} AI detections ".format( len(FPs) , sum(FPs) ) )

    dices = dfinnermerge.dice_score.tolist()

    for th in [0, 0.1, 0.2, 0.25, 0.5]:
        TP = [p>th for p in dices ]
        assert( len(TP) == len(dices))
        fprate = ( sum(FPs) - sum(TP) ) / len(FPs)
        print(f"sensitivity is {sum(TP)/len(dices):.3f}({sum(TP)}/{len(dices)}) FP-rate is {fprate:.1f} per CT-serie at threshold {th}")

In [None]:
detection_performance(df_subgroup)

In total, 151 CT series; 978 AI detections 
sensitivity is 0.823(730/887) FP-rate is 1.6 per CT-serie at threshold 0
sensitivity is 0.782(694/887) FP-rate is 1.9 per CT-serie at threshold 0.1
sensitivity is 0.768(681/887) FP-rate is 2.0 per CT-serie at threshold 0.2
sensitivity is 0.751(666/887) FP-rate is 2.1 per CT-serie at threshold 0.25
sensitivity is 0.651(577/887) FP-rate is 2.7 per CT-serie at threshold 0.5
