In [3]:
from scipy.stats import mannwhitneyu
import numpy as np
import pandas as pd

In [36]:
df = pd.read_excel(r'C:\Users\z004b0je\OneDrive - Siemens Healthineers\Projects\01_Prostate MR\03_FDA_Reader_Study\FDA_Original_Raw\Case Overview for CN w distributions 20240216.xlsx')
df.head(3)
df2 = pd.read_excel(r'C:\Users\z004b0je\OneDrive - Siemens Healthineers\Projects\01_Prostate MR\03_FDA_Reader_Study\FDA_Original_Raw\validation\case_level.xlsx')
df2.columns

Index(['FDA-#', 'InstitutionName', 'PatientAge', 'StudyDate',
       'ManufacturerModelName', 'MagneticFieldStrength', 'PatientPosition',
       'StudyInstanceUID', 'PSA', 'Ethnicity', 'Original PI-RADS',
       'GleasonTargetedMax', 'GleasonSystematicMax', 'GT_GGG1', 'GT_GGG2',
       'AI_LOS', 'T01', 'T02', 'T03', 'R01_LOS', 'R02_LOS', 'R03_LOS',
       'R04_LOS', 'R05_LOS', 'R06_LOS', 'R07_LOS', 'R08_LOS', 'R09_LOS',
       'R10_LOS', 'R11_LOS', 'R12_LOS', 'R01_wAI_LOS', 'R02_wAI_LOS',
       'R03_wAI_LOS', 'R04_wAI_LOS', 'R05_wAI_LOS', 'R06_wAI_LOS',
       'R07_wAI_LOS', 'R08_wAI_LOS', 'R09_wAI_LOS', 'R10_wAI_LOS',
       'R11_wAI_LOS', 'R12_wAI_LOS', 'AI', 'T01.1', 'T02.1', 'T03.1',
       'R01_pirads', 'R02_pirads', 'R03_pirads', 'R04_pirads', 'R05_pirads',
       'R06_pirads', 'R07_pirads', 'R08_pirads', 'R09_pirads', 'R10_pirads',
       'R11_pirads', 'R12_pirads', 'R01_wAI_pirads', 'R02_wAI_pirads',
       'R03_wAI_pirads', 'R04_wAI_pirads', 'R05_wAI_pirads', 'R06_wAI_pirads'

# basic statistical analysis

### Age

In [13]:
age_1 = df2[df2['InstitutionName']=='SITE_1']['PatientAge'].values
age_2 = df2[(df2['InstitutionName']=='SITE_2') | (df2['InstitutionName']=='SITE_3')]['PatientAge'].values
age = df2['PatientAge'].values

In [14]:
age_median_1 = np.median(age_1)
age_median_2 = np.median(age_2)
age_median = np.median(age)
age_min = np.min(age)
age_max = np.max(age)
print(f'Median patient age is {age_median}, range({age_min}-{age_max})')
print(f'Site_1 median age {age_median_1}, Site_2 median age {age_median_2}')

stat, p = mannwhitneyu(age_1,age_2)
print(f'Statistics: {stat}, P-value: {round(p,5)}')

Median patient age is 65.0, range(44-87)
Site_1 median age 63.0, Site_2 median age 66.0
Statistics: 10905.5, P-value: 0.00278


### PSA

In [15]:
psa_1 = df2[df2['InstitutionName']=='SITE_1']['PSA'].values
psa_2 = df2[(df2['InstitutionName']=='SITE_2') | (df2['InstitutionName']=='SITE_3')]['PSA'].values
psa = df2['PSA'].values

In [16]:
psa_median = np.median(psa)
psa_min = np.min(psa)
psa_max = np.max(psa)
psa_mean_1 = round(np.mean(psa_1),1)
psa_mean_2 = round(np.mean(psa_2),1)
print(f'PSA median is {psa_median}, range({psa_min}-{psa_max})')
print(f' Site_1 mean psa {psa_mean_1}, Site_2 mean psa {psa_mean_2}')

stat, p = mannwhitneyu(psa_1,psa_2)
print(f'Statistics: {stat}, P-value: {round(p,5)}')

PSA median is 5.7, range(0.6-53.0)
 Site_1 mean psa 6.8, Site_2 mean psa 6.8
Statistics: 14894.0, P-value: 0.11866


### PI-RADS

In [17]:
pirads_1 = df2[df2['InstitutionName']=='SITE_1']['Consensus PI-RADS'].values
pirads_2 = df2[(df2['InstitutionName']=='SITE_2') | (df2['InstitutionName']=='SITE_3')]['Consensus PI-RADS'].values

In [18]:
stat, p = mannwhitneyu(pirads_1,pirads_2)
print(f'Statistics: {stat}, P-value: {round(p,5)}')

Statistics: 13251.0, P-value: 0.73711


### AI LOS

In [21]:
ai_los_1 = df2[df2['InstitutionName']=='SITE_1']['AI_LOS'].values
ai_los_2 = df2[(df2['InstitutionName']=='SITE_2') | (df2['InstitutionName']=='SITE_3')]['AI_LOS'].values

In [22]:
stat, p = mannwhitneyu(ai_los_1,ai_los_2)
print(f'Statistics: {stat}, P-value: {round(p,5)}')

Statistics: 12431.5, P-value: 0.2104


## Performance Validation

### Sensitivity and Specificity

In [52]:
def get_sens_spec(pred, gt, thres):
    tp, fp, tn,fn = 0,0,0,0
    for idx in range(len(pred)):
        if pred[idx] >= thres:
            if gt[idx] >= thres:
                tp += 1
            else:
                fp += 1
        else:
            if gt[idx] >= thres:
                fn += 1
            else:
                tn += 1
    sens = round(tp / (tp + fn), 3)
    spec = round(tn / (tn + fp), 3)
    return sens, spec

reader_info = dict()
reader_info_cols = ['modality','reader', 'Sensitivity', 'Specificity']
for col in reader_info_cols:
    reader_info.setdefault(col, [])

def get_sens_spec2(pred, gt, thres):
    tp, fp, tn,fn = 0,0,0,0
    for idx in range(len(pred)):
        if pred[idx] >= thres:
            if gt[idx] == 1:
                tp += 1
            else:
                fp += 1
        else:
            if gt[idx] == 1:
                fn += 1
            else:
                tn += 1
    sens = round(tp / (tp + fn), 3)
    spec = round(tn / (tn + fp), 3)
    return sens, spec

reader_info = dict()
reader_info_cols = ['modality','reader', 'Sensitivity', 'Specificity']
for col in reader_info_cols:
    reader_info.setdefault(col, [])

for modality in ['ind','wAI']:
    for idx in range(1,13):
        if len(str(idx)) == 1:
            reader = f'0{idx}'
        if modality == 'ind':
            reader_col = f'R{reader}_LOS'
        else:
            reader_col = f'R{reader}_wAI_pirads'
        if idx < 7:
            reader_result = df2.iloc[:170][reader_col].values
            gt_result = df2.iloc[:170]['Rounded GT_GGG1'].values
        else:
            reader_result = df2.iloc[170:][reader_col].values
            gt_result = df2.iloc[170:]['Rounded GT_GGG1'].values
        sens, spec = get_sens_spec2(reader_result, gt_result, 3)
        reader_info['modality'].append(modality)
        reader_info['reader'].append(reader_col)
        reader_info['Sensitivity'].append(sens)
        reader_info['Specificity'].append(spec)
result_df = pd.DataFrame(reader_info)        
ind_sens_result = result_df[result_df['modality'] == 'ind']['Sensitivity'].values
ind_spec_result = result_df[result_df['modality'] == 'ind']['Specificity'].values
wAI_sens_result = result_df[result_df['modality'] == 'wAI']['Sensitivity'].values
wAI_spec_result = result_df[result_df['modality'] == 'wAI']['Specificity'].values
ind_sens_mean = round(np.mean(ind_sens_result),2)
ind_spec_mean = round(np.mean(ind_spec_result),2)
wAI_sens_mean = round(np.mean(wAI_sens_result),2)
wAI_spec_mean = round(np.mean(wAI_spec_result),2)
print(ind_sens_mean,  ind_spec_mean, wAI_sens_mean, wAI_spec_mean)


for modality in ['ind','wAI']:
    for idx in range(1,13):
        if len(str(idx)) == 1:
            reader = f'0{idx}'
        if modality == 'ind':
            reader_col = f'R{reader}_pirads'
        else:
            reader_col = f'R{reader}_wAI_pirads'
        if idx < 7:
            reader_result = df2.iloc[:170][reader_col].values
            gt_result = df2.iloc[:170]['Consensus PI-RADS'].values
        else:
            reader_result = df2.iloc[170:][reader_col].values
            gt_result = df2.iloc[170:]['Consensus PI-RADS'].values
        sens, spec = get_sens_spec(reader_result, gt_result, 3)
        reader_info['modality'].append(modality)
        reader_info['reader'].append(reader_col)
        reader_info['Sensitivity'].append(sens)
        reader_info['Specificity'].append(spec)
result_df = pd.DataFrame(reader_info)        
ind_sens_result = result_df[result_df['modality'] == 'ind']['Sensitivity'].values
ind_spec_result = result_df[result_df['modality'] == 'ind']['Specificity'].values
wAI_sens_result = result_df[result_df['modality'] == 'wAI']['Sensitivity'].values
wAI_spec_result = result_df[result_df['modality'] == 'wAI']['Specificity'].values
ind_sens_mean = round(np.mean(ind_sens_result),2)
ind_spec_mean = round(np.mean(ind_spec_result),2)
wAI_sens_mean = round(np.mean(wAI_sens_result),2)
wAI_spec_mean = round(np.mean(wAI_spec_result),2)
print(ind_sens_mean,  ind_spec_mean, wAI_sens_mean, wAI_spec_mean)



0.62 0.68 0.67 0.65
0.71 0.67 0.77 0.66


In [None]:
df2.iloc[:170]['R01_pirads'].values

In [44]:
df2['Rounded GT_GGG1']

0      1
1      1
2      0
3      1
4      1
      ..
335    1
336    1
337    1
338    1
339    1
Name: Rounded GT_GGG1, Length: 340, dtype: int64

In [56]:
est = 0.6983
stde = 0.03769
cil = est - 1.96 * stde
print(cil)

0.6244276
