In [None]:
import os
import pydicom.data
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re

### Get slice thickness and pixel spacing from DICOM metadata

In [None]:
path = 'path/to/folder/where/each/subfolder/isthepatientid/and/inthose/arethe/dicom/folders'
pids = os.listdir(path)

pixel_volumes = []

for pid in pids:
    dicom_path = os.path.join(path, pid, 'DICOM')

    # load list of files
    files = os.listdir(dicom_path)
    filename = pydicom.data.data_manager.get_files(dicom_path, files[0])[0] # get first dicom image for each series
    ds = pydicom.dcmread(filename)
    pixel_volume = ds.PixelSpacing[0] * ds.PixelSpacing[1] * ds.SliceThickness
    pixel_volumes.append([pid, pixel_volume])

df_volumes = pd.DataFrame(pixel_volumes, columns=['pid', 'pixel_volume'])
df_volumes['pid'] = df_volumes['pid'].astype(int)

# load mask pixel counts, prediction pixel counts, dice, pid data
df = pd.DataFrame()

for fold in range(3):
    file_path = f'pathto/dices_fold-{fold}.csv'
    fold_df = pd.read_csv(file_path)
    fold_df['fold'] = fold
    df = pd.concat([df, fold_df])

df = pd.merge(df, df_volumes, left_on='Patient', right_on='pid')
df['mask_volume'] = df['Mask_Pixel_Count'] * df['pixel_volume']
df['pred_volume'] = df['Pred_Pixel_Count'] * df['pixel_volume']
total_volumes = pd.DataFrame(df.groupby('pid')[['mask_volume','pred_volume']].sum()).reset_index()

### Get model performance

In [None]:
models = {
    'Shallow' : 'path_to_shallow_model',
    'Deep' : 'path_to_deep_model',
    'Encoding' : 'path_to_encoding_model',
    'Decoding' : 'path_to_decoding_model',
    'Unet' : 'path_to_unet_model',
    'Decoding + Tucker' : 'path_to_decoding_tucker_model',
    'Decoding + CP' : 'path_to_decoding_cp_model',
    'Decoding + TT' : 'path_to_decoding_tt_model'
}

df_m = pd.DataFrame()

for name, file in models.items():
    path = f'outputs/{file}/Running.log'

    with open(path, 'r') as f:
        lines = f.readlines()

    pid_dice = []

    for line in lines:
        if 'Dice Score for Patient' in line:
            pid = int(re.findall('Patient\s(\d*)', line)[0])
            dice = float(re.findall('is\s(.*)$', line)[0])
            pid_dice.append([name, pid, dice])

    df_m = pd.concat([df_m, pd.DataFrame(pid_dice, columns=['model', 'pid', 'dice'])])


data = pd.merge(total_volumes, df_m, on='pid')
data['mask_volume'] = data['mask_volume'] / 1000
data['pred_volume'] = data['pred_volume'] / 1000    
data['bin'] = pd.cut(data['mask_volume'], bins=[0,25,50,100,225])

### Visualizations

In [None]:
# volume plot
data_top = data[data['model'] == 'Decoding + Tucker'].reset_index(drop=True)
max = data_top[['mask_volume','pred_volume']].max().max()
max = max + 10 - (max % 10)

sns.regplot(data=data_top, x='mask_volume', y='pred_volume',
            ci=95, line_kws=dict(color="grey"), scatter_kws={'s':15,'color':'black'})

plt.plot([0, max], [0, max], color='grey', linestyle='--')
         
plt.xlabel('Annotated Blood Volume (mL)', fontsize=14)
plt.ylabel('Predicted Blood Volume (mL)', fontsize=14)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xlim(0, max)
plt.ylim(0, max)
plt.savefig('volume.png', dpi=300, bbox_inches='tight')

In [None]:
df_m.groupby('model')['dice'].describe().sort_values('mean', ascending=False)

In [None]:
data[['Modules','Decomposition']] = data['model'].str.split('+',expand=True)
data['Modules'] = data['Modules'].str.strip()
data['Decomposition'] = data['Decomposition'].str.strip()
x = data[~data['Modules'].isin(['All','None','U-Net'])]
x = x.groupby(['Modules','Decomposition'], dropna=False)['dice'].describe()[['mean','std']]
x['Mean'] = round(x['mean'], 3).astype(str) + ' +/-' + round(x['std'], 2).astype(str)
x = x.drop(['mean','std'], axis=1)
x

In [None]:
x = data.groupby(['model','bin'])['dice'].describe()
x = x.reset_index()
x = x[['model','bin','mean','std','count']]
x['value'] = round(x['mean'], 3).astype(str) + ' (+/-' + round(x['std'], 3).astype(str) + ')'
print(x[['bin','count']].drop_duplicates())
x = x.pivot_table(index='model', columns='bin', values='value', aggfunc=lambda x: ' '.join(x))
x