In [None]:
import pandas as pd
from pydicom import dcmread
from pathlib import Path
from itertools import chain

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
path_data = Path('../data') 
save_data = False
pd.options.display.float_format = '{:,.2f}'.format

In [None]:
# Block: build panda dataframe with relevant properties

if save_data:
    df = pd.DataFrame(columns=['modality', 'pixel_spacing', 'slice_thickness', 'rows', 'cols', 'institution_name', 'manufacturer_model_name'])

    for dcm in chain(path_data.glob('anonymized/*/*'), path_data.glob('controls/*/*')):
        print(dcm)
        ds = dcmread(next(dcm.iterdir()))

        try:
            df = df.append(
                pd.DataFrame(
                    {
                        'modality':ds.Modality,
                        'pixel_spacing':ds.PixelSpacing[0], # assuming equal values for both indexes in array
                        'slice_thickness':ds.SliceThickness,
                        'rows':ds.Rows,
                        'cols':ds.Columns,
                        'institution_name':ds.InstitutionName,
                        'manufacturer_model_name':ds.ManufacturerModelName
                    },
                    index=[ds.AccessionNumber]),
                ignore_index=False)
        except:
            print(str(dcm.parent) + " failed")
            continue

In [None]:
if save_data:
    df.to_pickle(path_data / 'data_analyzed.pkl')
else:
    df = pd.read_pickle(path_data / 'data_analyzed.pkl')

print(f"{len(df.index.unique())} patients were loaded\n") # should be 1705

print("CT slice thickness:")
print(df[df['modality'] == 'CT'].slice_thickness.value_counts())

print("\nPET slice thickness:")
print(df[df['modality'] == 'PT'].slice_thickness.value_counts())

print("\nCT pixel_spacing")
print(df[df['modality'] == 'CT'].pixel_spacing.value_counts())

print("\nPET pixel_spacing")
print(df[df['modality'] == 'PT'].pixel_spacing.value_counts())