## resample images
All images now have a standardized slice thickness and slice increment, but can still have different resolution in the x/y-plane. While manual annotations etc were all done on these 3mm images, the images and calcium masks are internally resampled again to the average resolution of 0.66 x 0.66 x 1.5 mm³. For better performance, we can do this once and store the resampled images (this requires around 100 GB):

In [2]:
from os import path
import os, tqdm
from glob import glob
from utils import io, resampling
input_dir = 'D:/data/CT_CCTA/mixCT'
scratch_dir = 'D:/data/CT_CCTA/resampled_mixCT'
input_imagedir = path.join(input_dir, 'images')

standard_resolution = (1.5, 0.66, 0.66)  # z, y, x
standard_imagedir = path.join(scratch_dir, 'images_resampled')

if not path.exists(standard_imagedir):
    os.makedirs(standard_imagedir)


# Iterate over all images and resample them to the standard resolution
n_resampled = 0
image_files = glob(path.join(input_imagedir, '*.mhd'))

print("{} scans in image dir".format(len(image_files)) )
for i in tqdm.tqdm( range(len(image_files))):
    image_file = image_files[i]

    series_uid = path.basename(image_file)[:-4]
    standard_image_file = path.join(standard_imagedir, series_uid + '.mhd')

    if path.exists(standard_image_file) :
        continue

    # Load image, resample to standard resolution, save again
    image, spacing, origin = io.read_image(image_file)
    image = resampling.resample_image(image, spacing, standard_resolution)
    io.write_image(standard_image_file, image, standard_resolution, origin)

    n_resampled += 1

print('{} images and masks resampled to standard resolution'.format(n_resampled))

400 scans in image dir
400 images and masks resampled to standard resolution


100%|██████████| 400/400 [33:59<00:00,  5.10s/it]


 prepare data set csv for CT and CCTA

In [13]:
import csv
from glob import glob
import pandas as pd
from sklearn.utils import shuffle
from os import path
import SimpleITK as sitk

header = ["Patient ID","Series Instance UID","FBP Reconstruction Kernel","Original Slice Thickness","Original Slice Order","Observer","Subset"]
ctDir = 'D:/data/CT_CCTA/mixCT'
files = glob("{}/images/*.mhd".format( ctDir ) )

data = []
N = len(files)

# test list
for i in tqdm.tqdm( range(N) ):
    f = files[i]
    uid = path.basename(f)[:-4]
    f = "{}/images/{}.mhd".format(ctDir, uid)
    shead = sitk.ReadImage(f)
    subset = 'ccta' if 'Contrast' in uid else 'ct'

    list = [i, uid, 0, shead.GetSpacing()[-1], 0, 'A', subset]
    data.append(list)

with open("{}/dataset.csv".format(ctDir), 'w', encoding='cp1252', newline='') as f:
    writer = csv.writer(f)
    writer.writerow( header )
    writer.writerows(data)



100%|██████████| 400/400 [04:28<00:00,  1.49it/s]
