In [2]:
import os
import pandas as pd
import numpy as np
import SimpleITK as sitk

In [4]:
landmarks_path = "data/train/landmarks.xls"
df_landmark1 = pd.read_excel(landmarks_path, sheet_name='Landmark 1 - FKC', header=[0, 1], index_col=0)
df_landmark2 = pd.read_excel(landmarks_path, sheet_name='Landmark 2 - FME', header=[0, 1], index_col=0)
df_landmark3 = pd.read_excel(landmarks_path, sheet_name='Landmark 3 - FLE', header=[0, 1], index_col=0)
df_landmark4 = pd.read_excel(landmarks_path, sheet_name='Landmark 4 - FTP', header=[0, 1], index_col=0)
df_landmark5 = pd.read_excel(landmarks_path, sheet_name='Landmark 5 - TKC', header=[0, 1], index_col=0)

df = pd.concat([df_landmark1, df_landmark2, df_landmark3, df_landmark4, df_landmark5], axis=1, 
                         keys=['Landmark1', 'Landmark2', 'Landmark3', 'Landmark4', 'Landmark5'])
df # df['Landmark1']

Unnamed: 0_level_0,Landmark1,Landmark1,Landmark1,Landmark1,Landmark1,Landmark1,Landmark1,Landmark1,Landmark1,Landmark1,...,Landmark5,Landmark5,Landmark5,Landmark5,Landmark5,Landmark5,Landmark5,Landmark5,Landmark5,Landmark5
Unnamed: 0_level_1,Amy,Amy,Amy,Ben,Ben,Ben,Marta,Marta,Marta,Katie,...,Ben,Marta,Marta,Marta,Katie,Katie,Katie,Average,Average,Average
Image,X,Y,Z,X,Y,Z,X,Y,Z,X,...,Z,X,Y,Z,X,Y,Z,X,Y,Z
100063_ct.nii,178,354,2132,179,357,2131,179,356,2132,179,...,2198,176,368,2172,178,367,2167,177.0,368.0,2177.0
100084_ct.nii,183,377,1953,187,298,1962,173,276,1992,180,...,2044,176,300,2013,172,306,2022,175.75,301.75,2026.25
100114_ct.nii,161,328,1901,180,320,2003,173,311,2018,180,...,2061,173,320,2039,174,332,2045,169.5,327.0,2022.0
100115_ct.nii,136,304,2021,134,305,2031,125,289,2026,138,...,2068,136,311,2057,137,311,2061,136.25,311.75,2066.25
100125_ct.nii,247,363,1886,249,366,1875,241,342,1890,250,...,1920,253,360,1907,252,362,1915,249.0,368.75,1915.75
100131_ct.nii,149,341,2036,152,345,2027,144,331,2044,149,...,2083,149,346,2062,148,340,2058,146.75,346.25,2070.75
100139_ct.nii,199,361,1875,195,359,1878,186,347,1889,193,...,1933,198,360,1906,198,359,1905,194.5,363.5,1919.25
100155_ct.nii,242,395,2057,239,390,2079,235,381,2085,242,...,2132,244,394,2105,245,390,2118,242.0,395.25,2121.75
100164_ct.nii,338,249,1886,341,249,1891,337,241,1891,339,...,1951,328,265,1912,325,261,1930,325.25,264.75,1935.25
100193_ct.nii,172,334,2160,171,334,2160,169,327,2165,171,...,2197,177,337,2179,174,339,2183,173.5,340.25,2189.0


In [5]:
class BinaryMask:

    def create_empty_image(self, image_ref):
        # Create an "empty" new image with the same dimensions and pixel type
        
        image = sitk.Image(image_ref.GetSize(), sitk.sitkUInt8)
        image.SetSpacing(image_ref.GetSpacing())
        image.SetOrigin(image_ref.GetOrigin())
        image.SetDirection(image_ref.GetDirection())

        print("---------Mask creation---------")
        print(f"Image size: {image_ref.GetSize()}")
        print(f"Image mask size: {image.GetSize()}")
        
        return image

    def apply_landmarks(self, image, df, image_name):
        # Set voxel value to 1 at each landmark

        print("------Applying landmarks------")
        for i in range(1, 6):
            landmark_n = 'Landmark' + str(i)
            observer = 'Average' # Average, Amy, Katie, etc
            x = round(df[landmark_n][observer]['X'][image_name])
            y = round(df[landmark_n][observer]['Y'][image_name])
            z = round(df[landmark_n][observer]['Z'][image_name])
            landmark = (x, y, z)

            print(f"Landmark {i}: {landmark}")
            image.SetPixel(landmark, 1) # Set voxel value to 1

        return image

    def binary_dilate(self, image):
        # Apply a Binary Dilation filter to make intensity 1 voxels become spheres

        dilate_filter = sitk.BinaryDilateImageFilter()
        dilate_filter.SetKernelRadius(3)
        dilate_filter.SetKernelType(sitk.sitkBall)
        dilate_filter.SetForegroundValue(1)
        
        dilated_image = dilate_filter.Execute(image)

        return dilated_image


    def execute(self, image, df, image_name):
        
        image_mask = self.create_empty_image(image)
        image_mask_annot = self.apply_landmarks(image_mask, df, image_name)
        image_mask_dilated = self.binary_dilate(image_mask_annot)
        
        return image_mask_dilated

In [7]:
class PreProcessing:

    def resample(self, image):
        # Resample image by normalising spacing

        original_spacing = image.GetSpacing()
        original_size = image.GetSize()
        new_spacing = [1.0, 1.0, 1.0]
        new_size = [int(sz * spc / new_spc) for sz, spc, new_spc in zip(original_size, original_spacing, new_spacing)]

        # Create resampling filter
        resampler = sitk.ResampleImageFilter()
        resampler.SetSize(new_size)
        resampler.SetOutputSpacing(new_spacing)
        resampler.SetOutputDirection(image.GetDirection())
        resampler.SetOutputOrigin(image.GetOrigin())
        resampler.SetTransform(sitk.Transform())

        # Apply resampling filter to image
        image_res = resampler.Execute(image)

        print(f"----------Resampling----------")
        print(f"Original image size: {image.GetSize()}")
        print(f"Resampled image size: {image_res.GetSize()}")
        print(f"Original image spacing: {image.GetSpacing()}")
        print(f"Resampled image spacing: {image_res.GetSpacing()}")
        print(f"Original image origin: {image.GetOrigin()}")
        print(f"Resampled image origin: {image_res.GetOrigin()}")
        
        return image_res
    
    def crop(self, image, crop_size):
        # Crop image to a set size, centered approx. around the knee

        original_size = image.GetSize()

        start_index_ = tuple((original_size[i] - crop_size[i]) // 2 for i in range(image.GetDimension()))

        offset_x = -70
        offset_y = 50
        offset_z = 250
        start_index = (start_index_[0] + offset_x, start_index_[1] + offset_y, start_index_[2] + offset_z)

        end_index = tuple(start_index[i] + crop_size[i] for i in range(image.GetDimension()))

        # Get the region of interest using indexing
        cropped_image = image[start_index[0]:end_index[0], start_index[1]:end_index[1], start_index[2]:end_index[2]]

        print(f"----------Cropping----------")
        print(f"Original image size: {image.GetSize()}")
        print(f"Resampled image size: {cropped_image.GetSize()}")
        print(f"Original image spacing: {image.GetSpacing()}")
        print(f"Resampled image spacing: {cropped_image.GetSpacing()}")
        print(f"Original image origin: {image.GetOrigin()}")
        print(f"Resampled image origin: {cropped_image.GetOrigin()}")

        return cropped_image
    
    # Normalisation

In [None]:
##### Pre-processing loop

image_dir = "data/train/images"
landmarks_path = "data/train/landmarks.xls"

# Loop through every image in "orig/imag"
image_names = [image_name for image_name in sorted(os.listdir(os.path.join(image_dir, "orig/imag")))
               if image_name.endswith('.nii')]
for image_name in image_names:

    print(f"Image {image_name}:")
    print("--------------------------------")

    image = sitk.ReadImage(os.path.join(image_dir, "orig/imag", image_name))

    # Create binary mask based on manually annotated landmarks
    image_mask = BinaryMask().execute(image, df, image_name)
    sitk.WriteImage(image_mask, os.path.join(image_dir, "orig/mask", image_name[:-4] + "_mask.nii"))

    # Apply pre-processing to image and mask
    print("-----Pre-processing image-----")
    image_post_ = PreProcessing().resample(image)
    image_post = PreProcessing().crop(image_post_, (256, 256, 256))
    print("-----Pre-processing mask-----")
    image_mask_post_ = PreProcessing().resample(image_mask)
    image_mask_post = PreProcessing().crop(image_mask_post_, (256, 256, 256))

    sitk.WriteImage(image_post, os.path.join(image_dir, "post/imag", image_name[:-4] + "_res.nii"))
    sitk.WriteImage(image_mask_post, os.path.join(image_dir, "post/mask", image_name[:-4] + "_mask_res.nii"))

    print("")

In [9]:
#### Converting pre-processed images to NumPy arrays

# List full paths of all files in the directory
image_paths = [os.path.join(image_dir, "post/imag", image_name) for image_name in sorted(os.listdir(os.path.join(image_dir, "post/imag"))) 
             if image_name.endswith('.nii')]

# Convert sitk images to arrays
image_arrays_ = []
for image_path in image_paths:
    image = sitk.ReadImage(image_path)
    image_array = sitk.GetArrayFromImage(image)
    image_arrays_.append(image_array)

# Convert the list of image arrays to a single NumPy array
# Make sure that all the images have the same dimensions before converting them to a NumPy array.
image_arrays = np.array(image_arrays_)

subarrays = np.split(image_arrays, [8])
x_train = subarrays[0]
x_test = subarrays[1]

print(f"x_train created with shape {x_train.shape}")
print(f"x_test created with shape {x_test.shape}")

np.save("data/train/x_train.npy", x_train)
np.save("data/train/x_test.npy", x_test)

x_train created with shape (8, 256, 256, 256)
x_test created with shape (2, 256, 256, 256)


In [10]:
#### Converting pre-processed masks to NumPy arrays

# List full paths of all files in the directory
image_dir = "data/train/images"
mask_paths = [os.path.join(image_dir, "post/mask", mask_name) for mask_name in sorted(os.listdir(os.path.join(image_dir, "post/mask"))) 
             if mask_name.endswith('.nii')]

# Convert sitk images to arrays
mask_arrays_ = []
for mask_path in mask_paths:
    mask = sitk.ReadImage(mask_path)
    mask_array = sitk.GetArrayFromImage(image)
    mask_arrays_.append(mask_array)

# Convert the list of image arrays to a single NumPy array
# Make sure that all the images have the same dimensions before converting them to a NumPy array.
image_arrays = np.array(image_arrays_)

subarrays = np.split(image_arrays, [8])
y_train = subarrays[0]
y_test = subarrays[1]

print(f"y_train created with shape {y_train.shape}")
print(f"y_test created with shape {y_test.shape}")

np.save("data/train/y_train.npy", y_train)
np.save("data/train/y_test.npy", y_test)

y_train created with shape (8, 256, 256, 256)
y_test created with shape (2, 256, 256, 256)
