In [3]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import SimpleITK as sitk

# Image Pre-processing

In [4]:
landmarks_path = "data/train/landmarks.xls"
df_landmark1 = pd.read_excel(landmarks_path, sheet_name='Landmark 1 - FKC', header=[0, 1], index_col=0)
df_landmark2 = pd.read_excel(landmarks_path, sheet_name='Landmark 2 - FME', header=[0, 1], index_col=0)
df_landmark3 = pd.read_excel(landmarks_path, sheet_name='Landmark 3 - FLE', header=[0, 1], index_col=0)
df_landmark4 = pd.read_excel(landmarks_path, sheet_name='Landmark 4 - FTP', header=[0, 1], index_col=0)
df_landmark5 = pd.read_excel(landmarks_path, sheet_name='Landmark 5 - TKC', header=[0, 1], index_col=0)

df = pd.concat([df_landmark1, df_landmark2, df_landmark3, df_landmark4, df_landmark5], axis=1, 
                         keys=['Landmark1', 'Landmark2', 'Landmark3', 'Landmark4', 'Landmark5'])
df
# df['Landmark1']

Unnamed: 0_level_0,Landmark1,Landmark1,Landmark1,Landmark1,Landmark1,Landmark1,Landmark1,Landmark1,Landmark1,Landmark1,...,Landmark5,Landmark5,Landmark5,Landmark5,Landmark5,Landmark5,Landmark5,Landmark5,Landmark5,Landmark5
Unnamed: 0_level_1,Amy,Amy,Amy,Ben,Ben,Ben,Marta,Marta,Marta,Katie,...,Ben,Marta,Marta,Marta,Katie,Katie,Katie,Average,Average,Average
Image,X,Y,Z,X,Y,Z,X,Y,Z,X,...,Z,X,Y,Z,X,Y,Z,X,Y,Z
100063_ct.nii,178,354,2132,179,357,2131,179,356,2132,179,...,2198,176,368,2172,178,367,2167,177.0,368.0,2177.0
100084_ct.nii,183,377,1953,187,298,1962,173,276,1992,180,...,2044,176,300,2013,172,306,2022,175.75,301.75,2026.25
100114_ct.nii,161,328,1901,180,320,2003,173,311,2018,180,...,2061,173,320,2039,174,332,2045,169.5,327.0,2022.0
100115_ct.nii,136,304,2021,134,305,2031,125,289,2026,138,...,2068,136,311,2057,137,311,2061,136.25,311.75,2066.25
100125_ct.nii,247,363,1886,249,366,1875,241,342,1890,250,...,1920,253,360,1907,252,362,1915,249.0,368.75,1915.75
100131_ct.nii,149,341,2036,152,345,2027,144,331,2044,149,...,2083,149,346,2062,148,340,2058,146.75,346.25,2070.75
100139_ct.nii,199,361,1875,195,359,1878,186,347,1889,193,...,1933,198,360,1906,198,359,1905,194.5,363.5,1919.25
100155_ct.nii,242,395,2057,239,390,2079,235,381,2085,242,...,2132,244,394,2105,245,390,2118,242.0,395.25,2121.75
100164_ct.nii,338,249,1886,341,249,1891,337,241,1891,339,...,1951,328,265,1912,325,261,1930,325.25,264.75,1935.25
100193_ct.nii,172,334,2160,171,334,2160,169,327,2165,171,...,2197,177,337,2179,174,339,2183,173.5,340.25,2189.0


In [7]:
img_dir = "data/train/images"
landmarks_path = "data/train/landmarks.xls"

# Remove MacOS auto-generated file
file_path = 'data/train/images/.DS_Store'
if os.path.exists(file_path):
    os.remove(file_path)

# Loop through each image in the directory
files = [img_path for img_path in sorted(os.listdir(img_dir)) if os.path.isfile(os.path.join(img_dir, img_path))]
for img_path in files:

    image = sitk.ReadImage(os.path.join(img_dir, img_path))

    # By default, SimpleITK represents images with an axis order of 'XYZ', whereas NumPy uses 'ZYX'
    # Convert all to XYZ
    image_arr = np.transpose(sitk.GetArrayFromImage(image), axes=(2, 1, 0))
    image_size = image.GetSize()

    # # Display image
    # plt.imshow(image_arr[:,:,2100], cmap='gray')
    # plt.show()

    print(f"Image {img_path}:")
    print("------------------")
    print(f"Size: {image_size}")
    print(f"Intensity (min-max): {np.min(image_arr)}-{np.max(image_arr)}")

    # Create an "empty" new image with the same dimensions and pixel type
    mask = sitk.Image(image.GetSize(), sitk.sitkUInt8)
    mask.SetSpacing(image.GetSpacing())
    mask.SetOrigin(image.GetOrigin())
    mask.SetDirection(image.GetDirection())

    print(f"Mask size: {mask.GetSize()}")

    # Set voxel value to 1 at each landmark
    for i in range(1, 6):
        landmark_n = 'Landmark' + str(i)
        observer = 'Average' # Average, Amy, Katie, etc
        x = round(df[landmark_n][observer]['X'][img_path])
        y = round(df[landmark_n][observer]['Y'][img_path])
        z = round(df[landmark_n][observer]['Z'][img_path])
        landmark = (x, y, z)

        print(f"Landmark {i}: {landmark}")
        mask.SetPixel(landmark, 1) # Set pixel value to 1
    
    # Apply a Binary Dilation filter to make single voxels become spheres
    dilate_filter = sitk.BinaryDilateImageFilter()
    dilate_filter.SetKernelRadius(3)
    dilate_filter.SetKernelType(sitk.sitkBall)
    dilate_filter.SetForegroundValue(1)

    # Get all label values, assuming all non zero values are label
    np_arr_view = sitk.GetArrayViewFromImage(mask)
    unique_values = set(np_arr_view[np_arr_view!=0])
    for label in unique_values:
        dilated_mask = dilate_filter.Execute(mask)

    # Save image
    sitk.WriteImage(dilated_mask, os.path.join(img_dir, "masks", img_path[:-4] + "_mask.nii"))

    print("")



Image 100063_ct.nii:
------------------
Size: (587, 587, 3311)
Intensity (min-max): 0-4000
Mask size: (587, 587, 3311)
Landmark 1: (179, 356, 2132)
Landmark 2: (218, 338, 2107)
Landmark 3: (144, 378, 2113)
Landmark 4: (167, 343, 2124)
Landmark 5: (177, 368, 2177)

Image 100084_ct.nii:
------------------
Size: (543, 517, 3003)
Intensity (min-max): 0-4000
Mask size: (543, 517, 3003)
Landmark 1: (181, 310, 1973)
Landmark 2: (226, 276, 1974)
Landmark 3: (140, 309, 1956)
Landmark 4: (174, 274, 1978)
Landmark 5: (176, 302, 2026)

Image 100114_ct.nii:
------------------
Size: (578, 573, 3194)
Intensity (min-max): 0-4000
Mask size: (578, 573, 3194)
Landmark 1: (174, 320, 1981)
Landmark 2: (227, 308, 1956)
Landmark 3: (131, 334, 1952)
Landmark 4: (168, 302, 1977)
Landmark 5: (170, 327, 2022)

Image 100115_ct.nii:
------------------
Size: (550, 596, 3083)
Intensity (min-max): 0-4000
Mask size: (550, 596, 3083)
Landmark 1: (133, 301, 2024)
Landmark 2: (175, 288, 2007)
Landmark 3: (102, 321, 2002)