In [None]:
import pydicom as py
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd

# import numba
# from numba import jit, cuda

from skimage import measure, draw
# from skimage.measure import find_contours
from skimage import morphology


In [None]:
#Image and Masks read from folders

root= "the root/path of the folders"
all_files= os.listdir(root)

def read_image(path):
    print("***********************************************")
    
    
    mask_path = os.path.join(path[:-2], "New folder", "ScalarVolume")
    
    # Count masks
    num_masks = len(os.listdir(mask_path))
    print(f"Number of masks: {num_masks}")
    
    
    files = os.listdir(path)
    
    series = []
    for f in files:
        series.append((py.dcmread(os.path.join(path, f))).SeriesDescription)
    
    unique_series = list(set(series))
    
    print("\nNumber of images available for each SeriesDescription:")
    for num, s in enumerate(unique_series):
        count = series.count(s)
        print(f"{num}: {s} - {count} images")
    
    inp = input("\nChoose series to work: ")
    imgs = []
    
    try:
        inp = int(inp)
        chosen_series = unique_series[inp]
        for f in files:
            if (py.dcmread(os.path.join(path, f)).SeriesDescription == chosen_series):
                imgs.append(py.dcmread(os.path.join(path, f)))
    
    except:
        print("Wrong Input.\n")
        inp = input("\nChoose series to work: ")
        try:
            inp = int(inp)
            chosen_series = unique_series[inp]
            for f in files:
                if (py.dcmread(os.path.join(path, f)).SeriesDescription == chosen_series):
                    imgs.append(py.dcmread(os.path.join(path, f)))
            
        except:
            print("Wrong Input again.")
            pass
    
    if(len(imgs) > 0):
        # Sorting the selected files as per their Serial/Instance number
        files_dict = {}
        for f in imgs:
            files_dict[f.InstanceNumber] = f
        
        sorted_instance_numbers = sorted(files_dict.keys())
        
        files_f = [files_dict[k] for k in sorted_instance_numbers]
        
        
        # Extraction of masks
        masks = []
        for m in os.listdir(mask_path):
            masks.append((py.dcmread(os.path.join(mask_path, m))).pixel_array)
        
        print("Extraction of files done!")
        return files_f, masks
    
    else:
        print("No images or masks read.")

In [None]:
#Read images and masks, and keep them in separate folders.

files= []
for f in folders:
    try:
        print("\nPatient=", f)
        files.append(read_image(root+ str(f) + "/A/"))
    except:
        pass


imgs, masks= [x[0] for x in files], [x[1] for x in files]

imgs= [i for i in imgs if i]
masks= [i for i in masks if i]

In [None]:
#Check if number of images and masks are the same for every file.

def is_equal(images, masks):
    imgs_len= [len(img) for img in images]
    masks_len= [len(mask) for mask in masks] 
    # masks_len[-2:]= [999, 999]
    
    is_equal= []
    for img, mask in zip(imgs_len, masks_len):
        is_equal.append(img == mask)
    
    if(False in is_equal):
        not_equal= np.where(np.array(is_equal) == False)[0]
    
    try:
        # print("\n")
        for i in not_equal:
            print(f"Images and Masks are not equal in file= {folders[i]}, with index= {i}.")
    except:
        pass



is_equal(imgs, masks)

In [None]:
#Cropping images which are greater than 512*512

target_shape = (512, 512)

def crop_image(image, target_shape):
    start_x = (image.shape[0] - target_shape[0]) // 2
    start_y = (image.shape[1] - target_shape[1]) // 2
    
    cropped_image = image[start_x:start_x + target_shape[0], start_y:start_y + target_shape[1]]
    return cropped_image

In [None]:
#Slope-Intercept transform

def sl_inter(image):
    x= []
    sl, inter= int(image[0].RescaleSlope), int(image[0].RescaleIntercept)
    
    print(f"Before Cropping: Slice shape= {np.shape(image[0].pixel_array)}")
    
    for i in image:
        img= (i.pixel_array * sl) + inter
        img= img.astype(int)
        if(np.shape(img.shape) == (512, 512)):
            pass
        else:
            img= crop_image(img, target_shape)
        
        x.append(img)
    
    print(f"After Cropping: Slice shape= {np.shape(x[0])}")
    return x

In [None]:
#Store images in their slope-intercept form

images= []

print("\n")

for f, image in zip(folders, imgs):
    images.append(sl_inter(image))
    
    print(f"{f}) Rescaling of files done.")

In [None]:
#Cropping Masks

for f, mask in zip(folders, masks):
    print(f"\n\nPatient= {f}\nBefore Cropping: Mask shape= {np.shape(mask[0])}")
    
    for i, m in enumerate(mask):
        if(np.shape(m.shape) == (512, 512)):
            pass
        else:
            mask[i]= crop_image(m, target_shape)

    print(f"After Cropping: Mask shape= {np.shape(mask[0])}")

In [None]:
for f, image in zip(folders, images):
    print(f"{f}) Slice shape= {np.shape(image[0])}")

for f, mask in zip(folders, masks):
    print(f"{f}) Mask shape= {np.shape(mask[0])}")

In [None]:
#Display image and corresponding mask

i= 7

#(figsize) is basically (width inches, height inches)
fig, axes= plt.subplots(1, 2, sharey= True, figsize= (7, 6))


plt.subplots_adjust(left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.1, hspace=0.5)

axes[0].imshow((images[i][20]), cmap= "gray")
axes[1].imshow((masks[i][20]), cmap= "gray")

plt.show()

In [None]:
#To find the desired ROI (connected component) using measure.label

def label_th(image):
    proc_images= []
    
    for i, sl in enumerate(image):
        try:
            # Initial thresholding mask
            mask1 = np.logical_and(sl > -500, sl < 1000)
            
            # Label the connected regions
            l = measure.label(mask1)
            
            # Find unique labels and counts
            a, b = np.unique(l, return_counts=True)
            
            # Sort labels according to its counts
            sorted_ind_b = (np.argsort(b))[::-1]

            processed = False

            # For the labels, go through to find the ROI.
            for j in sorted_ind_b:
                
                # Binary mask
                fc = (l == sorted_ind_b[j])   
                
                # Binary masked image
                fc_sl = np.multiply(fc, sl)
                
                # To separate 0 HU from nearby HU values (e.g., 1, -1 etc.)
                fc_sl[fc_sl == 0] = np.min(sl)
                
                if len(np.where(fc_sl > 10)[0]) > 10000:  # Maximum no. of pixels in the ROI >10 HU.
                    
                    # Thresholding mask
                    fc_sl_th = np.logical_and(fc_sl >= -100, fc_sl <= 200)
                    
                    # Thresholded final image
                    fc_img_th = np.multiply(fc_sl_th, sl)
                    fc_img_th[fc_img_th == 0] = -350
                    
                    # fig, axes = plt.subplots(1, 2, sharex=True, sharey=True, figsize=(8, 7))
                    # plt.subplots_adjust(left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.1, hspace=0.2)
                    
                    # axes[0].imshow(sl, cmap="gray", vmin= np.min(sl), vmax= np.max(sl))
                    # axes[0].set_title(f"{i+1}) Original Image")
                    
                    # axes[1].imshow(fc, cmap="gray")
                    # axes[1].set_title("Selected Mask")
                    
                    # plt.show()
                    
                    proc_images.append(fc_img_th)
                    processed = True
                    break

            if not processed:
                proc_images.append(np.full_like(sl, -350))
        
        except:
            proc_images.append(np.full_like(sl, -350))
    
    return proc_images

In [None]:
#Store the processed images. From label_th.

images_th= []

print("\n")
for f, img in zip(folders, images):
    images_th.append(label_th(img))
    print(f"Folder-{f}) Processing of files done.")

In [None]:
#Modify masks to exclude negatives and bones (within thresholds).
#image is processed image, mask is mask read originally.

def mod_masks(image, mask):
    proc_masks= []
    
    try:
        # i= 0
        for sl, m in zip(image, mask):
            if(np.sum(m)> 0):
                

                kernel= morphology.disk(2)
                new_mask_d= morphology.dilation(m, kernel)


                new_mask= np.multiply(sl, new_mask_d)
                new_mask[new_mask== 0]= -350
                new_mask= (new_mask>= -100)
                
                new_mask= new_mask.astype(int)
 
                
                # i= i+ 1
                proc_masks.append(new_mask)
                
            else:
                proc_masks.append(m)
    except:
        proc_masks.append(m)
    
    return proc_masks

In [None]:
#Store the processed masks.

masks_th= []

print("\n")

for f, image, mask in zip(folders, images_th, masks):
    m= mod_masks(image, mask)
    masks_th.append(m)
    print(f"Folder-{f}) Processing of masks done. Images, Masks= {len(image)},{len(m)}")