<a href="https://colab.research.google.com/github/matan034/Knee-Bone-Segmentation/blob/main/KneeProject_preprocess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Preprocess



In [None]:
import os, glob
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
import cv2
from google.colab import drive
import itertools
from sklearn.model_selection import train_test_split
from skimage.segmentation import find_boundaries

In [None]:
 #CONSTANTS!!!

# STEP 1 - Load and visualize data


SLICE_X = True
SLICE_Y = False
SLICE_Z = True
VALIDATION=True
TRAIN_AND_TEST=True
FIND_BOUNDARY=False
IS_BOUNDARY_PATH = False
REMOVE_BLACK_NO_BONE_IMAGES=False


drive.mount('/content/drive')
dataInputPath = '/content/drive/My Drive/KneeProject/volume_data'

if IS_BOUNDARY_PATH:
  trainingOutputPath = '/content/drive/My Drive/KneeProject/trainingBoundary/' #when creating boundary images go to path trainingBoundary regular images in training
  testingOutputPath = '/content/drive/My Drive/KneeProject/testingBoundary/'
  validationOutputPath = '/content/drive/My Drive/KneeProject/validationBoundary/'
else:
  trainingOutputPath = '/content/drive/My Drive/KneeProject/training/'
  testingOutputPath = '/content/drive/My Drive/KneeProject/testing/'
  validationOutputPath = '/content/drive/My Drive/KneeProject/validation/'

trainingImageSliceOutput = os.path.join(trainingOutputPath, 'img/femur')
trainingMaskSliceOutput = os.path.join(trainingOutputPath, 'mask/femur')

 #when creating boundary images go to path testingBoundary regular images in testing
testingImageSliceOutput = os.path.join(testingOutputPath, 'img/femur')
testingMaskSliceOutput = os.path.join(testingOutputPath, 'mask/femur')

 #when creating boundary images go to path validationBoundary regular images in validation
validationImageSliceOutput = os.path.join(validationOutputPath, 'img/femur')
validationMaskSliceOutput = os.path.join(validationOutputPath, 'mask/femur')

# STEP 2 - Image normalization
HOUNSFIELD_MIN = -1000
HOUNSFIELD_MAX = 2000
HOUNSFIELD_RANGE = HOUNSFIELD_MAX - HOUNSFIELD_MIN



SLICE_DECIMATE_IDENTIFIER = 3

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#Get all img and mask aths
imgPaths=[]
maskPaths=[]
for folderName in os.listdir(dataInputPath):
    imgPaths.append(os.path.join(dataInputPath, folderName + "/img/"))
    maskPaths.append(os.path.join(dataInputPath, folderName + "/mask/"))

**Read img/mask functions**




In [None]:
# Normalize image
def normalizeImageIntensityRange(img):
    img[img < HOUNSFIELD_MIN] = HOUNSFIELD_MIN
    img[img > HOUNSFIELD_MAX] = HOUNSFIELD_MAX
    return (img - HOUNSFIELD_MIN) / HOUNSFIELD_RANGE

# Read image or mask volume
def readImageVolume(imgPath, normalize=False):
    img = nib.load(imgPath).get_fdata()
    if normalize:
        return normalizeImageIntensityRange(img)
    else:
        return img

# Save volume slice to file
def saveSlice(img, fname, path, only_boundary=False):
    if only_boundary:
      img = find_boundaries(img,mode='thick')
    img = np.uint8(img * 255)
    fout = os.path.join(path, f'{fname}.png')
    cv2.imwrite(fout, img)
    print(f'[+] Slice saved: {fout}', end='\r')


def check_data_in_image(img, minAmountOfPixelsInImg=1):
    if not REMOVE_BLACK_NO_BONE_IMAGES:
        return True
    else:
        count = np.count_nonzero(img)
        if count > minAmountOfPixelsInImg:
            return True
        else:
            return False


#gets main folder name
def getFolderName(path, key = 'volume_data'):
        parts = path.split(os.path.sep)
        idx = parts.index('volume_data')
        folderName= parts[idx +1]
        return folderName



#reads normalizes and saves slices
def readAndProcessVolumes(img_tuple, mask_tuple):
    for img_idx,img_filename in enumerate(sorted(glob.iglob(img_tuple[0]+'*.nii'))):
        for mask_idx,mask_filename in enumerate(sorted(glob.iglob(mask_tuple[0]+'*.nii'))):
          img_folderName=getFolderName(img_tuple[0])
          mask_folderName=getFolderName(mask_tuple[0])
          img = readImageVolume(img_filename, img_tuple[3])
          mask = readImageVolume(mask_filename, mask_tuple[3])
          numOfSlices = sliceAndSaveVolumeImage(img_tuple=(img, img_folderName + str(img_idx),img_tuple[1],img_tuple[2]),
                                                mask_tuple=(mask, mask_folderName + str(mask_idx),mask_tuple[1],mask_tuple[2]))
          print(f'\n{img_filename + mask_filename}, {numOfSlices} slices created \n')
          return numOfSlices


#tuple - vol, fname, trainPath, testPath
def sliceAndSaveVolumeImage(img_tuple, mask_tuple):
    dims = {"img":img_tuple[0].shape, "mask": mask_tuple[0].shape}
    print(dims["img"], "image dims")
    print(dims["mask"], "mask dims")
    cnt = 0
    if SLICE_X:
        for i in range(dims["img"][0]):
            mask_slice = mask_tuple[0][i,:,:]
            MIN_PIXELS_IN_MASK=int(mask_slice.size*0.05)
            if check_data_in_image(mask_slice,MIN_PIXELS_IN_MASK):
              cnt += 1
              img_slice = img_tuple[0][i,:,:]
              if i % 5 == 0:
                  saveSlice(img_slice, img_tuple[1]+f'-slice{str(i).zfill(SLICE_DECIMATE_IDENTIFIER)}_x', img_tuple[3])
                  saveSlice(mask_slice, mask_tuple[1]+f'-slice{str(i).zfill(SLICE_DECIMATE_IDENTIFIER)}_x', mask_tuple[3], FIND_BOUNDARY)
              else:
                  saveSlice(img_slice, img_tuple[1]+f'-slice{str(i).zfill(SLICE_DECIMATE_IDENTIFIER)}_x', img_tuple[2])
                  saveSlice(mask_slice, mask_tuple[1]+f'-slice{str(i).zfill(SLICE_DECIMATE_IDENTIFIER)}_x', mask_tuple[2], FIND_BOUNDARY)

    if SLICE_Y:
        for i in range(dims["img"][1]):
            mask_slice = mask_tuple[0][:,i,:]
            MIN_PIXELS_IN_MASK=int(mask_slice.size*0.1)
            if check_data_in_image(mask_slice,MIN_PIXELS_IN_MASK):
              cnt += 1
              img_slice = img_tuple[0][:,i,:]
              if i % 5 == 0:
                  saveSlice(img_slice, img_tuple[1]+f'-slice{str(i).zfill(SLICE_DECIMATE_IDENTIFIER)}_y', img_tuple[3])
                  saveSlice(mask_slice, mask_tuple[1]+f'-slice{str(i).zfill(SLICE_DECIMATE_IDENTIFIER)}_y', mask_tuple[3],FIND_BOUNDARY)
              else:
                  saveSlice(img_slice, img_tuple[1]+f'-slice{str(i).zfill(SLICE_DECIMATE_IDENTIFIER)}_y', img_tuple[2])
                  saveSlice(mask_slice, mask_tuple[1]+f'-slice{str(i).zfill(SLICE_DECIMATE_IDENTIFIER)}_y', mask_tuple[2],FIND_BOUNDARY)
    if SLICE_Z:
        for i in range(dims["img"][2]):
            mask_slice = mask_tuple[0][:,:,i]
            MIN_PIXELS_IN_MASK=int(mask_slice.size*0.1)
            if check_data_in_image(mask_slice,MIN_PIXELS_IN_MASK):
              cnt += 1
              img_slice = img_tuple[0][:,:,i]
              if i % 5 == 0:
                  saveSlice(img_slice, img_tuple[1]+f'-slice{str(i).zfill(SLICE_DECIMATE_IDENTIFIER)}_z', img_tuple[3])
                  saveSlice(mask_slice, mask_tuple[1]+f'-slice{str(i).zfill(SLICE_DECIMATE_IDENTIFIER)}_z', mask_tuple[3],FIND_BOUNDARY)
              else:
                  saveSlice(img_slice, img_tuple[1]+f'-slice{str(i).zfill(SLICE_DECIMATE_IDENTIFIER)}_z', img_tuple[2])
                  saveSlice(mask_slice, mask_tuple[1]+f'-slice{str(i).zfill(SLICE_DECIMATE_IDENTIFIER)}_z', mask_tuple[2],FIND_BOUNDARY)
    return cnt


def clear_dir(path):
  if os.path.exists(path):
    for item in os.listdir(path):
      if not os.path.isdir(os.path.join(path, item)):
        os.remove(os.path.join(path, item))

**Save png of all slices**

In [None]:
  clear_dir(trainingImageSliceOutput)
  clear_dir(testingImageSliceOutput)
  clear_dir(trainingMaskSliceOutput)
  clear_dir(testingMaskSliceOutput)
  clear_dir(validationImageSliceOutput)
  clear_dir(validationMaskSliceOutput)

In [None]:
NUM_OF_FOLDERS=len(os.listdir(dataInputPath))-1
#NUM_OF_FOLDERS=1
if TRAIN_AND_TEST:
  for i in range(NUM_OF_FOLDERS):
    numOfSlicesImgs=readAndProcessVolumes((imgPaths[i],trainingImageSliceOutput,testingImageSliceOutput, True),
                                          (maskPaths[i],trainingMaskSliceOutput,testingMaskSliceOutput, False)) # read all img volumes and generate pngs from slices for Training and testing images

if VALIDATION:
    numOfSlicesImgs=readAndProcessVolumes((imgPaths[NUM_OF_FOLDERS],validationImageSliceOutput,validationImageSliceOutput, True),
                                        (maskPaths[NUM_OF_FOLDERS],validationMaskSliceOutput,validationMaskSliceOutput, False))

(512, 512, 570) image dims
(512, 512, 570) mask dims
[+] Slice saved: /content/drive/My Drive/KneeProject/training/mask/femur/ALMar0-slice569_z.png
/content/drive/My Drive/KneeProject/volume_data/ALMar/img/301 3D_VIEW_PD.nii/content/drive/My Drive/KneeProject/volume_data/ALMar/mask/Segmentation.ALMar-Femur-label.nii, 1082 slices created 

(512, 512, 570) image dims
(512, 512, 570) mask dims
[+] Slice saved: /content/drive/My Drive/KneeProject/training/mask/femur/FK0-slice569_z.png
/content/drive/My Drive/KneeProject/volume_data/FK/img/301 3D_VIEW_PD_1.nii/content/drive/My Drive/KneeProject/volume_data/FK/mask/Segmentation_FK-Femur-label.nii, 1082 slices created 

(512, 512, 570) image dims
(512, 512, 570) mask dims
[+] Slice saved: /content/drive/My Drive/KneeProject/training/mask/femur/BMS0-slice569_z.png
/content/drive/My Drive/KneeProject/volume_data/BMS/img/201 3D_VIEW_PD.nii/content/drive/My Drive/KneeProject/volume_data/BMS/mask/Segmentation_BMS-Femur-label.nii, 1082 slices creat