In [2]:
!gdown --id 1989z-UY70MMNdDI9jKzlOOxv80MMvGaX

Downloading...
From: https://drive.google.com/uc?id=1989z-UY70MMNdDI9jKzlOOxv80MMvGaX
To: /ssd_scratch/cvit/sashank.sridhar/ACDC.zip
100%|██████████████████████████████████████| 2.45G/2.45G [01:32<00:00, 26.5MB/s]


In [3]:
!mkdir 'data'

In [4]:
!unzip ACDC.zip -d data

Archive:  ACDC.zip
 extracting: data/ACDC/database/testing/patient101/Info.cfg  
 extracting: data/ACDC/database/testing/patient101/MANDATORY_CITATION.md  
 extracting: data/ACDC/database/testing/patient101/patient101_4d.nii.gz  
 extracting: data/ACDC/database/testing/patient101/patient101_frame01.nii.gz  
 extracting: data/ACDC/database/testing/patient101/patient101_frame01_gt.nii.gz  
 extracting: data/ACDC/database/testing/patient101/patient101_frame14.nii.gz  
 extracting: data/ACDC/database/testing/patient101/patient101_frame14_gt.nii.gz  
 extracting: data/ACDC/database/testing/patient102/Info.cfg  
 extracting: data/ACDC/database/testing/patient102/MANDATORY_CITATION.md  
 extracting: data/ACDC/database/testing/patient102/patient102_4d.nii.gz  
 extracting: data/ACDC/database/testing/patient102/patient102_frame01.nii.gz  
 extracting: data/ACDC/database/testing/patient102/patient102_frame01_gt.nii.gz  
 extracting: data/ACDC/database/testing/patient102/patient102_frame13.nii.gz

In [6]:
DATA_ROOT = 'data'

ACDC = {
    'id': 0,
    'image_size': (192, 192),
    'raw_path': DATA_ROOT + '/ACDC/database/training',
    'training_set': {
        'path': DATA_ROOT + '/ACDC/training',
        'path_normalized': DATA_ROOT + '/ACDC/training_normalized',
        'path_normalized_images': DATA_ROOT + '/ACDC/training_normalized' + '/images',
        'path_normalized_masks': DATA_ROOT + '/ACDC/training_normalized' + '/masks'
    },
    'testing_set': {
        'path': DATA_ROOT + '/ACDC/database/testing',
        'path_normalized': DATA_ROOT + '/ACDC/testing_normalized',
        'path_normalized_images': DATA_ROOT + '/ACDC/testing_normalized' + '/images',
        'path_normalized_masks': DATA_ROOT + '/ACDC/testing_normalized' + '/masks'
    }
}

In [8]:
import os
import random
import shutil
import numpy as np

In [9]:
def create_directory(path):
    if os.path.exists(path):
        shutil.rmtree(path)
    os.mkdir(path)

    return None

In [16]:
os.remove('data/ACDC/database/training/MANDATORY_CITATION.md')

In [17]:
def split_training_testing_set(config, percentage):
    """
    Split the raw directory in a training and a testing set.
    :param config: json, access path.
    :param percentage: int, percentage of the raw directory given to the training set.
    :return: None
    """
    patients = os.listdir(config['raw_path'])
    print(patients)
    nb_patient = 0
    for _ in patients:
        nb_patient += 1
    testing_set_count = round(nb_patient - ((nb_patient / 100) * percentage))
    testing_set_images = random.sample(os.listdir(config['raw_path']), testing_set_count)
    training_set_images = np.setdiff1d(patients, testing_set_images)

    create_directory(config['testing_set']['path'])
    create_directory(config['training_set']['path'])

    for patient in testing_set_images:
        print(patient)
        shutil.copytree(config['raw_path'] + "/" + patient, config['testing_set']['path'] +"/"+ patient)
    for patient in training_set_images:
        print(patient)
        shutil.copytree(config['raw_path'] + "/" + patient, config['training_set']['path'] + "/" + patient)

    return None

In [18]:
split_training_testing_set(ACDC, percentage=80)

['patient073', 'patient071', 'patient008', 'patient015', 'patient023', 'patient010', 'patient013', 'patient028', 'patient049', 'patient036', 'patient047', 'patient080', 'patient006', 'patient086', 'patient031', 'patient037', 'patient026', 'patient075', 'patient076', 'patient087', 'patient089', 'patient038', 'patient048', 'patient064', 'patient055', 'patient039', 'patient060', 'patient017', 'patient016', 'patient051', 'patient034', 'patient005', 'patient079', 'patient093', 'patient082', 'patient077', 'patient097', 'patient018', 'patient040', 'patient072', 'patient011', 'patient029', 'patient059', 'patient058', 'patient066', 'patient024', 'patient027', 'patient003', 'patient100', 'patient014', 'patient084', 'patient062', 'patient095', 'patient056', 'patient004', 'patient081', 'patient092', 'patient061', 'patient091', 'patient019', 'patient001', 'patient063', 'patient068', 'patient043', 'patient044', 'patient054', 'patient032', 'patient021', 'patient098', 'patient012', 'patient088', 'pati

In [20]:
!pip install nibabel

Collecting nibabel
  Downloading nibabel-5.1.0-py3-none-any.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 1.2 MB/s eta 0:00:01
Installing collected packages: nibabel
Successfully installed nibabel-5.1.0


In [21]:
import os
import nibabel as nib
import cv2
import numpy as np
import shutil

In [26]:
class NormalizationACDC:
    """
    Normalize ACDC dataset:
    Select frames of end systole and end diastole in short axis view.
    Break down frames into slices.
    Transform slices into NumPy arrays.
    Rescale images (spatial resolution of 1mm*2 / pixel).
    Crop images in a square shape (192 pixels) around the center.
    Apply CLAHE transformation.
    Store images in a new directory.
    """
    def __init__(self, config):
        self.path = config['path']
        self.path_normalized = config['path_normalized']
        self.path_normalized_images = config['path_normalized_images']
        self.path_normalized_masks = config['path_normalized_masks']

    def normalize(self):
        self.create_normalized_directory()
        patients = os.listdir(self.path)
        for patient in patients:
            files = os.listdir(self.path +"/"+ patient)
            for file in files:
                file_path = self.path +"/"+ patient +"/"+ file
                new_name = file[:-7]
                if self.get_file_type(file) == 0:
                    images = self.transform_img_to_numpy_arrays(file_path, 0)
                    pixdim = self.get_spatial_resolution(file_path)
                    images_rescaled = self.rescale(images, pixdim)
                    images_cropped = self.crop(images_rescaled)
                    images_clahe = self.clahe(images_cropped)
                    self.save_numpy_images(images_clahe, new_name, 0)

                elif self.get_file_type(file) == 1:
                    images = self.transform_img_to_numpy_arrays(file_path, 1)
                    pixdim = self.get_spatial_resolution(file_path)
                    images_rescaled = self.rescale(images, pixdim)
                    images_cropped = self.crop(images_rescaled)
                    self.save_numpy_images(images_cropped, new_name, 1)

        self.filter_empty_mask()

        return None

    def filter_empty_mask(self):
        """
        After the normalization process, images who does not contain the RV are dropped.
        :return: None
        """
        masks = os.listdir(self.path_normalized_masks)
        total, droped = len(masks), 0
        for mask in masks:
            data = np.load(self.path_normalized_masks +"/"+ mask)
            min, max = np.min(data), np.max(data)
            if max == 0:
                os.remove(self.path_normalized_masks +"/"+ mask)
                os.remove(self.path_normalized_images +"/"+ mask)
                droped += 1

        return None

    def clahe(self, img_arrays, tile_size=(1, 1)):
        """
        Apply the Contrast Limited Adaptative Histogram Equalization transformation to every slices of a frame.
        :param img_arrays: numpy arrays of slices of a frame
        :param tile_size: size of the tiles
        :return: array containing images with CLAHE transformation.
        """
        images_clahe = []
        clahe = cv2.createCLAHE(tileGridSize=tile_size, clipLimit=2.5)
        for img in img_arrays:
            img_8bit = self.convert_16bit_8bit(img)
            images_clahe.append(clahe.apply(img_8bit))

        return images_clahe

    def convert_16bit_8bit(self, img):
        """
        Convert a 16bit image to a 8bit image.
        :param img: numpy array of the 16 bit image
        :return: numpy image of the 8 bit image
        """
        min_16 = np.min(img)
        max_16 = np.max(img)
        img_8 = np.array(np.rint((255 * (img - min_16)) / float(max_16 - min_16)), dtype=np.uint8)

        return img_8

    def crop(self, img_arrays, crop_size=192):
        """
        Crop the image around center in a square shape.
        :param img_arrays: numpy arrays of slices of a frame
        :param crop_size: int
        :return: array containing newly cropped images for every slices of a frame.
        """
        images_croped = []
        for img in img_arrays:
            height, width = img.shape
            crop_x = (width - crop_size) // 2
            crop_y = (height - crop_size) // 2
            image_croped = img[crop_y:(crop_y + crop_size), crop_x:(crop_x + crop_size)]
            images_croped.append(image_croped)

        return images_croped

    def create_normalized_directory(self):
        if os.path.exists(self.path_normalized):
            shutil.rmtree(self.path_normalized)
        os.mkdir(self.path_normalized)
        os.mkdir(self.path_normalized_images)
        os.mkdir(self.path_normalized_masks)

        return None

    def get_file_type(self, file_name):
        """
        Label the type of file. 4 options possible.
        Type 0 = unlabeled ES and ED frames
        Type 1 = labeled ES and ED frames
        Type 2 = Complete 4D image
        Type 3 = Info file
        :param file_name: str
        :return: int
        """
        if file_name[-9:] == "gt.nii.gz":
            return 1
        elif file_name[-9:] == "4d.nii.gz":
            return 2
        elif file_name == 'Info.cfg' or file_name == 'MANDATORY_CITATION.md':
            return 3
        else:
            return 0

    def filter_right_ventricle(self, img):
        """
        Only select the RV labeled class
        :param img: numpy array of the image
        :return: numpy array of the filtered image
        """
        img[img > 1] = 0

        return img

    def transform_img_to_numpy_arrays(self, file_path, file_type):
        img_obj = nib.load(file_path)
        img_data = img_obj.get_fdata()

        if file_type == 1:
            img_data = self.filter_right_ventricle(img_data)

        depth = img_data.shape[2]
        images = []
        for i in range(depth):
            images.append(np.array(img_data[:, :, i]))

        return images

    def get_spatial_resolution(self, file_path):
        img_obj = nib.load(file_path)
        header_infos = img_obj.header

        return header_infos.get_zooms()

    def rescale(self, images, pixdim):
        """
        Rescale every images in order to obtain a spatial resolution of 1mm^2 / pixel
        :param images: numpy arrays of slices of a frame
        :param pixdim: spatial resolution of the images
        :return: numpy arrays of rescaled images
        """
        images_rescaled = []
        for image in images:
            images_rescaled.append(
                cv2.resize(image, (0, 0), fx=pixdim[0], fy=pixdim[1])
            )

        return images_rescaled

    def save_numpy_images(self, images_normalized, new_name, img_type):
        if img_type == 1:
            path = self.path_normalized_masks
        else:
            path = self.path_normalized_images
        i = 0
        for image in images_normalized:
            if new_name[-3:] == '_gt':
                new_name = new_name[:-3]
            n = new_name + '_' + str(i)
            np.save(path +"/"+ n, image)
            i += 1

        return 0


In [27]:
norm_training = NormalizationACDC(ACDC['training_set'])
norm_training.normalize()

In [28]:
norm_testing = NormalizationACDC(ACDC['testing_set'])
norm_testing.normalize()