In [21]:
# Setting the root directory
%cd /Users/vitthal/Documents/Research/DataScience/MedicalResearch/mgmt

/Users/vitthal/Documents/Research/DataScience/MedicalResearch/mgmt


In [22]:
# Variable definations
path = '/Users/vitthal/Documents/Research/DataScience/MedicalResearch/mgmt/'

CATEGORIES = ['MGMT_negative', 'MGMT_positive']

block_h, block_w = (224, 224)
# stride = 3

Case_Num = 5

# Modality to taget
modality = 'flair'

# pixel threshold for the tumour mask
pix_thresh = 300

Process to be followed:
1. load the vgg19 model
2. load the data
3. clean the data
4. save the data in a numpy array
5. save the data in a pickle file

In [23]:
# Function Defination --> Generate_images()
import os
import numpy as np
import cv2
import nibabel as nib
from PIL import Image
from tqdm import tqdm

def Generate_images():
    print("Generating images")
    try:
        workdir=os.listdir(Work_Dir)
        if '.DS_Store' in workdir:
            workdir.remove('.DS_Store')
        for case in workdir: # Case = Case_1 etc.
            Case_Path = os.path.join(Work_Dir, case + '/')
            casepath = os.listdir(Case_Path)
            if '.DS_Store' in casepath:
                casepath.remove('.DS_Store')
            for split in casepath: # Split = Train, Test
                Split_Path = os.path.join(Case_Path, split + '/')
                splitpath = os.listdir(Split_Path)
                if '.DS_Store' in splitpath:
                    splitpath.remove('.DS_Store')
                for category in splitpath: # Category = MGMT_Positives, MGMT_Negatives
                    Category_Path = os.path.join(Split_Path, category + '/')
                    categorypath = os.listdir(Category_Path)
                    if '.DS_Store' in categorypath:
                        categorypath.remove('.DS_Store')
                    for patient in tqdm(categorypath): # Patient = Patient_1, Patient_2 etc.
                        Patient_path = os.path.join(Category_Path, patient + '/')
                        os.chdir(Patient_path)
                        # Fetching various variables for computation
                        mod = nib.load('{}_{}.nii.gz'.format(patient, modality))
                        mod_data = mod.get_fdata() # Coverting nii to 3d numpy array
                        seg_mask = nib.load('{}_seg.nii.gz'.format(patient))
                        seg_mask_data = seg_mask.get_fdata() # Coverting nii to 3d numpy array
                        #Extracting only those layers from mask hich have non zero values
                        z = np.any(seg_mask_data, axis=(0, 1))
                            # zmin & zmax saves the corresponding layer numbers of tumor regions
                        zmin, zmax = np.where(z)[0][[0, -1]]

                        #Creating a new mask to remove segmentation
                        d = seg_mask_data
                        for layer in range(zmin, zmax+1):
                            nonzero = np.nonzero(d[:, :, layer])
                            r = nonzero[0]
                            c = nonzero[1]
                            if (r.size == 0 or c.size == 0):
                                continue
                            rmin = np.min(r)
                            rmax = np.max(r)
                            cmin = np.min(c)
                            cmax = np.max(c)
                            # Replacing tumor region values by 1
                            d[rmin:rmax+1, cmin:cmax+1, layer] = 1

                        #Multiplying flair data with new mask
                        tumor = np.multiply(mod_data, d)

                        #Removing zero valued layers
                        tumor_layers = tumor[:, :, ~
                                                (tumor == 0).all((0, 1))]

                        # Removing layers having less than 1000 pixels of tumor
                        tumor_layers = tumor_layers[:, :, np.count_nonzero(
                            tumor_layers, axis=(0, 1)) > pix_thresh]

                        #converting to png files
                        Cropped_list = []  # list containing cropped 2d layers of tumor region
                        for lay in range(0, tumor_layers.shape[2]):
                            coords = np.argwhere(tumor_layers[:, :, lay])
                            x_min, y_min = coords.min(axis=0)
                            x_max, y_max = coords.max(axis=0)
                            cropped = tumor_layers[x_min:x_max +
                                                    1, y_min:y_max+1, lay]
                            # normalization/
                            # testing images without normalization
                            cropped *= (255.0/cropped.max())
                            Cropped_list.append(cropped)

                        frame = 0
                        # print('Saving images')
                        for item in Cropped_list:

                            # item is numpy.ndarray
                            im = item
                            # r = 150/im.shape[1]
                            dim = (block_h, block_w)
                            im_resized = cv2.resize(
                                im, dim, interpolation=cv2.INTER_CUBIC)  # resizing to 224x224
                            # print("read cv2")
                            cv2.imwrite("{}_img_{}.png".format(
                                patient, frame), im_resized)
                            frame = frame+1
                            
                        #Removing unwanted nii files
                        niipath = os.listdir(os.getcwd())
                        if '.DS_Store' in niipath:
                            niipath.remove('.DS_Store')
                        for item in niipath:
                            try:
                                if item.startswith(patient) and item.endswith(".gz"):
                                    os.remove(item)
                            except Exception as e:
                                print(e)
    except Exception as e:
        print(e)
    os.chdir(Work_Dir)
    print('Images Generated')

In [24]:
# Function Definition --> Creating_Cases
#  Dividing Datasets into number of cases for Cross Validation

def Creating_Cases(Case_Num):
    import shutil
    import os
    print('Creating Cases')
    for case in range(Case_Num):
        CASES.append('Case_' + str(case + 1))

    # Creating Case-1
    Case = Work_Dir + 'Case_1'
    try:
        shutil.copytree(Work_Dir, Case)
    except Exception as e:
        print(e)

    # Creating Remaining Cases
    try:
        for case in range(Case_Num - 1):
            shutil.copytree(Case, Work_Dir + 'Case_' + str(case+2))
    except Exception as e:
        print(e)

    # Deleting folder, listed in the CATEGORIES list, after creating Cases
    for cate in CATEGORIES:
        try:
            # Deleting Folders of CATEGORIES list
            shutil.rmtree(Work_Dir + cate)
        except Exception as e:
            print(e)
    print('Cases Created')

In [25]:
# Function Definition --> Support function for Seperate test Data
def seperate_dir_ratio(input_folder, output_folder, ratio ):
    # ratio is a list of two numbers, the first number is the ratio of the train data, the second number is the ratio of the test data
    import shutil
    import random
    try:
        input_fol = os.listdir(input_folder)
        if '.DS_Store' in os.listdir(input_folder):
            input_fol.remove('.DS_Store')
        # create the train and test folders
        os.mkdir(output_folder + 'train')
        os.mkdir(output_folder + 'test')
        # Create folders for MGMT_positive and MGMT_negative in test and train folders
        os.mkdir(output_folder + 'train' + '/MGMT_positive')
        os.mkdir(output_folder + 'train' + '/MGMT_negative')
        os.mkdir(output_folder + 'test' + '/MGMT_positive')
        os.mkdir(output_folder + 'test' + '/MGMT_negative')
        # Get the list of patients from the sub-dirs of the input folder
        for case in input_fol:
            patient_path = os.path.join(input_folder, case) # patient_path + MGMT_positive or MGMT_negative
            patient_list = os.listdir(patient_path) # patient_list is the list of patients in the MGMT_positive or MGMT_negative
            if '.DS_Store' in patient_list:
                patient_list.remove('.DS_Store') # remove the .DS_Store file from the list
            random.shuffle(patient_list) # shuffle the patient_list for more randomness and better results
            # split the patient_list in the ratio of the ratio list
            train_list = patient_list[:int(len(patient_list) * ratio[0])] # get the first ratio[0] of the patient_list
            test_list = []
            for i in patient_list:
                if i in train_list:
                    pass
                else:
                    test_list.append(i)
            # copy the patients in the train_list to the train folder into respective type
            for patient in train_list:
                patient_split_path= os.path.join(patient_path, patient)
                # copy the folder to the respective path in train folder
                shutil.copytree(patient_split_path, output_folder + 'train' + '/' + case + '/' + patient)
            for patient in test_list:
                patient_split_path= os.path.join(patient_path, patient)
                # copy the folder to the respective path in test folder
                shutil.copytree(patient_split_path, output_folder + 'test' + '/' + case + '/' + patient)


    except Exception as e:
        print(e)

In [26]:
# Fuction defination --> Seperating_test_Data to perform K-fold Cross Validation

def Separating_Test_Data():
    import splitfolders
    import shutil
    from tqdm import tqdm
    import time
    print('Seprating Test Data')
    i=0
    workdir = os.listdir(Work_Dir)
    if '.DS_Store' in workdir:
        workdir.remove('.DS_Store')
        print("Removed .DS_Store")
    for case in tqdm(workdir):
        try:
            Case_Path = os.path.join(Work_Dir, case + '/')
            # call the function to seperate the data
            seperate_dir_ratio(Case_Path, Case_Path, [0.8, 0.2])
                
        
        except Exception as e:
            print("Error in Seperating_Test_Data")
            print(e)
        # input_folder = Case_Path
        # output_folder = input_folder
        # i = i+1
        # splitfolders.ratio(input_folder, output=output_folder,
        #                     seed=i, ratio=(.8, .2), group_prefix=5)
        
        # deleting folder in CATEGORIES list after seperating the data
        for cate in CATEGORIES:
            try:
                shutil.rmtree(Case_Path + cate)
            except Exception as e:
                print(e)
        
        # # rename folder 'val' to 'test'
        # try:
        #     os.rename(Case_Path + 'val', Case_Path + 'test')
        # except Exception as e:
        #     print(e)
    print('Test Data Seprated')


In [27]:
# Function Definition --> removing .DS_Store files to avoid errors in feautre labelling
def remove_ds_store():
    import os
    import shutil
    print('Removing .DS_Store')
    workdir = os.listdir(Work_Dir)
    if '.DS_Store' in workdir:
        os.chdir(Work_Dir)
        os.system('rm -rf .DS_Store')
        print("Removed .DS_Store from Work_Dir : {}".format(Work_Dir))
    for case in workdir:
        try:
            Case_Path = os.path.join(Work_Dir, case + '/')
            casepath = os.listdir(Case_Path)
            if '.DS_Store' in casepath:
                os.chdir(Case_Path)
                # remove the .DS_Store file from the directory
                os.system('rm -rf .DS_Store')
                print("Removed .DS_Store from Case_Path : {}".format(Case_Path))
            for Type in casepath:
                Type_Path = os.path.join(Case_Path, Type + '/')
                typepath = os.listdir(Type_Path)
                if '.DS_Store' in typepath:
                    os.chdir(Type_Path)
                    os.system('rm -rf .DS_Store')
                    print("Removed .DS_Store from Type_Path : {}".format(Type_Path))
                for patient in typepath:
                    Patient_Path = os.path.join(Type_Path, patient + '/')
                    patientpath = os.listdir(Patient_Path)
                    if '.DS_Store' in patientpath:
                        os.chdir(Patient_Path)
                        os.system('rm -rf .DS_Store')
                        print("Removed .DS_Store from Patient_Path : {}".format(
                            Patient_Path))
                    for file in patientpath:
                        file_Path = os.path.join(Patient_Path, file)
                        filepath= os.listdir(file_Path)
                        if '.DS_Store' in filepath:
                            os.chdir(file_Path)
                            os.system('rm -rf .DS_Store')
                            print("Removed .DS_Store from file_Path : {}".format(
                                file_Path))
        except Exception as e:
            print(e)


In [28]:
# Function Definition --> Reconstructing the Working Data
def reconstruct():
    import shutil
    print('Reconstructing')
    # Deleting working directory
    shutil.rmtree(PATH + "Working_data/")
    # Copying data from backup
    try:
        shutil.copytree(Org_Dir, Work_Dir)
    except:
        print('Working directory already exists')

    print('Reconstruction complete')


In [29]:
# Function Definition --> Finding empty folders
def find_empty_folders():
    import os
    import shutil
    print('Finding empty folders')
    workdir = os.listdir(Work_Dir)
    if '.DS_Store' in workdir:
        workdir.remove('.DS_Store')
    for case in workdir:
        try:
            Case_Path = os.path.join(Work_Dir, case + '/')
            casepath = os.listdir(Case_Path)
            if '.DS_Store' in casepath:
                casepath.remove('.DS_Store')
            for split in casepath:
                Split_Path = os.path.join(Case_Path, split + '/')
                splitpath = os.listdir(Split_Path)
                if '.DS_Store' in splitpath:
                    splitpath.remove('.DS_Store')
                for category in splitpath:
                    Category_Path = os.path.join(Split_Path, category + '/')
                    categorypath = os.listdir(Category_Path)
                    if '.DS_Store' in categorypath:
                        categorypath.remove('.DS_Store')
                    for patient in categorypath:
                        Patient_Path = os.path.join(Category_Path, patient + '/')
                        patientpath = os.listdir(Patient_Path)
                        if '.DS_Store' in patientpath:
                            patientpath.remove('.DS_Store')
                        if patientpath == []:
                            print('Empty folder found at : {}'.format(Patient_Path))
        except Exception as e:
            print(e)

In [30]:
# All Function Calls
import os
import numpy as np
import nibabel as nib
from PIL import Image
import cv2

PATH = path + 'Data/BRATS/mod_data1/'
Org_Dir = PATH + 'Original_Data_Backup/'
Work_Dir = PATH + 'Working_Data/'

# Function Call --> Backup Original Data For Safety Purpose
# Backup()

# Function Call --> Reconstructing the folder structure
reconstruct()

# Function Call --> Dividing Datasets into number of cases for Cross Validation
CASES = []
Creating_Cases(Case_Num)

# Function Call --> Seprating_Test_Data to perform K-fold Cross Validation
Separating_Test_Data()

# Function Call --> Generate Tumor Images of Patients
Generate_images()

# Function Call --> remove >.DS_store files to avoid errors in feautre labelling
# remove_ds_store()

# Fuction Call --> Find empty folders
find_empty_folders()

os.chdir(path)
print('All Done')
os.system('say "Data cleaning is done"')

Reconstructing
Reconstruction complete
Creating Cases
Cases Created
Seprating Test Data
Removed .DS_Store


100%|██████████| 5/5 [00:37<00:00,  7.53s/it]


Test Data Seprated
Generating images


100%|██████████| 40/40 [00:06<00:00,  5.75it/s]
100%|██████████| 40/40 [00:07<00:00,  5.44it/s]
100%|██████████| 160/160 [00:28<00:00,  5.58it/s]
100%|██████████| 160/160 [00:29<00:00,  5.52it/s]
100%|██████████| 40/40 [00:06<00:00,  5.96it/s]
100%|██████████| 40/40 [00:07<00:00,  5.58it/s]
100%|██████████| 160/160 [00:28<00:00,  5.55it/s]
100%|██████████| 160/160 [00:28<00:00,  5.60it/s]
100%|██████████| 40/40 [00:07<00:00,  5.69it/s]
100%|██████████| 40/40 [00:07<00:00,  5.55it/s]
100%|██████████| 160/160 [00:28<00:00,  5.60it/s]
100%|██████████| 160/160 [00:28<00:00,  5.60it/s]
100%|██████████| 40/40 [00:06<00:00,  5.89it/s]
100%|██████████| 40/40 [00:07<00:00,  5.61it/s]
100%|██████████| 160/160 [00:27<00:00,  5.77it/s]
100%|██████████| 160/160 [00:28<00:00,  5.64it/s]
100%|██████████| 40/40 [00:06<00:00,  5.93it/s]
100%|██████████| 40/40 [00:07<00:00,  5.51it/s]
100%|██████████| 160/160 [00:28<00:00,  5.59it/s]
100%|██████████| 160/160 [00:28<00:00,  5.58it/s]


Images Generated
Finding empty folders
All Done


0