#General Information
The list of all the cases in BraTS 2020 dataset has been included in a CSV file. This code reads that CSV to get the class of a case (LGG vs HGG), reads the T1 sequence MRI and its corresponding segemntation mask, and then stores only tumorous slices in an array. Finally, this array is saved as .npy file.

#General Instructions
Please extract the MICCAI_BraTS2020_TrainingData zip file to the folder Datasets/BraTS2020.

In [None]:
#import the necessary libraries
import numpy as np
import os
import nibabel as nib
import pandas as pd

In [None]:
#Run this cell only if your data resides on Google drive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
#Set base_path to the location where the data and results of your project
#reside
base_path = '/content/gdrive/MyDrive/HPT/'

In [None]:
#read the CSV file with the list of patients
pd_files_mappings = pd.read_csv(base_path + 'Datasets/BraTS2020/name_mapping.csv')

In [None]:
#filter the LGG and LGG class patients into two separate DataFrames.
pd_HGG = pd_files_mappings[pd_files_mappings['Grade']=='HGG']['BraTS_2020_subject_ID'].values
pd_LGG = pd_files_mappings[pd_files_mappings['Grade']=='LGG']['BraTS_2020_subject_ID'].values

In [None]:
print (len(pd_HGG))
print (len(pd_LGG))

#Create lists of folder paths for all the patients belonging to HGG and LGG class.

In [None]:
#This function returns a list of all the paths for HGG and LGG class depending 
#upon the parameter Grade
def getListOfFiles(dirName, Grade):
    # create a list of file and sub directories 
    # names in the given directory 
    listOfFile = os.listdir(dirName)
    allFiles = list()
    # Iterate over all the entries
    for entry in listOfFile:
        # Create full path
        fullPath = os.path.join(dirName, entry)
        if (os.path.isdir(fullPath)) and (Grade=="HGG") and (entry in HGG_list):
            
            allFiles.append(fullPath)
                           
        
        if (os.path.isdir(fullPath)) and (Grade=="LGG") and (entry in LGG_list):
            #fullPath = os.path.join(dirName, entry)
            allFiles.append(fullPath)
            
                
    return allFiles


In [None]:
#call the function getListOfFiles to get the list of LGG and HGG cases' paths
BraTS2020_folder = base_path + 'Datasets/BraTS2020
HGG_subjects = getListOfFiles(BraTS2020_folder, "HGG")
LGG_subjects = getListOfFiles(BraTS2020_folder, "LGG")

In [None]:
print (len(HGG_subjects))
print (len(LGG_subjects))

#Read and Save HGG Tumorous Slices

In [None]:
#create array for HGG tumorous slices of T1 sequence
HGG_tumorous_slices_t1 = np.zeros((19496, 240, 240), dtype=np.float16)

In [None]:
#Run this code cell to store all the HGG tumorous slices to the array
#HGG_tumorous_slices_t1

tumorous_counter_t1 = 0

subject_counter = 0

#array to flag the indices which are tumorous
tumorous_indices = np.zeros((155))

for subjects in HGG_subjects:
    subject_counter += 1
    #resetting tumorous indices
    for i in range(155):
        tumorous_indices[i] = False
    
    print ('Subject No.:' + str(subject_counter))
    
    #getting the list of files inside the current subject
    listOfFile = os.listdir(subjects)
    
    #flagging tumorous slices
    for item in listOfFile:
        if '_seg.nii' in item:

            tempPath = os.path.join(subjects, item)
            tempFile = nib.load(tempPath)
            tempData = tempFile.get_fdata()
            
            for i in range(155):
                currentSlice = tempData[:, :, i]
                maxVal = np.amax(currentSlice)
                if maxVal > 0:
                    tumorous_indices[i] = True
                            

    #getting t1 slices which are tumorous
    for sequence in listOfFile:
        if ('_t1.nii' in sequence):
            tempPath = os.path.join(subjects, sequence)
            tempFile = nib.load(tempPath)
            tempData = tempFile.get_fdata()
            
            #normalize the scan
            max_norm = np.amax(tempData)
            min_norm = np.amin(tempData)
            tempData = tempData/(max_norm - min_norm)
            
            #extract tumorous slices
            for i in range(155):
                if tumorous_indices[i]==True:
                    
                    HGG_tumorous_slices_t1[tumorous_counter_t1, :, :] = tempData[:, :, i]
                    tumorous_counter_t1 += 1
    
  


In [None]:
#print the no. of tumorous slices in HGG patients
print (tumorous_counter_t1) 

In [None]:
#Save all the tumorous slices (the array HGG_tumorous_slices_t1) as .npy file
np.save(base_path + 'Datasets/BraTS2020/BraTS2020_Tumorous_HGG_T1_f16.npy', HGG_tumorous_slices_t1)

#Read and Save LGG Tumorous Slices

In [None]:
#create array for LGG tumorous slices of T1 sequence
LGG_tumorous_slices_t1 = np.zeros((4926, 240, 240), dtype=np.float16)

In [None]:
#Run this code cell to store all the HGG tumorous slices to the array
#HGG_tumorous_slices_t1

tumorous_counter_t1 = 0

subject_counter = 0

#array to flag the indices which are tumorous
tumorous_indices = np.zeros((155))

for subjects in LGG_subjects:
    subject_counter += 1
    #resetting tumorous indices
    for i in range(155):
        tumorous_indices[i] = False
    
    print ('Subject No.:' + str(subject_counter))
    
    #getting the list of files inside the current subject
    listOfFile = os.listdir(subjects)
    
    #flagging tumorous slices
    for item in listOfFile:
        if '_seg.nii' in item:

            tempPath = os.path.join(subjects, item)
            tempFile = nib.load(tempPath)
            tempData = tempFile.get_fdata()
            
            for i in range(155):
                currentSlice = tempData[:, :, i]
                maxVal = np.amax(currentSlice)
                if maxVal > 0:
                    tumorous_indices[i] = True
                            

    #getting t1 slices which are tumorous
    for sequence in listOfFile:
        if ('_t1.nii' in sequence):
            tempPath = os.path.join(subjects, sequence)
            tempFile = nib.load(tempPath)
            tempData = tempFile.get_fdata()
            
            #normalize the scan
            max_norm = np.amax(tempData)
            min_norm = np.amin(tempData)
            tempData = tempData/(max_norm - min_norm)
            
            #extract tumorous slices
            for i in range(155):
                if tumorous_indices[i]==True:
                    
                    LGG_tumorous_slices_t1[tumorous_counter_t1, :, :] = tempData[:, :, i]
                    tumorous_counter_t1 += 1
    
  


In [None]:
#print the no. of tumorous slices in LGG patients
print (tumorous_counter_t1) 

In [None]:
#Save all the tumorous slices (the array LGG_tumorous_slices_t1) as .npy file
np.save(base_path + 'Datasets/BraTS2020/BraTS2020_Tumorous_LGG_T1_f16.npy', LGG_tumorous_slices_t1)