In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# save the zip file name as "BraTS2020_Data.zip"
!unzip /content/drive/MyDrive/BraTS2020_Data_V2.zip

Archive:  /content/drive/MyDrive/BraTS2020_Data_V2.zip
   creating: Data/
  inflating: __MACOSX/._Data         
  inflating: Data/.DS_Store          
  inflating: __MACOSX/Data/._.DS_Store  
   creating: Data/Test/
  inflating: __MACOSX/Data/._Test    
   creating: Data/Train/
  inflating: __MACOSX/Data/._Train   
   creating: Data/Validation/
  inflating: __MACOSX/Data/._Validation  
  inflating: Data/Test/.DS_Store     
  inflating: __MACOSX/Data/Test/._.DS_Store  
   creating: Data/Test/BraTS20_Test_011/
  inflating: __MACOSX/Data/Test/._BraTS20_Test_011  
   creating: Data/Test/BraTS20_Test_013/
  inflating: __MACOSX/Data/Test/._BraTS20_Test_013  
   creating: Data/Test/BraTS20_Test_012/
  inflating: __MACOSX/Data/Test/._BraTS20_Test_012  
  inflating: Data/Train/.DS_Store    
  inflating: __MACOSX/Data/Train/._.DS_Store  
   creating: Data/Train/BraTS20_Training_008/
  inflating: __MACOSX/Data/Train/._BraTS20_Training_008  
   creating: Data/Train/BraTS20_Training_006/
  inflating

In [3]:
!pip install nibabel h5py numpy



In [4]:
import os
os.makedirs('/content/BraTS2020_ValidationData_h5', exist_ok=True)
os.makedirs('/content/BraTS2020_TestData_h5', exist_ok=True)
os.makedirs('/content/BraTS2020_TrainData_h5', exist_ok=True)

In [7]:
from PIL import Image
import numpy as np
import nibabel as nib
import cv2
import h5py
import os

'''
For Train:-
Base Name:-BraTS20_Training_
path_to_file = /content/Data/Train
path_to_output = /content/BraTS2020_TrainData_h5

For Test:-
Base Name:-BraTS20_Test_
path_to_file = /content/Data/Test
path_to_output = /content/BraTS2020_TestData_h5

For Validation:-
Base Name:-BraTS20_Val_
path_to_file = /content/Data/Validation
path_to_output = /content/BraTS2020_ValidationData_h5
'''

path_to_file = '/content/Data/Train'
path_to_output = '/content/BraTS2020_TrainData_h5'
base_name = 'BraTS20_Training_'

# Starting Samples
S = 1
# Ending samples
E = 10

# Create output directory if it doesn't exist
os.makedirs(path_to_output, exist_ok=True)

for i in range(S, E+1):
    # Determine the patient ID string based on the number
    if i < 10:
        patient_id = '00' + str(i)
    elif 10 <= i < 100:
        patient_id = '0' + str(i)
    else:
        patient_id = str(i)

    # Load all the NIfTI files
    flair = nib.load(os.path.join(path_to_file, base_name + patient_id, base_name + patient_id + "_flair.nii"))
    t2 = nib.load(os.path.join(path_to_file, base_name + patient_id, base_name + patient_id + "_t2.nii"))
    t1 = nib.load(os.path.join(path_to_file, base_name + patient_id, base_name + patient_id + "_t1.nii"))
    t1c = nib.load(os.path.join(path_to_file, base_name + patient_id, base_name + patient_id + "_t1ce.nii"))
    seg = nib.load(os.path.join(path_to_file, base_name + patient_id, base_name + patient_id + "_seg.nii"))

    # Get the data arrays
    FLAIR = flair.get_fdata()
    T2 = t2.get_fdata()
    T1 = t1.get_fdata()
    T1c = t1c.get_fdata()
    SEG = seg.get_fdata()

    # Convert label 4 to 3 in segmentation mask (as per BRATS convention)
    SEG[SEG == 4] = 3

    # Normalize image I - median / interquartile range and then fit image so max is 1.5 x +/- IQR
    def norm_img(img):
        iMean = np.mean(img[img > 0])
        iSD = (np.std(img[img > 0]) * 3) / 128

        img[img > 0] = ((img[img > 0] - iMean) / iSD) + 127
        img[img < 0] = 0
        img[img > 255] = 255
        return img

    nFLAIR = norm_img(FLAIR)
    nT2 = norm_img(T2)
    nT1 = norm_img(T1)
    nT1c = norm_img(T1c)

    H = 240
    W = 240
    num_slices = FLAIR.shape[2]

    for j in range(num_slices):
        # Create a separate HDF5 file for each slice
        h5_filename = os.path.join(path_to_output, f"{base_name}{patient_id}_slice_{j:03d}.h5")

        with h5py.File(h5_filename, 'w') as h5f:
            # Prepare image data
            slice_data = np.zeros([H, W, 4], dtype=np.uint8)
            slice_data[:, :, 0] = cv2.resize(nFLAIR[:, :, j].astype(np.uint8), (H, W))
            slice_data[:, :, 1] = cv2.resize(nT1c[:, :, j].astype(np.uint8), (H, W))
            slice_data[:, :, 2] = cv2.resize(nT1[:, :, j].astype(np.uint8), (H, W))
            slice_data[:, :, 3] = cv2.resize(nT2[:, :, j].astype(np.uint8), (H, W))

            # Prepare mask data
            mask_data = np.zeros([H, W, 3], dtype=np.uint8)
            resized_mask = cv2.resize(SEG[:, :, j].astype(np.uint8), (H, W))
            # Channel 0: Whole tumor (labels 1, 2, 3)
            mask_data[:, :, 0] = (resized_mask > 0).astype(np.uint8) * 255
            # Channel 1: Tumor core (labels 1, 3)
            mask_data[:, :, 1] = ((resized_mask == 1) | (resized_mask == 3)).astype(np.uint8) * 255
            # Channel 2: Enhancing tumor (label 3)
            mask_data[:, :, 2] = (resized_mask == 3).astype(np.uint8) * 255

            # Store slice and mask in the HDF5 file
            h5f.create_dataset("slice", data=slice_data, compression="gzip")
            h5f.create_dataset("mask", data=mask_data, compression="gzip")

            # Add some metadata
            h5f.attrs["patient_id"] = patient_id
            h5f.attrs["slice_num"] = j
            h5f.attrs["image_size"] = f"{H}x{W}"

        print(f"Saved slice {j} for patient {patient_id} to {h5_filename}")

print("Conversion to HDF5 completed successfully.")

Saved slice 0 for patient 001 to /content/BraTS2020_TrainData_h5/BraTS20_Training_001_slice_000.h5
Saved slice 1 for patient 001 to /content/BraTS2020_TrainData_h5/BraTS20_Training_001_slice_001.h5
Saved slice 2 for patient 001 to /content/BraTS2020_TrainData_h5/BraTS20_Training_001_slice_002.h5
Saved slice 3 for patient 001 to /content/BraTS2020_TrainData_h5/BraTS20_Training_001_slice_003.h5
Saved slice 4 for patient 001 to /content/BraTS2020_TrainData_h5/BraTS20_Training_001_slice_004.h5
Saved slice 5 for patient 001 to /content/BraTS2020_TrainData_h5/BraTS20_Training_001_slice_005.h5
Saved slice 6 for patient 001 to /content/BraTS2020_TrainData_h5/BraTS20_Training_001_slice_006.h5
Saved slice 7 for patient 001 to /content/BraTS2020_TrainData_h5/BraTS20_Training_001_slice_007.h5
Saved slice 8 for patient 001 to /content/BraTS2020_TrainData_h5/BraTS20_Training_001_slice_008.h5
Saved slice 9 for patient 001 to /content/BraTS2020_TrainData_h5/BraTS20_Training_001_slice_009.h5
Saved slic

In [8]:
# Zip a folder (replace 'folder_name' with your actual folder name)
!zip -r /content/BraTS2020_h5_Data_v6.zip /content/BraTS2020_h5_Data_v6

  adding: content/BraTS2020_h5_Data_v6/ (stored 0%)
  adding: content/BraTS2020_h5_Data_v6/BraTS2020_TestData_h5/ (stored 0%)
  adding: content/BraTS2020_h5_Data_v6/BraTS2020_TestData_h5/BraTS20_Test_013_slice_091.h5 (deflated 18%)
  adding: content/BraTS2020_h5_Data_v6/BraTS2020_TestData_h5/BraTS20_Test_011_slice_075.h5 (deflated 18%)
  adding: content/BraTS2020_h5_Data_v6/BraTS2020_TestData_h5/BraTS20_Test_013_slice_064.h5 (deflated 20%)
  adding: content/BraTS2020_h5_Data_v6/BraTS2020_TestData_h5/BraTS20_Test_011_slice_020.h5 (deflated 42%)
  adding: content/BraTS2020_h5_Data_v6/BraTS2020_TestData_h5/BraTS20_Test_012_slice_082.h5 (deflated 17%)
  adding: content/BraTS2020_h5_Data_v6/BraTS2020_TestData_h5/BraTS20_Test_013_slice_008.h5 (deflated 49%)
  adding: content/BraTS2020_h5_Data_v6/BraTS2020_TestData_h5/BraTS20_Test_012_slice_070.h5 (deflated 17%)
  adding: content/BraTS2020_h5_Data_v6/BraTS2020_TestData_h5/BraTS20_Test_013_slice_053.h5 (deflated 22%)
  adding: content/BraTS202