# Import Required Libraries
Import the necessary libraries, including h5py and numpy.

In [9]:
# Import Required Libraries
import pandas as pd
import h5py
import numpy as np
import nibabel as nib
# h5py is used for handling HDF5 files
# numpy is used for numerical operations

# Load h5 File
Use h5py to load an h5 file from a specified path.

In [6]:
# Load h5 File
file_path = 'data/BraTS2020_training_data/content/data/volume_1_slice_0.h5'  # specify the path to your h5 file

# Open the h5 file in read mode
with h5py.File(file_path, 'r') as h5_file:
    # Explore the structure of the H5 file
    def print_structure(name, obj):
        print(name)
    h5_file.visititems(print_structure)
    
    # Access the 'image' dataset
    if 'image' in h5_file:
        image_data = h5_file['image']
        print("Image shape:", image_data.shape)
    else:
        print("Dataset 'image' not found in the file.")
    
    # Access the 'mask' dataset
    if 'mask' in h5_file:
        mask_data = h5_file['mask']
        print("Mask shape:", mask_data.shape)
    else:
        print("Dataset 'mask' not found in the file.")

image
mask
Image shape: (240, 240, 4)
Mask shape: (240, 240, 3)


In [10]:

# Load the CSV file
csv_path = 'data/BraTS2020_training_data/content/data/meta_data.csv'
df = pd.read_csv(csv_path)

# Group by volume to process each volume separately
grouped = df.groupby('volume')

for volume_id, group in grouped:
    # Initialize lists to store image and mask slices
    image_slices = []
    mask_slices = []

    for _, row in group.iterrows():
        h5_path = "data/BraTS2020_training_data" + row['slice_path']
        
        with h5py.File(h5_path, 'r') as h5_file:
            # Assuming the datasets are named 'image' and 'mask'
            image_slices.append(h5_file['image'][:])
            mask_slices.append(h5_file['mask'][:])
    # Stack slices to form 3D volumes
    image_slices = np.array(image_slices)  # shape (num_slices, h, w, 4)
    mask_slices = np.array(mask_slices)    # shape (num_slices, h, w, 3)
    
    # Transpose to get shape (h, w, num_slices, 4) for images and (h, w, num_slices, 3) for masks
    image_volumes = np.transpose(image_slices, (1, 2, 0, 3))
    mask_volumes = np.transpose(mask_slices, (1, 2, 0, 3))
    
    # Save each MRI type and mask type as separate NIfTI images
    mri_types = ['type1', 'type2', 'type3', 'type4']
    mask_types = ['mask1', 'mask2', 'mask3']
    
    for i, mri_type in enumerate(mri_types):
        image_nii = nib.Nifti1Image(image_volumes[..., i], np.eye(4))
        image_nii_path = f'data/Brats/volume_{volume_id}_{mri_type}.nii.gz'
        nib.save(image_nii, image_nii_path)
        print(f'Saved {image_nii_path}')
    
    for j, mask_type in enumerate(mask_types):
        mask_nii = nib.Nifti1Image(mask_volumes[..., j], np.eye(4))
        mask_nii_path = f'data/Brats/volume_{volume_id}_{mask_type}.nii.gz'
        nib.save(mask_nii, mask_nii_path)
        print(f'Saved {mask_nii_path}')


FileNotFoundError: [Errno 2] No such file or directory: 'data/Brats/volume_1_type1.nii.gz'

# Explore h5 File Structure
Explore the structure of the h5 file, including groups and datasets.

In [None]:
# Explore h5 File Structure

# Open the h5 file in read mode
with h5py.File(file_path, 'r') as h5_file:
    # Function to recursively explore the structure of the h5 file
    def explore_h5_structure(name, obj):
        if isinstance(obj, h5py.Group):
            print(f"Group: {name}")
        elif isinstance(obj, h5py.Dataset):
            print(f"Dataset: {name}, shape: {obj.shape}, dtype: {obj.dtype}")

    # Visit all items in the file
    h5_file.visititems(explore_h5_structure)

# Extract Data from h5 File
Extract specific datasets from the h5 file and convert them to numpy arrays for further analysis.

In [None]:
# Extract Data from h5 File

# Open the h5 file in read mode
with h5py.File(file_path, 'r') as h5_file:
    # Extract specific datasets
    dataset_1 = h5_file['dataset_1'][:]  # replace 'dataset_1' with your actual dataset name
    dataset_2 = h5_file['dataset_2'][:]  # replace 'dataset_2' with your actual dataset name

# Convert datasets to numpy arrays
array_1 = np.array(dataset_1)
array_2 = np.array(dataset_2)

# Display the extracted data
print("Dataset 1:", array_1)
print("Dataset 2:", array_2)