In [5]:
import os
import nibabel as nib
import numpy as np
import h5py
import matplotlib.pyplot as plt

In [6]:
def make_groups(hdf5_file):
    with h5py.File(hdf5_file, 'a') as f:
        training = f.create_group('training')
        training.create_group('image')
        training.create_group('seg')

        validation = f.create_group('validation')
        validation.create_group('image')
        validation.create_group('seg')

        test_seg = f.create_group('test_seg')
        test_seg.create_group('image')
        test_seg.create_group('seg')

        test_lm = f.create_group('test_lm')
        test_lm.create_group('image')
        test_lm.create_group('landmarks')


In [7]:

def convert_to_h5py(directory, hdf5_file):
    # One loop for training + validation
    val_size = 30
    seg_dim = 36
    max_val = 279.82879638671875 # max value in the training set
    i = 0
    for root, dirs, files in os.walk(directory+'/train'):
        dirs.sort() # Sort directories in ascending order
        for file in files:
            if file.endswith("nii.gz"):
                img = nib.load(os.path.join(root, file)).get_fdata()
                # my preprocessing
                img = np.transpose(img, (0, 2, 1))
                img = img[::-1,:,::-1]

                with h5py.File(hdf5_file, 'a') as f:
                        f.attrs['shape'] = img.shape
                        if i < 30:
                            group = 'validation'
                            k = i
                        else:
                            group = 'training'
                            k = i - val_size

                        if file.endswith("norm.nii.gz"):
                            f[group+'/image'].create_dataset(str(k), data=img)
                        elif file.endswith("seg35.nii.gz"):
                            f[group+'/seg'].create_dataset(str(k), data=img)
                            i += 1 # works bc seg35 is always after norm
                        else:
                            raise ValueError('File name not recognized')
    with h5py.File(hdf5_file, 'a') as f:
        f['training'].attrs['N'] = i - val_size
        f['training'].attrs['seg_dim'] = seg_dim
        f['validation'].attrs['N'] = val_size
        f['validation'].attrs['seg_dim'] = seg_dim

    # one loop for test_seg
    group = 'test_seg'
    i = 0
    for root, dirs, files in os.walk(directory+"/test/neurite"):
        dirs.sort()
        for file in files:
            if file.endswith("nii.gz"):
                    img = nib.load(os.path.join(root, file)).get_fdata()
                    # my preprocessing
                    img = np.transpose(img, (0, 2, 1))
                    img = img[::-1,:,::-1]

                    with h5py.File(hdf5_file, 'a') as f:
                        if file.endswith("norm.nii.gz"):
                            f[group+'/image'].create_dataset(str(i), data=img)
                        elif file.endswith("seg35.nii.gz"):
                            f[group+'/seg'].create_dataset(str(i), data=img)
                            i += 1 # works bc seg35 is always after norm
                        else:
                            raise ValueError('File name not recognized')
    with h5py.File(hdf5_file, 'a') as f:
            f['test_seg'].attrs['N'] = i
            f['test_seg'].attrs['seg_dim'] = seg_dim
                        
    # one loop for test_lm
    group = 'test_lm'
    i = 0
    for root, dirs, files in os.walk(directory+"/test/sub-lms-oasis"):
        files.sort() # Sort the files in ascending order
        for file in files:
            if file.endswith("T1w_oasis.nii.gz"):
                img = nib.load(os.path.join(root, file)).get_fdata()
                # min max normalization
                img = img / max_val
                with h5py.File(hdf5_file, 'a') as f:
                    f[group+'/image'].create_dataset(str(i), data=img)
            elif file.endswith("lms.txt"):
                landmarks = np.genfromtxt(os.path.join(root, file),delimiter=' ')
                with h5py.File(hdf5_file, 'a') as f:
                    f[group+'/landmarks'].create_dataset(str(i), data=landmarks)
                i += 1 # works bc lms.txt are always after T1w_oasis
    with h5py.File(hdf5_file, 'a') as f:
        f['test_lm'].attrs['N'] = i


In [8]:
hdf5_file = "OASIS.h5"
make_groups(hdf5_file)
convert_to_h5py(".", hdf5_file)