In [1]:
import numpy as np
import nibabel as nib
import matplotlib.pyplot as plt
import os
import gzip
import shutil

In [2]:
'Functions'

def preprocess_nifti_data(data_folder):
    """
    Processes all .nii.gz files in a folder by extracting, loading, rotating, 
    and concatenating them into a single volume.
    
    Parameters:
        data_folder (str): Path to the folder containing .nii.gz files.

    Returns:
        numpy.ndarray: The final 4D array of shape (Nx, Ny, Nz * num_files, t).
    """

    volumes = []

    # List all .nii.gz files in the folder
    nii_gz_files = [f for f in os.listdir(data_folder) if f.endswith('.nii.gz')]

    for file in nii_gz_files:
        file_path = os.path.join(data_folder, file)
        extracted_path = file_path[:-3]  # Remove ".gz" to get ".nii" path

        # Extract the .nii file from .nii.gz
        with gzip.open(file_path, 'rb') as f_in, open(extracted_path, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)

        # Load the extracted .nii file
        nii = nib.load(extracted_path)
        data = nii.get_fdata()

        # Rotate 90 degrees counterclockwise in the xy-plane
        rotated_data = np.rot90(data, k=1, axes=(0, 1))

        # Append to list
        volumes.append(rotated_data)

        # Delete the extracted .nii file after loading
        os.remove(extracted_path)

    # Concatenate along the z-axis
    final_data = np.concatenate(volumes, axis=2)

    print(f"Final shape of ground truth: {final_data.shape}; dimensions: (Nx, Ny, Nz*Nvol, t)")
    return final_data


def add_gaussian_noise(data, noise_level):
    """
    Adds Gaussian noise to each slice and timeframe in the dataset with a different noise level per slice and timeframe.

    Parameters:
        data (numpy.ndarray): The full dataset of shape (Nx, Ny, Nz*Nvol, t).
        noise_level (float): Maximum fraction of image intensity to use as noise (between 0 and 1).

    Returns:
        numpy.ndarray: The noisy dataset with the same shape as the input.
    """
    noisy_data = np.zeros_like(data)

    # Iterate over time dimension
    for t in range(data.shape[-1]):
        for z in range(data.shape[2]):  # Iterate over slices in the 3rd dimension
            # Compute slice-specific noise level as a fraction of its max intensity
            max_intensity = np.max(data[:, :, z, t])
            noise_std = np.random.uniform(0, noise_level) * max_intensity  # Scale noise

            # Add Gaussian noise to this slice
            noise = np.random.normal(0, noise_std, data[:, :, z, t].shape)
            noisy_data[:, :, z, t] = data[:, :, z, t] + noise

    print(f"Final shape of noisy dataset: {noisy_data.shape}; dimensions: (Nx, Ny, Nz*Nvol, t)")

    return noisy_data


def get_run_and_slice(slice_index, slices_per_run=39):
    """
    Input slice index from total data volume and return specific run and corresponding slice within run.

    Paramters:
        slice_index: Slice index from total volume.

    Returns:
        run: specific run from which given slice_index originates.
        
        slice_within_run: corresponding slice in run.
    """
    run = slice_index // slices_per_run + 1  # Runs are 1-based
    slice_within_run = slice_index % slices_per_run
    return run, slice_within_run


def organize_data(source_folder, target_folder, train_subs, test_subs):
    """
    Organizes .nii.gz files from a flat source folder into train and test folders based on subject prefixes.
    
    Parameters:
        source_folder (str): Path to the folder containing all .nii.gz files
        target_folder (str): Path to the target folder where files will be copied
        train_subs (list): List of subject prefixes to use for training (e.g., ['sub-01', 'sub-02'])
        test_subs (list): List of subject prefixes to use for testing (e.g., ['sub-03'])
    """
    # Create train and test directories
    train_dir = os.path.join(target_folder, 'train')
    test_dir = os.path.join(target_folder, 'test')
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)
    
    # Get all .nii.gz files from source folder
    all_files = [f for f in os.listdir(source_folder) if f.endswith('.nii.gz')]
    
    # Process files
    for file in all_files:
        # Check if file starts with any of the train subject prefixes
        is_train = any(file.startswith(sub) for sub in train_subs)
        # Check if file starts with any of the test subject prefixes
        is_test = any(file.startswith(sub) for sub in test_subs)
        
        if is_train:
            src_file = os.path.join(source_folder, file)
            dst_file = os.path.join(train_dir, file)
            shutil.copy2(src_file, dst_file)
            print(f"Copied {file} to train set")
        elif is_test:
            src_file = os.path.join(source_folder, file)
            dst_file = os.path.join(test_dir, file)
            shutil.copy2(src_file, dst_file)
            print(f"Copied {file} to test set")
        else:
            print(f"Skipped {file} - not in train or test subjects")

# Generate ground truth dataset

Train/test split functional data (without considering anatomical data)

In [4]:
# Creating train/test split (without considering) anatomical data
source_folder = r'../all_data/func'
target_folder = r'../split_data/func'
train_subs = ['sub-01', 'sub-dd', 'sub-gg']
test_subs = ['sub-uu']
organize_data(source_folder, target_folder, train_subs, test_subs)

Copied sub-01_ses-1_task-motor_run-10_bold (2).nii.gz to train set
Copied sub-01_ses-1_task-motor_run-1_bold (1).nii.gz to train set
Copied sub-01_ses-1_task-motor_run-2_bold (1).nii.gz to train set
Copied sub-01_ses-1_task-motor_run-3_bold.nii.gz to train set
Copied sub-01_ses-1_task-motor_run-4_bold.nii.gz to train set
Copied sub-01_ses-1_task-motor_run-5_bold.nii.gz to train set
Copied sub-01_ses-1_task-motor_run-6_bold.nii.gz to train set
Copied sub-01_ses-1_task-motor_run-7_bold.nii.gz to train set
Copied sub-01_ses-1_task-motor_run-8_bold.nii.gz to train set
Copied sub-01_ses-1_task-motor_run-9_bold.nii.gz to train set
Copied sub-01_ses-2_task-agency_run-10_bold.nii.gz to train set
Copied sub-01_ses-2_task-agency_run-1_bold.nii.gz to train set
Copied sub-01_ses-2_task-agency_run-2_bold.nii.gz to train set
Copied sub-01_ses-2_task-agency_run-3_bold.nii.gz to train set
Copied sub-01_ses-2_task-agency_run-4_bold.nii.gz to train set
Copied sub-01_ses-2_task-agency_run-5_bold.nii.gz t

Train/test split with anatomical data

In [5]:
# Creating train/test split including anatomical data
source_folder = r'../all_data/anat'
target_folder = r'../split_data/anat'
train_subs = ['sub-01', 'sub-dd', 'sub-gg']
test_subs = ['sub-uu']
organize_data(source_folder, target_folder, train_subs, test_subs)

Copied sub-01_ses-1_T1w.nii.gz to train set
Copied sub-dd_ses-1_T1w.nii.gz to train set
Copied sub-gg_ses-1_T1w.nii.gz to train set
Copied sub-uu_ses-1_T1w.nii.gz to test set
