# Data loading & normalization

## Imports

In [1]:
import numpy as np
import nibabel as nib
import matplotlib.pyplot as plt
import os
import gzip
import shutil

## Defining normalization

In [2]:
# Normalization
def zscore_normalize_time_series(fmri_data):
    """
    Applies z-score normalization over time (last axis) to a 4D fMRI array.

    Parameters:
        fmri_data (np.ndarray): 4D array of shape (Nx, Ny, Nz*Nvol, T)

    Returns:
        np.ndarray: z-score normalized array of same shape
    """
    mean = fmri_data.mean(axis=-1, keepdims=True)
    std = fmri_data.std(axis=-1, keepdims=True)

    # Avoid division by zero
    std[std == 0] = 1.0

    return (fmri_data - mean) / std

## Preprocessing functions

In [3]:
'Functions'

def preprocess_nifti_data(data_folder, subject):
    """
    Processes all .nii.gz files in a folder by extracting, loading, rotating, 
    and concatenating them into a single volume.
    
    Parameters:
        data_folder (str): Path to the folder containing .nii.gz files.

    Returns:
        numpy.ndarray: The final 4D array of shape (Nx, Ny, Nz * num_files, t).
    """

    volumes = []

    # List all .nii.gz files in the folder
    nii_gz_files = [f for f in os.listdir(data_folder) if f.endswith('.nii') and subject in f]

    for file in nii_gz_files:
        file_path = os.path.join(data_folder, file)

        # Load the extracted .nii file
        nii = nib.load(file_path)
        data = nii.get_fdata()

        # Rotate 90 degrees counterclockwise in the xy-plane
        rotated_data = np.rot90(data, k=1, axes=(0, 1))

        # Append to list
        volumes.append(rotated_data)

    # Concatenate along the z-axis
    final_data = np.concatenate(volumes, axis=2)

    print(f"Final shape of ground truth: {final_data.shape}; dimensions: (Nx, Ny, Nz*Nvol, t)")
    return zscore_normalize_time_series(final_data)


def add_gaussian_noise(data, noise_level):
    """
    Adds Gaussian noise to each slice and timeframe in the dataset with a different noise level per slice and timeframe.

    Parameters:
        data (numpy.ndarray): The full dataset of shape (Nx, Ny, Nz*Nvol, t).
        noise_level (float): Maximum fraction of image intensity to use as noise (between 0 and 1).

    Returns:
        numpy.ndarray: The noisy dataset with the same shape as the input.
    """
    noisy_data = np.zeros_like(data)

    # Iterate over time dimension
    for t in range(data.shape[-1]):
        for z in range(data.shape[2]):  # Iterate over slices in the 3rd dimension
            # Compute slice-specific noise level as a fraction of its max intensity
            max_intensity = np.max(data[:, :, z, t])
            noise_std = np.random.uniform(0, noise_level) * max_intensity  # Scale noise

            # Add Gaussian noise to this slice
            noise = np.random.normal(0, noise_std, data[:, :, z, t].shape)
            noisy_data[:, :, z, t] = data[:, :, z, t] + noise

    print(f"Final shape of noisy dataset: {noisy_data.shape}; dimensions: (Nx, Ny, Nz*Nvol, t)")

    return noisy_data


def get_run_and_slice(slice_index, slices_per_run=39):
    """
    Input slice index from total data volume and return specific run and corresponding slice within run.

    Paramters:
        slice_index: Slice index from total volume.

    Returns:
        run: specific run from which given slice_index originates.
        
        slice_within_run: corresponding slice in run.
    """
    run = slice_index // slices_per_run + 1  # Runs are 1-based
    slice_within_run = slice_index % slices_per_run
    return run, slice_within_run

##  Load ground truth & noisy dataset 
Includes processing to npy format and adding of noise for noisy dataset creation

In [4]:
output_base_dir = "/kaggle/working/data"
os.makedirs(f"{output_base_dir}/data", exist_ok=True)

### Anat data

In [5]:
# 'Load raw data and save processed ground truth dataset'

# # Creating train dataset (anat data)
# data_folder = r'/kaggle/input/fmri-data-splitted/split_data/anat/train'
# data = preprocess_nifti_data(data_folder)
# gt_path = rf"/kaggle/working/processed_data/anat/gt_anat_train.npy"
# np.save(gt_path, data)
# print(f'GT train dataset (anat) saved as "{gt_path}"')

# # Select value between 0 and 1 for maximum percentage of image highest intensity to generate noise
# noisy_data = add_gaussian_noise(data, noise_level=0.1)
# noisy_path = rf"/kaggle/working/processed_data/anat/noisy_anat_train.npy"
# np.save(noisy_path, noisy_data)
# print(f'Noisy train dataset (anat) saved as "{noisy_path}"')



# # Creating test dataset (anat data)
# data_folder = r'/kaggle/input/fmri-data-splitted/split_data/anat/test'
# data = preprocess_nifti_data(data_folder)
# gt_path = rf"/kaggle/working/processed_data/anat/gt_anat_test.npy"
# np.save(gt_path, data)
# print(f'GT test dataset (anat) saved as "{gt_path}"')

# # Select value between 0 and 1 for maximum percentage of image highest intensity to generate noise
# noisy_data = add_gaussian_noise(data, noise_level=0.1)
# noisy_path = rf"/kaggle/working/processed_data/anat/noisy_anat_test.npy"
# np.save(noisy_path, noisy_data)
# print(f'Noisy test dataset (anat) saved as "{noisy_path}"')

### Func data

Train

In [6]:
'Load raw data and save processed ground truth dataset'

# Creating train dataset (func data)
data_folder = r'/kaggle/input/fmri-data-splitted/split_data/func/train'
data = preprocess_nifti_data(data_folder, "sub-01")
gt_path = rf"/kaggle/working/data/gt_func_train_1.npy"
np.save(gt_path, data)
print(f'GT train dataset (func) saved as "{gt_path}"')

# Select value between 0 and 1 for maximum percentage of image highest intensity to generate noise
noisy_data = add_gaussian_noise(data, noise_level=0.1)
noisy_path = rf"/kaggle/working/data/noisy_func_train_1.npy"
np.save(noisy_path, noisy_data)
print(f'Noisy train dataset (func) saved as "{noisy_path}"')

Final shape of ground truth: (64, 64, 156, 300); dimensions: (Nx, Ny, Nz*Nvol, t)
GT train dataset (func) saved as "/kaggle/working/data/gt_func_train_1.npy"
Final shape of noisy dataset: (64, 64, 156, 300); dimensions: (Nx, Ny, Nz*Nvol, t)
Noisy train dataset (func) saved as "/kaggle/working/data/noisy_func_train_1.npy"


In [7]:
# 'Load raw data and save processed ground truth dataset'

# # Creating train dataset (func data)
# data_folder = r'/kaggle/input/fmri-data-splitted/split_data/func/train'
# data = preprocess_nifti_data(data_folder, "sub-dd")
# gt_path = rf"/kaggle/working/data/gt_func_train_2.npy"
# np.save(gt_path, data)
# print(f'GT train dataset (func) saved as "{gt_path}"')

# # Select value between 0 and 1 for maximum percentage of image highest intensity to generate noise
# noisy_data = add_gaussian_noise(data, noise_level=0.1)
# noisy_path = rf"/kaggle/working/data/noisy_func_train_2.npy"
# np.save(noisy_path, noisy_data)
# print(f'Noisy train dataset (func) saved as "{noisy_path}"')

In [8]:
# 'Load raw data and save processed ground truth dataset'

# # Creating train dataset (func data)
# data_folder = r'/kaggle/input/fmri-data-splitted/split_data/func/train'
# data = preprocess_nifti_data(data_folder, "sub-gg")
# gt_path = rf"/kaggle/working/data/gt_func_train_3.npy"
# np.save(gt_path, data)
# print(f'GT train dataset (func) saved as "{gt_path}"')

# # Select value between 0 and 1 for maximum percentage of image highest intensity to generate noise
# noisy_data = add_gaussian_noise(data, noise_level=0.1)
# noisy_path = rf"/kaggle/working/data/noisy_func_train_3.npy"
# np.save(noisy_path, noisy_data)
# print(f'Noisy train dataset (func) saved as "{noisy_path}"')

Test

In [9]:
# # Creating test dataset (func data)
# data_folder = r'/kaggle/input/fmri-data-splitted/split_data/func/test'
# data = preprocess_nifti_data(data_folder, "sub-uu")
# gt_path = rf"/kaggle/working/data/gt_func_test.npy"
# np.save(gt_path, data)
# print(f'GT test dataset (func) saved as "{gt_path}"')

# # Select value between 0 and 1 for maximum percentage of image highest intensity to generate noise
# noisy_data = add_gaussian_noise(data, noise_level=0.1)
# noisy_path = rf"/kaggle/working/data/noisy_func_test.npy"
# np.save(noisy_path, noisy_data)
# print(f'Noisy test dataset (func) saved as "{noisy_path}"')

# Plots

In [10]:
# 'Plot example slices'

# # Choose slice and timeframe to plot
# slice_index = 70
# timeframe = 200

# # Get run and corresponding slice within that run
# run, slice_within_run = get_run_and_slice(slice_index)

# # Create figure with two subplots
# fig, axes = plt.subplots(1, 2, figsize=(10, 5))

# # Plot ground truth image
# axes[0].imshow(data[:, :, slice_index, timeframe], cmap='gray')
# axes[0].set_title(f'Ground Truth\nRun {run}, Slice {slice_within_run}, Time {timeframe}')
# axes[0].axis("off")

# # Plot noisy image
# axes[1].imshow(noisy_data[:, :, slice_index, timeframe], cmap='gray')
# axes[1].set_title(f'Noisy Image\nRun {run}, Slice {slice_within_run}, Time {timeframe}')
# axes[1].axis("off")

# # Show the plots
# plt.tight_layout()
# plt.show()

In [11]:
# 'Plot example voxel timeseries'

# # Choose voxel and slice to plot time series
# vx, vy = 30, 30

# # Get run and corresponding slice within that run
# run, slice_within_run = get_run_and_slice(slice_index) 

# # Create figure
# plt.figure(figsize=(8, 5))

# # Plot time series for ground truth and noisy data
# plt.plot(data[vx, vy, slice_index, :], label="Ground Truth", color='b')
# plt.plot(noisy_data[vx, vy, slice_index, :], label="Noisy Image", color='r', alpha=0.7)

# # Add labels and title
# plt.xlabel("Timeframe")
# plt.ylabel("Intensity")
# plt.title(f'Run {run}, Voxel ({vx},{vy},{slice_within_run}) Time Series')

# # Add legend
# plt.legend()

# # Show plot
# plt.show()