This notebook is used to compare the old vs. the new resampling function.

The old one used nested for loops which was inefficient.

The new one uses numpy method which are much faster. 

The comparison showed that the results are the same and therefore the resample function was updated.

In [16]:
import numpy as np
import pandas as pd
import os
import nibabel as nib

# add path to search for functions
import sys
sys.path.append("../")

from functions.function_get_label_df import get_label_df
from functions.function_get_component_array import get_component_array
from functions.function_get_best_features import get_best_features


In [17]:
# since in a Jupyter notebook, the functions would be called from a wrong directory
# the functions are copied here to be able to run the notebook

def get_label_df():
    """
    Returns a dataframe with the labels data.
    """
    # read in the excel-file with the labels
    data_path = "../../data/"
    label_file = "Conn_IDs_Matching_90_subjects.xlsx"

    # read excel with only the first three columns
    # Sham = 1, Verum = 2
    df_label = (pd.read_excel(os.path.join(data_path, label_file),
                                usecols=[0, 1, 2])
                .replace({"Cond": {1: 0}})
                .replace({"Cond": {2: 1}})
                )

    df_label["Cond"] = df_label["Cond"].astype("category")

    return df_label

def get_component_array(components, print_info=False):
    """
    Returns an arrays with the stacked components of the MVPA data.

    Parameters
    ----------
    component : list of components
    """

    # read MVPA data
    data_path = "../../data/"
    path_content = os.listdir(os.path.join(data_path, "Denoised_MVPA_8mm"))

    components = sorted(components)
    comp_diff_list = []

    for component in components:
        sample_diff_list = []
        # make two lists with pre (Condition002) and post (Condition003) data 
        # of component in loop
        pre = sorted([x for x in path_content 
                            if f"Component00{component}" in x 
                            and "Condition001" in x])
        post = sorted([x for x in path_content 
                            if f"Component00{component}" in x 
                            and "Condition002" in x])
        
        if print_info:
            print(f"there are {len(pre)} pre and {len(post)} post samples for component {component}")

        # loop over pre and post samples and calculate difference
        for pre, post in zip(pre, post):
            pre_vol = nib.load(
                os.path.join(data_path, "Denoised_MVPA_8mm", pre)
            )
            post_vol = nib.load(
                os.path.join(data_path, "Denoised_MVPA_8mm", post)
            )
            pre_vol_data = pre_vol.get_fdata()
            post_vol_data = post_vol.get_fdata()
            diff_vol_data = post_vol_data - pre_vol_data  # type = array
            sample_diff_list.append(diff_vol_data)

        comp_diff_list.append(sample_diff_list)

    # this stacks the two lists on top of each other
    # resulting in [component, sample, x, y, z]
    inpt_diff_stacked = np.stack(comp_diff_list, axis=0)

    if print_info:
        print(f"type of single volume array {type(diff_vol_data)}")
        print(f"shape of single volume array {diff_vol_data.shape}")
        print(f"shape of stacked array {inpt_diff_stacked.shape}")

    return inpt_diff_stacked

In [18]:
# get the label data
df_label = get_label_df()

# get the MVPA data arrays
component_array_5d = get_component_array([1])
print(f"shape of component_array_5d: {component_array_5d.shape}")

  df_label = (pd.read_excel(os.path.join(data_path, label_file),


shape of component_array_5d: (1, 90, 91, 109, 91)


In [19]:
def reshape_3d_array(big_array, volume_size):
    big_array_size = big_array.shape
    
    # Calculate the padding needed to make the big array a multiple of the volume size
    padding = [((volume_size[i] - big_array_size[i] % volume_size[i]) % volume_size[i]) // 2 for i in range(3)]
    
    # Pad the big array
    padded_big_array = np.pad(big_array, [(padding[0], padding[0]), 
                                        (padding[1], padding[1]), 
                                        (padding[2], padding[2])])
    
    # Calculate the size of the small array
    small_array_size = tuple(int(padded_big_array.shape[i] / volume_size[i]) for i in range(3))
    small_array = np.zeros(small_array_size)
    
    # Iterate over each element of the smaller array
    for i in range(small_array_size[0]):
        for j in range(small_array_size[1]):
            for k in range(small_array_size[2]):
                # Calculate the mean of the volume from the bigger array
                volume_mean = np.mean(padded_big_array[i*volume_size[0]:(i+1)*volume_size[0],
                                                    j*volume_size[1]:(j+1)*volume_size[1],
                                                    k*volume_size[2]:(k+1)*volume_size[2]])
                # Assign the mean to the corresponding index in the smaller array
                small_array[i, j, k] = volume_mean
    
    return small_array

In [31]:
def resample_3d_array(big_array, volume_size):
    big_array_size = big_array.shape
    
    # Calculate the padding needed to make the big array a multiple of the volume size
    padding = [((volume_size[i] - big_array_size[i] % volume_size[i]) % volume_size[i]) // 2 for i in range(3)]
    
    # Pad the big array
    padded_big_array = np.pad(big_array, [(padding[0], padding[0]), 
                                        (padding[1], padding[1]), 
                                        (padding[2], padding[2])])
    
    # Calculate the size of the small array
    small_4d_array_size = tuple(int(padded_big_array.shape[i] / volume_size[i]) for i in range(3))
    
    # Create a view of the bigger array with the desired volume size and the proper strides
    strides = tuple(padded_big_array.strides[i] * volume_size[i] for i in range(3)) + padded_big_array.strides
    window_view = np.lib.stride_tricks.as_strided(padded_big_array, shape=small_4d_array_size + volume_size, strides=strides)
    
    # Calculate the mean of each volume and reshape the result to the desired small array size
    small_4d_array = np.mean(window_view, axis=(-3, -2, -1)).reshape(small_4d_array_size)
    
    return small_4d_array


In [33]:
a = resample_3d_array(component_array_5d[0, 0], (1, 1, 1))

In [34]:
b = reshape_3d_array(component_array_5d[0, 0], (1, 1, 1))

In [35]:
# compare if a and b are the same
np.array_equal(a, b)

True

In [40]:
resample_cube = 3

b = reshape_3d_array(component_array_5d[0, 0], (resample_cube, resample_cube, resample_cube))
a = resample_3d_array(component_array_5d[0, 0], (resample_cube, resample_cube, resample_cube))

# compare if a and b are the same
np.array_equal(a, b)

True