In [3]:
%load_ext autoreload
%autoreload 2

from pathlib import Path
import numpy as np
import nibabel as nib
from scipy import stats
import h5py
from datetime import datetime

from mfs_tools.library.cifti_stuff import get_cortical_indices, get_subcortical_indices
from mfs_tools.library.distance_stuff import regress_adjacent_cortex
from mfs_tools.library.utility_stuff import compare_mats


save_to = Path("/mnt/cache/pfm_python/")
reference_cifti_path = (
    save_to /
    "sub-ME01_task-rest_concatenated_and_demeaned_32k_fsLR.dtseries.nii"
)
distance_matrix_path = save_to / "dist_complete.npy"


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
# Load data
bold_cifti = nib.Cifti2Image.from_filename(reference_cifti_path)
distance_matrix = np.load(distance_matrix_path)


In [4]:
# The whole notebook in a few lines, with some help from the library

print(datetime.now())
adjusted_img = regress_adjacent_cortex(
    bold_cifti, distance_matrix, distance_threshold=20, verbose=True
)
print(datetime.now())
adjusted_img.to_filename(
    save_to /
    "sub-ME01_task-rest_concatenated_demeaned_and_regressed_32k_fsLR.dtseries.nii"
)
print(datetime.now())


2025-01-12 11:20:07.017480
Filtered distance matrix down to 25,647 sub-cortical voxels by 59,412 cortical vertices
Found 19,137 voxels within 20mm of a cortical vertex.
Adjustments to 19,137 subcortical voxels near cortex complete. New Cifti2 image (2560, 85059).
2025-01-12 11:22:09.268434
2025-01-12 11:22:18.500896


The above code is all that's necessary to just run the regression function
from the library. You can do that here, and it's done. The next few cells
compare these results from the same step in Lynch's matlab code. The rest of
this notebook is simply spelling out th `regress_adjacent_cortex` function.

In [7]:
# Were the inputs the same as those in the matlab version?
# Load matlab versions of each matrix
ml_base_path = Path("/mnt/cache/pfm_matlab/")
ml_bold_cifti_file = ml_base_path / reference_cifti_path.name
ml_bold_cifti_img = nib.Cifti2Image.from_filename(ml_bold_cifti_file)

ml_distance_file = ml_base_path / "DistanceMatrix.mat"
ml_distance_dict = h5py.File(ml_distance_file, 'r')
ml_distance_matrix = np.array(ml_distance_dict.get('D'), dtype=np.uint8)

In [8]:

ml_adjusted_file = ml_base_path / "sub-ME01_task-rest_concatenated_demeaned_and_regressed_32k_fsLR.dtseries.nii"
ml_adjusted_img = nib.Cifti2Image.from_filename(ml_adjusted_file)


In [27]:
# Are the output BOLD data the same?
# **THIS IS THE ONLY TEST THAT REALLY MATTERS FOR THIS NOTEBOOK**
# Note the tolerance. There are differences, probably due to float
# encoding or minor regression fitting between matlab and python.
# These differences exist beyond the fifth decimal point, but for
# this, we don't really care about that. If BOLD values match to
# the hundred-thousandth place, that's way more than close enough for us.
compare_mats(ml_adjusted_img.get_fdata(), adjusted_img.get_fdata(),
             a_name="matlab adj BOLD", b_name="python adj BOLD",
             tolerance=0.00001, verbose=True, preview=True)


[1;32m  The matrices 'matlab adj BOLD' and 'python adj BOLD' are equal, with tolerance of 1e-05.[0m
  Mem before 25,182.0MB; Mem after 25,182.0MB; delta 0.0


True

In [16]:
# Are the input BOLD data the same?
compare_mats(ml_bold_cifti_img.get_fdata(), bold_cifti.get_fdata(),
             a_name="matlab BOLD", b_name="python BOLD",
             verbose=True, preview=True)


[1;32m  The matrices 'matlab BOLD' and 'python BOLD' are equal.[0m
  Mem before 21,831.1MB; Mem after 21,831.1MB; delta 0.0


True

In [30]:
# Are the distance matrices the same?
# These things are so damn huge, we need to compare them in pieces.
# Also, we already did these comparisons in the distance notebook,
# so we know they're going to have 1mm differences we can't do
# anything about. But that's fine. It's good to check everything
# twice while debugging and rule out stupid typos and errors.
ctx_idx = get_cortical_indices(ml_bold_cifti_img)
subcort_idx = get_subcortical_indices(ml_bold_cifti_img)
for row_idx, col_idx, desc in [
    (ctx_idx, ctx_idx, "ctx-ctx"),
    (subcort_idx, ctx_idx, "subcort-ctx"),
    (ctx_idx, subcort_idx, "ctx-subcort"),
    (subcort_idx, subcort_idx, "subcort-subcort"),
]:
    compare_mats(
        ml_distance_matrix[row_idx, :][:, col_idx],
        distance_matrix[row_idx, :][:, col_idx],
        a_name=f"matlab {desc} distance",
        b_name=f"python {desc} distance",
        verbose=True, preview=True)


  There are mismatches between 'matlab ctx-ctx distance' (uint8)  and 'python ctx-ctx distance' (uint8).
  Top left corners, for a small preview:
|    0.0000,  67.0000, 136.0000,  93.0000, 111.0000 |    |    0.0000,  67.0000, 136.0000,  93.0000, 111.0000 |
|   67.0000,   0.0000,  84.0000, 104.0000, 110.0000 |    |   67.0000,   0.0000,  84.0000, 104.0000, 110.0000 |
|  136.0000,  84.0000,   0.0000,  89.0000, 174.0000 | vs |  136.0000,  84.0000,   0.0000,  89.0000, 174.0000 |
|   93.0000, 104.0000,  89.0000,   0.0000, 200.0000 |    |   93.0000, 104.0000,  89.0000,   0.0000, 200.0000 |
|  111.0000, 110.0000, 174.0000, 200.0000,   0.0000 |    |  111.0000, 110.0000, 174.0000, 200.0000,   0.0000 |
[1;32m  Only 1 in 120308 values differ (14,670 of 1,764,922,578). [0m
[0;31m  The largest difference is 1.0 == 1.000000000 [0m
  Mem before 35,279.9MB; Mem after 36,963.0MB; delta 1,683.2
  There are mismatches between 'matlab subcort-ctx distance' (uint8)  and 'python subcort-ctx distance' (ui

----

The remainder of this notebook just re-implements the `regress_adjacent_cortex`
function one piece at a time.

----

In [40]:
# Only calculate distance to real cortical vertices that may get used.
cort_idx = get_cortical_indices(bold_cifti)
subcort_idx = get_subcortical_indices(bold_cifti)
relevant_distances = distance_matrix[subcort_idx, :][:, cort_idx]


In [41]:
print(f"Filtered distance matrix down to {relevant_distances.shape[0]:,} "
      f"sub-cortical voxels by {relevant_distances.shape[1]:,} "
      f"cortical vertices")


Filtered distance matrix down to 25,647 sub-cortical voxels by 59,412 cortical vertices


In [80]:
# Determine which subcortical voxels are within 20mm of a cortical vertex.
smallest_distances = np.min(relevant_distances, axis=1)
outer_voxel_indices = np.where(smallest_distances <= 20)[0]


This generates a list of 19,137 voxels within 20mm of a cortical vertex.
This is exactly the same as the 19,137 voxels in Lynch's matlab code/data.
Next, we loop over each voxel near cortex, extract the BOLD from all
voxels within the 20mm threshold, and regress the cortical BOLD signal
from it.

In [78]:
def print_array_summary(a, desc):
    print(f"{desc} is shaped {a.shape}:")
    print("  [" + "".join(
        [", ".join([f"{v:0.2f}" for v in a[:5]])] +
        [", ..., "] +
        [", ".join([f"{v:0.2f}" for v in a[-5:]])]
    ) + "]")


In [75]:
i = 5

# Lynch's code for the first voxel results in [550 x 2560] BOLD, meaning
# that 550 voxels are within 20mm of the first voxel in our outer_voxel list.
# Our cifti data are transposed, [2560 x 550], but otherwise identical.
nearby_bold = bold_cifti.get_fdata()[:, distance_matrix[outer_voxel_indices[i], :] <= 20]

# Average the signal from all nearby voxels into a single time series
if nearby_bold.shape[1] > 1:
    nearby_bold = np.mean(nearby_bold, axis=1)



In [76]:
# We could use statsmodels or scikit-learn, but for a simple linear regression,
# we'll just stick with numpy.
# Regression outcome is this voxel's BOLD time series
voxel_index = subcort_idx[outer_voxel_indices[i]]
y = bold_cifti.get_fdata()[:, voxel_index]
# Regression data are the surrounding voxels' BOLD time series, with an intercept
# X = np.vstack([np.ones((1, len(nearby_bold))), nearby_bold.reshape(1, -1)])
X = nearby_bold

print(f"y is shaped {y.shape}; X is shaped {nearby_bold.shape}")

y is shaped (2560,); X is shaped (2560,)


In [79]:
# scipy's linregress function adds a column of ones internally,
# so we don't have to do it ourselves.
results = stats.linregress(nearby_bold, y)
predicted_y = results.intercept + results.slope * nearby_bold
residuals = y - predicted_y

print_array_summary(y, "Original BOLD")
print_array_summary(nearby_bold, "Regional average BOLD")
print_array_summary(residuals, "Residualized BOLD")


Original BOLD is shaped (2560,):
  [5.18, -36.65, -52.11, -5.04, -52.56, ..., 27.71, -10.79, -0.26, 45.62, 27.19]
Regional average BOLD is shaped (2560,):
  [-1.37, 9.72, 14.87, 19.99, 15.47, ..., -66.99, -69.48, -54.87, -28.57, -11.06]
Residualized BOLD is shaped (2560,):
  [4.98, -35.22, -49.92, -2.10, -50.28, ..., 17.85, -21.02, -8.34, 41.41, 25.56]


Above is an example of the voxel-wise regression processing. Below is exactly the same thing within a loop that will regress surrounding signal from each voxel near cortex.

In [81]:
adjusted_data = bold_cifti.get_fdata().copy()
for cifti_locus_index in outer_voxel_indices:
    # Extract all BOLD data within 20mm of this voxel
    nearby_bold = bold_cifti.get_fdata()[:, distance_matrix[cifti_locus_index, :] <= 20]
    if nearby_bold.shape[1] > 1:
        nearby_bold = np.mean(nearby_bold, axis=1)

    # Regress surrounding BOLD from this voxel's BOLD
    voxel_index = subcort_idx[cifti_locus_index]
    y = bold_cifti.get_fdata()[:, voxel_index]
    results = stats.linregress(nearby_bold, y)
    predicted_y = results.intercept + results.slope * nearby_bold
    residuals = y - predicted_y

    # Replace the BOLD data with residuals
    adjusted_data[:, voxel_index] = residuals

# We should now have a copy of the BOLD data,
# with each voxel near cortex cleaned of surrounding signal


In [82]:
# The adjusted data need to be packaged into a new Cifti2 file,
# just like the input file, and saved to disk.

adjusted_img = nib.Cifti2Image(
    adjusted_data, header=bold_cifti.header,
)
adjusted_img.to_filename(
    save_to /
    "sub-ME01_task-rest_concatenated_demeaned_and_regressed_32k_fsLR.dtseries.nii"
)
