In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
from pathlib import Path
from mfs_tools.library.distance_stuff import make_distance_matrix
import h5py
import seaborn as sns
import pandas as pd


reference_cifti_path = Path(
    "/mnt/cache/pfm_python/"
    "/sub-ME01_task-rest_concatenated_and_demeaned_32k_fsLR.dtseries.nii"
)
surface_files = {
    'lh': Path(
        "/mnt/brunodata/open_data/ds005118/derivatives/sub-ME01/fs_LR/fsaverage_LR32k"
        "/ME01.L.midthickness.32k_fs_LR.surf.gii"
    ),
    'rh': Path(
        "/mnt/brunodata/open_data/ds005118/derivatives/sub-ME01/fs_LR/fsaverage_LR32k"
        "/ME01.R.midthickness.32k_fs_LR.surf.gii"
    ),
}
save_to = "/mnt/cache/pfm_python/"
wb_command_path = "/usr/local/workbench/2.0.1/bin_linux64/wb_command"
work_dir = None


In [None]:
# We need to build a complete 85059 x 85059 distance matrix in parts:
# A complete fsLR can have up to 32492 vertices per hemisphere, but
# Lynch's actual data avoids filling the medial wall.

#                   cortex                 subcortex
#           left hem       right hem
#     🭽                                                🭾
#       🭽    (1)     🭾 🭽    (2)     🭾 🭽            🭾
#   lh   29696 x 29696   29696 x 29716   29696 x 25647
#       🭼            🭿 🭼            🭿 🭼            🭿
#       🭽    (2)     🭾 🭽    (1)     🭾 🭽            🭾
#   rh   29716 x 29696   29716 x 29716   29716 x 25647
#       🭼            🭿 🭼            🭿 🭼            🭿
#       🭽            🭾 🭽            🭾 🭽            🭾
#   sc   25647 x 29696   25647 x 29716   25647 x 25647
#       🭼            🭿 🭼            🭿 🭼            🭿
#     🭼                                                🭿

# If we built it all at once, it would have 7 billion 32-bit floats,
# requiring 28GB just to hold it. 

----

## Calculate left and right cortical distances

----

In [None]:
# 1. Start by either loading pre-built single-hemisphere distance matrices
#    or building them.
distance_matrices = dict()
for (hemi) in ('lh', 'rh', ):
    mat_file = Path(save_to) / f"dist_{hemi}.npy"
    if mat_file.exists():
        distance_matrices[hemi] = np.load(mat_file)
    else:
        distance_matrices[hemi] = make_distance_matrix(
            reference_cifti_path,
            surface_files[hemi],
            save_to,
            num_procs=12,
            wb_command_path=wb_command_path,
            work_dir=Path(save_to) / "tmp",
        )
        print(f"built {distance_matrices[hemi].shape}-shaped distance matrix")
        np.save(mat_file, distance_matrices[hemi])

py_lh = distance_matrices["lh"]
py_rh = distance_matrices["rh"]


Matlab
- running with 5 workers, finished in 27:53

Python
- Running with num_procs == 3 finished in 19:38.5
- Running with num_procs == 15 finished in 7:44.9
- Running with num_procs == 12 finished in 8:32 & 8:28; 7:44 & 7:42; 7:21 & 7:16
- Running with 5 workers, finished in 12:57 and 13:30


In [None]:
# Load the matrices from matlab

# Ensure our lh and rh match the lh and rh from matlab.
# Then ensure our combined cortical d matches, too.

matlab_outdir = Path("/mnt/cache/ds005118_sub-ME01/pfm/")
lh_matlab_file = h5py.File(matlab_outdir / "lh.mat", 'r')
rh_matlab_file = h5py.File(matlab_outdir / "rh.mat", 'r')
ml_lh = np.array(lh_matlab_file.get('lh'), dtype=np.uint8)
ml_rh = np.array(rh_matlab_file.get('rh'), dtype=np.uint8)
print(f"The lh.mat from matlab contains {ml_lh.shape} {str(ml_lh.dtype)}s.")
print(f"The rh.mat from matlab contains {ml_rh.shape} {str(ml_rh.dtype)}s.")


In [None]:

# This comparison consumes about 30GB RAM, I assume because it compares floats.
# So ensure there's memory available for it.
if np.allclose(py_lh, ml_lh) and np.allclose(py_rh, ml_rh):
    print(f"The lh matrices from python and matlab are equal.")
else:
    print(f"There are mismatches between python and matlab.")
    print(f"Python's top left corners:")
    print(np.hstack([py_lh[:6, :6], py_rh[:6, :6]]))
    print(f"Matlab's top left corners:")
    print(np.hstack([ml_lh[:6, :6], ml_rh[:6, :6]]))

    # Extract just the values that differ between methods and compare them.
    lh_eq = np.array(py_lh == ml_lh, dtype=np.bool)[np.tril_indices_from(py_lh)]
    rh_eq = np.array(py_rh == ml_rh, dtype=np.bool)[np.tril_indices_from(py_rh)]
    
    different_ml_lh_vals = ml_lh[np.tril_indices_from(py_lh)][~lh_eq]
    different_py_lh_vals = py_lh[np.tril_indices_from(py_lh)][~lh_eq]
    different_ml_rh_vals = ml_rh[np.tril_indices_from(py_rh)][~rh_eq]
    different_py_rh_vals = py_rh[np.tril_indices_from(py_rh)][~rh_eq]
    
    print(f"Left hemi : {len(different_ml_lh_vals):,} / {len(lh_eq):,} differ.")
    print(f"Right hemi: {len(different_ml_rh_vals):,} / {len(rh_eq):,} differ.")

    lh_diff_vals = pd.DataFrame({
        "py_lh": np.astype(different_py_lh_vals, np.float32),
        "ml_lh": np.astype(different_ml_lh_vals, np.float32),
    })
    rh_diff_vals = pd.DataFrame({
        "py_rh": np.astype(different_py_rh_vals, np.float32),
        "ml_rh": np.astype(different_ml_rh_vals, np.float32),
    })
    lh_diff_vals['delta'] = lh_diff_vals['py_lh'] - lh_diff_vals['ml_lh']
    rh_diff_vals['delta'] = rh_diff_vals['py_rh'] - rh_diff_vals['ml_rh']
    
    print("The largest difference in lh is "
          f"{lh_diff_vals['delta'].min():0.2f} or "
          f"{lh_diff_vals['delta'].max():0.2f}")
    print("The largest difference in lh is "
          f"{lh_diff_vals['delta'].min():0.2f} or "
          f"{lh_diff_vals['delta'].max():0.2f}")


----

## Calculate subcortical, and subcortex to left and right cortical distances

----

In [None]:
# Get gifti coordinates to calculate Euclidean distance between them.
import nibabel as nib
from nibabel.affines import apply_affine


# Extract the 3D Cartesian coordinates of all surface vertices
lh_surface_img = nib.gifti.gifti.GiftiImage.from_filename(surface_files['lh'])
rh_surface_img = nib.gifti.gifti.GiftiImage.from_filename(surface_files['rh'])
surface_coordinates = np.vstack([
    lh_surface_img.darrays[0].data, rh_surface_img.darrays[0].data,
])
print("Gifti Surface coordinates: "
      f"[{lh_surface_img.darrays[0].data.shape} + {rh_surface_img.darrays[0].data.shape}]"
      f" = {surface_coordinates.shape}")

# Extract the vertex indices into the mapped BOLD data
img = nib.cifti2.Cifti2Image.from_filename(reference_cifti_path)
brain_ax = img.header.get_axis(1)
print(f"Length of cifti2 brain_axis: {len(brain_ax)}")
anat_map = {
    'CortexLeft': 'CIFTI_STRUCTURE_CORTEX_LEFT',
    'CortexRight': 'CIFTI_STRUCTURE_CORTEX_RIGHT',
}
lh_surf_anat = lh_surface_img.darrays[0].metadata.get('AnatomicalStructurePrimary', '')
lh_surf_idx = brain_ax[brain_ax.name == anat_map[lh_surf_anat]]
print(f"Just vertices in {str(type(lh_surf_idx))} {lh_surf_anat}: {len(lh_surf_idx)}")
rh_surf_anat = rh_surface_img.darrays[0].metadata.get('AnatomicalStructurePrimary', '')
rh_surf_idx = brain_ax[brain_ax.name == anat_map[rh_surf_anat]]
print(f"Just vertices in {str(type(rh_surf_idx))} {rh_surf_anat}: {len(rh_surf_idx)}")
used_surf_axis = lh_surf_idx + rh_surf_idx
used_surface_coordinates = surface_coordinates[used_surf_axis.vertex, :]

# Get the subcortical voxels, too, from a volumetric grid rather than vertices.
ctx_labels = list(anat_map.values())
subcortical_coordinates = apply_affine(
    brain_ax.affine,
    brain_ax.voxel[~np.isin(brain_ax.name, ctx_labels)],
)
print("Nifti subcortical coordinates: "
      f" = {subcortical_coordinates.shape}")

whole_brain_coordinates = np.vstack([
    used_surface_coordinates, subcortical_coordinates
])
print("Whole brain coordinates: "
      f" = {whole_brain_coordinates.shape}")

# Note that python's voxel locations are consistently shifted relative to
# matlab's. Python's x values are ml+2mm, y=ml+2mm, z=ml-2mm.
# Maybe 0-based vs 1-based indexing, then multiplied by the affine?
# Maybe it's start of voxel vs end of voxel, not center?
# It's all relative, so the effect is only between subcortical and cortical.

In [None]:
# Now, calculate the Euclidean distances between all points
from scipy.spatial.distance import cdist

euclid_dist = cdist(
    whole_brain_coordinates[(1, 2, 3, 40000, 84860, 84861), :],
    whole_brain_coordinates[(1, 2, 3, 40000, 84860, 84861), :],
)
# This works! but it will generate a giant fucking matrix, blowing out
# my memory. We'll have to do pieces at a time, then uint8 them,
# and stitch them together after.



----

## Put all cortical distances together

----

In [None]:
# Start pasting lh-lh and rh-rh distances into a complete distance matrix
# where anything between lh and rh is "large".
# The largest uint8 is 2^8 == 256, which is big enough to get masked later.
# [ [ lh ] [255s] ]
# [ [255s] [ rh ] ]

top_right_lh_x_rh = np.ones((py_lh.shape[0], py_rh.shape[1]), dtype=np.uint8) * 255
bottom_left_rh_x_lh = np.ones((py_rh.shape[0], py_lh.shape[1]), dtype=np.uint8) * 255
py_ctx = np.vstack([
    np.hstack([py_lh, top_right_lh_x_rh, ]),
    np.hstack([bottom_left_rh_x_lh, py_rh, ]),
])
np.save(Path(save_to) / "dist_ctx.npy", py_ctx)
