Chuck Lynch published his PFM code supporting his 2024 Nature paper, but it's matlab.
I would like my python-based code to generate the same data.
Here, I check that.

In [1]:
from pathlib import Path
import numpy as np
import nibabel as nib
import pandas as pd

from mfs_tools.library.concat_stuff import concat_dtseries

# TODO: Add --set-mean and --set-sd to concat_dtseries


In [2]:
# Gather the same files as the matlab code.
# I could just glob, but I need to loop the same way as in matlab.



subject = "ME01"
data_dir = Path("/mnt/brunodata/open_data/ds005118/")
derivatives_dir = data_dir / "derivatives" / f"sub-{subject}"
processed_dir = derivatives_dir / "processed_restingstate_timecourses"
n_sessions = len(list(
    processed_dir.glob("ses-func*1")
))
print(n_sessions)


1


In [3]:


run_data_mats = list()
a_cifti_template = None

n_sessions = len(list(
    processed_dir.glob("ses-func*1")
))
print(n_sessions)

for i_ses in range(n_sessions):
    n_runs = len(list(
        (processed_dir / f"ses-func{i_ses + 1:02d}").glob("*run-*.dtseries.nii")
    ))
    print(f"Session {i_ses + 1:02d}, {n_runs:02d} runs")

    for i_run in range(n_runs):
        dt_file = f"sub-{subject}_ses-func{i_ses + 1:02d}_task-rest_run-{i_run + 1:02d}_bold_32k_fsLR.dtseries.nii"
        a_cifti_template = nib.load(
            processed_dir / f"ses-func{i_ses + 1:02d}" / dt_file
        )
        run_data = a_cifti_template.get_fdata()
        print(f"  data {run_data.shape} mean {np.mean(run_data):0.2f} +- {np.std(run_data):0.2f}")

        tmask_file = dt_file.replace(".dtseries.nii", "_tmask.txt")
        tmask = pd.read_csv(
            processed_dir / f"ses-func{i_ses + 1:02d}" / tmask_file,
            header=None, index_col=None
        )
        print(f"  tmask is shaped {tmask.values.shape} with {np.sum(tmask.values)} ones.")

        # De-meaning occurs across time at each locus.
        # These matrices are [time x location], which differs from matlab's [location x time]
        run_data = run_data[tmask.values.ravel() == 1, :]
        run_data = run_data - np.mean(run_data, axis=0)
        print(f"  data {run_data.shape} mean {np.mean(run_data):0.2f} +- {np.std(run_data):0.2f}")

        run_data_mats.append(run_data)

concatenated_data = np.concatenate(run_data_mats, axis=0)
print(f"  data {concatenated_data.shape} mean {np.mean(concatenated_data):0.2f} +- {np.std(concatenated_data):0.2f}")


1
Session 01, 04 runs
  data (640, 85059) mean 2480.96 +- 544.24
  tmask is shaped (640, 1) with 640 ones.
  data (640, 85059) mean 0.00 +- 27.95
  data (640, 85059) mean 2456.04 +- 541.37
  tmask is shaped (640, 1) with 640 ones.
  data (640, 85059) mean 0.00 +- 23.27
  data (640, 85059) mean 2427.18 +- 530.81
  tmask is shaped (640, 1) with 640 ones.
  data (640, 85059) mean 0.00 +- 28.88
  data (640, 85059) mean 2444.67 +- 547.28
  tmask is shaped (640, 1) with 640 ones.
  data (640, 85059) mean -0.00 +- 27.19
  data (2560, 85059) mean 0.00 +- 26.91


In [4]:
# Build a new Cifti2Image from concatenated data

concatenated_cifti_axis_0 = a_cifti_template.header.get_axis(0)
tr_len = concatenated_cifti_axis_0.step
concatenated_cifti_axis_1 = a_cifti_template.header.get_axis(1)
concatenated_cifti_axis_0 = nib.cifti2.SeriesAxis(
    start=0, step=tr_len, size=concatenated_data.shape[0]
)
concatenated_img = nib.cifti2.Cifti2Image(
    concatenated_data, (concatenated_cifti_axis_0, concatenated_cifti_axis_1)
)
concatenated_img.update_headers()
py_concat_0_path = (
        Path(f"/mnt/cache/pfm_python/") /
        f"sub-{subject}_task-rest_concatenated_and_demeaned_32k_fsLR.dtseries.nii"
)
concatenated_img.to_filename(py_concat_0_path)


## Compare data from pairs of files.

Ensure my python code did the same thing as Charles Lynch's matlab code.

In [5]:
# Compare my concatenated data to Lynch's

python_img = nib.cifti2.Cifti2Image.from_filename(py_concat_0_path)

ml_concat_0_path = (
        Path("/mnt/cache/ds005118_sub-ME01/pfm/") /
        f"sub-{subject}_task-rest_concatenated_and_demeaned_32k_fsLR.dtseries.nii"
)
matlab_img = nib.cifti2.Cifti2Image.from_filename(ml_concat_0_path)

if np.allclose(python_img.get_fdata(), matlab_img.get_fdata()):
    print("They match!")
else:
    print("I guess we missed.")


They match!


In [6]:
# Compare templateflow gifti surface data to Lynch's

# He used his own brain for the gii; I'm using a template.
# They won't match.



----
Below is individual testing, doesn't need to be run.
----

In [7]:
# Verify row-wise vs column-wise averaging

test_array = np.asarray(
    [[3, 4, 5, 6, ],
     [1, 3, 2, 4, ],
     [7, 8, 6, 7, ], ]
)
print("axis=0; mean of each column", np.mean(test_array, axis=0))
print("axis=1; mean of each row", np.mean(test_array, axis=1))

axis=0; mean of each column [3.66666667 5.         4.33333333 5.66666667]
axis=1; mean of each row [4.5 2.5 7. ]


In [8]:
# Review one cifti to ensure I'm reading it right.

cdata = nib.load("/mnt/brunodata/open_data/ds005118/derivatives/sub-ME01/processed_restingstate_timecourses/ses-func01/sub-ME01_ses-func01_task-rest_run-01_bold_32k_fsLR.dtseries.nii")
print(cdata.shape)


(640, 85059)


In [9]:
# Review one mask to ensure I'm reading it right.

tmask = pd.read_csv("/mnt/brunodata/open_data/ds005118/derivatives/sub-ME01/processed_restingstate_timecourses/ses-func01/sub-ME01_ses-func01_task-rest_run-01_bold_32k_fsLR_tmask.txt", header=None, index_col=None)
print(f"tmask is shaped {tmask.values.shape} with {np.sum(tmask.values)} ones.")


tmask is shaped (640, 1) with 640 ones.


In [10]:
# Artificially set one value to high-motion to test filtering
tmask.iloc[12, 0] = 0
# The 640-row matrix should drop row 12 and result in 639
cdata.get_fdata()[np.astype(tmask.values.ravel(), bool), :].shape

(639, 85059)