In [1]:
# Import necessary libraries
import heartpy as hp
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import glob, os, gzip, json
from scipy.stats import zscore
import seaborn as sns
import neurokit2 as nk

# Set the base directory containing your data
# Change this to the directory where your data is stored
base_dir = '/data/MoodGroup/17M0060/bids/sourcedata'

# Use glob to find all .tsv.gz files that match the pattern in the base directory
# Adjust the pattern if your file naming convention is different
tsv_fns = glob.glob(os.path.join(base_dir,'*','*','func', '*rest*physio*.tsv.gz'))

# Initialize lists to store data
all_srate = []
all_bpm = []
all_dat = []
all_nope = []

# Loop through each found file
for idx, fn in enumerate(tsv_fns):
    base_fn = os.path.basename(fn)
    print(f"Processing file: {base_fn}")

    # If you sampling rate is not consistent read the corresponding JSON file to get the sampling frequency
    with open(fn.replace('tsv.gz', 'json'), 'r') as fid:
        json_dat = json.load(fid)

    #If the sampling rate is consistent then comment out the earlier lines and place the value
    srate = json_dat['SamplingFrequency']
    all_srate.append(srate)

    # Read the .tsv.gz file into a DataFrame
    dat = pd.read_csv(fn, compression='gzip', sep='\t', header=None, names=["ppg", "rsp", "trig"])

    try:
        # Process the PPG and RSP signals
        ppg_signals, ppg_info = nk.ppg_process(dat["ppg"], sampling_rate=srate)
        rsp_signals, rsp_info = nk.rsp_process(dat["rsp"][1:], sampling_rate=srate)

        # Extract interval-related measurements
        ppg_meas = nk.ppg_intervalrelated(ppg_signals)
        rsp_meas = nk.rsp_intervalrelated(rsp_signals)

        all_dat.append((ppg_signals, rsp_signals))

        # Parse the file name to get subject, session, and task information
        bids_bits = base_fn.split('_')

        # Handle the measurements
        tmp = [ii if isinstance(ii, np.float64) else ii[0] for idx, ii in enumerate(rsp_meas.values[0])]
        all_bpm.append(bids_bits[:-1] + [srate] + list(ppg_meas.values[0]) + tmp)

        # Print the results for the current file
        print(f"{idx}: {base_fn} - PPG Rate Mean: {ppg_meas['PPG_Rate_Mean'].values[0]}, RSP Rate Mean: {rsp_meas['RSP_Rate_Mean'].values[0]}")

    except Exception as e:
        print(f"{idx}: {base_fn} - Error: {e}")
        all_nope.append((fn, dat))

# Create a DataFrame of the collected measurements
# The columns are derived from the keys of the PPG and RSP measurement dictionaries
columns = ['sub', 'ses', 'task', 'run', 'echo', 'srate'] + list(ppg_meas.keys()) + list(rsp_meas.keys())
df = pd.DataFrame(all_bpm, columns=columns)

# Print the final DataFrame
print("Final DataFrame:")
print(df.head())


Processing file: sub-RD120_ses-20210511_task-rest_run-01_echo-01_physio.tsv.gz
0: sub-RD120_ses-20210511_task-rest_run-01_echo-01_physio.tsv.gz - PPG Rate Mean: 57.352979607965516, RSP Rate Mean: 7.4905882918865165
Processing file: sub-RD120_ses-20210518_task-rest_run-101_echo-01_physio.tsv.gz
1: sub-RD120_ses-20210518_task-rest_run-101_echo-01_physio.tsv.gz - PPG Rate Mean: 58.260704658592104, RSP Rate Mean: 5.9658373029894225
Processing file: sub-RD120_ses-20210518_task-rest_run-001_echo-01_physio.tsv.gz
2: sub-RD120_ses-20210518_task-rest_run-001_echo-01_physio.tsv.gz - PPG Rate Mean: 58.84331309498293, RSP Rate Mean: 5.907970364988176
Processing file: sub-RD120_ses-20210518_task-rest_run-201_echo-01_physio.tsv.gz
3: sub-RD120_ses-20210518_task-rest_run-201_echo-01_physio.tsv.gz - PPG Rate Mean: 58.89674172591544, RSP Rate Mean: 6.517228742161686
Processing file: sub-RD120_ses-20210518_task-rest_run-102_echo-01_physio.tsv.gz
4: sub-RD120_ses-20210518_task-rest_run-102_echo-01_physio

  warn(
  warn(
  warn(


27: sub-RD119_ses-20210521_task-rest_run-01_echo-01_physio.tsv.gz - PPG Rate Mean: 64.40723505754946, RSP Rate Mean: 18.04605113784032
Processing file: sub-RD119_ses-20210504_task-rest_run-201_echo-01_physio.tsv.gz
28: sub-RD119_ses-20210504_task-rest_run-201_echo-01_physio.tsv.gz - PPG Rate Mean: 58.33241505020791, RSP Rate Mean: 17.531278788500313
Processing file: sub-RD119_ses-20210504_task-rest_run-101_echo-01_physio.tsv.gz
29: sub-RD119_ses-20210504_task-rest_run-101_echo-01_physio.tsv.gz - PPG Rate Mean: 56.629344699769945, RSP Rate Mean: 18.077315736004618
Processing file: sub-RD119_ses-20210504_task-rest_run-001_echo-01_physio.tsv.gz
30: sub-RD119_ses-20210504_task-rest_run-001_echo-01_physio.tsv.gz - PPG Rate Mean: 56.06747038737779, RSP Rate Mean: 17.571213835240208
Processing file: sub-RD119_ses-20210427_task-rest_run-101_echo-01_physio.tsv.gz
31: sub-RD119_ses-20210427_task-rest_run-101_echo-01_physio.tsv.gz - PPG Rate Mean: 77.73643049266545, RSP Rate Mean: 20.075054714348

  warn(
  warn(
  warn(
  output["RSP_Rate_Mean"] = np.nanmean(data["RSP_Rate"].values)
  return _methods._mean(a, axis=axis, dtype=dtype,
  out["MeanBB"] = np.nanmean(bbi)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


58: sub-RD115_ses-20191203_task-rest_run-101_echo-01_physio.tsv.gz - Error: ('NeuroKit error: complexity_embedding(): dimension * delay should be lower than', ' the length of the signal.')
Processing file: sub-RD115_ses-20191203_task-rest_run-102_echo-01_physio.tsv.gz
59: sub-RD115_ses-20191203_task-rest_run-102_echo-01_physio.tsv.gz - PPG Rate Mean: 74.62224300129851, RSP Rate Mean: 9.10491410672664
Processing file: sub-RD115_ses-20191126_task-rest_run-201_echo-01_physio.tsv.gz
60: sub-RD115_ses-20191126_task-rest_run-201_echo-01_physio.tsv.gz - PPG Rate Mean: 80.34885915103396, RSP Rate Mean: 14.894524825814146
Processing file: sub-RD115_ses-20191126_task-rest_run-102_echo-01_physio.tsv.gz
61: sub-RD115_ses-20191126_task-rest_run-102_echo-01_physio.tsv.gz - PPG Rate Mean: 89.63380047836007, RSP Rate Mean: 8.499447240905097
Processing file: sub-RD115_ses-20191126_task-rest_run-001_echo-01_physio.tsv.gz
62: sub-RD115_ses-20191126_task-rest_run-001_echo-01_physio.tsv.gz - PPG Rate Mean:

  warn(
  warn(
  warn(
  output["RSP_Rate_Mean"] = np.nanmean(data["RSP_Rate"].values)
  return _methods._mean(a, axis=axis, dtype=dtype,
  out["MeanBB"] = np.nanmean(bbi)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


76: sub-RD127_ses-20240206_task-rest_run-102_echo-01_physio.tsv.gz - Error: ('NeuroKit error: complexity_embedding(): dimension * delay should be lower than', ' the length of the signal.')
Processing file: sub-RD127_ses-20240206_task-rest_run-201_echo-01_physio.tsv.gz
77: sub-RD127_ses-20240206_task-rest_run-201_echo-01_physio.tsv.gz - PPG Rate Mean: 67.06835009776583, RSP Rate Mean: 9.086695456776642
Processing file: sub-RD127_ses-20240206_task-rest_run-103_echo-01_physio.tsv.gz


  warn(


78: sub-RD127_ses-20240206_task-rest_run-103_echo-01_physio.tsv.gz - PPG Rate Mean: 62.39555322521193, RSP Rate Mean: 10.454000914071646
Processing file: sub-RD127_ses-20240206_task-rest_run-104_echo-01_physio.tsv.gz
79: sub-RD127_ses-20240206_task-rest_run-104_echo-01_physio.tsv.gz - PPG Rate Mean: 65.90410096361126, RSP Rate Mean: 10.37275970941531
Processing file: sub-RD127_ses-20240206_task-rest_run-001_echo-01_physio.tsv.gz
80: sub-RD127_ses-20240206_task-rest_run-001_echo-01_physio.tsv.gz - PPG Rate Mean: 63.36704078976148, RSP Rate Mean: 9.84817000140879
Processing file: sub-RD127_ses-20240206_task-rest_run-101_echo-01_physio.tsv.gz
81: sub-RD127_ses-20240206_task-rest_run-101_echo-01_physio.tsv.gz - PPG Rate Mean: 62.591739031535134, RSP Rate Mean: 10.170358054005101
Processing file: sub-RD127_ses-20240123_task-rest_run-01_echo-01_physio.tsv.gz
82: sub-RD127_ses-20240123_task-rest_run-01_echo-01_physio.tsv.gz - PPG Rate Mean: 63.41625519442403, RSP Rate Mean: 9.650725474139769


  warn(


86: sub-RD111_ses-20190702_task-rest_run-101_echo-01_physio.tsv.gz - PPG Rate Mean: 64.92600307980503, RSP Rate Mean: 12.750408301020242
Processing file: sub-RD111_ses-20190702_task-rest_run-001_echo-01_physio.tsv.gz
87: sub-RD111_ses-20190702_task-rest_run-001_echo-01_physio.tsv.gz - PPG Rate Mean: 56.134111845665466, RSP Rate Mean: 12.561980348874185
Processing file: sub-RD111_ses-20190709_task-rest_run-001_echo-01_physio.tsv.gz


  warn(
  warn(
  warn(


88: sub-RD111_ses-20190709_task-rest_run-001_echo-01_physio.tsv.gz - Error: SVD did not converge in Linear Least Squares
Processing file: sub-RD111_ses-20190709_task-rest_run-201_echo-01_physio.tsv.gz
89: sub-RD111_ses-20190709_task-rest_run-201_echo-01_physio.tsv.gz - PPG Rate Mean: 53.23756598284074, RSP Rate Mean: 10.311567941385968
Processing file: sub-RD111_ses-20190709_task-rest_run-002_echo-01_physio.tsv.gz
90: sub-RD111_ses-20190709_task-rest_run-002_echo-01_physio.tsv.gz - PPG Rate Mean: 54.19497132217097, RSP Rate Mean: 10.922716618357423
Processing file: sub-RD111_ses-20190709_task-rest_run-101_echo-01_physio.tsv.gz
91: sub-RD111_ses-20190709_task-rest_run-101_echo-01_physio.tsv.gz - PPG Rate Mean: 52.28505149771759, RSP Rate Mean: 10.844158179726085
Processing file: sub-RD111_ses-20190813_task-rest_run-02_echo-01_physio.tsv.gz
92: sub-RD111_ses-20190813_task-rest_run-02_echo-01_physio.tsv.gz - PPG Rate Mean: 60.01125268429182, RSP Rate Mean: 12.63786132798996
Processing fil

126: sub-RD101_ses-20170718_task-rest_run-001_echo-01_physio.tsv.gz - PPG Rate Mean: 51.63430911876491, RSP Rate Mean: 17.11795383981119
Processing file: sub-RD101_ses-20170718_task-rest_run-201_echo-01_physio.tsv.gz
127: sub-RD101_ses-20170718_task-rest_run-201_echo-01_physio.tsv.gz - PPG Rate Mean: 53.1942143827571, RSP Rate Mean: 18.527970908281876
Processing file: sub-RD101_ses-20170829_task-rest_run-01_echo-01_physio.tsv.gz
128: sub-RD101_ses-20170829_task-rest_run-01_echo-01_physio.tsv.gz - PPG Rate Mean: 58.16691786213409, RSP Rate Mean: 18.337419961003995
Processing file: sub-RD101_ses-20170725_task-rest_run-001_echo-01_physio.tsv.gz
129: sub-RD101_ses-20170725_task-rest_run-001_echo-01_physio.tsv.gz - PPG Rate Mean: 55.660481059380764, RSP Rate Mean: 17.00939259954707
Processing file: sub-RD101_ses-20170725_task-rest_run-102_echo-01_physio.tsv.gz
130: sub-RD101_ses-20170725_task-rest_run-102_echo-01_physio.tsv.gz - PPG Rate Mean: 62.50829681630087, RSP Rate Mean: 16.8187351985

  warn(


136: sub-RD124_ses-20230321_task-rest_run-101_echo-01_physio.tsv.gz - PPG Rate Mean: 53.397669411067056, RSP Rate Mean: 11.570734281609889
Processing file: sub-RD124_ses-20230328_task-rest_run-102_echo-01_physio.tsv.gz
137: sub-RD124_ses-20230328_task-rest_run-102_echo-01_physio.tsv.gz - PPG Rate Mean: 56.06424175171267, RSP Rate Mean: 7.134316081400716
Processing file: sub-RD124_ses-20230328_task-rest_run-101_echo-01_physio.tsv.gz
138: sub-RD124_ses-20230328_task-rest_run-101_echo-01_physio.tsv.gz - PPG Rate Mean: 51.72393659870025, RSP Rate Mean: 7.194195087393667
Processing file: sub-RD124_ses-20230328_task-rest_run-001_echo-01_physio.tsv.gz
139: sub-RD124_ses-20230328_task-rest_run-001_echo-01_physio.tsv.gz - PPG Rate Mean: 52.43354312294168, RSP Rate Mean: 9.795263988532886
Processing file: sub-RD124_ses-20230328_task-rest_run-201_echo-01_physio.tsv.gz
140: sub-RD124_ses-20230328_task-rest_run-201_echo-01_physio.tsv.gz - PPG Rate Mean: 59.1665316069593, RSP Rate Mean: 5.2390185974

174: sub-RD104_ses-20180213_task-rest_run-001_echo-01_physio.tsv.gz - PPG Rate Mean: 71.55730728648223, RSP Rate Mean: 11.901005453406446
Processing file: sub-RD104_ses-20180213_task-rest_run-201_echo-01_physio.tsv.gz
175: sub-RD104_ses-20180213_task-rest_run-201_echo-01_physio.tsv.gz - PPG Rate Mean: 78.99227430396354, RSP Rate Mean: 10.767515013967454
Processing file: sub-RD104_ses-20180213_task-rest_run-101_echo-01_physio.tsv.gz
176: sub-RD104_ses-20180213_task-rest_run-101_echo-01_physio.tsv.gz - PPG Rate Mean: 70.25703146954442, RSP Rate Mean: 11.460174989165646
Processing file: sub-RD104_ses-20180403_task-rest_run-02_echo-01_physio.tsv.gz
177: sub-RD104_ses-20180403_task-rest_run-02_echo-01_physio.tsv.gz - PPG Rate Mean: 65.50616668950327, RSP Rate Mean: 12.296995097362819
Processing file: sub-RD104_ses-20180206_task-rest_run-001_echo-01_physio.tsv.gz
178: sub-RD104_ses-20180206_task-rest_run-001_echo-01_physio.tsv.gz - PPG Rate Mean: 73.53064328857772, RSP Rate Mean: 15.33687124

  warn(
  warn(
  warn(
  output["RSP_Rate_Mean"] = np.nanmean(data["RSP_Rate"].values)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


205: sub-RD113_ses-20190924_task-rest_run-001_echo-01_physio.tsv.gz - PPG Rate Mean: 66.84829263226901, RSP Rate Mean: 19.089480962537277
Processing file: sub-RD113_ses-20190924_task-rest_run-201_echo-01_physio.tsv.gz
206: sub-RD113_ses-20190924_task-rest_run-201_echo-01_physio.tsv.gz - PPG Rate Mean: 74.73129183063232, RSP Rate Mean: 17.80506939612077
Processing file: sub-RD113_ses-20190917_task-rest_run-01_echo-01_physio.tsv.gz
207: sub-RD113_ses-20190917_task-rest_run-01_echo-01_physio.tsv.gz - PPG Rate Mean: 59.549981069763035, RSP Rate Mean: 8.036664684678108
Final DataFrame:
         sub           ses       task      run     echo  srate  PPG_Rate_Mean  \
0  sub-RD120  ses-20210511  task-rest   run-01  echo-01  500.0      57.352980   
1  sub-RD120  ses-20210518  task-rest  run-101  echo-01  500.0      58.260705   
2  sub-RD120  ses-20210518  task-rest  run-001  echo-01  500.0      58.843313   
3  sub-RD120  ses-20210518  task-rest  run-201  echo-01  500.0      58.896742   
4  sub-