<a href="https://colab.research.google.com/github/robml/MultiDeepRLNeuroGambling/blob/main/fmri_baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Baseline for HCP Gambling fMRI reading prediction task based on average of win/loss/neutral

In [3]:
import os, requests
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# The data shared for NMA projects is a subset of the full HCP dataset
N_SUBJECTS = 100

# The data have already been aggregated into ROIs from the Glasser parcellation
N_PARCELS = 360

# The acquisition parameters for all tasks were identical
TR = 0.72  # Time resolution, in seconds

# The parcels are matched across hemispheres with the same order
HEMIS = ["Right", "Left"]

# Each experiment was repeated twice in each subject
RUNS   = ['LR','RL']
N_RUNS = 2

# There are 7 tasks. Each has a number of 'conditions'
# TIP: look inside the data folders for more fine-graned conditions

EXPERIMENTS = {
    'GAMBLING'   : {'cond':['loss','win']},
}

In [4]:
#@title Loading Data and Configs
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.style.use("https://raw.githubusercontent.com/NeuromatchAcademy/course-content/main/nma.mplstyle")

fname = "hcp_task.tgz"
url = "https://osf.io/2y3fw/download"

if not os.path.isfile(fname):
  try:
    r = requests.get(url)
  except requests.ConnectionError:
    print("!!! Failed to download data !!!")
  else:
    if r.status_code != requests.codes.ok:
      print("!!! Failed to download data !!!")
    else:
      with open(fname, "wb") as fid:
        fid.write(r.content)

# The download cells will store the data in nested directories starting here:
HCP_DIR = "./hcp_task"

# importing the "tarfile" module
import tarfile

# open file
with tarfile.open(fname) as tfile:
  # extracting file
  tfile.extractall('.')

subjects = np.loadtxt(os.path.join(HCP_DIR, 'subjects_list.txt'), dtype='str')

In [5]:
# Load Regions
regions = np.load(f"{HCP_DIR}/regions.npy").T
region_info = dict(
    name=regions[0].tolist(),
    network=regions[1],
    hemi=['Right']*int(N_PARCELS/2) + ['Left']*int(N_PARCELS/2),
)

In [6]:
# Helper functions: REPLACE WITH INTERVAL DATA WHEN AVAILABLE
def load_single_timeseries(subject, experiment, run, remove_mean=True):
  """Load timeseries data for a single subject and single run.

  Args:
    subject (str):      subject ID to load
    experiment (str):   Name of experiment
    run (int):          (0 or 1)
    remove_mean (bool): If True, subtract the parcel-wise mean (typically the mean BOLD signal is not of interest)

  Returns
    ts (n_parcel x n_timepoint array): Array of BOLD data values

  """
  bold_run  = RUNS[run]
  bold_path = f"{HCP_DIR}/subjects/{subject}/{experiment}/tfMRI_{experiment}_{bold_run}"
  bold_file = "data.npy"
  ts = np.load(f"{bold_path}/{bold_file}")
  if remove_mean:
    ts -= ts.mean(axis=1, keepdims=True)
  return ts


def load_evs(subject, experiment, run):
  """Load EVs (explanatory variables) data for one task experiment.

  Args:
    subject (str): subject ID to load
    experiment (str) : Name of experiment
    run (int): 0 or 1

  Returns
    evs (list of lists): A list of frames associated with each condition

  """
  frames_list = []
  task_key = f'tfMRI_{experiment}_{RUNS[run]}'
  for cond in EXPERIMENTS[experiment]['cond']:
    ev_file  = f"{HCP_DIR}/subjects/{subject}/{experiment}/{task_key}/EVs/{cond}.txt"
    ev_array = np.loadtxt(ev_file, ndmin=2, unpack=True)
    ev       = dict(zip(["onset", "duration", "amplitude"], ev_array))
    # Determine when trial starts, rounded down
    start = np.floor(ev["onset"] / TR).astype(int)
    # Use trial duration to determine how many frames to include for trial
    duration = np.ceil(ev["duration"] / TR).astype(int)
    # Take the range of frames that correspond to this specific trial
    frames = [s + np.arange(0, d) for s, d in zip(start, duration)]
    frames_list.append(frames)

  return frames_list

In [16]:
my_exp = 'GAMBLING'
my_subj = subjects[1]
my_run = 1 # two runs (0 or 1)

data = load_single_timeseries(subject=my_subj,
                              experiment=my_exp,
                              run=my_run,
                              remove_mean=True)
evs = load_evs(subject=my_subj, experiment=my_exp, run=my_run)

In [17]:
def average_frames(data, evs, experiment, cond):
  idx = EXPERIMENTS[experiment]['cond'].index(cond)
  return np.mean(np.concatenate([np.mean(data[:, evs[idx][i]], axis=1, keepdims=True) for i in range(len(evs[idx]))], axis=-1), axis=1)

def baseline():
  group_win, group_loss = 0,0
  for s in subjects:
    for r in [0, 1]:
      data = load_single_timeseries(subject=s, experiment=my_exp,
                                    run=r, remove_mean=True)
      evs = load_evs(subject=s, experiment=my_exp,run=r)

      wins = average_frames(data, evs, my_exp, 'win')
      losses = average_frames(data, evs, my_exp, 'loss')
      group_win += wins; group_loss += losses
  group_win /= len(subjects); group_loss /= len(subjects)
  return group_win, group_loss

In [18]:
baseline_win, baseline_loss = baseline()

In [19]:
baseline_win

array([ 3.45420287e+01,  1.57255672e+01,  5.42124974e+00,  2.80552703e+01,
        2.77239145e+01,  3.63234303e+01,  4.17304550e+01, -6.00795777e+00,
       -8.44747917e+00,  1.58053212e+01,  1.87631810e+01,  5.08136786e+00,
        3.10737251e+01, -4.84366442e+00, -6.59078970e-01,  2.11500280e+01,
        2.04361421e+01,  2.30403565e+01,  3.13398541e+01,  3.45528081e+01,
        3.24738436e+01,  3.20211557e+01,  1.08461173e+01, -1.14394661e+01,
        4.17846200e-01, -5.70711332e-01,  2.09850011e+00,  4.40223488e-02,
        1.09267489e+01, -7.81592000e+00, -1.34147763e+01, -6.47874278e+00,
       -2.00732945e+01, -7.79187758e+00, -1.48985640e+01, -2.90939743e+00,
       -4.67669527e+00, -4.67031075e+00, -6.01807870e+00, -7.98795466e+00,
       -1.03005151e+01, -6.73398112e+00,  1.30146718e+01,  5.23014252e+00,
        2.15798738e+00,  1.13823790e+01,  1.59526530e+00,  2.26739864e+01,
        7.86446552e+00,  2.11407777e+01, -6.97443934e+00, -4.12120383e+00,
       -6.54441484e+00, -

In [20]:
baseline_loss

array([ 29.8158509 ,  14.27385443,   4.99848769,  29.35321436,
        23.06376298,  34.61698724,  36.85784708,  -6.01880475,
       -10.19246615,  12.1984566 ,  18.57787828,   4.94850886,
        17.06484127,  -0.40969455,   5.24737141,  15.37798892,
        15.71775703,  22.68270497,  26.48269274,  24.91185361,
        28.15236007,  29.3524067 ,  12.82689194,  -0.49774543,
         2.415352  ,   6.24431383,  -0.17772339,   3.77998371,
        11.27513036,  -2.50546261,  -8.22096039,  -5.280605  ,
       -11.21563466,  -7.56458175, -14.49050574,  -3.37347247,
        -9.44202669,  -7.69447007,  -5.2473409 ,  -8.77305054,
        -5.76292045, -14.71867539,  10.09134265,   5.32725531,
        -6.94794987,   5.59380631,  -6.42159015,  15.08954695,
         1.26028287,  16.61370334,  -6.92218731,  -6.78653318,
        -9.14423917,  -6.01511985, -10.73721387,   1.05460208,
        -3.90764868,   2.31472439,   2.60371694,  13.08449645,
        -6.17347221,   4.86261081,  15.95242901,  -4.93