# Prepare White-Matter Connecitvity Blueprints

This notebook loads all non-redundant individual level HCP task contrasts, 
optionally residualizes them based on the group average task contrasts, and concatenates them into a single file.

**Inputs**

Indiviudal task contrasts<br>
`HCP_1200/{subj}/MNINonLinear/Results/tfMRI_{para}/tfMRI_{para}_hp200_s2_level2_MSMAll.feat/{subj}_tfMRI_{para}_level2_hp200_s2_MSMAll.dscalar.nii`, 

**Outputs**

List of non-redundant HPC task contrasts<br>
`data/task_contrasts_47_unique_names.txt`

Indiviudal task contrasts (254 x 47 contrasts in single file)<br>
`/scratch/users/robert.scholz2/acc_dists/all_47_tasks_254_full_unrelated.raw.npy`

Indiviudal task contrasts, residualized on group average contrasts (254 x 47 contrasts in single file)<br>
`/scratch/users/robert.scholz2/acc_dists/all_47_tasks_254_full_unrelated.raw.npy`<br>
\+ residualization parameters `/scratch/users/robert.scholz2/acc_dists/all_47_tasks_254_full_unrelated.yresid.from_zscored.params.npy`


<br>

In [2]:
from tqdm import tqdm
from IPython.display import clear_output
import numpy as np
import nibabel as nib
nib.imageglobals.logger.level = 40
import os

f= lambda str: eval("f'" + f"{str}" + "'")

import hcp_utils as hcp
import scipy.stats

  warn("Fetchers from the nilearn.datasets module will be "


In [7]:
from lib.default_config import contrast_info
full_subjs=np.loadtxt("data/subjs_hcp254_full_unrelated.txt").astype(int).astype(str);

# Collect all 47 non-redundant tasks

In [6]:
paradigms = np.unique([c[0] for c in contrast_info]).tolist()
print("Tasks:", paradigms)

unique_contrasts_long = ['tfMRI_EMOTION_level2_FACES_hp200_s2_MSMAll', 'tfMRI_EMOTION_level2_SHAPES_hp200_s2_MSMAll', 'tfMRI_EMOTION_level2_FACES-SHAPES_hp200_s2_MSMAll', 'tfMRI_GAMBLING_level2_PUNISH_hp200_s2_MSMAll', 'tfMRI_GAMBLING_level2_REWARD_hp200_s2_MSMAll', 'tfMRI_GAMBLING_level2_REWARD-PUNISH_hp200_s2_MSMAll', 'tfMRI_LANGUAGE_level2_MATH_hp200_s2_MSMAll', 'tfMRI_LANGUAGE_level2_STORY_hp200_s2_MSMAll', 'tfMRI_LANGUAGE_level2_STORY-MATH_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_CUE_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_LF_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_LH_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_RF_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_RH_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_T_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_AVG_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_CUE-AVG_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_LF-AVG_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_LH-AVG_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_RF-AVG_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_RH-AVG_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_T-AVG_hp200_s2_MSMAll', 'tfMRI_RELATIONAL_level2_MATCH_hp200_s2_MSMAll', 'tfMRI_RELATIONAL_level2_REL_hp200_s2_MSMAll', 'tfMRI_RELATIONAL_level2_REL-MATCH_hp200_s2_MSMAll', 'tfMRI_SOCIAL_level2_RANDOM_hp200_s2_MSMAll', 'tfMRI_SOCIAL_level2_TOM_hp200_s2_MSMAll', 'tfMRI_SOCIAL_level2_TOM-RANDOM_hp200_s2_MSMAll', 'tfMRI_WM_level2_2BK_BODY_hp200_s2_MSMAll', 'tfMRI_WM_level2_2BK_FACE_hp200_s2_MSMAll', 'tfMRI_WM_level2_2BK_PLACE_hp200_s2_MSMAll', 'tfMRI_WM_level2_2BK_TOOL_hp200_s2_MSMAll', 'tfMRI_WM_level2_0BK_BODY_hp200_s2_MSMAll', 'tfMRI_WM_level2_0BK_FACE_hp200_s2_MSMAll', 'tfMRI_WM_level2_0BK_PLACE_hp200_s2_MSMAll', 'tfMRI_WM_level2_0BK_TOOL_hp200_s2_MSMAll', 'tfMRI_WM_level2_2BK_hp200_s2_MSMAll', 'tfMRI_WM_level2_0BK_hp200_s2_MSMAll', 'tfMRI_WM_level2_2BK-0BK_hp200_s2_MSMAll', 'tfMRI_WM_level2_BODY_hp200_s2_MSMAll', 'tfMRI_WM_level2_FACE_hp200_s2_MSMAll', 'tfMRI_WM_level2_PLACE_hp200_s2_MSMAll', 'tfMRI_WM_level2_TOOL_hp200_s2_MSMAll', 'tfMRI_WM_level2_BODY-AVG_hp200_s2_MSMAll', 'tfMRI_WM_level2_FACE-AVG_hp200_s2_MSMAll', 'tfMRI_WM_level2_PLACE-AVG_hp200_s2_MSMAll', 'tfMRI_WM_level2_TOOL-AVG_hp200_s2_MSMAll'];
unique_contrasts = [uc[6:-16].replace("_level2","") for uc in unique_contrasts_long]
#np.savetxt("data/task_contrasts_47_unique_names.txt", unique_contrasts, fmt="%s")
print("Contrasts:", len(unique_contrasts), unique_contrasts[:14])

Tasks: ['EMOTION', 'GAMBLING', 'LANGUAGE', 'MOTOR', 'RELATIONAL', 'SOCIAL', 'WM']
Contrasts: 47 ['EMOTION_FACES', 'EMOTION_SHAPES', 'EMOTION_FACES-SHAPES', 'GAMBLING_PUNISH', 'GAMBLING_REWARD', 'GAMBLING_REWARD-PUNISH', 'LANGUAGE_MATH', 'LANGUAGE_STORY', 'LANGUAGE_STORY-MATH', 'MOTOR_CUE', 'MOTOR_LF', 'MOTOR_LH', 'MOTOR_RF', 'MOTOR_RH']


In [22]:
# Excluded contrasts (because they are redundant):
excluded_contrasts=[ 'tfMRI_EMOTION_level2_SHAPES-FACES_hp200_s2_MSMAll', 'tfMRI_GAMBLING_level2_PUNISH-REWARD_hp200_s2_MSMAll', 'tfMRI_LANGUAGE_level2_MATH-STORY_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_AVG-CUE_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_AVG-LF_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_AVG-LH_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_AVG-RF_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_AVG-RH_hp200_s2_MSMAll', 'tfMRI_MOTOR_level2_AVG-T_hp200_s2_MSMAll',  'tfMRI_RELATIONAL_level2_MATCH-REL_hp200_s2_MSMAll', 'tfMRI_SOCIAL_level2_RANDOM-TOM_hp200_s2_MSMAll', 'tfMRI_WM_level2_0BK-2BK_hp200_s2_MSMAll', 'tfMRI_WM_level2_AVG-BODY_hp200_s2_MSMAll', 'tfMRI_WM_level2_AVG-FACE_hp200_s2_MSMAll', 'tfMRI_WM_level2_AVG-PLACE_hp200_s2_MSMAll', 'tfMRI_WM_level2_AVG-TOOL_hp200_s2_MSMAll']

In [7]:
def load_subj_full_task_data(subj, contrast_ids, paradigms, dtype=np.float32, v=False, ret_keys=False, \
                            local_dir = "/scratch/users/robert.scholz2/", bma_slice=slice(0, 29696)):
  data = {}
  for para in paradigms: 
    if v: print(para, end=" ")
    fn = local_dir + f"HCP_1200/{subj}/MNINonLinear/Results/tfMRI_{para}/tfMRI_{para}_hp200_s2_level2_MSMAll.feat/{subj}_tfMRI_{para}_level2_hp200_s2_MSMAll.dscalar.nii"
    cifti = nib.load(fn)
    cnames = cifti.header.get_axis(0).name;
    darr = cifti.get_fdata()[:,bma_slice];
    data.update( {name : darr[i] for i, name in enumerate(cnames)})
  
  if v: print(len(data.keys()))
  dkeys = list(data.keys())
  tdata = []
  contained=[]
  if contrast_ids == "all":
    for key in dkeys:
      if v: print(subj+"_"+cid[:-1], "\t", keys[0])
      tdata.append(data[key])
      contained.append(key)
  else:
    for cid in contrast_ids:
      keys = [x for x in dkeys if x.startswith(subj+"_"+cid[:-1])]
      if v: print(subj+"_"+cid[:-1], "\t", keys[0])
      tdata.append(data[keys[0]])
      contained.append(keys[0])
  if ret_keys: return np.array(tdata).T.astype(dtype), contained;
  return np.array(tdata).T.astype(dtype);


In [19]:
subj0_task_names = load_subj_full_task_data(full_subjs[0], unique_contrasts, paradigms , ret_keys=1)[1]
len(subj0_task_names), subj0_task_names[:10]

(47,
 ['100206_tfMRI_EMOTION_level2_FACES_hp200_s2_MSMAll',
  '100206_tfMRI_EMOTION_level2_SHAPES_hp200_s2_MSMAll',
  '100206_tfMRI_EMOTION_level2_FACES-SHAPES_hp200_s2_MSMAll',
  '100206_tfMRI_GAMBLING_level2_PUNISH_hp200_s2_MSMAll',
  '100206_tfMRI_GAMBLING_level2_REWARD_hp200_s2_MSMAll',
  '100206_tfMRI_GAMBLING_level2_REWARD-PUNISH_hp200_s2_MSMAll',
  '100206_tfMRI_LANGUAGE_level2_MATH_hp200_s2_MSMAll',
  '100206_tfMRI_LANGUAGE_level2_STORY_hp200_s2_MSMAll',
  '100206_tfMRI_LANGUAGE_level2_STORY-MATH_hp200_s2_MSMAll',
  '100206_tfMRI_MOTOR_level2_CUE_hp200_s2_MSMAll'])

In [20]:
# accumulate all subjects
data = {subj: load_subj_full_task_data(subj, unique_contrasts, paradigms, v=0) for subj in tqdm(full_subjs)}

100%|██████████| 254/254 [16:39<00:00,  3.93s/it]


In [21]:
fn = "/scratch/users/robert.scholz2/acc_dists/all_47_tasks_254_full_unrelated.raw.npy"#
if not(os.path.exists(fn)): np.save(fn, data);

In [32]:
!ls -ash {fn}

1.4G /scratch/users/robert.scholz2/acc_dists/all_47_tasks_254_full_unrelated.raw.npy


# Create residualized task maps

In [None]:
from lib.stats import residualize

def residualize_subj_task_data(sdata, y_mean_task_maps):
    n_tasks = y_mean_task_maps.shape[-1]
    mparams = np.zeros((n_tasks, 2))
    resid = np.zeros_like(sdata);
    for tn in range( n_tasks):
      tresid, reg = residualize(sdata[:,tn], y_mean_task_maps[:,tn], return_reg = 1)
      mparams[tn,:] = reg.intercept_[0], reg.coef_[0,0] 
      resid[:,tn] = tresid;
    return resid, mparams


In [None]:
print("Sample subject data:", yfull_task_data['100206'].shape)
y_task_maps_raw = np.array([d for s,d in yfull_task_data.items()])
print(y_task_maps_raw.shape)
y_task_maps_zsc = scipy.stats.zscore(y_task_maps_raw, axis=1)
y_mean_task_maps = y_task_maps_zsc.mean(0)
print(y_mean_task_maps.shape)

In [None]:
resdata = {}
resparam = {}
for subj, sdata in tqdm(yfull_task_data.items()):
  # need to be zscored first! 
  sdataz = scipy.stats.zscore(sdata, axis=0)
  resid, mparams = residualize_subj_task_data(sdataz, y_mean_task_maps)
  resdata[subj] = resid;
  resparam[subj] = mparams;  

In [None]:
fn = "/scratch/users/robert.scholz2/acc_dists/all_47_tasks_254_full_unrelated.yresid.from_zscored.npy"
np.save(fn, resdata)
fn = "/scratch/users/robert.scholz2/acc_dists/all_47_tasks_254_full_unrelated.yresid.from_zscored.params.npy"
np.save(fn, resparam)

In [None]:
def recreate_full_ymap(y_resid, y_mean_task_maps, params):
  return y_resid + ((y_mean_task_maps * params[:, 1] ) +  params[:, 0])

In [None]:
data_ = recreate_full_ymap(resdata["100206"], y_mean_task_maps, resparam["100206"])
plot_29k(scipy.stats.zscore(yfull_task_data["100206"][:, 6]), title="original")
plot_29k(data_[:, 6], title="recreated")

# Prepare them for BrainSurfCNN (optional)

In [None]:
import scipy
from lib.default_config import task_names, tmsmall, smooth_lv, tmap_type, full_subj_path 

def get_hcp_task_contrast(subj, task_cope_ctr = ('SOCIAL', '6', 'TOM-RANDOM'), 
                          supfolder="HCP_1200/", local_dir = "/scratch/users/robert.scholz2/",
                          bma_slice=slice(None), scalar_slice=slice(None), zscore=False, v=False):
  global task_fmri_sam, smooth_lv, tmsmall
  (task, cope_num, contr) = task_cope_ctr
  filen = f'{subj}/MNINonLinear/Results/tfMRI_{task}/tfMRI_{task}_hp200_s{smooth_lv}_level2{tmsmall}.feat/GrayordinatesStats/cope{cope_num}.feat/{tmap_type}.dtseries.nii'
  if v: print(filen)
  frmi = nib.load(local_dir + supfolder + filen)
  frmi_data = frmi.get_fdata()[scalar_slice, bma_slice]
  if zscore:
    frmi_data = scipy.stats.zscore(frmi_data, axis=-1)
  return frmi_data

In [None]:
#https://github.com/ngohgia/brain-surf-cnn/blob/master/preprocess/4_join_all_task_contrasts.py

contrasts_dir = "/scratch/users/robert.scholz2/hcp/joint_left_task_contrasts"

for subj in tqdm(all_subjs, desc="Concat and save task maps"):
  subj_task_data_file = os.path.join(contrasts_dir, "%s_joint_L_task_contrasts.npy" % subj)

  if os.path.exists(subj_task_data_file):
    print(f"Skipping subject {subj} as the task maps file already seem to exist")
    continue;
  
  # a single task contrast returned by get_hcp_task_contrast has shape (1, 29696)
  # we only get the left hemisphere
  #tdata = [get_hcp_task_contrast(subj, ci, bma_slice=slice(0, 29696), zscore=1) for ci in contrast_info]
  tdata = [get_hcp_task_contrast(subj, ci, bma_slice=slice(0, 29696), zscore=False) for ci in contrast_info]
  subj_task_data = np.concatenate(tdata, axis=0) # (10, 29696)
  np.save(subj_task_data_file, subj_task_data) 
  