# Downloading of the HCP task data

This notebook shows how to download the relevant files from the HCP-YA dataset stored on AWS

**Inputs**

List of HCP subjects<br>
`data/subjs_hcp254_full_unrelated.txt`

**Downloaded Files**

Indiviudal Brain Surfaces (aligned to FSLR32k; with and without medial wall)<br>
`{subj}/T1w/fsaverage_LR32k/{subj}.L.midthickness_MSMAll.32k_fs_LR.surf.gii`<br>
`{subj}/T1w/fsaverage_LR32k/{subj}.{H}.midthickness_MSMAll.29k.noMW.surf.gii`

Indiviudal task contrasts<br>
`HCP_1200/{subj}/MNINonLinear/Results/tfMRI_{para}/tfMRI_{para}_hp200_s2_level2_MSMAll.feat/{subj}_tfMRI_{para}_level2_hp200_s2_MSMAll.dscalar.nii`, 

HCP Resting State Group Connectivity Matrix <br>
`HCP_Resources/GroupAvg/HCP_S1200_1003_rfMRI_MSMAll_groupPCA_d4500ROW_zcorr.dconn.nii`

All 4 HCP resting state runs for each subject <br>
reduced to the left hemisphere: shape 4x (29696, 1200) ~ (29696, 1200) <br>
`$SUBJ/MNINonLinear/Results/$SESSION/${SESSION}_Atlas_MSMAll_hp2000_clean.dtseries.nii`

Indiviudal subject ICA component timeseries, shape (200, 4800) <br>
based on group-level ICA components<br>
`HCP_Resources/GroupAvg/HCP_PTN1200/NodeTimeseries_3T_HCP1200_MSMAll_ICAd200_ts2.tar.gz`<br>

<br>

Requires the `awscli` to be installed for python. 

In [4]:
from tqdm import tqdm
from IPython.display import clear_output
import numpy as np
import os

from importlib import reload 
import lib.aws_tools
reload(lib.aws_tools)
from lib.aws_tools import download_aws_file, exists_locally_or_on_aws

# download_aws_file:
# calls: python -m awscli s3 cp {aws_bucket}{file} {local_dir}{file}


f= lambda str: eval("f'" + f"{str}" + "'")

import hcp_utils as hcp

  warn("Fetchers from the nilearn.datasets module will be "
pixdim[1,2,3] should be non-zero; setting 0 dims to 1


In [5]:
# Defintition of file path templates for the individual brains surfaces (with and without medial wall)
nativ_32k_surf      = "{subj}/T1w/fsaverage_LR32k/{subj}.{H}.midthickness_MSMAll.32k_fs_LR.surf.gii"
nativ_29k_surf_noMW = "{subj}/T1w/fsaverage_LR32k/{subj}.{H}.midthickness_MSMAll.29k.noMW.surf.gii"

H="L" # Hemisphere of interest

In [15]:
# File templates for the task contrasts
from lib.default_config import task_fmri_sam, tmap_type, smooth_lv, tmsmall, contrast_info
paradigms = np.unique([c[0] for c in contrast_info]).tolist()
print(paradigms)
(task, cope_num, contr) = contrast_info[0]
subj="[subj]"
f(task_fmri_sam)

['EMOTION', 'GAMBLING', 'LANGUAGE', 'MOTOR', 'RELATIONAL', 'SOCIAL', 'WM']


'[subj]/MNINonLinear/Results/tfMRI_WM/tfMRI_WM_hp200_s2_level2_MSMAll.feat/GrayordinatesStats/cope19.feat/tstat1.dtseries.nii'

# Checking the existence of data on AWS

In [None]:
subjs=np.loadtxt("data/subjs_hcp255_full_unrelated.txt").astype(int).astype(str)

failed_subjs = []

for subj in tqdm(subjs, desc="Checking the existence of files"):  
  
  if not exists_locally_or_on_aws("HCP_1200/" + f(nativ_32k_surf)):
    failed_subjs.append(subj);
    continue;
    
  for (task, cope_num, contr) in contrast_info:
    if not exists_locally_or_on_aws("HCP_1200/" + f(task_fmri_sam)):
        failed_subjs.append(subj)
        continue

full_subjs = [subj for subj in subjs if not (subj in failed_subjs)]
len(full_subjs)
#np.savetxt("data/subjs_hcp254_full_unrelated.txt", full_subjs, fmt="%s")
#!cat data/subjs_hcp254_full_unrelated.txt

# Download the data from AWS

In [None]:
full_subjs=np.loadtxt("data/subjs_hcp254_full_unrelated.txt").astype(int).astype(str);
len(full_subjs), full_subjs[:4]

(254, array(['100206', '100610', '101006', '101309'], dtype='<U21'))

In [None]:
# Download of the task contrasts

for subj in tqdm(full_subjs, desc="Downloading subject data"):  
  # actually download the full subj data
  local_path = download_aws_file("HCP_1200/" + f(nativ_32k_surf));
  for (task, cope_num, contr) in contrast_info:
    local_path = download_aws_file("HCP_1200/" + f(task_fmri_sam));  
  clear_output(wait=True)  
  

In [None]:
# Download of the resting state runs:

from lib.default_config import rest_file_stub as rs_file_stub
from lib.default_config import rest_sessions as sessions
#sessions = ["rfMRI_REST1_LR", "rfMRI_REST1_RL", "rfMRI_REST2_LR", "rfMRI_REST2_RL" ]

failed = []; failed_subjs = []

for subj in tqdm(full_subjs, desc="Downloading subject data"):  
  for session in sessions: 
    file = f(rs_file_stub);
    print(file)
    local_path = download_aws_file(file);
    if local_path == False:  failed.append(file); failed_subjs.append(subj)
  
  clear_output(wait=True)

print(failed_subjs)

In [5]:
# Download other structural files (e.g. cortical thickness, myelin, etc)

from lib.default_config import structural_file_templates 

"""
corrthickness is the thickness with curvature regressed out.  It is appropriate 
for looking at differences between cortical areas in thickness.  The regular 
thickness is appropriate for morphometrics.

structural_file_templates = [
  "{subj}/MNINonLinear/fsaverage_LR32k/{subj}.MyelinMap_BC_MSMAll.32k_fs_LR.dscalar.nii",
  "{subj}/MNINonLinear/fsaverage_LR32k/{subj}.corrThickness_MSMAll.32k_fs_LR.dscalar.nii",
  "{subj}/MNINonLinear/fsaverage_LR32k/{subj}.curvature_MSMAll.32k_fs_LR.dscalar.nii",
  "{subj}/MNINonLinear/fsaverage_LR32k/{subj}.sulc_MSMAll.32k_fs_LR.dscalar.nii",
  "{subj}/MNINonLinear/fsaverage_LR32k/{subj}.thickness_MSMAll.32k_fs_LR.dscalar.nii"]
"""

#failed = []; failed_subjs_rests = []
for subj in tqdm(full_subjs, desc="Downloading subject data"):  
  for sf in structural_file_templates: 
    file= f(sf)
    local_path = download_aws_file("HCP_1200/" + file);
    if local_path == False:  failed.append(file); failed_subjs.append(subj)
  clear_output(wait=True)

failed_subjs

5

In [38]:
# Download the full task data

failed = []; failed_subjs = []
for subj in tqdm(full_subjs, desc="Downloading full subject task data"):  
  for para in paradigms:
    file = f"HCP_1200/{subj}/MNINonLinear/Results/tfMRI_{para}/tfMRI_{para}_hp200_s2_level2_MSMAll.feat/{subj}_tfMRI_{para}_level2_hp200_s2_MSMAll.dscalar.nii"
    local_path = download_aws_file(file);  
    if local_path == False: failed.append(file); failed_subjs.append(subj);
  clear_output(wait=True)  

failed_subjs

Downloading full subject task data: 100%|██████████| 254/254 [3:13:03<00:00, 45.60s/it]


[]