In [None]:
!pip install gdown

This script can be used to unpack the tar files.

The only catch is that we need the file ID because we're hosting the data using google drive. To get this information, Right click the file you want to access and copy the link.

For example to download 'NodeTimeseries_3T_HCP1200_MSMAll_ICAd100_ts2.tar.gz', it copies out the link:

https://drive.google.com/file/d/1OIGg484nlEmcYsyt_2EbotZ3tpoIRYDA/view?usp=drive_link

the ID is the string of letters between d/ and /view, so in this case '1OIGg484nlEmcYsyt_2EbotZ3tpoIRYDA'

In [None]:
import gdown, tarfile, os, numpy as np

# This creates a directory (locally in collab, not in drive)
DATA_DIR = "./hcp_task"  # All data will be stored here
os.makedirs(DATA_DIR, exist_ok=True)

# Step 1: Download our key data file (our tar.gz file)
file_id = "1_Pv_qDjYmwwsX0EbUhQ3_LiZtrTOtuM3"  # file ID
output_file = os.path.join(DATA_DIR, "groupICA_3T_HCP1200_MSMAll.tar.gz")  # name doesn't affect functionality


#groupICA_3T_HCP1200_MSMAll.tar.gz = 1_Pv_qDjYmwwsX0EbUhQ3_LiZtrTOtuM3
#netmats_3T_HCP1200_MSMAll_ICAd15_ts2.tar.gz = 1upmYDKsORPIWAPiVyek4XkceOaW7AP2c
#NodeTimeseries_3T_HCP1200_MSMAll_ICAd15_ts2.tar = 1pF_zYU6BmH4qZayHS-Goiw5u4UgPJiWC

After you put that ID in, it should be able to access the file, and it will load it all into a local directory 'hcp_task' (not into google drive- into your coding workspaces directory, so if you're running it here in collab, open up the file directory on the left)

This should produce an 'extracted files' directory, and a zipped version of it which can be downloaded.

In [None]:
# Download the file from Google Drive
gdown.download(f"https://drive.google.com/uc?id={file_id}", output_file, quiet=False)

# Step 2: 'Unzip' the tar file
# Extract the file to the collab directory/ local ipynb directory (this doesn't save it to the drive)
extracted_path = os.path.join(DATA_DIR, "Extracted_files")  # Extract files into our subdirectory
os.makedirs(extracted_path, exist_ok=True)

with tarfile.open(output_file, 'r:gz') as tar:
    tar.extractall(path=extracted_path)
    print(f"Files extracted to: {extracted_path}")


# This isn't integral to functionality - it generates a zipped version to make it locally downloadable
import shutil

zip_file_path = os.path.join(DATA_DIR, "Extracted_files.zip")  # Path for the zipped archive
shutil.make_archive(base_name=zip_file_path.replace(".zip", ""),  # Remove .zip for shutil naming
                    format="zip",
                    root_dir=extracted_path)

print(f"Extracted_files directory zipped to: {zip_file_path}")


Downloading...
From (original): https://drive.google.com/uc?id=1_Pv_qDjYmwwsX0EbUhQ3_LiZtrTOtuM3
From (redirected): https://drive.google.com/uc?id=1_Pv_qDjYmwwsX0EbUhQ3_LiZtrTOtuM3&confirm=t&uuid=fe5c98ac-93c0-4ba8-be99-6de4058266c7
To: /content/hcp_task/groupICA_3T_HCP1200_MSMAll.tar.gz
100%|██████████| 992M/992M [00:08<00:00, 116MB/s]


Files extracted to: ./hcp_task/Extracted_files
Extracted_files directory zipped to: ./hcp_task/Extracted_files.zip


Subject data is more straightforward to set up and we shouldn't have to change the ID- this one has 1003 entries, but there are two other subjectID text files so it depends what they represent.

In [None]:
# Step 3: subject data download
file_id = "1Evo506Nx7AXCbq3XNvw5o1qHo5xayUT1"  # file ID
subject_list_file = os.path.join(DATA_DIR, "subject_lists.txt")  # Save inside DATA_DIR
gdown.download(f"https://drive.google.com/uc?id={file_id}", subject_list_file, quiet=False)

# Step 4: Establish subject data frame
subjects = np.loadtxt(subject_list_file, dtype='str')  # Load subjects as a NumPy array
#print(f"Subjects loaded: {subjects[:5]}")  # Display the first 5 subjects

Downloading...
From: https://drive.google.com/uc?id=1Evo506Nx7AXCbq3XNvw5o1qHo5xayUT1
To: /content/hcp_task/subject_lists.txt
100%|██████████| 7.02k/7.02k [00:00<00:00, 3.00MB/s]

Subjects loaded: ['100206' '100307' '100408' '100610' '101006']





The scripts tar file seems to contain a heap of matlab and other code for working with the data and this may be very helpful.

In [None]:
# Step 5: Scripts file
file_id = "1fHgoP-NMQQOQBkyY9djWy4GwZ4wa31wJ"  # file ID
output_file = os.path.join(DATA_DIR, "scripts")  # name imported file here

# Download the file from Google Drive
gdown.download(f"https://drive.google.com/uc?id={file_id}", output_file, quiet=False)

# 'Unzip' the scripts tar file & extract to the directory
extracted_path = os.path.join(DATA_DIR, "Scripts")  # Extract files into our subdirectory
os.makedirs(extracted_path, exist_ok=True)

with tarfile.open(output_file, 'r:gz') as tar:
    tar.extractall(path=extracted_path)
    print(f"Files extracted to: {extracted_path}")



# This isn't integral to functionality - just making a zip of the scripts file to make it locally downloadable
import shutil

zip_file_path = os.path.join(DATA_DIR, "Scripts.zip")  # Path for the zipped archive
shutil.make_archive(base_name=zip_file_path.replace(".zip", ""),  # Remove .zip for shutil naming
                    format="zip",
                    root_dir=extracted_path)

print(f"Scripts directory zipped to: {zip_file_path}")

Downloading...
From: https://drive.google.com/uc?id=1fHgoP-NMQQOQBkyY9djWy4GwZ4wa31wJ
To: /content/hcp_task/scripts
100%|██████████| 9.35k/9.35k [00:00<00:00, 11.9MB/s]

Files extracted to: ./hcp_task/Scripts
Scripts directory zipped to: ./hcp_task/Scripts.zip





Below are parameters taken from the NMA workbook - we can't quite construct a matrix from these until we find the EV files though

In [None]:
#Defining Constants (Directly taken from NMA workbook, we may need to adapt these)
N_SUBJECTS = 1003 #need to match this to actual subject count
N_PARCELS = 360 #(if using Glasser 360 ROI parcellation)
TR = 0.72  # Time resolution, in seconds, (The acquisition parameters for all tasks were identical)
HEMIS = ["Right", "Left"]  # The parcels are matched across hemispheres with the same order
RUNS, N_RUNS   = ['LR','RL'], 2  # Each experiment was repeated twice in each subject
EXPERIMENTS = {    # 7 tasks organised into a dictionary with conditions as entries
    'MOTOR'      : {'cond':['lf','rf','lh','rh','t','cue']},
    'WM'         : {'cond':['0bk_body','0bk_faces','0bk_places','0bk_tools','2bk_body','2bk_faces','2bk_places','2bk_tools']},
    'EMOTION'    : {'cond':['fear','neut']},
    'GAMBLING'   : {'cond':['loss','win']},
    'LANGUAGE'   : {'cond':['math','story']},
    'RELATIONAL' : {'cond':['match','relation']},
    'SOCIAL'     : {'cond':['ment','rnd']}
}