### Download and import necessary packages

In [1]:
BUCKET_NAME = "YOUR_BUCKET"
# Change the bucket name with your own

In [2]:
%%capture
!pip install -r /home/neuro/codes/questionnaire_brain/requirements_multivariate_prediction.txt;

In [3]:
import os
import sys
import glob
import pickle
import time
import random
import boto3
import numpy as np
import pandas as pd
import nibabel as nib
from nltools.data import Brain_Data
from multiprocessing import Pool, cpu_count
# Prevent warnings to be displayed
import warnings
warnings.filterwarnings('ignore')
# Prevent global variables
from noglobal import NoGlobal
noglobal = NoGlobal(globals()).noglobal

In [4]:
print(sys.version)
!pip freeze > requirements_my_concat_social_isolation.txt

3.9.15 | packaged by conda-forge | (main, Nov 22 2022, 08:45:29) 
[GCC 10.4.0]


In [5]:
# Here, we decide your working directory
initial_directory = "/home/neuro/mount/"
data_directory = os.path.join(initial_directory, "run_concat_social_isolation_data")
# Create data_directory if it doesn't exist
if not os.path.exists(data_directory):
    os.makedirs(data_directory)

### Check and set versions

In [6]:
def fix_seed(seed):
    # random
    random.seed(seed)
    # Numpy
    np.random.seed(seed)

# Set seed to 42 for both Numpy and random
SEED = 42
fix_seed(SEED)

### Set up AWS S3

In [7]:
# Create Directory if it doesn't exist already.
if not os.path.exists('/root/.aws'):
    os.makedirs('/root/.aws')
    # Put credential on host into guest
    ! cp /home/neuro/credential/your_s3_credentials ~/.aws/credentials

In [8]:
# Download or Upload to s3 bucket.
@noglobal
def cp_s3(BUCKET_NAME, FROM_PATH, TO_PATH, FILE_NAME, download_or_upload):
    s3_resource = boto3.resource('s3')
    bucket = s3_resource.Bucket(BUCKET_NAME)

    from_path = os.path.join(FROM_PATH, FILE_NAME)
    to_path = os.path.join(TO_PATH, FILE_NAME)
    
    if download_or_upload == "download":
        print("Downloading from S3 bucket!")
        bucket.download_file(from_path, to_path)
    elif download_or_upload == "upload":
        print("Uplaoding to S3 bucket!")
        bucket.upload_file(from_path, to_path)
    else:
        raise ValueError('Please specify download or upload.')
    return

### Creates a list of session id - with its subfiles

In [9]:
# Fetches all the file keys in a given bucket using pagination.
def paginate_objects(bucket_name):
    s3 = boto3.client('s3')
    paginator = s3.get_paginator('list_objects_v2')
    
    file_list = []
    for page in paginator.paginate(Bucket=bucket_name):
        for obj in page['Contents']:
            file_list.append(obj['Key'])
    return file_list

file_list = paginate_objects(BUCKET_NAME)

# Remove the prefix and suffix and returns it back.
def rem_str_func(val):
    return val.removeprefix(prefix_str).removesuffix(suffix_str)

In [10]:
# Task: CIC

# Filters files that start with a specific prefix.
prefix_str = 'outputdir/glmsingle_output/'
l_start = [s for s in file_list if s.startswith(prefix_str)]
display(len(l_start))
# Filters files that end with a specific suffix among the above list.
suffix_str = '/beta_img.nii.gz'
l_end = [s for s in l_start if s.endswith(suffix_str)]
display(len(l_end))

# Creates a list of session ids for tasks CIC
session_ids_CIC = list(map(rem_str_func, l_end))

192

96

In [11]:
# Task: midloc

# Filters files that start with a specific prefix.
prefix_str = 'outputdir/glmsingle_output_midloc/'
l_start = [s for s in file_list if s.startswith(prefix_str)]
display(len(l_start))
# Filters files that end with a specific suffix among the above list.
suffix_str = '/beta_img.nii.gz'
l_end = [s for s in l_start if s.endswith(suffix_str)]
display(len(l_end))

# Creates a list of session ids for tasks midloc
session_ids_midloc = list(map(rem_str_func, l_end))

64

32

### Download GLMsingle output

In [16]:
# Downloads the brain data and corresponding event data from S3, and returns it as a Brain_Data object.
@noglobal
def make_brain_data(session_id, BUCKET_NAME, data_directory, s3_dir):
    # Will be downloaded from...
    FROM_PATH = os.path.join(s3_dir,  session_id)
    # Will be downloaded to...
    TO_PATH = os.path.join(data_directory, session_id)
    # Check if download destination exists, if not, make one.
    if not os.path.exists(TO_PATH):
        os.makedirs(TO_PATH)
    
    # Downloades betamap from s3 bucket.
    print(f"Downloading betamap of {session_id}!")
    FILE_NAME = "beta_img.nii.gz"
    cp_s3(BUCKET_NAME, FROM_PATH, TO_PATH, FILE_NAME, "download")
    # Load it as a nibabel image.
    img = nib.load(os.path.join(TO_PATH, FILE_NAME))
    print("Converting into Brain_Data!")
    # Convert it to a Brain_Data object.
    data = Brain_Data(img)
    
    # Downloads eventdata from s3 bucket.
    print(f"Downloading eventfile of {session_id}!")
    FILE_NAME = "events.csv"
    cp_s3(BUCKET_NAME, FROM_PATH, TO_PATH, FILE_NAME, "download")
    # Convert it into a dataframe
    design_df_concat = pd.read_csv(os.path.join(TO_PATH, FILE_NAME))
    
    return data, design_df_concat

### Perform concatenation

In [17]:
# Do concatenation and save concat data
@noglobal
def concat_func(session_ids_CIC, session_ids_midloc, BUCKET_NAME, data_directory):
    
    #Output: 
    #Brain_data.data = concatenated brain_data of all sessions in {session_ids}
    #Brain_data.Y = the trial_type and session_id
    
    print(f"Starting concatenation for {session_ids_CIC} and {session_ids_midloc}!")
    
    
    FROM_PATH = os.path.join(data_directory,'concat_output')
    TO_PATH = "outputdir/concat_output"
    
    if not os.path.exists(FROM_PATH):
        os.makedirs(FROM_PATH)
    
    braindata = Brain_Data()
    df = pd.DataFrame()
    
    # Concatenating Brain_Data and design_df of all sessions
    for session_id in session_ids_CIC:
        # Obtain brain data and event data for each session
        braindata_current, design_df_concat = make_brain_data(session_id, BUCKET_NAME, data_directory, 'outputdir/glmsingle_output/')
        # Add a column named "session" which includes the particular session_id
        design_df_concat['session'] = session_id
        design_df_concat = design_df_concat[design_df_concat['trial_type'] != "Rati"].reset_index(drop=True)
        # Concatenate the brain_data
        braindata = braindata.append(braindata_current)
        print("braindata shape: " + str(braindata.shape()))
        # Concatenate the design_df
        df = pd.concat([df, design_df_concat])
        print("df shape: " + str(df.shape))
    
    # Concatenating Brain_Data and design_df of all sessions
    for session_id in session_ids_midloc:
        # Obtain brain data and event data for each session
        braindata_current, design_df_concat = make_brain_data(session_id, BUCKET_NAME, data_directory, 'outputdir/glmsingle_output_midloc/')
        # Add a column named "session" which includes the particular session_id
        design_df_concat['session'] = session_id
        # Concatenate the brain_data
        braindata = braindata.append(braindata_current)
        print("braindata shape: " + str(braindata.shape()))
        # Concatenate the design_df
        df = pd.concat([df, design_df_concat])
        print("df shape: " + str(df.shape))


    print("Concatenation finished!")
    
    # Apply trial_type and session_id to braindata.Y
    braindata.Y = pd.DataFrame(df[['session', 'trial_type']])
    print(f"Braindata.data shape: {braindata.shape()}")
    print(f"Braindata.Y: {braindata.Y}")
    
    print("Saving your cocatenation outputs!")
    # Serializes and saves braindata as a pickle file at FROM_PATH
    FILE_NAME = "braindata.pickle"
    with open(os.path.join(FROM_PATH, FILE_NAME), mode="wb") as f:
        pickle.dump(braindata, f)

    # Uploads the pickled braindata to S3 bucket
    cp_s3(BUCKET_NAME, FROM_PATH, TO_PATH, FILE_NAME, "upload")
    
    # Deletes the concatenation outputs as well as GLMsingle files
    !rm -rf $FROM_PATH
    !rm -rf $data_directory

    return braindata

In [18]:
# Checks the disk usage before the operations.
!df -h -m --total

Filesystem     1M-blocks  Used Available Use% Mounted on
overlay            99189 38932     60242  40% /
tmpfs                 64     0        64   0% /dev
tmpfs              31639     0     31639   0% /sys/fs/cgroup
shm                   64     0        64   0% /dev/shm
/dev/root          99189 38932     60242  40% /etc/hosts
/dev/nvme1n1      451821  7709    444113   2% /home/neuro/mount
tmpfs              31639     0     31639   0% /proc/acpi
tmpfs              31639     0     31639   0% /proc/scsi
tmpfs              31639     0     31639   0% /sys/firmware
total             776880 85571    691277  12% -


In [19]:
# Start of time measurement
time_sta = time.time()

# Perform concatenation
braindata = concat_func(session_ids_CIC, session_ids_midloc, BUCKET_NAME, data_directory)

# Time measurement end
time_end = time.time()
tim = time_end- time_sta

print(str(round(tim)) + " seconds")
print(str(round(tim/60)) + " minutes")

Starting concatenation for ['sub-SAXSISO01b', 'sub-SAXSISO01f', 'sub-SAXSISO01s', 'sub-SAXSISO02b', 'sub-SAXSISO02f', 'sub-SAXSISO02s', 'sub-SAXSISO03b', 'sub-SAXSISO03f', 'sub-SAXSISO03s', 'sub-SAXSISO04b', 'sub-SAXSISO04f', 'sub-SAXSISO04s', 'sub-SAXSISO08b', 'sub-SAXSISO08f', 'sub-SAXSISO08s', 'sub-SAXSISO09b', 'sub-SAXSISO09f', 'sub-SAXSISO09s', 'sub-SAXSISO10b', 'sub-SAXSISO10f', 'sub-SAXSISO10s', 'sub-SAXSISO11b', 'sub-SAXSISO11f', 'sub-SAXSISO11s', 'sub-SAXSISO12b', 'sub-SAXSISO12f', 'sub-SAXSISO12s', 'sub-SAXSISO13b', 'sub-SAXSISO13f', 'sub-SAXSISO13s', 'sub-SAXSISO14b', 'sub-SAXSISO14f', 'sub-SAXSISO14s', 'sub-SAXSISO15b', 'sub-SAXSISO15f', 'sub-SAXSISO15s', 'sub-SAXSISO17b', 'sub-SAXSISO17f', 'sub-SAXSISO17s', 'sub-SAXSISO18b', 'sub-SAXSISO18f', 'sub-SAXSISO18s', 'sub-SAXSISO19b', 'sub-SAXSISO19f', 'sub-SAXSISO19s', 'sub-SAXSISO21b', 'sub-SAXSISO21f', 'sub-SAXSISO21s', 'sub-SAXSISO22b', 'sub-SAXSISO22f', 'sub-SAXSISO22s', 'sub-SAXSISO24b', 'sub-SAXSISO24f', 'sub-SAXSISO24s', 

Converting into Brain_Data!
Downloading eventfile of sub-SAXSISO13b!
Downloading from S3 bucket!
braindata shape: (1503, 238955)
df shape: (1503, 8)
Downloading betamap of sub-SAXSISO13f!
Downloading from S3 bucket!
Converting into Brain_Data!
Downloading eventfile of sub-SAXSISO13f!
Downloading from S3 bucket!
braindata shape: (1557, 238955)
df shape: (1557, 8)
Downloading betamap of sub-SAXSISO13s!
Downloading from S3 bucket!
Converting into Brain_Data!
Downloading eventfile of sub-SAXSISO13s!
Downloading from S3 bucket!
braindata shape: (1611, 238955)
df shape: (1611, 8)
Downloading betamap of sub-SAXSISO14b!
Downloading from S3 bucket!
Converting into Brain_Data!
Downloading eventfile of sub-SAXSISO14b!
Downloading from S3 bucket!
braindata shape: (1665, 238955)
df shape: (1665, 8)
Downloading betamap of sub-SAXSISO14f!
Downloading from S3 bucket!
Converting into Brain_Data!
Downloading eventfile of sub-SAXSISO14f!
Downloading from S3 bucket!
braindata shape: (1719, 238955)
df shap

Converting into Brain_Data!
Downloading eventfile of sub-SAXSISO30s!
Downloading from S3 bucket!
braindata shape: (3555, 238955)
df shape: (3555, 8)
Downloading betamap of sub-SAXSISO32b!
Downloading from S3 bucket!
Converting into Brain_Data!
Downloading eventfile of sub-SAXSISO32b!
Downloading from S3 bucket!
braindata shape: (3609, 238955)
df shape: (3609, 8)
Downloading betamap of sub-SAXSISO32f!
Downloading from S3 bucket!
Converting into Brain_Data!
Downloading eventfile of sub-SAXSISO32f!
Downloading from S3 bucket!
braindata shape: (3663, 238955)
df shape: (3663, 8)
Downloading betamap of sub-SAXSISO32s!
Downloading from S3 bucket!
Converting into Brain_Data!
Downloading eventfile of sub-SAXSISO32s!
Downloading from S3 bucket!
braindata shape: (3717, 238955)
df shape: (3717, 8)
Downloading betamap of sub-SAXSISO33b!
Downloading from S3 bucket!
Converting into Brain_Data!
Downloading eventfile of sub-SAXSISO33b!
Downloading from S3 bucket!
braindata shape: (3771, 238955)
df shap

Converting into Brain_Data!
Downloading eventfile of sub-SAXSISO11b!
Downloading from S3 bucket!
braindata shape: (6437, 238955)
df shape: (6437, 8)
Downloading betamap of sub-SAXSISO12b!
Downloading from S3 bucket!
Converting into Brain_Data!
Downloading eventfile of sub-SAXSISO12b!
Downloading from S3 bucket!
braindata shape: (6597, 238955)
df shape: (6597, 8)
Downloading betamap of sub-SAXSISO13b!
Downloading from S3 bucket!
Converting into Brain_Data!
Downloading eventfile of sub-SAXSISO13b!
Downloading from S3 bucket!
braindata shape: (6757, 238955)
df shape: (6757, 8)
Downloading betamap of sub-SAXSISO14b!
Downloading from S3 bucket!
Converting into Brain_Data!
Downloading eventfile of sub-SAXSISO14b!
Downloading from S3 bucket!
braindata shape: (6917, 238955)
df shape: (6917, 8)
Downloading betamap of sub-SAXSISO15b!
Downloading from S3 bucket!
Converting into Brain_Data!
Downloading eventfile of sub-SAXSISO15b!
Downloading from S3 bucket!
braindata shape: (7077, 238955)
df shap