In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd /content/drive/MyDrive/Thesis/ASD/

/content/drive/MyDrive/Thesis/ASD


In [6]:
import os
import urllib
import urllib.request
from docopt import docopt



def collect_and_download(derivative, pipeline, strategy, out_dir):

    s3_prefix = "https://s3.amazonaws.com/fcp-indi/data/Projects/ABIDE_Initiative"

    derivative = derivative.lower()
    pipeline = pipeline.lower()
    strategy = strategy.lower()

    if "roi" in derivative:
        extension = ".1D"
    else:
        extension = ".nii.gz"

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    s3_pheno_file = open("./Phenotypes/Phenotypic_V1_0b_preprocessed1.csv", "r")
    pheno_list = s3_pheno_file.readlines()

    header = pheno_list[0].split(",")
    file_idx = header.index("FILE_ID")

    s3_paths = []
    for pheno_row in pheno_list[1:]:
        cs_row = pheno_row.split(",")
        row_file_id = cs_row[file_idx]
        if row_file_id == "no_filename":
            continue
        filename = row_file_id + "_" + derivative + extension
        s3_path = "/".join([s3_prefix, "Outputs", pipeline, strategy, derivative, filename])
        s3_paths.append(s3_path)

    total_num_files = len(s3_paths)
    for path_idx, s3_path in enumerate(s3_paths):
        rel_path = s3_path.lstrip(s3_prefix).split("/")[-1]
        download_file = os.path.join(out_dir, rel_path)
        download_dir = os.path.dirname(download_file)
        if not os.path.exists(download_dir):
            os.makedirs(download_dir)
        if not os.path.exists(download_file):
            print ("Retrieving: %s" % download_file)
            urllib.request.urlretrieve(s3_path, download_file)
            print ("%.3f%% percent complete" % (100*(float(path_idx+1)/total_num_files)))
        else:
            print ("File %s already exists, skipping..." % download_file)

In [None]:
if __name__ == "__main__":

  

    derivatives = ['rois_dosenbach160'] # 'rois_aal', 'rois_cc200', 'rois_dosenbach160', 'rois_ho', 'rois_cc400'
    

    out_dir = os.path.abspath("Datasets/CPAC")

    for derivative in derivatives :
        collect_and_download(derivative, 'cpac', 'filt_global', os.path.join(out_dir, derivative))

Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/CPAC/rois_dosenbach160/Pitt_0050003_rois_dosenbach160.1D
0.097% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/CPAC/rois_dosenbach160/Pitt_0050004_rois_dosenbach160.1D
0.193% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/CPAC/rois_dosenbach160/Pitt_0050005_rois_dosenbach160.1D
0.290% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/CPAC/rois_dosenbach160/Pitt_0050006_rois_dosenbach160.1D
0.386% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/CPAC/rois_dosenbach160/Pitt_0050007_rois_dosenbach160.1D
0.483% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/CPAC/rois_dosenbach160/Pitt_0050008_rois_dosenbach160.1D
0.580% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/CPAC/rois_dosenbach160/Pitt_0050009_rois_dosenbach160.1D
0.676% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Da

In [None]:
if __name__ == "__main__":

  

    derivatives = ['rois_cc200']

    out_dir = os.path.abspath("Datasets/DPARSF/")

    for derivative in derivatives :
        collect_and_download(derivative, 'dparsf', 'filt_global', os.path.join(out_dir, derivative))

Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/DPARSF/rois_cc200/Pitt_0050003_rois_cc200.1D
0.097% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/DPARSF/rois_cc200/Pitt_0050004_rois_cc200.1D
0.193% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/DPARSF/rois_cc200/Pitt_0050005_rois_cc200.1D
0.290% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/DPARSF/rois_cc200/Pitt_0050006_rois_cc200.1D
0.386% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/DPARSF/rois_cc200/Pitt_0050007_rois_cc200.1D
0.483% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/DPARSF/rois_cc200/Pitt_0050008_rois_cc200.1D
0.580% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/DPARSF/rois_cc200/Pitt_0050009_rois_cc200.1D
0.676% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/DPARSF/rois_cc200/Pitt_0050010_rois_cc200.1D
0.773% percent complete
Retrievi

In [None]:
if __name__ == "__main__":

  

    derivatives = ['rois_cc200']

    out_dir = os.path.abspath("Datasets/NIAK/")

    for derivative in derivatives :
        collect_and_download(derivative, 'niak', 'filt_global', os.path.join(out_dir, derivative))

Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/NIAK/rois_cc200/Pitt_0050003_rois_cc200.1D
0.097% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/NIAK/rois_cc200/Pitt_0050004_rois_cc200.1D
0.193% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/NIAK/rois_cc200/Pitt_0050005_rois_cc200.1D
0.290% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/NIAK/rois_cc200/Pitt_0050006_rois_cc200.1D
0.386% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/NIAK/rois_cc200/Pitt_0050007_rois_cc200.1D
0.483% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/NIAK/rois_cc200/Pitt_0050008_rois_cc200.1D
0.580% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/NIAK/rois_cc200/Pitt_0050009_rois_cc200.1D
0.676% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/NIAK/rois_cc200/Pitt_0050010_rois_cc200.1D
0.773% percent complete
Retrieving: /content/dri

In [None]:
if __name__ == "__main__":

  

    derivatives = ['rois_cc200']

    out_dir = os.path.abspath("Datasets/CCS/")

    for derivative in derivatives :
        collect_and_download(derivative, 'ccs', 'filt_global', os.path.join(out_dir, derivative))

Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/CCS/rois_cc200/Pitt_0050003_rois_cc200.1D
0.097% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/CCS/rois_cc200/Pitt_0050004_rois_cc200.1D
0.193% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/CCS/rois_cc200/Pitt_0050005_rois_cc200.1D
0.290% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/CCS/rois_cc200/Pitt_0050006_rois_cc200.1D
0.386% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/CCS/rois_cc200/Pitt_0050007_rois_cc200.1D
0.483% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/CCS/rois_cc200/Pitt_0050008_rois_cc200.1D
0.580% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/CCS/rois_cc200/Pitt_0050009_rois_cc200.1D
0.676% percent complete
Retrieving: /content/drive/MyDrive/Thesis/ASD/Datasets/CCS/rois_cc200/Pitt_0050010_rois_cc200.1D
0.773% percent complete
Retrieving: /content/drive/MyDri