In [None]:
# Download files from Flywheel
# Written by David Parker - updated Oct 2022
# Adapted by Lisa Bruckert Nov 2021
# Adapted by Rocio Poblaciones Apr 2022

"""
Changelog:
10/31/2022 - Parker
 - Added regular expression filter ability to analysis label
 - Added gear version filter to analysis search
 - reorganized code/ added blocks
 - removed API key references.

"""
import flywheel
from pathlib import Path
import shutil
import pathvalidate as pv
from tqdm import tqdm
import os
import re


System settings to be used through the script

In [None]:
# Create a work directory in our local "home" directory
# Example: work_dir = Path(Path.home()/'Documents/Flywheel_QCreport', platform='auto')
work_dir = Path('//')
# If it doesn't exist, create it
if not work_dir.exists():
    work_dir.mkdir(parents = True)


Flywheel settings to be used through the script

In [None]:
# Set the project ID you wish to download from:
project_id = '5eafb9fd788701016978097d'

# Set the subject labels you wish to download from
subject_ids_to_download = ['21346',]

# Set the gear we're looking for
# Example: gear = 'bids-freesurfer'
gear = 'rtp-pipeline'

# Set the version of the gear we're looking for
# "latest" will use the latest version of the gear installed on the site.
version = 'latest'

# Set the regular expression to match analyses to.  Only
# analyses with a label that matches this regular expression
# will be downloaded.  Use regex101.com to validate regular  expressions
# Use the string: r".*" to accept all labels
analysis_label_regex = r"example_regex"
aregex = re.compile(analysis_label_regex)


#We create a list of files,must include the exact file names as is on FW
file_name_list=['t1.nii.gz',
               'CC_Mot_wbt_noEval_clean.tck',
               'CC_Occ_wbt_noEval_clean.tck',
               'CC_OrbFron_wbt_noEval_clean.tck',
               'CC_PostPar_wbt_noEval_clean.tck',
               'CC_SupFron_wbt_noEval_clean.tck',
               'CC_SupPar_wbt_noEval_clean.tck',
               'CC_Temp_wbt_noEval_clean.tck',
               'CC_AntFron_wbt_noEval_clean.tck',
               'RTP_fa.csv',
               'RTP_ad.csv',
               'RTP_rd.csv',
                'RTP_md.csv',
               'RTP_ad.csv',
                'RTP_cl.csv',
                'RTP_rd.csv',
                'RTP_C2ROIad.csv',
                'RTP_C2ROIcl.csv',
                'RTP_C2ROImd.csv',
                'RTP_C2ROIad.csv',
                'RTP_C2ROIrd.csv',
                'RTP_C2ROIfa.csv',
                'CFMaj_wbt_noEval_clean.tck',
                'CFMin_wbt_noEval_clean.tck',
                'LAF_wbt_noEval_clean.tck',
               'LATR_wbt_noEval_clean.tck',
               'LCC_wbt_noEval_clean.tck','LCH_wbt_noEval_clean.tck','LCST_wbt_noEval_clean.tck',
               'LICP_wbt_noEval_clean.tck','LIFOF_wbt_noEval_clean.tck','LILF_wbt_noEval_clean.tck',
               'LSCP_wbt_noEval_clean.tck','LSLF_wbt_noEval_clean.tck','LUF_wbt_noEval_clean.tck',
               'MCP_wbt_noEval_clean.tck','RAF_wbt_noEval_clean.tck','RATR_wbt_noEval_clean.tck',
               'RCC_wbt_noEval_clean.tck','RCH_wbt_noEval_clean.tck','RCST_wbt_noEval_clean.tck',
               'RICP_wbt_noEval_clean.tck','RIFOF_wbt_noEval_clean.tck', 'RILF_wbt_noEval_clean.tck',
               'RSCP_wbt_noEval_clean.tck','RSLF_wbt_noEval_clean.tck',
               'RUF_wbt_noEval_clean.tck','dwi.nii.gz','dwi_wmCsd_autolmax.mif']


Initialize flywheel thingies

In [1]:
# Project ID for PT_NeonateBrain
# I recommend storing your API key as an environment variable and then referencing it this way
# You can do this in bash:
# export STAN_API="<my_api_key>"
# Before running this script.

fw = flywheel.Client(os.environ['STAN_API'])
project = fw.get_project(project_id)

if version == "latest":
    gears = fw.get_all_gears(filter=f"gear.name={gear}")
    if not gears:
        print(f'INVALID GEAR NAME {gear}')
        raise Exception("Invalid gear name")

    version = gears[0].gear.version

# Create a custom path for our project (we may run this on other projects in the future) and create if it doesn't exist
project_path = pv.sanitize_filepath(work_dir/project.label)
if not project_path.exists():
    project_path.mkdir()

NameError: name 'flywheel' is not defined

Perform the search and loop over the subjects specified

In [None]:
# We can loop over sessions (and skip subjects), because the subject parent info is stored on the session if we need it,
# AND the analysis of interest is stored on the session itself.
for ses in tqdm(project.sessions.iter()):


    # If subjects_ids_to_download is a list with some ids inside
    # then lets only do those.
    # If subject_ids_to_download is NOT False and ses_label IS inside subject_ids_to_download
    # then do the process below
    # else, continue

    ses_label = ses.label
    sub_label = ses.subject.label

    if (subject_ids_to_download is not False) & (str(sub_label) not in subject_ids_to_download):
        #print("Jumping " + str(ses_label))
        continue
    else:
        print("Analyzing Subject id: " + str(ses_label) + " Session id" + str(sub_label))


    # Make sure we have all our analysis since we got the session through an iterator, and not "fw.get()'
    ses = ses.reload()
    analyses = ses.analyses

    # If there are no analyses containers, we know that this gear was not run. Move on to the next session
    if len(analyses) == 0:
        continue

    # Otherwise there are analyses containers
    else:
        print(f'{ses.label} has analysis')

        # Check to see if any were generated by our gear
        matches = [
                    asys for asys in analyses if
                    asys.gear_info.get('name') == gear
                    and asys.gear_info.get('version')==version
                    and aregex.findall(asys.label)
                   ]

        print(f'{len(matches)} matches in {[asys.label for asys in analyses]}')
        # If there are no matches, the gear didn't run
        if len(matches) == 0:
            continue

        # If there is one match, that's our target
        elif len(matches) == 1:
            match = matches[0]

        # If there are more than one matches (due to reruns), take the most recent run.
        # This behavior may be modified to whatever suits your needs
        else:

           # Loop through the analyses and first make sure we only look at successful runs
            matches = [asys for asys in matches if asys.job.get('state')=='complete']
            print(f'{len(matches)} completed matches')

            # Now find the max run date (most recent), and extract the analysis that has that date.
            last_run_date = max([asys.created for asys in matches])
            last_run_analysis = [asys for asys in matches if asys.created == last_run_date]

            # There should only be one exact match.  If there are two successful runs that happened at the same time,
            # Something is strange...just take one at random.
            match = last_run_analysis[0]

        status = match.job.get('state')

        # If the status is complete, look for the output file:
        if status == 'complete':
            # Put the download section within a "try" loop in case there are API errors downloading.
            try:
                # Reload the match and let's look at the files
                match = match.reload()
                files = match.files

                # In case there are more files (there shouldn't be), find the one that's
                # A zip archive.
                if len(files) > 1:
                    files = [f for f in files if f.mimetype == 'application/zip']

                # Exctract the file object
                file = files[0]
                # Get it's name
                fname = file.name
                print("fname")
                print(fname)


                # Get the zip members.  We're looking for one particular file called "aseg.stats", but the actual
                # Directory may be different from subject to subject, as the parent directories have subject ID's in their name
                zip_info = match.get_file_zip_info(fname)['members']

                # We'll identify any strings that have this aseg.stats string in them
                # Example: file_of_interest = [a['path'] for a in zip_info if '/aseg.stats' in a['path']]


               #We loop over files using variable file_name that includes all elements in file_name_list

                for file_name in file_name_list:
                    file_of_interest = [a['path'] for a in zip_info if '/'+ file_name in a['path']]
                    print(file_of_interest)
                # If we found some (There should be one), set that as our file of interest.
                    if len(file_of_interest) > 0:
                        file_of_interest = file_of_interest[0]
                    else:
                        print(f'No File of Interest found for {sub_label} {ses_label} {match.label}' )
                        continue


                #We create a variable download_name with var file_name and sanitize name.
                    download_name = Path(pv.sanitize_filename(f'{match.label}'+'_' + file_name))

                    download_dir = pv.sanitize_filepath(project_path/sub_label)
                    # Create the path
                    if not download_dir.exists():
                        download_dir.mkdir(parents=True)

                    download_path = download_dir/download_name
                    print('download_path')
                    print(download_path)

                # Download the file
                #If we don't want to replace existing files use if not, if we want to replace them skip this step
                    if not os.path.exists(download_path):
                        print('downloading file')
                        match.download_file_zip_member(fname, file_of_interest, download_path)
                    else:
                        print('File exists')


            # Alert the user of any exceptions.
            except Exception as e:
                print('Error Downloading File')
                print(e)
