# MRI data analysis with `cloudknot`

This example demonstrates analysis of MRI data using software that depends on the scipy stack, 
as well as on smaller open-source software projects

The data for each participant in the study is non-trivial in size (about 140 MB per subject),
and complexity (4D arrays representing MRI measurements in every location in the brain with 
diffusion gradients in multiple directions). The analysis requires non-trivial computations 
(e.g., fitting a linear model over directions in every spatial location). 

The data we will use is publicly accessible through a URL pointing to the Stanford Data Repository
We will pull down each participant's data using a URL that the function constructs based on its 
input.

In [31]:
import cloudknot as ck

In [None]:
def process_subject_data(sub):
    """ 
    Perform non-trivial analysis of non-trivial data
    
    Parameters
    ----------

    sub : str
        The ID string forone of the subjects in this data-set. 
        One of  {'SUB1', 'SUB2', 'SUB3', 'SUB4', 'SUB5', 'SUB6'}
    
    """
    import os
    import os.path as op
    import requests

    # Helper function to download files and save them:
    def download_file(url, fname):
        if not op.exists(fname):
            r = requests.get(url)
            with open(fname , 'wb') as fd:
                for chunk in r.iter_content(chunk_size=128):
                    fd.write(chunk)
    
    # Create the folder structure AFQ expects:
    base_folder = op.join(op.expanduser('~'), 'data')
    if not op.exists(base_folder):
        os.mkdir(base_folder)
        os.mkdir(op.join(base_folder, 'sub-01'))
        os.mkdir(op.join(base_folder, 'sub-01', 'sess-01'))
        anat_folder = op.join(base_folder, 'sub-01', 'sess-01', 'anat')
        os.mkdir(anat_folder)
        dwi_folder = op.join(base_folder, 'sub-01', 'sess-01', 'dwi')
        os.mkdir(dwi_folder)

    # Grab the data from the Stanford Data Repository:
    data_url = 'https://stacks.stanford.edu/file/druid:rt034xr8593/'
    sub_data = data_url + sub
    fbvals_url = sub_data + "_1.bvals"
    fbvecs_url = sub_data + "_1.bvecs"
    fnii_url = sub_data + "_1.nii.gz"
    
    # Download and save into the designated location:
    download_file(fnii_url, op.join(dwi_folder, 'dwi.nii.gz'))
    download_file(fbvals_url, op.join(dwi_folder, 'dwi.bvals'))
    download_file(fbvecs_url, op.join(dwi_folder, 'dwi.bvecs'))

    # AFQ knows how to find the files:    
    from AFQ.api import AFQ
    my_afq = AFQ(preproc_path=base_folder, sub_prefix='sub')
    
    # This triggers the analysis:
    fa_file = my_afq.dti_fa[0]
    
    # Upload to S3:
    client = boto3.resource('s3')
    bucket_name = 'escience.washington.edu.public'
    b = client.Bucket(bucket_name)
    b.upload_file(fa_file, '%s_FA.nii.gz' % sub)    

In [None]:
try:
    knot = ck.Knot(name='write_to_s3_bucket',
                   func=process_subject_data,
                   pars_policies=('AmazonS3FullAccess',))
except ValueError:    
    # If you previously created this knot but didn't clobber it, then just supply
    # the name in order to retrieve the knot info from the cloudknot config file
    knot = ck.Knot(name='test_s3_knot')

In [None]:
result_futures = knot.map(['SUB1', 'SUB2', 'SUB3', 'SUB4', 'SUB5', 'SUB6'])

In [None]:
knot.view_jobs()