In [1]:
import cloudknot as ck
import itertools
ck.set_region('us-west-2')

In [2]:
import AFQ.data as afqd
study = afqd.S3BIDSStudy(
    "hbn-curated",
    "fcp-indi",
    "data/Projects/HBN/BIDS_curated/derivatives/qsiprep",
    subjects="all",
)

  data = yaml.load(f.read()) or {}


Retrieving subject S3 keys
[########################################] | 100% Completed |  4min 51.9s


In [41]:
def afq_hbn(subject):
    import AFQ
    print(AFQ.__version__)
    
    import AFQ.data as afqd
    import AFQ.api as api
    import AFQ.definitions.mask as afm
    import os.path as op
    import os
    import s3fs
    import shutil
    import packaging
    import cython
    
    local_bids_folder = "hbn"

    input_bucket = "fcp-indi"
    input_s3_prefix = "data/Projects/HBN/BIDS_curated"

    output_bucket = "hbn-afq"
    output_s3_prefix = ""
    output_deriv_name = "afq_like_hcp"

    # select subjects from qsiprep
    study = afqd.S3BIDSStudy(
        "hbn-curated",
        input_bucket,
        input_s3_prefix,
        subjects=[subject],
        random_seed=42
    )

    study.download(local_bids_folder, include_derivs=True)
    
    session = [
        d for d in os.listdir(op.join(local_bids_folder, "derivatives", "qsiprep", subject))
        if d.startswith("ses-")
    ][0]
    
    for fname in os.listdir(op.join(
        local_bids_folder, "derivatives", "qsiprep", subject, "anat"
    )):
        shutil.copy2(
            op.join(local_bids_folder, "derivatives", "qsiprep", subject, "anat", fname),
            op.join(local_bids_folder, "derivatives", "qsiprep", subject, session, "anat"),
        )

    fs = s3fs.S3FileSystem()

    # Only do it if the output file doesn't already exist:    
    if not fs.exists(f"hbn-afq/derivatives/{output_deriv_name}/{subject}/"
                     f"{session}/{subject}_{session}_"
                     "acq-64dir_space-T1w_desc-preproc_dwi_space-RASMM_model-CSD_"
                     "desc-prob-afq_profiles.csv"):
        # Configuration:
        # session = "Retest"
        seg_algo = "afq"
        reuse_tractography = False
        bundle_info = api.BUNDLES + api.CALLOSUM_BUNDLES
        shell = "multi"

        tracking_params = {
            'seed_mask': afm.ScalarMask('dki_fa'),
            'stop_mask': afm.ScalarMask('dki_fa'),
            "odf_model": "CSD",
            "directions": "prob"
        }
        kwargs = {
            "scalars": ["dki_fa", "dki_md", "dki_mk", "dki_awf"]
        }

        # Whether to reuse a previous tractography that has already been
        # uploaded to s3 by another run of this function. Useful if you want to
        # try new parameters that do not change the tractography.
        custom_tractography_bids_filters = None

#         if reuse_tractography:
#             rpath = (f"profile-hcp-west/hcp_reliability/multi_shell/"
#                      f"hcp_{session.lower()}_reco80_csd_azure/sub-{subject}"
#                      f"/ses-01/sub-{subject}_dwi_space-RASMM"
#                      f"_model-CSD_desc-prob_tractography.trk")
#             #  rpath=(
#             #      f"{my_hcp_key}/{shell}_shell/"
#             #      f"hcp_{session.lower()}_afq/sub-{subject}/ses-01/"
#             #      f"sub-{subject}_dwi_space-RASMM_model-"
#             #      f"{tracking_params['odf_model']}_desc-prob_tractography.trk")
#             lpath = (
#                 f"derivatives/dmriprep/sub-{subject}/"
#                 f"ses-01/sub-{subject}_customtrk.trk")
#             if fs.exists(rpath):
#                 log.info(f"Gettng {rpath}")
#                 fs.get(
#                     rpath,
#                     op.join(hcp_bids, lpath))
#                 custom_tractography_bids_filters = {
#                     "suffix": "customtrk", "scope": "dmriprep"}

        # Initialize the AFQ object with all of the parameters we have set so far
        # Sets viz_backend='plotly' to make GIFs in addition to the default html
        # visualizations (this adds ~45 minutes)        
        myafq = api.AFQ(
            local_bids_folder,
            dmriprep="qsiprep",
            brain_mask=afm.MaskFile(
                "mask",
                {
                    "desc": "brain",
                    "space": None,
                }
            ),
            custom_tractography_bids_filters=custom_tractography_bids_filters,
            tracking_params=tracking_params,
            bundle_info=bundle_info,
            segmentation_params={
                "seg_algo": seg_algo, "reg_algo": "syn"},
            viz_backend='plotly',
            **kwargs)
        
        # run the AFQ objects
        print("Running the pyAFQ pipeline")
        myafq.export_all(afqbrowser=False, xforms=False)
        
        remote_export_path = op.join(
            output_bucket,
            output_s3_prefix,
            "derivatives",
            output_deriv_name
        )
        print(f"Uploading to {remote_export_path}")
        myafq.upload_to_s3(fs, remote_export_path)
    else:
        print(f"Already completed analysis for this subject")

In [44]:
di = ck.DockerImage(
    name="afq-hbn-like-hcp",
    func=afq_hbn,
    base_image="libglxvfb:1",
    github_installs="https://github.com/yeatmanlab/pyAFQ.git@master",
    overwrite=True
)



In [45]:
di.build(tags=["afq-hbn-like-hcp-20210730"])

In [46]:
repo = ck.aws.DockerRepo(name=ck.get_ecr_repo())
di.push(repo=repo)

In [26]:
knot = ck.Knot(
    name="afq-hbn-like-hcp-20210731-0",
    docker_image=di,
    pars_policies=("AmazonS3FullAccess"),
    bid_percentage=100,
    volume_size=60,
    memory=64000,
    job_def_vcpus=4,
    max_vcpus=512,
    retries=3,
    aws_resource_tags={"Project": "HBN-FCP-INDI"},
)

In [27]:
debug_subs = study._all_subjects[:5]
pilot_subs = study._all_subjects[5:100]
remaining_subs = study._all_subjects[50:]

In [28]:
len(debug_subs + pilot_subs)

100

In [29]:
ft = knot.map(debug_subs + pilot_subs)

In [30]:
ck.set_region('us-west-2')
knot.view_jobs()

Job ID              Name                        Status   
---------------------------------------------------------
66e897d9-99b8-4def-a99d-a53646fa7f45        afq-hbn-like-hcp-20210731-0-0        SUBMITTED


In [31]:
len(remaining_subs)

2086

In [32]:
production_knot = ck.Knot(
    name="afq-hbn-like-hcp-20210731-1",
    docker_image=di,
    pars_policies=("AmazonS3FullAccess"),
    bid_percentage=100,
    volume_size=60,
    memory=64000,
    job_def_vcpus=4,
    max_vcpus=8192,
    retries=3,
    aws_resource_tags={"Project": "HBN-FCP-INDI"},
)

In [38]:
ft = production_knot.map(remaining_subs)

In [40]:
ck.set_region('us-west-2')
production_knot.view_jobs()

Job ID              Name                        Status   
---------------------------------------------------------
80ff8a26-55c4-4274-9a4b-e7b528e68427        afq-hbn-like-hcp-20210731-1-0        PENDING  


In [47]:
production_knot.clobber(clobber_pars=True)