In [1]:
import globus_automate_client
import mdf_toolbox

In [16]:
native_app_id = "417301b1-5101-456a-8a27-423e71a2ae26"  # Premade native app ID (from CFDE)
flows_client = globus_automate_client.create_flows_client(native_app_id)

In [4]:
flow_permissions = ["urn:globus:auth:identity:117e8833-68f5-4cb2-afb3-05b25db69be1"]  # jgaff@uchicago.edu
# flow_permissions = ["urn:globus:groups:"]
curation_subflow_url = "http://flows.automate.globus.org/flows/a24d39dd-f8b7-4287-ba4d-cdd8e36fcee6"
curation_subflow_scope = "https://auth.globus.org/scopes/a24d39dd-f8b7-4287-ba4d-cdd8e36fcee6/flow_a24d39dd_f8b7_4287_ba4d_cdd8e36fcee6"
transfer_loop_url = "http://flows.automate.globus.org/flows/c270840f-ba31-49db-9fd9-cd0aa67d9545"
transfer_loop_scope = "https://auth.globus.org/scopes/c270840f-ba31-49db-9fd9-cd0aa67d9545/flow_c270840f_ba31_49db_9fd9_cd0aa67d9545"


In [None]:
mdf_flow_def = {
    "title": "The Materials Data Facility Dataset Processing Flow",
    "description": "Extract, process, and ingest a dataset into MDF Connect.",
    "visible_to": flow_permissions,
    "runnable_by": flow_permissions,
    "definition": {
        "StartAt": "UserTransfer",
        "States": {
             # There must be at least one data location
            "UserTransfer": {
                "Type": "Action",
                "ActionUrl": transfer_loop_url,
                "ActionScope": transfer_loop_scope,
                "ExceptionOnActionFailure": True,
                "RunAs": "Input",
                "Parameters": {
                    "action_inputs.$": "$.user_transfer_inputs"
                },
                "ResultPath": "$.UserTransferResult",
                "WaitTime": 86400,
                "Next": "Xtraction"
            },
            "Xtraction": {
                "Type": "Action",
                "ActionUrl": "https://xtract.materialsdatafacility.org/",
                "ActionScope": "https://auth.globus.org/scopes/34284fb1-2eea-4532-a04a-9c8ad1702856/xtract_crawl_and_extract",
                "ExceptionOnActionFailure": True,
                "Parameters": {
                    "metadata_storage_ep.$": "$.mdf_storage_ep",
                    "eid.$": "$.mdf_storage_ep",
                    "dir_path.$": "$.mdf_dataset_path",
                    "mapping": "match",  # ?
                    "dataset_mdata.$": "$.dataset_mdata",
                    "validator_params.$": "$.validator_params",
                    # options are 'directory/matio'
                    "grouper.=": "'directory' if `$.group_by_dir` else 'matio'"
                },
                "ResultPath": "$.XtractionResult",
                "WaitTime": 86400,
                "Next": "FeedstockTransfer"
            },
            "FeedstockTransfer": {
                "Type": "Action",
                "ActionUrl": "https://actions.globus.org/transfer/transfer",
                "ActionScope": "https://auth.globus.org/scopes/5fac2e64-c734-4e6b-90ea-ff12ddbf9653/transfer/transfer",
                "ExceptionOnActionFailure": True,
                "Parameters": {
                    "destination_endpoint_id.$": "$.feedstock_ep",
                    "source_endpoint_id.$": "$.mdf_storage_ep",
                    "transfer_items": [{
                        "destination_path.=": "`$.feedstock_dir` + $.source_id",
                        # TODO: Xtract output path
                        "source_path.$": "$.XtractionResult.details.",
                        "recursive": False
                    }]
                },
                "ResultPath": "$.FeedstockTransferResult",
                "WaitTime": 86400,
                "Next": "ChooseCuration"
            },
            "ChooseCuration": {
                "Type": "Choice",
                "Choices": [{
                    "Variable": "$.curation_input",
                    "BooleanEquals": False,
                    "Next": "SearchIngest"
                }],
                "Default": "CurateSubmission"
            },
            "CurateSubmission": {
                "Type": "Action",
                "ActionUrl": curation_subflow_url,
                "ActionScope": curation_subflow_scope,
                "ExceptionOnActionFailure": True,
                "InputPath": "$.curation_input",
                "__Private_Parameters": ["send_credentials"],
                "ResultPath": "$.CurateResult",
                "WaitTime": 86400,
                "Next": "ChooseAcceptance"
            },
            "ChooseAcceptance": {
                "Type": "Choice",
                "Choices": [{
                    "Variable": "$.CurateResult.details.output.CurationResult.details.name",
                    "StringEquals": "accepted",
                    "Next": "SearchIngest"
                }, {
                    "Variable": "$.CurateResult.details.output.CurationResult.details.name",
                    "StringEquals": "rejected",
                    "Next": "FailCuration"
                }],
                "Default": "ErrorState"
            },
            "SearchIngest": {
                # TODO: Find docs for this
                pass
            },
            "DataDestTransfer":{
                "Type": "Action",
                "ActionUrl": transfer_loop_url,
                "ActionScope": transfer_loop_scope,
                "ExceptionOnActionFailure": True,
                "Parameters": {
                    "action_inputs.$": "$.data_destinations"
                },
                "ResultPath": "$.DataDestResult",
                "WaitTime": 86400,
                "Next": "ChoosePublish"
            },
            "ChoosePublish": {
                "Type": "Choice",
                "Choices": [{
                    "Variable": "$.mdf_publish",
                    "BooleanEquals": True,
                    "Next": "MDFPublish"
                }],
                "Default": "ChooseCitrine"
            },
            "MDFPublish": {
                # TODO: Mint DOI + ?
                "Type": "Action",
                "ActionUrl": "",
                "ActionScope": "",
                "ExceptionOnActionFailure": False,
                "Parameters": {
                    pass
                },
                "ResultPath": "$.MDFPublishResult",
                "WaitTime": 86400,
                "Next": "ChooseCitrine"
            },
            "ChooseCitrine": {
                "Type": "Choice",
                "Choices": [{
                    "Variable": "$.citrine",
                    "BooleanEquals": True,
                    "Next": "CitrinePublish"
                }],
                "Default": "ChooseMRR"
            },
            "CitrinePublish": {
                # FuncX function fo publish to Citrine
                "Type": "Action",
                "ActionUrl": "",
                "ActionScope": "",
                "ExceptionOnActionFailure": False,
                "Parameters": {
                    pass
                },
                "ResultPath": "$.CitrinePublishResult",
                "WaitTime": 86400,
                "Next": "ChooseMRR"
            },
            "ChooseMRR": {
                "Type": "Choice",
                "Choices": [{
                    "Variable": "$.mrr",
                    "BooleanEquals": True,
                    "Next": "MRRPublish"
                }],
                "Default": "SearchUpdate"
            },
            "MRRPublish":{
                # FuncX function to publish to MRR
                "Type": "Action",
                "ActionUrl": "",
                "ActionScope": "",
                "ExceptionOnActionFailure": False,
                "Parameters": {
                    pass
                },
                "ResultPath": "$.MRRPublishResult",
                "WaitTime": 86400,
                "Next": "SearchUpdate"
            },
            "SearchUpdate": {
                # TODO: How to fetch previous dataset entry - Xtract results?
                # TODO: Apply services changes to dataset entry
                "Type": "Action",
                "ActionUrl": "https://actions.globus.org/search/ingest",
                "ActionScope": "https://auth.globus.org/scopes/5fac2e64-c734-4e6b-90ea-ff12ddbf9653/search/ingest",
                "ExceptionOnActionFailure": False,
                "Parameters": {
                    # TODO
                    "subject": "",
                    "content": {},
                    "visible_to": [],
                    "search_index": ""
                    
                },
                "ResultPath": "$.SearchUpdateResult",
                "WaitTime": 86400,
                "Next": "FinishSubmission"
            },

            # TODO: Customize
            "FlowSuccess": {
                "Type": "Action",
                "ActionUrl": "https://actions.globus.org/expression_eval",
                "ActionScope": "https://auth.globus.org/scopes/5fac2e64-c734-4e6b-90ea-ff12ddbf9653/expression",
                "ExceptionOnActionFailure": True,
                "Parameters": {
                    "expressions": [{
                        "expression": "catalog_link",
                        "arguments": {
                            "catalog_link.$": "$.DerivaFinalACLResult.details.deriva_link"
                        },
                        "result_path": "deriva_link"
                    }, {
                        "expression": "catalog_id",
                        "arguments": {
                            "catalog_id.$": "$.DerivaFinalACLResult.details.deriva_id"
                        },
                        "result_path": "deriva_id"
                    }, {
                        "expression": ("'Submission Flow succeeded. Your catalog ID is ' + str(catalog_id) + "
                                       "', and your submission can be viewed at this link: ' + catalog_link"),
                        "arguments": {
                            "catalog_link.$": "$.DerivaFinalACLResult.details.deriva_link",
                            "catalog_id.$": "$.DerivaFinalACLResult.details.deriva_id"
                        },
                        "result_path": "message"
                    }]
                },
                "ResultPath": "$.SuccessState",
                "WaitTime": 86400,
                "Next": "FinishFlow"
            },
            "FailCuration": {
                "Type": "Action",
                "ActionUrl": "https://actions.globus.org/expression_eval",
                "ActionScope": "https://auth.globus.org/scopes/5fac2e64-c734-4e6b-90ea-ff12ddbf9653/expression",
                "ExceptionOnActionFailure": True,
                "Parameters": {
                    "expressions": [{
                        "expression": "text",
                        "arguments": {
                            "text.=": ("Your submission was rejected by a curator and did not complete the "
                                       "ingestion process. The curator gave the following reason for rejection: "
                                       "`$.CurateResult.details.output.CurationResult.details.parameters.user_input`")
                        },
                        "result_path": "error"
                    }]
                },
                "ResultPath": "$.FailureState",
                "WaitTime": 86400,
                "Next": "FinishFlow"
            },
            "ErrorState": {
                "Type": "Action",
                "ActionUrl": "https://actions.globus.org/notification/notify",
                "ActionScope": "https://auth.globus.org/scopes/helloworld.actions.automate.globus.org/notification_notify",
                "ExceptionOnActionFailure": True,
                "Parameters": {
                    # "body_mimetype": "",
                    "body_template.$": ,
                    "destination.$": admin_email,
                    # "notification_method": "",
                    # "notification_priority": "low",
                    "send_credentials": [{
                        # "credential_method": "",
                        "credential_type": "smtp",
                        "credential_value": {
                            "hostname": smtp_hostname,
                            "username": smtp_user,
                            "password": smtp_pass
                        }
                    }],
                    "__Private_Parameters": ["send_credentials"],
                    "sender": sender_email,
                    "subject": "Submission Failed to Ingest"
                },
                "ResultPath": "$.FailDerivaIngestResult",
                "WaitTime": 86400,
                "Next": "FinishFlow"
            },
            "ErrorFlowLog": {
                "Type": "Action",
                "ActionUrl": "https://actions.globus.org/expression_eval",
                "ActionScope": "https://auth.globus.org/scopes/5fac2e64-c734-4e6b-90ea-ff12ddbf9653/expression",
                "ExceptionOnActionFailure": True,
                "Parameters": {
                    "expressions": [{
                        "expression": "text",
                        "arguments": {
                            "text": ("A service error has occurred, and the CFDE team has been notified. "
                                     "You may be contacted with additional details.")
                        },
                        "result_path": "error"
                    }]
                },
                "ResultPath": "$.ErrorState",
                "WaitTime": 86400,
                "Next": "FinishFlow"
            },
            
            
            
            
            "FinishSubmission": {
                "Type": "Pass",
                "End": True
            }
        }
    }
}

transfer_input_schema = {
    # "deadline": "datetime str",
    "destination_endpoint_id": "str",
    "label": "str",
    "source_endpoint_id": "str",
    # "sync_level": "str 0-3",
    "transfer_items": [{
        "destination_path": "str",
        "recursive": "bool",
        "source_path": "str"
    }]
}
curation_input_schema = {
    "curator_emails": "list of str, or False",
    "curator_template": "str or False",  # variables: $landing_page
    "curation_permissions": "list of str",
    "curation_text": "str or False",
    "author_email": "str or False",
    "author_template": "str or False",  # variables: $curation_task_id, $decision, $reason
    "email_sender": "str",
    "send_credentials": [{}]
}
xtract_input_schema = {
    "metadata_storage_ep": "str",
    "eid": "str",
    "dir_path": "str",
    "mapping": "match",  # ?
    "dataset_mdata": {"test1": "test2"},
    "validator_params": {"schema_branch": "master", "validation_info": {"test1": "test2"}},
    "grouper": "matio"  # options are 'directory/matio'
}

input_schema = {
    "$.source_id": "str",
    "user_transfer_inputs": [transfer_input_schema],
    "data_destinations": [transfer_input_schema],

    "group_by_dir": "bool",
    "mdf_storage_ep": "str",
    "mdf_dataset_path": "str",
    "dataset_mdata": "dict",
    "validator_params": "dict",

    "feedstock_ep": "str",
    "feedstock_dir": "str",

    "curation_input": curation_input_schema,  # or False
    "mdf_publish": "bool",
    "citrine": "bool",
    "mrr": "bool",
    
}

In [None]:
flow_deploy_res = flows_client.deploy_flow(
    flow_definition=mdf_flow_def["definition"],
    title=mdf_flow_def["title"],
    description=mdf_flow_def["description"],
    visible_to=mdf_flow_def["visible_to"],
    runnable_by=mdf_flow_def["runnable_by"],
)
flow_deploy_res

In [None]:
flow_id = flow_deploy_res["id"]
flow_scope = flow_deploy_res["globus_auth_scope"]
print(flow_id)
print(flow_scope)

In [None]:
flow_input = {
    pass
}

In [None]:
flow_res = flows_client.run_flow(flow_id, flow_scope, flow_input)
flow_res.data

In [None]:
status = flows_client.flow_action_status(flow_id, flow_scope, flow_res["action_id"]).data
print(json.dumps(status, indent=4, sort_keys=True))

In [None]:
flows_client.flow_action_log(flow_id, flow_scope, flow_res["action_id"], limit=100).data