In [None]:
import time
import json
import sys
import os
import glob
import re
import logging
import traceback

from collections import deque

from globus_automate_client import (create_flows_client, graphviz_format, state_colors_for_log,
                                    create_action_client,
                                    create_flows_client)
from funcx.sdk.client import FuncXClient
from funcx.serialize import FuncXSerializer

# FuncX Service Client ID (hidden in recent FuncX version
CLIENT_ID = "e6c75d97-532a-4c88-b031-8584a319fa3e"


In [None]:
# Globus Online Endpoints
src_endpoint = 'dd916908-0072-11e7-badc-22000b9a448b' #'hostel' #aps/workstation
dest_endpoint = '08925f04-569f-11e7-bef8-22000b9a448b' #'alcf#dtn_theta'

# FuncX endpoint at ThetaGPU (ALCF) and Prisma (APS)
theta_fx_endpoint = '2ab22e1f-4cf1-47e9-a40c-dd8c58b41d73' #theta-ptycho-8w-1n
prisma_fx_endpoint = '4bf59543-3398-42d2-9416-c628e9f5635f' #prisma-ptycho

In [None]:
fxc = FuncXClient()

In [None]:
def ptycho(data):
    """Test the ptycho tool"""
    import os
    import subprocess
    import logging
    from subprocess import PIPE


    rec_max_qsize = data['max_qsize']
    rec_ngpu = data['rec_ngpu']
    wid = data['wid']
    dataset_name = data['dataset_name']

    log_file_name = "/grand/hp-ptycho/bicer/ptycho/comm_33IDD/globus_automate/logs/funcx-ptycho-{}-w{}-q{}-g{}.log".format(dataset_name, wid, rec_max_qsize, rec_ngpu)
    logging.basicConfig(filename=log_file_name,
                        filemode='a',
                        format='%(asctime)s %(levelname)s %(message)s',
                        level=logging.INFO,
                        datefmt='%Y-%m-%d %H:%M:%S')

    logging.info("Starting ptycho funcx function.")

    remote_log_file_name = "/grand/hp-ptycho/bicer/ptycho/comm_33IDD/globus_automate/logs/tike-wf-{}-w{}-q{}-g{}.log".format(dataset_name, wid, rec_max_qsize, rec_ngpu)

    python_path = data['python_path']
    script_path = data['script_path']

    #recon. script parameters
    ifpath = data['ifpath']
    ofpath = data['ofpath']
    rec_alg = data['rec_alg']
    rec_nmodes = data['rec_nmodes']
    rec_niter = data['rec_niter']
    rec_output_freq = data['rec_output_freq']
    rec_recover_psi = '--recover-psi' if (('rec_recover_psi' in data) and data['rec_recover_psi']) else ''
    rec_recover_probe = '--recover-probe' if (('rec_recover_probe' in data) and data['rec_recover_probe']) else ''
    rec_recover_positions = '--recover-positions' if (('rec_recover_positions' in data) and data['rec_recover_positions']) else ''
    rec_model = data['rec_model']
    rec_use_mpi = '--use-mpi' if (('rec_use_mpi' in data) and data['rec_use_mpi']) else ''
    rec_overwrite = '--overwrite' if (('rec_overwrite' in data) and data['rec_overwrite']) else ''
    rec_auto_pin = '--auto-pin' if (('rec_auto_pin' in data) and data['rec_auto_pin']) else ''
    rec_gpu_id = data['rec_gpu_id']
    rec_log_filename = remote_log_file_name

    try:
        os.mkdir(ofpath)
    except:
        pass

    cmd = f"{python_path} {script_path} --algorithm={rec_alg} --nmodes={rec_nmodes} --niter={rec_niter} --output-freq={rec_output_freq} {rec_recover_psi} {rec_recover_probe} {rec_recover_positions} --model={rec_model} --ngpu={rec_ngpu} {rec_use_mpi} --ifile='{ifpath}' {rec_overwrite} {rec_auto_pin} --gpu-id={rec_gpu_id} --folder='{ofpath}' --wf-max-qsize={rec_max_qsize} --log-file='{rec_log_filename}'"
    logging.info(f"Running command: {cmd}")

    try:
        res = subprocess.run(cmd, stdout=PIPE, stderr=PIPE,
                             shell=True, executable='/bin/bash')
    except:
        pass
    outstr = f"{res.stdout}"
    return outstr

func_ptycho_uuid = fxc.register_function(ptycho)
print(func_ptycho_uuid)

In [None]:
def get_folder_paths(path):
    import glob
    import re

    return sorted(glob.glob(path, recursive=False),
                key = lambda v : int(re.search(r"(\d+)" , v[len(v)-"".join(reversed(v)).index('/'):]).group(0)))


def get_file_paths(path):
    import glob
    import re

    return sorted(glob.glob(path, recursive=False),
                key = lambda v : int(re.search(r"(\d+)" , v[len(v)-"".join((reversed(v))).index('/'):v.index('.')]).group(0)))

fx_func_get_file_paths_uuid = fxc.register_function(get_file_paths)
fx_func_get_folder_paths_uuid = fxc.register_function(get_folder_paths)
print(fx_func_get_file_paths_uuid)
print(fx_func_get_folder_paths_uuid)


In [None]:
# Globus Automate flow definition

flow_definition = {
  "Comment": "An analysis flow",
  "StartAt": "Transfer",
  "States": {
    "Transfer": {
      "Comment": "Initial transfer",
      "Type": "Action",
      "ActionUrl": "https://actions.automate.globus.org/transfer/transfer",
      "ActionScope": "https://auth.globus.org/scopes/actions.globus.org/transfer/transfer",
      "Parameters": {
        "source_endpoint_id.$": "$.input.source_endpoint",
        "destination_endpoint_id.$": "$.input.dest_endpoint",
        "transfer_items": [
          {
            "source_path.$": "$.input.source_path",
            "destination_path.$": "$.input.dest_path",
            "recursive": True
          }
        ]
      },
      "ResultPath": "$.Transfer1Result",
      "WaitTime": 14400,
      "Next": "Analyze"
    },
    "Analyze": {
      "Comment": "Run a funcX function",
      "Type": "Action",
      "ActionUrl": "https://api.funcx.org/automate",
      "ActionScope": "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/automate2",
      "Parameters": {
          "tasks": [{
            "endpoint.$": "$.input.fx_ep",
            "func.$": "$.input.fx_id",
            "payload.$": "$.input.params"
        }]
      },
      "ResultPath": "$.AnalyzeResult",
      "WaitTime": 14400,
      "Next": "Transfer2"
    },
    "Transfer2": {
      "Comment": "Return transfer",
      "Type": "Action",
      "ActionUrl": "https://actions.automate.globus.org/transfer/transfer",
      "ActionScope": "https://auth.globus.org/scopes/actions.globus.org/transfer/transfer",
      "Parameters": {
        "source_endpoint_id.$": "$.input.dest_endpoint",
        "destination_endpoint_id.$": "$.input.source_endpoint",
        "transfer_items": [
          {
            "source_path.$": "$.input.result_path",
            "destination_path.$": "$.input.source_result_path",
            "recursive": True #False
          }
        ]
      },
      "ResultPath": "$.Transfer2Result",
      "WaitTime": 14400,
      "End": True
    },
  }
}


In [None]:
src_wf_root_path = '/prisma-data1/bicer/workflow'
src_input_folder_prefix = "input"
src_output_folder_prefix = "output"

dest_wf_root_path = '/grand/hp-ptycho/bicer/ptycho/comm_33IDD/globus_automate'
dest_input_folder_prefix = "input"
dest_output_folder_prefix = "output"


src_input_folder_paths_regex = f"{src_wf_root_path}/{src_input_folder_prefix}/*"
rid = fxc.run(src_input_folder_paths_regex,
              endpoint_id=prisma_fx_endpoint,
              function_id=fx_func_get_folder_paths_uuid)
src_input_folder_paths = fxc.get_result(rid)

src_output_folder_paths = []
dest_output_folder_paths = []
dest_input_folder_paths = []
for src_input_folder_path in src_input_folder_paths:
    #print(src_input_folder_path)
    iid = re.findall(r'\d+', src_input_folder_path)
    src_output_folder_path = f"{src_wf_root_path}/{src_output_folder_prefix}/{iid[-1]}"
    src_output_folder_paths.append(src_output_folder_path)
    dest_input_folder_path = f"{dest_wf_root_path}/{dest_input_folder_prefix}/{iid[-1]}"
    dest_input_folder_paths.append(dest_input_folder_path)
    dest_output_folder_path = f"{dest_wf_root_path}/{dest_output_folder_prefix}/{iid[-1]}"
    dest_output_folder_paths.append(dest_output_folder_path)

# src_input_folder_paths: diffraction patterh files to be processed @ APS
# src_output_folder_paths: folders for reconstrcuted images after processing @ APS
# dest_input_folder_paths: diffraction patterh files to be processed @ ALCF
# dest_output_folder_paths: folders for reconstrcuted images after processing @ ALCF

for (src_input_folder_path, src_output_folder_path, dest_input_folder_path, dest_output_folder_path ) in zip(src_input_folder_paths, src_output_folder_paths, dest_input_folder_paths, dest_output_folder_paths):
    print(f"Source input folder: {src_input_folder_path}; Source output folder: {src_output_folder_path}")
    print(f"Dest. input folder: {dest_input_folder_path}; Dest. output folder: {dest_output_folder_path}")
    print()


In [None]:
# Ptycho recon params
script_path = '/home/bicer/projects/tike/scripts/tike-pinned-ptycho-recon.py'
python_path = "/home/bicer/projects/tyler/bin/python"

rec_alg = 'cgrad'
rec_nmodes = 1
rec_upd_pos = False
rec_niter = 30
rec_output_freq = 5
rec_recover_psi = True
rec_recover_probe= True
rec_recover_positions = False
rec_model = 'gaussian'
rec_ngpu = 1
rec_use_mpi = False
rec_overwrite = True
rec_auto_pin = True


dataset_name = "catalyst"
nnodes = 2
nworkers_per_node = 8
MAX_QSIZE = nnodes * nworkers_per_node


flow_inputs = []
gcounter = 0
for (src_input_folder_path, src_output_folder_path,
     dest_input_folder_path, dest_output_folder_path ) in zip(
    src_input_folder_paths, src_output_folder_paths,
    dest_input_folder_paths, dest_output_folder_paths):

    rec_gpu_id = gcounter%nworkers_per_node

    flow_input = {
        "input": {
            "source_endpoint": f"{src_endpoint}",
            "source_path": f"{src_input_folder_path}",
            "dest_endpoint": dest_endpoint,
            "dest_path": f"{dest_input_folder_path}",

            "result_path": f"{dest_output_folder_path}",
            "source_result_path": f"{src_output_folder_path}",
            "fx_ep": f"{theta_fx_endpoint}",
            "fx_id": f"{func_ptycho_uuid}",
            "params": {'ifpath': f"{dest_input_folder_path}",
                       'ofpath': f"{dest_output_folder_path}/",
                       'script_path': script_path,
                       'python_path': python_path,
                       'rec_alg': rec_alg,
                       'rec_nmodes': rec_nmodes,
                       'rec_upd_pos': rec_upd_pos,
                       'rec_niter': rec_niter,
                       'rec_output_freq': rec_output_freq,
                       'rec_recover_psi': rec_recover_psi,
                       'rec_recover_probe': rec_recover_probe,
                       'rec_recover_positions': rec_recover_positions,
                       'rec_model': rec_model,
                       'rec_ngpu': rec_ngpu,
                       'rec_use_mpi': rec_use_mpi,
                       'rec_overwrite': rec_overwrite,
                       'rec_auto_pin': rec_auto_pin,
                       'rec_gpu_id':rec_gpu_id,
                       'max_qsize': MAX_QSIZE,
                       'dataset_name': dataset_name,
                       'wid':gcounter}
        }
    }
    gcounter=gcounter+1
    flow_inputs.append(flow_input)

#print(f"transfer file from {src_endpoint}#{src_filepath}/{src_filename} to {dest_endpoint}#{dest_filepath}/")
#print(f"recon file:{dest_filepath}/{src_filename} output:{dest_resultpath}/")
#print(f"transfer file from {dest_endpoint}:{dest_resultpath} to {src_endpoint}#{src_result_path}/")

In [None]:
print(len(flow_inputs))

In [None]:
flows_client = create_flows_client(CLIENT_ID)
flow = flows_client.deploy_flow(flow_definition, title="Simple ptycho data analysis flow")
flow_id = flow['id']
flow_scope = flow['globus_auth_scope']

print(flow)

In [None]:
log_file_name = "/home/beams/TBICER/logs/funcx-ptycho-wf-{}-q{}-g{}.log".format(dataset_name, MAX_QSIZE, rec_ngpu)
logging.basicConfig(filename=log_file_name,
                    filemode='a',
                    format='%(asctime)s %(levelname)s %(message)s',
                    level=logging.INFO,
                    datefmt='%Y-%m-%d %H:%M:%S')

q0 = deque()
q1 = deque()

logging.info(f"Workflow information for: {log_file_name}")
logging.info(f"Deployed flow: {flow}")
nnodes = 8
nworkers_per_node = 8

counter=0
nflows = len(flow_inputs)
logging.info(f"Starting workflow {dataset_name}:{MAX_QSIZE}:{nnodes} left={nflows-counter} succeeded={counter}")

for i in range(len(flow_inputs)):
        flow_action = flows_client.run_flow(flow_id, flow_scope, flow_inputs[i])
        q1.append(flow_action)
        lstr=f"Flow {i} initiated and added to q1: {flow_action['action_id']}"
        logging.info(lstr)
        print(lstr)

i=-1
while len(q1)>0:
    i = (i+1)%len(q1)
    try:
        flow = flows_client.flow_action_status(flow_id, flow_scope, q1[i]['action_id'])
    except Exception as exc:
        err = traceback.format_exc()
        print(f"Exception:{exc}")
        logging.warning(err)
        continue
 
 
    lstr = f"len(q0)={len(q0)}; len(q1)={len(q1)}; i={i}"
    logging.info(lstr)
    print(lstr)
 
    lstr = f"Flow {i} status: {q1[i]['action_id']}: {flow['status']}"
    logging.info(lstr)
    print(lstr)
 
    if len(q1) < 4: time.sleep(1)
 
    if flow['status'] == 'SUCCEEDED':
        logging.info(f"Flow succeeded: {q1[i]}")
        del q1[i]
        counter = counter+1
        if len(q0)>0:
            flow_input = q0.popleft()
            flow_action = flows_client.run_flow(flow_id, flow_scope, flow_input)
            q1.append(flow_action)
            lstr = f"New flow initiated and added to the q1: {flow_action['action_id']}"
            logging.info(lstr)
            print(lstr)
            lstr=f"Copy from {flow_input['input']['params']['ifpath']} to {flow_input['input']['params']['ofpath']}"
            logging.info(lstr)
            print(lstr)
 
    if flow['status'] == 'FAILED':
        lstr = f"Failed action, removing: {flow_action['action_id']}"
        print(lstr)
        logging.warning(lstr)
 
        del q1[i]
 
logging.info(f"Done workflow {dataset_name}:{MAX_QSIZE}:{nnodes} left={nflows-counter} succeeded={counter}")


In [None]:
print(flow_scope)

In [None]:
for flow_action in q1:
    flows_client.flow_action_cancel(flow_id, flow_scope, flow_action['action_id'])