In [2]:
import globus_sdk, os, json, time, h5py, pickle
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

In [None]:
# please follow tutorial here 
# https://globus-sdk-python.readthedocs.io/en/stable/tutorial/
# to obtain your client ID

# invalid ID, obtain your own please
CLIENT_ID = "02cd98fb-d0d3-444c-ba2b-cf8609a92c2b" 
client = globus_sdk.NativeAppAuthClient(CLIENT_ID)
client.oauth2_start_flow()

authorize_url = client.oauth2_get_authorize_url()
print('Please go to this URL and login: {0}'.format(authorize_url))

get_input = getattr(__builtins__, 'raw_input', input)
auth_code = get_input('Please enter the code you get after login here: ').strip()
token_response = client.oauth2_exchange_code_for_tokens(auth_code)

globus_auth_data = token_response.by_resource_server['auth.globus.org']
globus_transfer_data = token_response.by_resource_server['transfer.api.globus.org']

# most specifically, you want these tokens as strings
AUTH_TOKEN = globus_auth_data['access_token']
TRANSFER_TOKEN = globus_transfer_data['access_token']

authorizer = globus_sdk.AccessTokenAuthorizer(TRANSFER_TOKEN)
tc = globus_sdk.TransferClient(authorizer=authorizer)

# dump & save the token for future experiments, it valids for 3 days 
with open('globus-transfer-client.pkl', 'wb') as filehandler:
    pickle.dump(tc, filehandler)

In [None]:
my_dtns = {}
for ep in tc.endpoint_search(filter_scope="administered-by-me"):
    print("{} {}".format(ep["id"], ep["display_name"]))
    my_dtns[ep["display_name"]] = ep["id"]

# if not administered by yourself, you can add your connectors manully 
my_dtns['posix'] = '33c03015-2a4d-4165-848a-74ac2133b708' 

In [None]:
# when transfer is done, this function extracted all needed information for regression analysis 
def avg_throughput(tc, tid, label=None):
    task_info = tc.get_task(tid)
    trs_sz = task_info['bytes_transferred']

    task_list = list(tc.task_event_list(tid))
    if (task_list[0]['code'] == 'SUCCEEDED' and task_list[-1]['code']=='STARTED'):
        elapse = (pd.to_datetime(task_list[0]['time']) - pd.to_datetime(task_list[-1]['time'])).total_seconds()
    else:
        print('[WARN] %s is not a normal started/completed task' % tid)
        elapse = (pd.to_datetime(task_info['completion_time']) - \
                  pd.to_datetime(task_info['request_time'])).total_seconds()
        
    print('[%s] Avg. rate of transferring %.1f MiB from %s to %s is %.2f Mbps %s' % (\
          '' if label is None else label, \
          trs_sz*2**-20, task_info['source_endpoint_display_name'], \
          task_info['destination_endpoint_display_name'], \
          8 * trs_sz / elapse*1e-6, tid))

In [None]:
def globus_trs_files(src_id, dst_id, tc, src_fns, dst_fns, label=None):
    if len(src_fns) != len(dst_fns):
        print('Source and desitnation file names do not match')
    if label is None:
        label = 'gcon-exp-%d files' % (len(src_fns), )
    tdata = globus_sdk.TransferData(tc, src_id, dst_id, preserve_timestamp=False, 
                                    label = label)
    for _sfn, _dfn in zip(src_fns, dst_fns):
        tdata.add_item(_sfn, _dfn, recursive=False)

    return tc.submit_transfer(tdata)    

## upload to wasabi for regression test

In [None]:
for _nf in (1, 50, 100, 200, 400, 600, 800, 1000)[1:]:
    sfn_10g_cc = ["/data/ds-5g/%04d-files/%04d.bin" %  (_nf, i) for i in range(_nf)]
    dfn_10g_ws = ["/boto3/5g-%04d-files/%04d.bin" % (_nf, i) for i in range(_nf)]

    gtrs_inst = globus_trs_files(my_dtns['posix'], my_dtns['WASABI-ZLIU'], tc, \
                                 sfn_10g_cc, dfn_10g_ws,\
                                 label = 'upload %d files to wasabi' % len(sfn_10g_cc))
    while not tc.task_wait(gtrs_inst['task_id'], timeout=7200, polling_interval=10): continue
    avg_throughput(tc, gtrs_inst['task_id'], label='%d files uploaded to wasabi' % len(sfn_10g_cc)) 

## download from wasabi for regression test

In [None]:
for _nf in (50, 100, 200, 400, 600, 800, 1000):
    dfn_5g_cc = ["/data/dl-tmp/%04d.bin" %  (i) for i in range(_nf)]
    sfn_5g_ws = ["/boto3/5g-%04d-files/%04d.bin" % (_nf, i) for i in range(_nf)]

    gtrs_inst = globus_trs_files(my_dtns['WASABI-ZLIU'], my_dtns['posix'], tc, \
                                 sfn_5g_ws, dfn_5g_cc,\
                                 label = 'download %d files from wasabi' % len(dfn_5g_cc))
    while not tc.task_wait(gtrs_inst['task_id'], timeout=7200, polling_interval=10): continue
    avg_throughput(tc, gtrs_inst['task_id'], label='%d files downloaded to wasabi' % len(dfn_5g_cc)) 

The process for all other connectors are the same, just need to change the endpoint name