In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob
from radical.entk import Profiler
import radical.analytics as ra
import radical.utils as ru
import radical.pilot as rp

%matplotlib



Using matplotlib backend: Qt5Agg


In [2]:
# Entire list of pipeline values for which profiles are available

pipelines_list = [8,16,32]

In [3]:
# Generate task uids

def get_task_uids(num_pipelines):
    
    num_tasks = num_pipelines*7*1
    task_uids = []
    for t in range(num_tasks):
        task_uids.append('radical.entk.task.%04d'%t)

    return task_uids

In [4]:

# Get information from all the entk profiles

def get_entk_info(src):
    
    p = Profiler(src = src + '/../')
    
    task_uids = get_task_uids(pipelines)
    
    entk_dur = p.duration(task_uids, states=['SCHEDULING','DONE'])
    
    # Time taken in appmanager to rreq function call
    entk_core_1 = p.duration('radical.entk.appmanager.0000', events=['create amgr obj', 'init rreq submission'])
    
    # Time taken to tear down appmanager
    entk_core_2 = p.duration('radical.entk.appmanager.0000', events=['start termination', 'termination done'])
    
    # Time taken to create resource manager obj
    entk_core_3 = p.duration('radical.entk.resource_manager.0000', events = ['create rmgr obj', 'rmgr obj created'])
    
    # Time taken to create and submit resource reservation
    entk_core_4 = p.duration('radical.entk.resource_manager.0000', events = ['creating rreq', 'rreq submitted'])
    
    # Time taken to deallocate resource reservation
    entk_core_5 = p.duration('radical.entk.resource_manager.0000', events = ['canceling resource allocation', 'resource allocation cancelled'])
    
    entk_core_dur = entk_core_1 + entk_core_2 + entk_core_3 
    
    #entk_total_dur = entk_dur_1 + entk_dur_2 + entk_core_dur
    
    return entk_dur

In [10]:
# Get information from all the rp profile files and json file
# returns 0,0 if no rp files are found

def get_rp_info(src):

    json_files = glob.glob('%s/*.json'%src)
    print json_files
    json_file = json_files[0]
    json      = ru.read_json(json_file)
    sid       = os.path.basename(json_file)[:-5]

    session = ra.Session(sid, 'radical.pilot', src=src)
    units = session.filter(etype='unit', inplace=False)

    exec_dur = units.duration([rp.AGENT_EXECUTING, rp.AGENT_STAGING_OUTPUT_PENDING])
    rp_dur = units.duration([rp.UMGR_SCHEDULING, rp.DONE])
    
#     for unit in units.get()[:1]:
#         print unit
    
    data_dur = units.duration([rp.UMGR_STAGING_INPUT, rp.AGENT_STAGING_INPUT_PENDING]) + units.duration([rp.AGENT_STAGING_INPUT, rp.AGENT_SCHEDULING_PENDING]) + units.duration([rp.AGENT_STAGING_OUTPUT, rp.UMGR_STAGING_OUTPUT_PENDING]) + units.duration([rp.UMGR_STAGING_OUTPUT, rp.DONE])

    return exec_dur, rp_dur, data_dur


In [11]:
df = pd.DataFrame(columns=['EnTK overhead', 'RP overhead', 'Execution duration'])

pipelines_list = [8]

for pipelines in pipelines_list:
    
    src = glob.glob('./rp.session*'.format(pipelines))
    src_cpy = list(src)
    #print src
    for f in src:
        if f.split('.')[-1] == 'json':
            src_cpy.remove(f)

    src = src_cpy[0]
    print src
    
    entk_dur = get_entk_info(src)
    print entk_dur
    
    exec_dur, rp_dur, data_dur = get_rp_info(src)    
    print exec_dur, rp_dur, data_dur
    
    #df.loc[pipelines] = [entk_dur - rp_dur, rp_dur - exec_dur, exec_dur]

./rp.session.js-17-212.jetstream-cloud.org.vivek91.017465.0005
2640.26300001
['./rp.session.js-17-212.jetstream-cloud.org.vivek91.017465.0005/rp.session.js-17-212.jetstream-cloud.org.vivek91.017465.0005.json']
2586.2560997 2561.21889997 47.1306009293


In [None]:
%matplotlib inline

In [None]:
df = df.drop('Execution duration',axis=1)
ax1 = df.plot(kind='bar', title='Time taken by EnTK and RP to execute a workflow consisting of \n X Pipelines, 7 Stages per Pipeline and 1 Task per Stage on NCSA.BW (Task executable = "ESMACS", number of trials per data point = 1)')
ax1.set_xlabel('Number of Pipelines')
ax1.set_ylabel('Time Execution Duration (seconds)')

FONTSIZE=12
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
fig.show()
#fig.savefig('namd_worload_devel.pdf', dpi=100)