In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob
from radical.entk import Profiler
import radical.analytics as ra
import radical.utils as ru
import radical.pilot as rp
import numpy as np
from math import sqrt
import os

os.environ['RADICAL_PILOT_DBURL'] = 'mongodb://user:user@ds153652.mlab.com:53652/test_08_08'

%matplotlib inline



In [38]:
def collapse_ranges(ranges):
        """
        given be a set of ranges (as a set of pairs of floats [start, end] with
        'start <= end'. This algorithm will then collapse that set into the
        smallest possible set of ranges which cover the same, but not more nor
        less, of the domain (floats).
    
        We first sort the ranges by their starting point. We then start with the
        range with the smallest starting point [start_1, end_1], and compare to the
        next following range [start_2, end_2], where we now know that start_1 <=
        start_2. We have now two cases:
    
        a) when start_2 <= end_1, then the ranges overlap, and we collapse them
        into range_1: range_1 = [start_1, max[end_1, end_2]
    
        b) when start_2 > end_2, then ranges don't overlap. Importantly, none of
        the other later ranges can ever overlap range_1. So we move range_1 to
        the set of final ranges, and restart the algorithm with range_2 being
        the smallest one.
    
        Termination condition is if only one range is left -- it is also moved to
        the list of final ranges then, and that list is returned.
        """

        final = []

        # sort ranges into a copy list
        _ranges = sorted (ranges, key=lambda x: x[0])

        START = 0
        END = 1

        base = _ranges[0] # smallest range

        for _range in _ranges[1:]:

            if _range[START] <= base[END]:

                # ranges overlap -- extend the base
                base[END] = max(base[END], _range[END])

            else:

                # ranges don't overlap -- move base to final, and current _range
                # becomes the new base
                final.append(base)
                base = _range

        # termination: push last base to final
        final.append(base)

        return final

In [39]:
def get_duration_using_analytics(session):
    
    units = session.filter(etype='unit', inplace=False)
    return units.duration([rp.UMGR_SCHEDULING_PENDING, rp.DONE])

In [40]:
def get_duration_using_minmax(session):
    
    units_1 = session.get(state=rp.UMGR_SCHEDULING_PENDING, etype='unit')
    start_rp = [unit.states[rp.UMGR_SCHEDULING_PENDING]['time'] for unit in units_1]
        
    units_2 = session.get(state=rp.DONE, etype='unit')
    stop_rp = [unit.states[rp.DONE]['time'] for unit in units_2]
    
    return max(stop_rp) - min(start_rp)

In [41]:
def get_duration_using_utils(session):
    
    units = session.get(state=[rp.UMGR_SCHEDULING_PENDING, rp.DONE], etype='unit')
    
    ranges = [[unit.states[rp.UMGR_SCHEDULING_PENDING]['time'], unit.states[rp.DONE]['time']] for unit in units]
    
    overlap = 0.0
    for crange in collapse_ranges(ranges):
        overlap += crange[1] - crange[0]

    return overlap

In [43]:
def get_profiles(data_loc):

    json_files = glob.glob('{0}/*.json'.format(data_loc))
    json_file = json_files[0]
    json      = ru.read_json(json_file)
    sid       = os.path.basename(json_file)[:-5]

    session = ra.Session(sid, 'radical.pilot', src='{0}/'.format(data_loc))       
        
    print 'Duration using analytics: ',get_duration_using_analytics(session)
    print 'Duration using (max - min): ',get_duration_using_minmax(session)
    print 'Duration using utils functions: ',get_duration_using_utils(session)
    print '-------------------------------------'

In [46]:
data_loc_no_barrier = '../raw_data/tasks-1024-trial-5/'
data_loc_barrier = '../../../htbac-experiments/weak_scaling_null_workload_data-devel/null-ws-64cores/'

print 'Duration with all tasks executed concurrently'
get_profiles(data_loc_no_barrier)
print 'Duration with tasks executed concurrently'
get_profiles(data_loc_barrier)

Duration with all tasks executed concurrently
Duration using analytics:  651.08190012
Duration using (max - min):  651.08190012
Duration using utils functions:  651.08190012
-------------------------------------
Duration with tasks executed concurrently
Duration using analytics:  112.780900002
Duration using (max - min):  112.91260004
Duration using utils functions:  112.780900002
-------------------------------------
