# SDP HPSO Scheduling

Last run with Jupyter Notebook 5.0.0 running Python 3.5.2

In [None]:
# Imports
from __future__ import print_function
import sys
import matplotlib.pyplot as plt
sys.path += ['..']
from sdp_par_model import reports as iapi
from sdp_par_model import evaluate as imp
from sdp_par_model.config import PipelineConfig
from sdp_par_model.parameters.definitions import *
from sdp_par_model.parameters.definitions import Constants as c
import numpy as np
import collections
import warnings

%matplotlib inline
plt.rcParams['figure.figsize'] = 16, 8

## Define structures and methods for handling SDP tasks 

In [None]:
# Needs some refactoring methinks; idea would be to specify HPSOs instead of "letters". 
hpso_lookup = {'A' : HPSOs.hpso01, 
               'B' : HPSOs.hpso04c,  # TODO: This task not properly defined yet
               'C' : HPSOs.hpso13, 
               'D' : HPSOs.hpso14,
               'E' : HPSOs.hpso15,
               'F' : HPSOs.hpso27,
               'G' : HPSOs.hpso37c}

# The following results map was copied from examples used by Peter Wortmann. It defines values we wish to calculate.
#               Title                      Unit       Default? Sum?             Expression
results_map =[('Total buffer ingest rate','TeraBytes/s',True, False, lambda tp: tp.Rvis_ingest*tp.Nbeam*tp.Npp*tp.Mvis/c.tera),
              ('Working (cache) memory',  'TeraBytes',  True, True,  lambda tp: tp.Mw_cache/c.tera,   ),
              ('Visibility I/O Rate',     'TeraBytes/s',True, True,  lambda tp: tp.Rio/c.tera,        ),
              ('Total Compute Rate',       'PetaFLOP/s', True, True,  lambda tp: tp.Rflop/c.peta,      ),
              ('Comp Req Breakdown ->',   'PetaFLOP/s', True, True,  lambda tp: tp.get_products('Rflop', scale=c.peta), )]
del results_map[4]  # We actually don't care about the breakdown for now; but it is useful to know how to get it


class SDPTask:
    uid          = None  # Optional: unique ID; can be used for sequencing
    description  = None  # Provides a description
    t_min_start  = None  # Earliest wall clock time (in seconds) that this task can / may start
    prec_task    = None  # Preceding task (uid) that needs to complete before this one can start
    t_fixed      = None  # fixed minimum duration (in seconds) of this task (e.g. for an observation)
    data_in      = None  # Amount of data (in TB) this task requires *before* starting (usually read from hot buffer)
    memsize      = None  # The amount of SDP working memory (in TB) needed to perform this task
    flopcount    = None  # Number of operations (in PetaFLOP) required to complete this task
    data_out     = None  # Amount of data (in TB) this task stores *after* finishing (usually written to hot buffer)    

    def __str__(self):
        s = "SDPTask (type undefined): "
        if self.description is not None:
            s = "SDPTask (%s):" % self.description
        fields = self.__dict__
        for k in fields.keys():
            key_string = str(k)
            value_string = str(fields[k])
            if len(value_string) > 40:
                value_string = value_string[:40] + "... (truncated)"
            s += "\n %s\t\t= %s" % (key_string, value_string)
        return s
            
def task_letters_to_SDPTask_list(letter_sequence, performance_dict):
    """
    Converts a list of task letters into a list of SDPTask objects
    @param letter_sequence : a sequence of HPSOs, defined by Rosie's A..G lettering scheme. TODO: replace by actual HPSO names
    @param performance_dict : a dictionary of computational requirements for each HPSO; these need to be computed only once
    """
    tasks = []
    uid =  -1
    for task_letter in letter_sequence:
        hpso = hpso_lookup[task_letter]
        hpso_subtasks = HPSOs.hpso_subtasks[hpso]
        nr_subtasks = len(hpso_subtasks)
        assert nr_subtasks > 2

        if not (hpso_subtasks[0] in HPSOs.ingest_subtasks) and (hpso_subtasks[1] in HPSOs.rcal_subtasks):
            # this is assumed true for all HPSOs - hence raising an assertion error if not
            raise AssertionError("Assumption was wrong - some HPSO apparently doesn't involve Ingest + RCal")
        
        # Ingest and Rcal are combined into a a single task object, as they cannot be separated         
        t = SDPTask()
        uid += 1  # the unique id of the combined Ingest+Rcal task
        ingest_uid = uid  # remembered for later; some subtasks may only start once this one is completed
        t.uid = uid
        t.description = 'Ingest+RCal'
        subtask = hpso_subtasks[0]  # ingest
        t.t_fixed = performance_dict[hpso]['Tobs']
        t.data_in  = 0  # data is acquired in real time; nothing read from buffer
        t.memsize = performance_dict[hpso][subtask]['cache']
        t.flopcount = t.t_fixed * performance_dict[hpso][subtask]['compRate']
        t.data_out  = t.t_fixed * performance_dict[hpso][subtask]['ingestRate']  # All ingested data gets written to buffer?

        subtask = hpso_subtasks[1]  # rcal
        t.memsize += performance_dict[hpso][subtask]['cache']
        t.flopcount += t.t_fixed * performance_dict[hpso][subtask]['compRate']
        t.data_out  += 0 # What output does RCal generate? Just set it to zero for now. Are ingestRate and visRate relevant?
        ingest_rcal_data = t.data_out  # This is the data generated by this combined task; needed by subsequent tasks

        tasks.append(t)

        # Now handle the rest of the subtasks
        for i in range(2, nr_subtasks):
            subtask = hpso_subtasks[i]
            uid += 1
            t = SDPTask()
            t.uid = uid
            t.description = str(subtask)
            t.prec_task = ingest_uid  # must be after ingest+rcal, otherwise not in specific order rel to other subtasks            
            t.data_in  = ingest_rcal_data  # TODO: value? read from buffer at a later time than ingest happened at
            t.memsize = performance_dict[hpso][subtask]['cache']
            # TODO: is the line below correct? Can probably define directly in terms of amount of data instead of via Tobs
            t.flopcount = performance_dict[hpso]['Tobs'] * performance_dict[hpso][subtask]['compRate']
            t.data_out  = 0  # TODO: No idea what gets output here. visRate? Temporarily set to zero 
            
            tasks.append(t)
    return tasks

def add_delta(deltas, t, delta):
    """
    Adds a {timestamp : delta} pair to the supplied "deltas" dictionary.
    If the supplied timestamp value already exists, delta is added to the existig value
    """
    if t in deltas:
        warnings.warn('Timestamp entry already exists in the timeline')  # warning may be omitted
        deltas[t] += delta
    else:
        deltas[t] = delta

## Computes  performace requirements for each HPSO using parametric model
### We do this once, and store the results in a dictionary for lookup

In [None]:
performance_dict = {}  # A dictionary of dictionaries.  HPSO requirements are computed once and stored as lookups

# As a test we loop over all HPSOs we wish to handle, computing results for each
for task_letter in sorted(hpso_lookup.keys()):
    hpso = hpso_lookup[task_letter]
    print('*** Processing task type %s => %s ***\n' % (task_letter, hpso))
    if not hpso in performance_dict:
        performance_dict[hpso] = {}
        
    for subtask in HPSOs.hpso_subtasks[hpso]:
        print('subtask -> %s' % subtask)
        if not subtask in performance_dict[hpso]:
            performance_dict[hpso][subtask] = {}
        
        cfg = PipelineConfig(hpso=hpso, hpso_subtask=subtask)
        (valid, msgs) = cfg.is_valid()
        if not valid:
            print("Invalid configuration!")
            for msg in msgs:
                print(msg)
            raise AssertionError("Invalid config")
        tp = cfg.calc_tel_params()
        results = iapi._compute_results(cfg, False, results_map)  #TODO - refactor this method's parameter sequence
        
        performance_dict[hpso]['Tobs'] = tp.Tobs  # Observation time
        performance_dict[hpso][subtask]['ingestRate'] = results[0]
        performance_dict[hpso][subtask]['cache'] = results[1]
        performance_dict[hpso][subtask]['visRate'] = results[2]
        performance_dict[hpso][subtask]['compRate'] = results[3]
        
        print('Buffer ingest rate\t= %g TB/s' % results[0])
        print('Cache memory\t= %g TB' % results[1])
        print('Visibility IO rate\t= %g TB/s' % results[2])
        print('Compute Rate\t= %g PetaFLOP/s' % results[3])
        print()
        
print('done')

## Let's create run a short test sequence

In [None]:
test_seq = ('A','A','B','B','B','A')
task_list = task_letters_to_SDPTask_list(test_seq, performance_dict)
for task in task_list:
    print(task)

# Notebook incomplete beyond this point - don't execute unless you know what you're doing!

## Now, simulate the execution of this sequence on the SDP (incomplete!)

In [None]:
t = {}
t[1] = 'a'
t[3] = 'b'
t.pop(3)
t

In [None]:
sdp_FLOPS = 22.8  # NB: The processing capacity of the SDP in PetaFLOP/s

# First, assert that the SDP has enough FLOP/s capacity to handle the real-time tasks. If not, we can't continue.
tasks_to_be_scheduled = {}  # TODO: we can replace this dictionary with a set once hash method for SDPTask is defined
for task in task_list:
    tasks_to_be_scheduled[task.uid] = task
    if task.t_fixed is not None:
        required_FLOPs = task.flopcount / task.t_fixed
        #print("Task %d requires a FLOPS rate of %g PetaFLOP/s" % (task.uid, required_FLOPs))
        if (task.flopcount / task.t_fixed) > sdp_FLOPS:
            raise AssertionError("Task %d (%s) requires %g PetaFLOP/s. SDP capacity of %g PetaFLOP/s is insufficient!" 
                                  % (task.uid, task.description, required_FLOPs, sdp_FLOPS) )
            
# Next, run through task list, determining start and end times for their execution and the effect on the buffer
wall_clock = 0       # Simulated wall clock time (seconds)
buffer_deltas = {}   # a dictionary that maps wall clock times to buffer allocation / deallocation (+/-) sizes
t_proc_end_last = 0  # The wall clock time that the last process completed
idle_time_durations = np.zeros(len(tasks))  # in seconds
i = 0
tasks_scheduled = {}  # set of task uids; TODO: replace by set of SDPTask objects when hash and equals methods defined

nr_iterations = 0
max_nr_iterations = 100  # Emergency termination if the loop fails to schedule all taks
# Iteratively run through all tasks, scheduling them where possible. Repeat until all tasks are scheduled.
while len(tasks_scheduled) > 0:
    nr_iterations += 1
    if nr_iterations > max_nr_iterations:
        warnings.warn('Maximum number of iterations exceeded; aborting!')
        break
    
    for task in task_list:
        if (task.prec_task_uid is not None) and (task.uid not in )
    
    task.t_obs_start = wall_clock
    add_delta(buffer_deltas, task.t_obs_start, task.bufsize)
    t_obs_end = task.t_obs_start + task.t_obs  # Time the observation completes
    task.t_proc_start = max(t_obs_end, t_proc_end_last + sdp_setup_time)
    task.t_proc_end   = task.t_proc_start + task.flopcount * task.t_obs / sdp_FLOPS
    add_delta(buffer_deltas, task.t_proc_end, -task.bufsize)
    t_proc_end_last = task.t_proc_end
    wall_clock = t_obs_end + telecope_setup_time
    idle_time_durations[i] = task.t_proc_start - t_obs_end
    i += 1


In [None]:
sdp_FLOPS = 22.8  # NB: The processing capacity of the SDP in PetaFLOP/s

for task in tasks:
    if hasattr(task, )

# Run through the list of tasks, determining start and end times for their execution and the effect on the buffer

wall_clock = 0       # Simulated wall clock time (seconds)
buffer_deltas = {}   # a dictionary mapping wall clock times to buffer allocation / deallocation (+/-) sizes
t_proc_end_last = 0  # The wall clock time that the last process completed
idle_time_durations = np.zeros(len(tasks))  # in seconds
i = 0
uids_completed = set()

for task in tasks:
    task.t_obs_start = wall_clock
    add_delta(buffer_deltas, task.t_obs_start, task.bufsize)
    t_obs_end = task.t_obs_start + task.t_obs  # Time the observation completes
    task.t_proc_start = max(t_obs_end, t_proc_end_last + sdp_setup_time)
    task.t_proc_end   = task.t_proc_start + task.flopcount * task.t_obs / sdp_FLOPS
    add_delta(buffer_deltas, task.t_proc_end, -task.bufsize)
    t_proc_end_last = task.t_proc_end
    wall_clock = t_obs_end + telecope_setup_time
    idle_time_durations[i] = task.t_proc_start - t_obs_end
    i += 1

buffer_evolution = collections.OrderedDict(sorted(buffer_deltas.items()))
time_vals   = np.zeros(2 * len(buffer_evolution))
buffer_vals = np.zeros(2 * len(buffer_evolution))

i = 0
buffer_val = 0
time_val   = 0
for k, delta in buffer_evolution.items(): 
    #print('(%.1f,\t%.2f)' % (k/3600, delta))
    time_val = k / 3600  # hours
    time_vals[i]     = time_val
    buffer_vals[i]   = buffer_val
    buffer_val += delta  # Adds the buffer delta to the buffer's stored contents
    time_vals[i+1]   = time_val  # we assume no time went by (writing being instantaneous)
    buffer_vals[i+1] = buffer_val  # TeraBytes
    i += 2

plt.plot(time_vals, buffer_vals / 1e3, 'b-')
plt.title('Evolution of the SDP Buffer while executing the supplied LOW sequence.\nObservation time = %.1f hrs.' 
          ' Total execution time = %.1f hrs; Max buffer = %.1f PB' % (wall_clock / 3600, time_vals[-1],                                                                      np.max(buffer_vals)/1e3))
plt.xlabel('time (hours)')
plt.ylabel('buffer usage (PB)')
plt.xlim(0, time_vals[-1])

plt.figure()
plt.plot(np.array(range(len(idle_time_durations))), idle_time_durations / 3600, marker='s', color = 'r', linewidth=0)
plt.title('Idle time that tasks spend in the LOW buffer.\nSummed idle time for all tasks = %.1f hrs.' % 
          (np.sum(idle_time_durations) / 3600))
plt.xlabel('Task''s number in sequence')
plt.ylabel('Time (hours)')


print('Done!')

## Hard-coded performace costs and requirements from Rosie's Excel sheet
### These were previously used in rev [3372fdd] to approximately replicate Rosie's results. Check (rerun) the notebook at that repository revision to regenerate those results - not repeated here.

In [None]:
# The following sets of values should be computed using the parametric model. Just hard-coded for now (from Excel)
hpso_ingest_rates = {'A':0.459, 'B':3e-3, 'C':0.117, 'D':0.112, 'E':0.0603, 'F':0.244, 'G':0.438}  # in TeraByte/s
# FLOPcounts below are the PetaFLOPs required to process one second of ingested data
hpso_flopcounts = {'A':50.4, 'B':2.0, 'C':7.5, 'D':6.2, 'E':2.9833, 'F':17.689, 'G':27.698}  # in PetaFLOP/s
hpso_durations  = {'A':6, 'B':0.17, 'C':6, 'D':6, 'E':4.4, 'F':0.1233, 'G':6}  # in hours -- TODO check whether correct

sdp_setup_time = 60  # the minimum amount of time between processing tasks on the SDP (seconds)
telecope_setup_time = 0  # TODO is this correct?

## Reproduction of "Low" and "Mid" sequences from Rosie's Excel sheet
### Create a lists of observation tasks as letter sequences

In [None]:
seqL = ('A','A','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','A','A','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','A','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B')
seqM = ('B','G','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','G','C','F','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','G','G','E','E','E','E','D')

print('HPSO LOW task distribution (number of occurences) A..B = (%.0f, %.0f)' % (seqL.count('A'), seqL.count('B')))
tA = seqL.count('A') * hpso_durations['A']
tB = seqL.count('B') * hpso_durations['B']
print('HPSO LOW task distribution (observation time) A..B = (%.1f%%, %.1f%%)' % (100 * tA / (tA + tB), 100 * tB / (tA + tB)))

tA = seqM.count('A')
tB = seqM.count('B')
tC = seqM.count('C')
tD = seqM.count('D')
tE = seqM.count('E')
tF = seqM.count('F')
tG = seqM.count('G')
tt = len(seqM)

print('\nHPSO MID task distribution (number of occurences) A..G = (%.0f, %.0f, %.0f, %.0f, %.0f, %.0f, %.0f)' % \
      (tA, tB, tC, tD, tE, tF, tG))
print('HPSO MID task distribution (observation time) A..G = (%.1f%%, %.1f%%, %.1f%%, %.1f%%, %.1f%%, %.1f%%, %.1f%%)' % \
      (100*tA/tt, 100*tB/tt, 100*tC/tt, 100*tD/tt, 100*tE/tt, 100*tF/tt, 100*tG/tt))

### Use the lists of letters to build a lists of task objects

### Virtually execute the "seqL" task list for LOW

In [None]:
tasks = task_letters_to_objects(seqL, performance_dict)  # Set up the task list from the letter sequence

sdp_FLOPS = 22.8  # NB: The processing capacity of the SDP in PetaFLOP/s

# Run through the list of tasks, determining start and end times for their execution and the effect on the buffer

wall_clock = 0       # Simulated wall clock time (seconds)
buffer_deltas = {}   # a dictionary mapping wall clock times to buffer allocation / deallocation (+/-) sizes
t_proc_end_last = 0  # The wall clock time that the last process completed
idle_time_durations = np.zeros(len(tasks))  # in seconds
i = 0
uids_completed = set()

for task in tasks:
    task.t_obs_start = wall_clock
    add_delta(buffer_deltas, task.t_obs_start, task.bufsize)
    t_obs_end = task.t_obs_start + task.t_obs  # Time the observation completes
    task.t_proc_start = max(t_obs_end, t_proc_end_last + sdp_setup_time)
    task.t_proc_end   = task.t_proc_start + task.flopcount * task.t_obs / sdp_FLOPS
    add_delta(buffer_deltas, task.t_proc_end, -task.bufsize)
    t_proc_end_last = task.t_proc_end
    wall_clock = t_obs_end + telecope_setup_time
    idle_time_durations[i] = task.t_proc_start - t_obs_end
    i += 1

buffer_evolution = collections.OrderedDict(sorted(buffer_deltas.items()))
time_vals   = np.zeros(2 * len(buffer_evolution))
buffer_vals = np.zeros(2 * len(buffer_evolution))

i = 0
buffer_val = 0
time_val   = 0
for k, delta in buffer_evolution.items(): 
    #print('(%.1f,\t%.2f)' % (k/3600, delta))
    time_val = k / 3600  # hours
    time_vals[i]     = time_val
    buffer_vals[i]   = buffer_val
    buffer_val += delta  # Adds the buffer delta to the buffer's stored contents
    time_vals[i+1]   = time_val  # we assume no time went by (writing being instantaneous)
    buffer_vals[i+1] = buffer_val  # TeraBytes
    i += 2

plt.plot(time_vals, buffer_vals / 1e3, 'b-')
plt.title('Evolution of the SDP Buffer while executing the supplied LOW sequence.\nObservation time = %.1f hrs.' 
          ' Total execution time = %.1f hrs; Max buffer = %.1f PB' % (wall_clock / 3600, time_vals[-1],                                                                      np.max(buffer_vals)/1e3))
plt.xlabel('time (hours)')
plt.ylabel('buffer usage (PB)')
plt.xlim(0, time_vals[-1])

plt.figure()
plt.plot(np.array(range(len(idle_time_durations))), idle_time_durations / 3600, marker='s', color = 'r', linewidth=0)
plt.title('Idle time that tasks spend in the LOW buffer.\nSummed idle time for all tasks = %.1f hrs.' % 
          (np.sum(idle_time_durations) / 3600))
plt.xlabel('Task''s number in sequence')
plt.ylabel('Time (hours)')


print('Done!')

# Scratchpad

In [None]:
#               Table Row Title            Unit     Default?  Sum?   Expression
results_map =[('Total buffer ingest rate','TeraBytes/s',True, False, lambda tp: tp.Rvis_ingest*tp.Nbeam*tp.Npp*tp.Mvis/c.tera),
              ('Working (cache) memory',  'TeraBytes',  True, True,  lambda tp: tp.Mw_cache/c.tera,   ),
              ('Visibility I/O Rate',     'TeraBytes/s',True, True,  lambda tp: tp.Rio/c.tera,        ),
              ('Total Compute Req',       'PetaFLOP/s', True, True,  lambda tp: tp.Rflop/c.peta,      ),
              ('Comp Req Breakdown ->',   'PetaFLOP/s', True, True,  lambda tp: tp.get_products('Rflop', scale=c.peta), )]
del results_map[4]  # We actually don't care about the breakdown for now; but it is useful to know how to get it

hpso = hpso_lookup['A']  # hpso01.ICAL

cfg = PipelineConfig(hpso=hpso)
assert cfg.is_valid()
tp = cfg.calc_tel_params()

results = iapi._compute_results(cfg, False, results_map)  #TODO - refactor this method's parameter sequence
print('Cache memory for hpso01.ICAL = %g TB' % results[1])
print('Visibility rate for hpso01.ICAL = %g TB/s' % results[2])
print('Rflop for hpso01.ICAL = %g PetaFLOPS' % results[3])

# Another, slightly more roundabout, way to do the same as _compute_results 
# (tsnap_opt, nfacet_opt) = imp.find_optimal_Tsnap_Nfacet(tp)
# result_expressions = iapi.get_result_expressions(results_map, tp)
# results_for_pipeline = imp.evaluate_expressions(result_expressions, tp, tsnap_opt, nfacet_opt)
# print(results_for_pipeline[3])

## Example code taken from computing parametric model results by Pipeline

In [None]:
teles = (Telescopes.SKA1_Low, Telescopes.SKA1_Mid)
bands = (Bands.Low, 
         Bands.Mid1, Bands.Mid2, Bands.Mid5A, Bands.Mid5B, Bands.Mid5C,
         Bands.Sur1)
parallel = 0  # Set this to 0 if PyMP is absent

for pipeline in Pipelines.all:
    iapi.stack_bars_pipelines("%s Computational Requirements [PetaFLOP/s]" % pipeline, teles, bands, [pipeline],
                              parallel=parallel)

In [None]:
for band in bands:
    iapi.stack_bars_pipelines("%s Computational Requirements [PetaFLOP/s]" % band, teles, [band], Pipelines.all,
                              parallel = parallel)

In [None]:
iapi.stack_bars_hpsos("HPSOs Computational Requirements [PetaFLOP/s]", HPSOs.hpsos,
                      parallel=16)