# SDP HPSO Scheduling

Last run with Jupyter Notebook 5.0.0 running Python 3.5.2

In [None]:
from __future__ import print_function
import sys
import matplotlib.pyplot as plt
sys.path += ['..']
from sdp_par_model import reports as iapi
from sdp_par_model.config import PipelineConfig
from sdp_par_model.parameters.definitions import *
%matplotlib inline
import numpy as np
import collections
import warnings

plt.rcParams['figure.figsize'] = 16, 8

In [None]:
# TODO: the HPSO lookup refers to only one part of the HPSO pipeline, and that definion is in itself duplicated 
# in definitions.py. Needs some refactoring methinks
hpso_lookup = {'A':HPSOs.hpso01ICAL, 'B':HPSOs.hpso04c, 'C':HPSOs.hpso13ICAL, 'D':HPSOs.hpso14ICAL, 
               'E':HPSOs.hpso15ICAL, 'F':HPSOs.hpso27ICAL, 'G':HPSOs.hpso37cICAL}

# The following sets of values should be computed using the parametric model. Just hard-coded for now (from Excel)
hpso_ingest_rates = {'A':0.459, 'B':3e-3, 'C':0.117, 'D':0.112, 'E':0.0603, 'F':0.244, 'G':0.438}  # in TeraByte/s
# FLOPcounts below are the PetaFLOPs required to process one second of ingested data
hpso_flopcounts = {'A':50.4, 'B':2.0, 'C':7.5, 'D':6.2, 'E':2.9833, 'F':17.689, 'G':27.698}  # in PetaFLOP/s
hpso_durations  = {'A':6, 'B':0.17, 'C':6, 'D':6, 'E':4.4, 'F':0.1233, 'G':6}  # in hours -- TODO check whether correct

sdp_setup_time = 60  # the minimum amount of time between processing tasks on the SDP (seconds)
telecope_setup_time = 0  # TODO is this correct?

seqL = ('A','A','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','A','A','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','A','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B')
seqM = ('B','G','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','G','C','F','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','G','G','E','E','E','E','D')

acc = 0
search_char = 'A'
for c in seqL:
    if c == search_char:
        acc += 1

hpso_distribution_count = {'A' : acc / len(seqL), 'B' : 1 - acc / len(seqL)}
    
print('HPSO Low division (number) A:B = %.0f : %.0f' % (hpso_distribution_count['A'] * len(seqL), hpso_distribution_count['B']  * len(seqL)))
tA = hpso_distribution_count['A'] * hpso_durations['A']
tB = hpso_distribution_count['B'] * hpso_durations['B']
print('HPSO Low division (time)   A:B = %.1f%% : %.1f%%' % (100 * tA / (tA + tB), 100 * tB / (tA + tB)))

## First simple reproduction of Rosie's "Low" sequence
### First, create a list of observation task objects

In [None]:
class Observation:
    uid          = None  # Optional: unique ID; can be used for sequencing
    t_obs        = None  # duration of the observation
    t_obs_start  = None  # Wall clock time that this observation starts (in seconds)
    t_proc_start = None  # Wall clock time that this observation's processing starts (in seconds)
    t_proc_end   = None  # Wall clock time that this observation's processing ends (in seconds)    
    bufsize      = None  # Amount of memory (in TB) that this operation needs to store in the buffer    
    flopcount    = None  # Number of floating point operations required to finish this observation

def add_delta(deltas, t, delta):
    if t in deltas:
        warnings.warn('Timestamp entry already exists in the timeline')
        deltas[t] += delta
    else:
        deltas[t] = delta

### Virutally execute the task list for LOW

In [None]:
tasks = []
uid =  0
for task in seqL:
    o = Observation()
    o.uid = uid
    uid += 1
    p = ParameterContainer()
    apply_hpso_parameters(p, hpso_lookup[task])
    o.t_obs = p.Tobs
    o.bufsize = p.Tobs * hpso_ingest_rates[task]
    o.flopcount = hpso_flopcounts[task]
    tasks.append(o)

sdp_FLOPS = 22.8  # NB: The processing capacity of the SDP in PetaFLOP/s

# Run through the list of tasks, determining start and end times for their execution and the effect on the buffer

wall_clock = 0       # Simulated wall clock time (seconds)
buffer_deltas = {}   # a dictionary mapping wall clock times to buffer allocation / deallocation (+/-) sizes
t_proc_end_last = 0  # The wall clock time that the last process completed
idle_time_durations = np.zeros(len(tasks))  # in seconds
i = 0

for task in tasks:
    task.t_obs_start = wall_clock
    add_delta(buffer_deltas, task.t_obs_start, task.bufsize)
    t_obs_end = task.t_obs_start + task.t_obs  # Time the observation completes
    task.t_proc_start = max(t_obs_end, t_proc_end_last + sdp_setup_time)
    task.t_proc_end   = task.t_proc_start + task.flopcount * task.t_obs / sdp_FLOPS
    add_delta(buffer_deltas, task.t_proc_end, -task.bufsize)
    t_proc_end_last = task.t_proc_end
    wall_clock = t_obs_end + telecope_setup_time
    idle_time_durations[i] = task.t_proc_start - t_obs_end
    i += 1

buffer_evolution = collections.OrderedDict(sorted(buffer_deltas.items()))
time_vals   = np.zeros(2 * len(buffer_evolution))
buffer_vals = np.zeros(2 * len(buffer_evolution))

i = 0
buffer_val = 0
time_val   = 0
for k, delta in buffer_evolution.items(): 
    #print('(%.1f,\t%.2f)' % (k/3600, delta))
    time_val = k / 3600  # hours
    time_vals[i]     = time_val
    buffer_vals[i]   = buffer_val
    buffer_val += delta  # Adds the buffer delta to the buffer's stored contents
    time_vals[i+1]   = time_val  # we assume no time went by (writing being instantaneous)
    buffer_vals[i+1] = buffer_val  # TeraBytes
    i += 2

plt.plot(time_vals, buffer_vals / 1e3, 'b-')
plt.title('Evolution of the SDP Buffer while executing the supplied LOW sequence.\nObservation time = %.1f hrs.' 
          ' Total execution time = %.1f hrs; Max buffer = %.1f PB' % (wall_clock / 3600, time_vals[-1],                                                                      np.max(buffer_vals)/1e3))
plt.xlabel('time (hours)')
plt.ylabel('buffer usage (PB)')
plt.xlim(0, time_vals[-1])

plt.figure()
plt.plot(np.array(range(len(idle_time_durations))), idle_time_durations / 3600, marker='s', color = 'r', linewidth=0)
plt.title('Idle time that tasks spend in the LOW buffer.\nSummed idle time for all tasks = %.1f hrs.' % 
          (np.sum(idle_time_durations) / 3600))
plt.xlabel('Task''s number in sequence')
plt.ylabel('Time (hours)')


print('Done!')

### Repeat execution for MID's task list

In [None]:
tasks = []
uid =  0
for task in seqM:
    o = Observation()
    o.uid = uid
    uid += 1
    p = ParameterContainer()
    apply_hpso_parameters(p, hpso_lookup[task])
    o.t_obs = p.Tobs
    o.bufsize = p.Tobs * hpso_ingest_rates[task]
    o.flopcount = hpso_flopcounts[task]
    tasks.append(o)

sdp_FLOPS = 22.8  # NB: The processing capacity of the SDP in PetaFLOP/s

# Run through the list of tasks, determining start and end times for their execution and the effect on the buffer

wall_clock = 0       # Simulated wall clock time (seconds)
buffer_deltas = {}   # a dictionary mapping wall clock times to buffer allocation / deallocation (+/-) sizes
t_proc_end_last = 0  # The wall clock time that the last process completed
idle_time_durations = np.zeros(len(tasks))  # in seconds
i = 0

for task in tasks:
    task.t_obs_start = wall_clock
    add_delta(buffer_deltas, task.t_obs_start, task.bufsize)
    t_obs_end = task.t_obs_start + task.t_obs  # Time the observation completes
    task.t_proc_start = max(t_obs_end, t_proc_end_last + sdp_setup_time)
    task.t_proc_end   = task.t_proc_start + task.flopcount * task.t_obs / sdp_FLOPS
    add_delta(buffer_deltas, task.t_proc_end, -task.bufsize)
    t_proc_end_last = task.t_proc_end
    wall_clock = t_obs_end + telecope_setup_time
    idle_time_durations[i] = task.t_proc_start - t_obs_end
    i += 1

buffer_evolution = collections.OrderedDict(sorted(buffer_deltas.items()))
time_vals   = np.zeros(2 * len(buffer_evolution))
buffer_vals = np.zeros(2 * len(buffer_evolution))

i = 0
buffer_val = 0
time_val   = 0
for k, delta in buffer_evolution.items(): 
    #print('(%.1f,\t%.2f)' % (k/3600, delta))
    time_val = k / 3600  # hours
    time_vals[i]     = time_val
    buffer_vals[i]   = buffer_val
    buffer_val += delta  # Adds the buffer delta to the buffer's stored contents
    time_vals[i+1]   = time_val  # we assume no time went by (writing being instantaneous)
    buffer_vals[i+1] = buffer_val  # TeraBytes
    i += 2

plt.plot(time_vals, buffer_vals / 1e3, 'b-')
plt.title('Evolution of the SDP Buffer while executing the supplied MID sequence.\nObservation time = %.1f hrs.' 
          ' Total execution time = %.1f hrs; Max buffer = %.1f PB' % (wall_clock / 3600, time_vals[-1],                                                                      np.max(buffer_vals)/1e3))
plt.xlabel('time (hours)')
plt.ylabel('buffer usage (PB)')
plt.xlim(0, time_vals[-1])

plt.figure()
plt.plot(np.array(range(len(idle_time_durations))), idle_time_durations / 3600, 'r*')
plt.title('Idle time that tasks spend in the MID buffer.\nSummed idle time for all tasks = %.1f hrs.' % 
          (np.sum(idle_time_durations) / 3600))
plt.xlabel('Task''s number in sequence')
plt.ylabel('Time (hours)')


print('Done!')

# Scratchpad
## Example code taken from computing parametric model results by Pipeline

In [None]:
for pipeline in Pipelines.all:
    iapi.stack_bars_pipelines("%s Computational Requirements [PetaFLOP/s]" % pipeline, teles, bands, [pipeline],
                              parallel=parallel)

In [None]:
for band in bands:
    iapi.stack_bars_pipelines("%s Computational Requirements [PetaFLOP/s]" % band, teles, [band], Pipelines.all,
                              parallel = parallel)

In [None]:
iapi.stack_bars_hpsos("HPSOs Computational Requirements [PetaFLOP/s]", HPSOs.hpsos,
                      parallel=16)