# SDP HPSO Scheduling

Last run with Jupyter Notebook 5.7.0 running Python 3.7.0

In [None]:
# Imports
import sys
import os
import pickle
import random
import csv

import numpy as np
import matplotlib.pyplot as plt

sys.path += ['..']
from sdp_par_model import reports as iapi
from sdp_par_model.parameters.definitions import *
from sdp_par_model.parameters.definitions import Constants as c
from sdp_par_model.scheduler import Scheduler

%matplotlib inline
plt.rcParams['figure.figsize'] = 16, 8

## Load precomputed performance dictionary (or compute it anew and save to file)

In [None]:
performance_lookup_filename = os.path.join("..", "performance_dict.data")
if os.path.isfile(performance_lookup_filename):
    performance_dict = None
    with open(performance_lookup_filename, "rb") as f:
        performance_dict = pickle.load(f)
else:
    # Create a performance dictionary and write it to file
    performance_dict = Scheduler.compute_performance_dictionary()
    with open(performance_lookup_filename, "wb") as f:
        pickle.dump(performance_dict, f, pickle.HIGHEST_PROTOCOL)

## Let's create a sequence of HPSOs 

In [None]:
flops_capacity_low = 13.8  # PetaFlops
flops_capacity_mid = 12.1  # PetaFlops

cold_buffer_size_low = 60 * (c.peta / c.tera) # TeraBytes
hot_buffer_size_low  = 60 * (c.peta / c.tera) # TeraBytes

cold_buffer_size_mid = 60 * (c.peta / c.tera) # TeraBytes
hot_buffer_size_mid  = 60 * (c.peta / c.tera) # TeraBytes

flops_cap   = flops_capacity_low
coldbuf_cap = cold_buffer_size_low
hotbuf_cap  = hot_buffer_size_low
tel_str = 'SKA1-Low'
keep_data_in_cold_buffer = False

### Option 1: using letters A, B, ..., G to define scheduling  blocks - see Google Drive or Python code  for Definitions

In [None]:
seqL = ['B','A','A',] + ['B',]*32 + ['A',]*2 + ['B',]*73 + ['A',] + ['B',]*43
seqM = ['B','G',] + ['B',]*34 + ['G','C','F',] + ['B',]*110 +['F',]*91 + ['G',]*2 + ['E',]*4 + ['D',]

sequence_to_simulate = seqL.copy()
random.shuffle(sequence_to_simulate)  # Randomly shuffles the sequence of processing blocks (letters)
keep_data_in_cold_buffer = False
hpso_list = Scheduler.hpso_letters_to_hpsos(sequence_to_simulate)
t_obs_list = None

### or Option 2: using dynamic generation, loosely based on Mark Ashdown's scheduling code

In [None]:
# First, create sets of hpsos from which to build the sequences
hpsos_low = {k for k, v in HPSOs.hpso_telescopes.items() if v == Telescopes.SKA1_Low and k[0:4] == 'hpso'}
hpsos_mid = {k for k, v in HPSOs.hpso_telescopes.items() if v == Telescopes.SKA1_Mid and k[0:4] == 'hpso'}

hpso_set = hpsos_mid
dt_block = 8.0 * 3600.0        # duration of each scheduling block, in seconds
dt_seq = 10.0 * 24.0 * 3600.0  # duration floor of the entire sequence, in seconds
allow_short_tobs = False

print('HPSOs are:', sorted(hpso_set))

(hpso_list, t_obs_list) = Scheduler.generate_sequence(hpso_set, performance_dict, dt_block, dt_seq, allow_short_tobs)

ttotal = np.sum(t_obs_list)
print('Generated a list containing %d HPSOs, representing a cumulative observation time of %g hours.' 
      % (len(hpso_list), ttotal / 3600))
print('Cum. observation time is %g %% of the desired value' % (100 * ttotal / dt_seq))

## In either case, use the HPSOs list to generate a schedule and run simulation

In [None]:
task_list = Scheduler.hpsos_to_sdp_task_list(hpso_list, performance_dict, t_obs_list, keep_data_in_cold_buffer)
'''To show how the tasks are created, can print the sequence of Task objects.'''
#for task in task_list:
#    print(task)

schedule = Scheduler.schedule(task_list, flops_cap, hotbuf_cap, coldbuf_cap,  
                              assign_flops_fraction=0.5, assign_bw_fraction=0.5, max_nr_iterations=1000)

max_t = sorted(schedule.preserve_deltas.keys())[-1]
print("SDP task sequence completes at t = %g hrs" % (max_t / 3600))

# Now we plot the results

xrange = [0.0, 1.05 * max_t]
xlabel = 'Time (hours)'

iapi.plot_deltas(schedule.flops_deltas, xrange=xrange, max_t=max_t, 
                 title='%s SDP compute load (capped at %.3g petaFLOPS)' % (tel_str, flops_cap), 
                 xlabel=xlabel, ylabel='petaFLOPS')

iapi.plot_deltas(schedule.memory_deltas, xrange=xrange, max_t=max_t, 
                 title='%s SDP working memory usage' % tel_str,
                 xlabel=xlabel, ylabel='terabyte')
                 
iapi.plot_deltas(schedule.cold_buffer_deltas, xrange=xrange, max_t=max_t, 
                 title='%s SDP cold buffer usage (capped at %.3g PetaByte)' % (tel_str, coldbuf_cap), 
                 xlabel=xlabel, ylabel='terabyte')

iapi.plot_deltas(schedule.hot_buffer_deltas, xrange=xrange, max_t=max_t, 
                 title='%s SDP hot Buffer usage (capped at %.0f PetaByte)' % (tel_str, hotbuf_cap), 
                 xlabel=xlabel, ylabel='terabyte')

iapi.plot_deltas(schedule.preserve_deltas, xrange=xrange, max_t=max_t, 
                 title='%s SDP preservation usage (uncapped)' % tel_str,
                 xlabel=xlabel, ylabel='terabyte')

iapi.plot_deltas(schedule.ingest_pipe_deltas, xrange=xrange, max_t=max_t, 
                 title='%s bandwidth (Ingest pipeline -> Cold Buffer)' % tel_str, 
                 xlabel=xlabel, ylabel='terabyte/s', colour='c')                 

'''
# Ingest -> Working memory pipeline is identical to Working Memory -> Cold Buffer (streaming)
iapi.plot_deltas(schedule.mem_cold_pipe_deltas, xrange=xrange, max_t=max_t, 
                 title='%s bandwidth (Ingest working memory -> Cold Buffer)' % tel_str, 
                 xlabel=xlabel, ylabel='terabyte/s', colour='c')
'''

iapi.plot_deltas(schedule.cold_hot_pipe_deltas, xrange=xrange, max_t=max_t, 
                 title='%s bandwidth (Cold Buffer -> Hot Buffer)' % tel_str, 
                 xlabel=xlabel, ylabel='terabyte/s', colour='c')

'''
iapi.plot_deltas(schedule.hot_mem_pipe_delta, xrange=xrange, max_t=max_t, 
                 title='%s bandwidth (hot buffer -> working memory)' % tel_str, 
                 xlabel=xlabel, ylabel='terabyte/s', colour='c')

iapi.plot_deltas(schedule.mem_hot_pipe_delta, xrange=xrange, max_t=max_t, 
                 title='%s bandwidth (working memory -> hot buffer)' % tel_str, 
                 xlabel=xlabel, ylabel='terabyte/s', colour='c')
'''                 

iapi.plot_deltas(schedule.hot_preserve_pipe_delta, xrange=xrange, max_t=max_t, 
                 title='%s bandwidth (Hot Buffer -> Preservation)' % tel_str, 
                 xlabel=xlabel, ylabel='terabyte/s', colour='c')

## Run a number of randomized sequences, looking at spread of results

In [None]:
# First, create sets of hpsos from which to build the sequences
hpsos_low = {k for k, v in HPSOs.hpso_telescopes.items() if v == Telescopes.SKA1_Low and k[0:4] == 'hpso'}
hpsos_mid = {k for k, v in HPSOs.hpso_telescopes.items() if v == Telescopes.SKA1_Mid and k[0:4] == 'hpso'}

nr_of_runs = 100
hpso_set = hpsos_low
dt_block = 6.0 * 3600.0        # duration of each scheduling block, in seconds
dt_seq = 10.0 * 24.0 * 3600.0  # duration floor of the entire sequence, in seconds
allow_short_tobs = False
keep_data_in_cold_buffer = False

# Caps are all in "Peta" units (FLOPS, or Bytes)
flops_cap   = flops_capacity_low
coldbuf_cap = cold_buffer_size_low
hotbuf_cap  = hot_buffer_size_low

runtimes = np.zeros(nr_of_runs)
for i in range(nr_of_runs):
    # Generate random sequence
    (hpso_list, t_obs_list) = Scheduler.generate_sequence(hpso_set, performance_dict, dt_block, dt_seq, allow_short_tobs)

    task_list = Scheduler.hpsos_to_sdp_task_list(hpso_list, performance_dict, t_obs_list, keep_data_in_cold_buffer)
    '''To show how the tasks are created, can print the sequence of Task objects.'''
    #for task in task_list:
    #    print(task)

    schedule = Scheduler.schedule(task_list, flops_cap, hotbuf_cap, coldbuf_cap,  
                                  assign_flops_fraction=0.5, assign_bw_fraction=0.5, max_nr_iterations=1000)

    max_t = sorted(schedule.preserve_deltas.keys())[-1]
    runtimes[i] = max_t
    print("Run %d of %d : SDP task seq completed at t = %g hrs" % (i+1, nr_of_runs, (max_t / 3600)))

print("Done!")

plt.hist(runtimes/3600)
plt.title('Distribution of execution times (median = %.1f hours)' % np.median(runtimes/3600), Fontsize=20)
plt.xlabel('Hours', Fontsize=16)
plt.ylabel('Nr of occurrences', Fontsize=16)
plt.show()

## Hard-coded performace costs and requirements from Rosie's Excel sheet
### These were previously used in rev [3372fdd] to approximately replicate Rosie's results. Check (rerun) the notebook at that repository revision to regenerate those results - not repeated here.

In [None]:
# The following sets of values should be computed using the parametric model. Just hard-coded for now (from Excel)
hpso_ingest_rates = {'A':0.459, 'B':3e-3, 'C':0.117, 'D':0.112, 'E':0.0603, 'F':0.244, 'G':0.438}  # in TeraByte/s
# FLOPcounts below are the PetaFLOPs required to process one second of ingested data
hpso_flopcounts = {'A':50.4, 'B':2.0, 'C':7.5, 'D':6.2, 'E':2.9833, 'F':17.689, 'G':27.698}  # in PetaFLOP/s
hpso_durations  = {'A':6, 'B':0.17, 'C':6, 'D':6, 'E':4.4, 'F':0.1233, 'G':6}  # in hours

sdp_setup_time = 60  # the minimum amount of time between processing tasks on the SDP (seconds)
telecope_setup_time = 0  # TODO is this correct?