# SDP HPSO Scheduling

Last run with Jupyter Notebook 5.0.0 running Python 3.5.2

In [None]:
# Imports
import matplotlib.pyplot as plt
import sys
import os
import pickle

sys.path += ['..']
from sdp_par_model import reports as iapi
from sdp_par_model.parameters.definitions import *
from sdp_par_model.parameters.definitions import Constants as c

from sdp_par_model.scheduler import Definitions as sdefs
from sdp_par_model.scheduler import Scheduler

import collections
import warnings
import bisect

%matplotlib inline
plt.rcParams['figure.figsize'] = 16, 8

## Create a Scheduler object
At the moment the Scheduler object contains the functionality of the SDP simulator, the scheduling code itself, as well as the generated schedule objects. In future we may wish to split them into separate classes and objects 

In [None]:
sdp_scheduler = Scheduler()

## Read  performace requirement lookup for all HPSOs. 
### If this lookup table does not exist, we create it, and save it to disk (to save time re-computing)

In [None]:
performance_lookup_filename = "performance_dict.data"
if os.path.isfile(performance_lookup_filename):
    performance_dict = None
    with open(performance_lookup_filename, "rb") as f:
        performance_dict = pickle.load(f)
    sdp_scheduler.set_performance_dictionary(performance_dict)
else:
    # Create a performance dictionary and write it to file
    performance_dict = sdp_scheduler.compute_performance_dictionary()
    with open(performance_lookup_filename, "wb") as f:
        pickle.dump(performance_dict, f, pickle.HIGHEST_PROTOCOL)

## Let's create a sequence of tasks
### This can be modified so that the list is read from e.g. a CSV file

In [None]:
seqL = ('A','A','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','A','A','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','A','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B')
seqM = ('B','G','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','G','C','F','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','B','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','F','G','G','E','E','E','E','D')
test_seq = ('A','B','B','B','A')

squence_to_simulate = seqM

task_list = sdp_scheduler.task_letters_to_sdp_task_list(squence_to_simulate)
'''To show how the tasks are created, can print the sequence of Task objects.'''

# for task in task_list:
#    print(task)

## Schedule the tasks on the SDP using the Scheduler

In [None]:
schedule = sdp_scheduler.schedule(task_list, sdp_flops=22.8, assign_flops_fraction=0.5, assign_bandwidth_fraction=0.5, 
                                  max_nr_iterations=1000)
last_preservation_timestamp = sorted(schedule.preserve_deltas.keys())[-1]
max_t = last_preservation_timestamp
print("SDP task sequence completes at t = %g hrs" % (max_t / 3600))

In [None]:
max_preservation = sorted(schedule.preserve_deltas.values())[-1]
last_preservation_timestamp = sorted(schedule.preserve_deltas.keys())[-1]
xrange = [0, last_preservation_timestamp * 1.05]
preserv_yrange = [0, max(max_preservation * 1.05, 1)]

max_t = last_preservation_timestamp
print("SDP task sequence completes at t = %g hrs" % (max_t / 3600))

iapi.plot_deltas(schedule.flops_deltas, xrange=xrange, max_t=max_t, 
                 title='Evolution of SDP FLOP/s', xlabel='wall clock time (hours)', ylabel='PetaFLOP/s')
iapi.plot_deltas(schedule.memory_deltas, xrange=xrange, max_t=max_t, 
                 title='Evolution of SDP working memory (RAM)', xlabel='wall clock time (hours)', ylabel='TeraByte')
iapi.plot_deltas(schedule.cold_buffer_deltas, xrange=xrange, max_t=max_t, 
                 title='Evolution of SDP Cold buffer', xlabel='wall clock time (hours)', ylabel='TeraByte')
iapi.plot_deltas(schedule.hot_buffer_deltas, xrange=xrange, max_t=max_t, 
                 title='Evolution of SDP Hot buffer', xlabel='wall clock time (hours)', ylabel='TeraByte')
iapi.plot_deltas(schedule.preserve_deltas, xrange=xrange, yrange=preserv_yrange, max_t=max_t, 
                 title='Evolution of SDP Preservation storage', xlabel='wall clock time (hours)', ylabel='TeraByte')

iapi.plot_deltas(schedule.ingest_cold_pipe_deltas, xrange=xrange, max_t=max_t, 
                 title='Bandwidth usage of pipeline flowing into cold buffer', 
                 xlabel='wall clock time (hours)', ylabel='TeraByte/s', colour='c')
iapi.plot_deltas(schedule.cold_hot_pipe_deltas, xrange=xrange, max_t=max_t, 
                 title='Bandwidth usage of pipeline flowing into hot buffer', 
                 xlabel='wall clock time (hours)', ylabel='TeraByte/s', colour='c')
iapi.plot_deltas(schedule.hot_mem_pipe_delta, xrange=xrange, max_t=max_t, 
                 title='Bandwidth usage of pipeline from hot buffer to working memory', 
                 xlabel='wall clock time (hours)', ylabel='TeraByte/s', colour='c')
iapi.plot_deltas(schedule.mem_hot_pipe_delta, xrange=xrange, max_t=max_t, 
                 title='Bandwidth usage of pipeline from working memory to hot buffer', 
                 xlabel='wall clock time (hours)', ylabel='TeraByte/s', colour='c')
iapi.plot_deltas(schedule.hot_preserve_pipe_delta, xrange=xrange, max_t=max_t, 
                 title='Bandwidth usage of pipeline flowing into Preservation', 
                 xlabel='wall clock time (hours)', ylabel='TeraByte/s', colour='c')

## Hard-coded performace costs and requirements from Rosie's Excel sheet
### These were previously used in rev [3372fdd] to approximately replicate Rosie's results. Check (rerun) the notebook at that repository revision to regenerate those results - not repeated here.

In [None]:
# The following sets of values should be computed using the parametric model. Just hard-coded for now (from Excel)
hpso_ingest_rates = {'A':0.459, 'B':3e-3, 'C':0.117, 'D':0.112, 'E':0.0603, 'F':0.244, 'G':0.438}  # in TeraByte/s
# FLOPcounts below are the PetaFLOPs required to process one second of ingested data
hpso_flopcounts = {'A':50.4, 'B':2.0, 'C':7.5, 'D':6.2, 'E':2.9833, 'F':17.689, 'G':27.698}  # in PetaFLOP/s
hpso_durations  = {'A':6, 'B':0.17, 'C':6, 'D':6, 'E':4.4, 'F':0.1233, 'G':6}  # in hours -- TODO check whether correct

sdp_setup_time = 60  # the minimum amount of time between processing tasks on the SDP (seconds)
telecope_setup_time = 0  # TODO is this correct?