# Scheduling (2019 refactoring)

In [None]:
import sys
import math
import random
import time

from matplotlib import pylab
sys.path.insert(0, "..")

from sdp_par_model import reports, config
from sdp_par_model.scheduling import graph, level_trace
from sdp_par_model.parameters import definitions
from sdp_par_model.parameters.definitions import Telescopes, Pipelines, Constants, HPSOs
from sdp_par_model import config

## Set observatory parameters

In [None]:
telescope_flops = {
    Telescopes.SKA1_Low: int(13.8 * Constants.peta),
    Telescopes.SKA1_Mid: int(12.1 * Constants.peta),
}
buffer_size = {
    Telescopes.SKA1_Low: int(46.0 * Constants.peta),
    Telescopes.SKA1_Mid: int(39.0 * Constants.peta),
}
hot_buffer_size = {
    Telescopes.SKA1_Low: int(buffer_size[Telescopes.SKA1_Low] / 2.7),
    Telescopes.SKA1_Mid: int(buffer_size[Telescopes.SKA1_Mid] / 2.7)
}
delivery_buffer_size = {
    Telescopes.SKA1_Low: int(buffer_size[Telescopes.SKA1_Low] / 30),
    Telescopes.SKA1_Mid: int(buffer_size[Telescopes.SKA1_Mid] / 30)
}
input_buffer_size = {
    tel : buffer_size[tel] - hot_buffer_size[tel] - delivery_buffer_size[tel] for tel in buffer_size
}

## Read HPSO performance characteristics

As generated by the export notebook. This always picks up the latest file checked into Git.

In [None]:
csv = reports.read_csv(reports.newest_csv(reports.find_csvs()))

## Determine computational capacity required for realtime processing

As SKA SDP needs to be able to change observation at arbitrary times, we need to always reserve enough computational resources to deal with the most expensive case. Here we figure this out automatically based on the calculated parameters.

In [None]:
hpsos = definitions.HPSOs.all_hpsos
realtime_flops = { tel: 0 for tel in definitions.Telescopes.available_teles }
realtime_flops_hpso = {}
for hpso in definitions.HPSOs.all_hpsos:
    # Sum FLOP rates over involved real-time pipelines
    rt_flops = 0
    for pipeline in definitions.HPSOs.hpso_pipelines[hpso]:
        cfg_name = config.PipelineConfig(hpso=hpso, pipeline=pipeline).describe()
        flops = int(math.ceil(float(reports.lookup_csv(csv, cfg_name, 'Total Compute Requirement')) * definitions.Constants.peta))
        if pipeline in definitions.Pipelines.realtime:
            rt_flops += flops
    # Dominates?
    telescope = definitions.HPSOs.hpso_telescopes[hpso]
    if rt_flops > realtime_flops[telescope]:
        realtime_flops[telescope] = rt_flops
        realtime_flops_hpso[telescope] = hpso
        
# Show
print("Realtime processing requirements:")
batch_flops = { tel : tel_flops - realtime_flops[tel] for tel, tel_flops in telescope_flops.items() }
for tel in definitions.Telescopes.available_teles:
    print(" {}: {:.3f} Pflop/s real-time (from {}), {:.3f} Pflop/s left for batch".format(
        tel, realtime_flops[tel] / definitions.Constants.peta,
        realtime_flops_hpso[tel], batch_flops[tel] / definitions.Constants.peta))

## Generate graph

Generate a sequence with all HPSOs appearing roughly as often as we expect them in a real-life schedule. We then shuffle this list and generate a (multi-)graph of tasks from it.

Note that in contrast to Francois' scheduler, the resource usage of every task is fixed up-front, therefore we need to declare certain key sizes here. Adjust as necessary in relation to the capacities (see below) to get the desired amount of parallelism between tasks.

In [None]:
Tsequence = 10 * 24 * 3600 # 10 days
telescope = Telescopes.SKA1_Low
Tobs_min = 0 # 10 min

cold_transfer_rate = 0.5 * definitions.Constants.tera # Bytes/s
offline_flop_rate = batch_flops[telescope]
hot_buffer_rate = 9.5 * definitions.Constants.tera # Byte/s
delivery_rate = 100 / 8 * definitions.Constants.giga # Bytes/s
if telescope == Telescopes.SKA1_Mid:
    delivery_rate *= 6

In [None]:
Texp = {}; Tobs = {}; Rflop = {}
for hpso in HPSOs.all_hpsos:
    if HPSOs.hpso_telescopes[hpso] == telescope:
        tp = config.PipelineConfig(hpso=hpso, pipeline=Pipelines.Ingest).calc_tel_params()
        Texp[hpso] = tp.Texp; Tobs[hpso] = max(tp.Tobs, Tobs_min)

Texp_total = sum(Texp.values())
hpso_sequence = []
Rflop_sum = 0; Tobs_sum = 0
for hpso in HPSOs.all_hpsos:
    if HPSOs.hpso_telescopes[hpso] == telescope:
        count = int(math.ceil(Tsequence * Texp[hpso] / Tobs[hpso] / Texp_total))
        print("{} x {} (Tobs={:.1f}h, Texp={:.1f}h)".format(count, hpso, Tobs[hpso]/3600, Texp[hpso]/3600))
        hpso_sequence.extend(count * [hpso])
        Tobs_sum += count * Tobs[hpso]
print("{:.3f} d total".format(Tobs_sum / 3600 / 24))
random.shuffle(hpso_sequence)

In [None]:
nodes = []
t = time.time()
for hpso in hpso_sequence:
    hnodes = graph.make_graph(csv, {'hpso': hpso},
                              cold_transfer_rate, offline_flop_rate, hot_buffer_rate, delivery_rate)
    nodes.extend(hnodes)
print("Multi-graph has {} nodes (generation took {:.3f}s)".format(len(nodes), time.time()-t))

## Set Capacities

These are the limits scheduling is going to assume for every cost

In [None]:
capacities = {
    graph.Resources.Observatory: 1,
    graph.Resources.BatchCompute: batch_flops[telescope], # Flops/s
    graph.Resources.RealtimeCompute: realtime_flops[telescope], # Flops/s
    graph.Resources.InputBuffer: input_buffer_size[telescope], # Byte
    graph.Resources.HotBuffer: hot_buffer_size[telescope], # Byte
    graph.Resources.OutputBuffer: delivery_buffer_size[telescope], # Byte
    graph.Resources.IngestRate: int(0.7 * definitions.Constants.tera), # Byte/s
    graph.Resources.ColdBufferRate: int(2 * 0.7 * definitions.Constants.tera), # Byte/s
    graph.Resources.HotBufferRate: int(5.0 * definitions.Constants.tera), # Byte/s
    graph.Resources.DeliveryRate: int(100/8 * definitions.Constants.giga),  # Byte/s
    graph.Resources.LTSRate: int(100/8 * definitions.Constants.giga), # Byte/s
}
# HACK: Adjustments to make things work
if telescope == Telescopes.SKA1_Mid:
    capacities[graph.Resources.OutputBuffer] *= 3
    capacities[graph.Resources.DeliveryRate] *= 6
capacities[graph.Resources.HotBufferRate] *= 2
units = {
    graph.Resources.BatchCompute: ("PFLOP/s", Constants.peta),
    graph.Resources.RealtimeCompute: ("PFLOP/s", Constants.peta),
    graph.Resources.InputBuffer: ("PB", Constants.peta),
    graph.Resources.HotBuffer: ("PB", Constants.peta),
    graph.Resources.OutputBuffer: ("PB", Constants.peta),
    graph.Resources.IngestRate: ("TB/s", Constants.tera),
    graph.Resources.ColdBufferRate: ("TB/s", Constants.tera),
    graph.Resources.HotBufferRate: ("TB/s", Constants.tera),
    graph.Resources.DeliveryRate: ("TB/s", Constants.tera),
    graph.Resources.LTSRate: ("TB/s", Constants.tera),
}

## Sanity-check Capacities

We can do a number of consistency checks at this point: Clearly we should have enough capacity to run every task in isolation.

Furthermore, in order to keep up with observations we need to make sure that we are not over-using any resource on average. This is a pretty rough estimate of safety that especially under-estimates the cost of edges in high-pressure scenarios. For example, if somethings needs to be kept in the buffer for longer, it has a higher footprint than estimated here. Therefore especially the size of `input-buffer` and `output-buffer` should be quite generous here.

In [None]:
import warnings
cost_sum = { cost : 0 for cost in capacities.keys() }
for task in nodes:
    for cost, amount in task.all_cost().items():
        assert cost in capacities, "No {} capacity defined, required by {}!".format(cost, task.name)
        assert amount <= capacities[cost], "Not enough {} capacity to run {} ({:g}<{:g}!)".format(
            cost, task.name, capacities[cost], amount)
        # Try to compute an average. Edges are the main wild-card here: We only know that they stay
        # around at least for the lifetime of the dependency *and* the longest dependent task.
        ttime = task.time
        if cost in task.edge_cost and len(task.rev_deps) > 0:
            ttime += max([d.time for d in task.rev_deps])
        cost_sum[cost] += ttime * amount
print("Best-case average loads:")
for cost in graph.Resources.All:
    unit, mult = units[cost]
    avg = cost_sum[cost] / Tobs_sum
    cap = capacities[cost]
    print(" {}:\t{:.3f} {} ({:.1f}% of {:.3f} {})".format(cost, avg/mult, unit, avg/cap*100, cap/mult, unit))
    # Warn past 75%
    if avg > cap:
        print('Likely insufficient {} capacity!'.format(cost), file=sys.stderr,)

## Schedule tasks

Assign a task time to every node, and figure out resource usages and edge lengths along the way.

In [None]:
from sdp_par_model.scheduling import scheduler

t = time.time()
usage, task_time, task_edge_end_time = scheduler.schedule(nodes, capacities, verbose=False)
print("Scheduling took {:.3f}s".format(time.time() - t))
print("Observing efficiency: {:.1f}%".format(Tobs_sum / usage[graph.Resources.Observatory].end() * 100))

In [None]:
trace_end = max(*task_edge_end_time.values())
pylab.figure(figsize=(16,40))
for n, cost in enumerate(graph.Resources.All):
    levels = usage[cost]
    avg = levels.average(0,trace_end)    
    unit, mult = units[cost]
    pylab.subplot(len(usage), 1, n+1)
    pylab.step([0] + [ t/24/3600 for t in levels._trace.keys() ] + [trace_end],
               [0] + [ v/mult for v in  levels._trace.values() ] + [0],
               where='post')
    pylab.title("{}: {:.3f} {} average ({:.2f}%)".format(
        cost, avg/mult, unit, avg / capacities[cost] * 100))
    pylab.xlim((0, trace_end/24/3600)); pylab.xticks(range(int(trace_end)//24//3600+1))
    pylab.ylim((0, capacities[cost] / mult * 1.01))
    pylab.ylabel(unit)
pylab.xlabel("Days")
pylab.show()