# Scheduling Model inputs for SV AA2 Low

Batch-generate scheduling and processing block data for use in scheduling experiments

In [None]:
import json
import sys
import yaml
import math
import tempfile
 
sys.path.insert(0, "..")
from sdp_par_model.config import PipelineConfig
from sdp_par_model.parameters.definitions import Constants

## Specify output filenames

In [2]:
pipeline_config_output_path = "../data/output/sv_aa2_low_pipelines_v1.json"
scheduling_block_types_config_output_path = "../data/output/sv_aa2_low_scheduling_block_types_v1.json"

## Load parameters

Expected to be a list of parametric model configuration dictionaries.

Can be generated from Google Sheets (Sheets to JSON -> Array of rows -> Download/Generate)

In [3]:
with open("../data/configs/stage_1/svaa2_low_v1.json") as stream:
    yamls = yaml.safe_load(stream)
print(f"Loaded {len(yamls)} pipeline configurations: {', '.join(yaml['name'] for yaml in yamls)}")

Loaded 49 pipeline configurations: DSC-015-1 ICAL, DSC-015-1 CIMG, DSC-020 ICAL, DSC-020 CIMG 1, DSC-020 CIMG 2, DSC-020 CIMG 3, DSC-020 SIMG 1, DSC-020 SIMG 2, DSC-020 SIMG 3, DSC-023 ICAL, DSC-023 CIMG, DSC-023 SIMG, SC-025 ICAL, SC-025 CIMG 1, SC-025 CIMG 2, SC-026 ICAL, SC-026 CIMG , SC-027 ICAL, SC-027 CIMG , SC-028 ICAL, SC-028 CIMG , SC-028 SIMG , SC-028-1 ICAL, SC-028-1 CIMG 1, SC-028-1 CIMG 2, SC-028-1 SIMG 1, SC-028-1 SIMG 2, SC-029 ICAL, SC-029 CIMG , SC-029 SIMG , SC-029-1 ICAL, SC-029-1 CIMG  , SC-029-1 SIMG  , SC-030 ICAL, SC-030 CIMG, SC-030 SIMG, SC-031 ICAL, SC-031 CIMG , SC-032 ICAL, SC-032 CIMG, SC-032 SIMG, SC-033 ICAL, SC-033 CIMG , Stretch-003 ICAL, Stretch-003 CIMG , Stretch-003 SIMG , Stretch-004 ICAL, Stretch-004 CIMG , Stretch-004 SIMG 


## Base assumptions

We will likely need to revise this quite a bit to calibrate the appropriate level of "optimism". For the moment, let's keep with conservative estimates.

In [4]:
# Base assumptions
data_retention_hrs = 24.0
node_hours_uncertainty = 0.1
pct_parallelism = {
    "ICAL": 50.0,
    "DPrepA": 50.0,
    "DPrepB": 90.0,
}
pct_parallelism_max = {
    "ICAL": 75.0,
    "DPrepA": 75.0,
    "DPrepB": 95.0,
}
num_nodes = {
    "ICAL": 25,
    "DPrepA": 25,
    "DPrepB": 50,
}
    
# Extrapolation data
benchmark_node_hours = 30+8/60+26/60/60 # Strictly speaking using 3 nodes
Rflop_reference = 4324483174870.51
Tobs_ref = 8 * 3600

## Generate configurations

In [5]:
pipelines = {}
scheduling_block_types = {}

for to_calculate in yamls:

    # Get name. Assume that everything up to first blank is the scheduling block definition name
    pip_name = to_calculate['name']
    sbd_name = pip_name.split(' ')[0]

    # Calculate parameters
    print('\n===',pip_name,'===')
    with tempfile.NamedTemporaryFile('w') as f:
        yaml.dump(to_calculate, f)
        pars = PipelineConfig(use_yaml=True, yaml_path=f.name).calc_tel_params(verbose=True)

    # Print
    node_hours = benchmark_node_hours * float(pars.Rflop * pars.Tobs) / (Rflop_reference * Tobs_ref)
    vis_size_gb = int(math.ceil(pars.Rvis_ingest*pars.Tobs/1e9))
    averaged_vis_size_gb = int(math.ceil(pars.Rvis(pars.Bmax, bcount=pars.Nbl_full)*pars.Tobs/1e9))
    output_size_gb = int(math.ceil(pars.Mout/1e9))
    print(f"Compute time:             {node_hours:.7} Nh")
    print(f"Ingest visibility size:   {vis_size_gb} GB")
    print(f"Averaged visibility size: {averaged_vis_size_gb} GB")
    print(f"Output size:              {output_size_gb} GB")
    print(f"Fiducial frequency:       {pars.c / pars.wl_sb_min / Constants.mega} MHz")
    #print(3 * (pars.c / 135e6) / 2 / 39e3 / 3 / Constants.arcsecond)
    print(f"Beam size:                {pars.Theta_beam / Constants.arcsecond}\"")
    print(f"Pixel size:               {pars.Theta_pix / Constants.arcsecond}\"")

    # Generate data
    if sbd_name not in scheduling_block_types:
        scheduling_block_types[sbd_name] = {
            "description": sbd_name,
            "short_name": sbd_name,
            "sampling_weight": pars.Tpoint / 3600,
            "scheduling_block_instance_time_hrs": pars.Tobs / 3600,
            "integration_time_hrs": max(pars.Tobs, pars.Tpoint) / 3600,
            "pipeline_steps": [],
            "raw_vis_gb": vis_size_gb,
            "processed_vis_gb": averaged_vis_size_gb,
            "data_retention_hrs": data_retention_hrs
        }

    # Add pipeline
    assert pip_name not in pipelines, f"Duplicated pipeline name {pip_name}!"
    scheduling_block_types[sbd_name]['pipeline_steps'].append(pip_name)
    pipelines[pip_name] = {
        "description": pip_name,
        "node_hours": node_hours,
        "node_hours_uncertainty": node_hours_uncertainty,
        "pct_parallelism": pct_parallelism[pars.pipeline],
        "pct_parallelism_max": pct_parallelism_max[pars.pipeline],
        "data_product_storage_gb": output_size_gb,
        "num_nodes": num_nodes[pars.pipeline]
    }



=== DSC-015-1 ICAL ===
Baseline coalescing on
Evaluating Nfacet=1 -> DeltaW_stack=20.8109, Rflop=6.33891e+13
Evaluating Nfacet=2 -> DeltaW_stack=27.0809, Rflop=7.08659e+13
Evaluating Nfacet=3 -> DeltaW_stack=34.1028, Rflop=6.17989e+13
Evaluating Nfacet=4 -> DeltaW_stack=39.5711, Rflop=6.16194e+13
Evaluating Nfacet=5 -> DeltaW_stack=46.6814, Rflop=6.05364e+13
Evaluating Nfacet=6 -> DeltaW_stack=51.2456, Rflop=6.6001e+13
Evaluating Nfacet=7 -> DeltaW_stack=57.0999, Rflop=6.98287e+13
Nfacet=5, DeltaW_stack=46.6814  yielded the lowest value of Rflop=6.05364e+13
Compute time:             210.9616 Nh
Ingest visibility size:   337 GB
Averaged visibility size: 54 GB
Output size:              0 GB
Fiducial frequency:       135.461492394372 MHz
Beam size:                9.25314823204382"
Pixel size:               3.08438274401461"

=== DSC-015-1 CIMG ===
Baseline coalescing on
Evaluating Nfacet=1 -> DeltaW_stack=9.79775, Rflop=3.90199e+13
Evaluating Nfacet=2 -> DeltaW_stack=12.3375, Rflop=5.178

In [None]:
with open(pipeline_config_output_path, "w") as f:
    json.dump(pipelines, f, indent=4)
with open(scheduling_block_types_config_output_path, "w") as f:
    json.dump(scheduling_block_types, f, indent=4)