# Scheduling Model inputs for SV AA2 Mid

Batch-generate scheduling and processing block data for use in scheduling experiments

In [None]:
import json
import sys
import yaml
import math
import tempfile
sys.path.insert(0, "..")
from sdp_par_model.config import PipelineConfig
from sdp_par_model.parameters.definitions import Constants

## Specify output filenames

In [2]:
pipeline_config_output_path = "../data/output/sv_aas_mid_pipelines_v1.json"
scheduling_block_types_config_output_path = "../data/output/sv_aas_mid_scheduling_block_types_v1.json"

## Load parameters

Expected to be a list of parametric model configuration dictionaries.

Can be generated from Google Sheets (Sheets to JSON -> Array of rows -> Download/Generate)

In [3]:
with open("../data/configs/stage_2/svaas_mid_v1.json") as stream:
    yamls = yaml.safe_load(stream)
print(f"Loaded {len(yamls)} pipeline configurations: {', '.join(yaml['name'] for yaml in yamls)}")

Loaded 73 pipeline configurations: SC-001 ICAL, SC-001 CIMG, SC-001 CIMG 2, SC-001 SIMG, SC-001 SIMG 2, SC-002 ICAL, SC-002 CIMG, SC-003 ICAL, SC-003 CIMG, SC-004 ICAL, SC-004 CIMG 1, SC-004 CIMG 2, SC-005 ICAL, SC-005 CIMG 1, SC-005 CIMG 2, SC-006 ICAL, SC-006 CIMG 1, SC-006 CIMG 2, SC-007 ICAL, SC-007 CIMG, SC-008 ICAL, SC-008 CIMG, SC-009 ICAL, SC-009 CIMG 1, SC-009 CIMG 2, SC-010 ICAL, SC-010 CIMG, SC-011 ICAL, SC-011 CIMG, SC-012 ICAL, SC-012 CIMG 1, SC-012 CIMG 2, SC-012 SIMG 1, SC-012 SIMG 2, SC-012 SIMG 3, SC-012 SIMG 4, SC-012 SIMG 5, SC-012 SIMG 6, SC-012 SIMG 7, SC-012 SIMG 8, SC-013 ICAL, SC-013 CIMG 1, SC-013 CIMG 2, SC-013 SIMG 1, SC-013 SIMG 2, SC-014 ICAL, SC-014 CIMG 1, SC-014 CIMG 2, SC-014 SIMG 1, SC-014 SIMG 2, SC-015 ICAL, SC-015 CIMG, SC-015 SIMG, SC-016 ICAL, SC-016 CIMG, SC-017 ICAL, SC-017 CIMG 1, SC-017 CIMG 2, SC-018 ICAL, SC-018 CIMG 1, SC-018 CIMG 2, SC-018 CIMG 3, SC-018 SIMG 1, SC-018 SIMG 2, SC-018 SIMG 3, SC-019 ICAL, SC-019 CIMG 1, SC-019 CIMG 2, SC-01

## Base assumptions

We will likely need to revise this quite a bit to calibrate the appropriate level of "optimism". For the moment, let's keep with conservative estimates.

In [4]:
# Base assumptions
data_retention_hrs = 24.0
node_hours_uncertainty = 0.1
pct_parallelism = {
    "ICAL": 50.0,
    "DPrepA": 50.0,
    "DPrepB": 90.0,
}
pct_parallelism_max = {
    "ICAL": 75.0,
    "DPrepA": 75.0,
    "DPrepB": 95.0,
}
num_nodes = {
    "ICAL": 25,
    "DPrepA": 25,
    "DPrepB": 50,
}
    
# Extrapolation data
benchmark_node_hours = 30+8/60+26/60/60 # Strictly speaking using 3 nodes
Rflop_reference = 4324483174870.51
Tobs_ref = 8 * 3600

## Generate configurations

In [5]:
pipelines = {}
scheduling_block_types = {}

for to_calculate in yamls:

    # Get name. Assume that everything up to first blank is the scheduling block definition name
    pip_name = to_calculate['name']
    sbd_name = pip_name.split(' ')[0]

    # Calculate parameters
    print('\n===',pip_name,'===')
    with tempfile.NamedTemporaryFile('w') as f:
        yaml.dump(to_calculate, f)
        pars = PipelineConfig(use_yaml=True, yaml_path=f.name).calc_tel_params(verbose=True)

    # Print
    node_hours = benchmark_node_hours * float(pars.Rflop * pars.Tobs) / (Rflop_reference * Tobs_ref)
    vis_size_gb = int(math.ceil(pars.Rvis_ingest*pars.Tobs/1e9))
    averaged_vis_size_gb = int(math.ceil(pars.Rvis(pars.Bmax, bcount=pars.Nbl_full)*pars.Tobs/1e9))
    output_size_gb = int(math.ceil(pars.Mout/1e9))
    print(f"Compute time:             {node_hours:.7} Nh")
    print(f"Ingest visibility size:   {vis_size_gb} GB")
    print(f"Averaged visibility size: {averaged_vis_size_gb} GB")
    print(f"Output size:              {output_size_gb} GB")
    print(f"Fiducial frequency:       {pars.c / pars.wl_sb_min / Constants.mega} MHz")
    #print(3 * (pars.c / 135e6) / 2 / 39e3 / 3 / Constants.arcsecond)
    print(f"Beam size:                {pars.Theta_beam / Constants.arcsecond}\"")
    print(f"Pixel size:               {pars.Theta_pix / Constants.arcsecond}\"")

    # Generate data
    if sbd_name not in scheduling_block_types:
        scheduling_block_types[sbd_name] = {
            "description": sbd_name,
            "short_name": sbd_name,
            "sampling_weight": pars.Tpoint / 3600,
            "scheduling_block_instance_time_hrs": pars.Tobs / 3600,
            "integration_time_hrs": max(pars.Tobs, pars.Tpoint) / 3600,
            "pipeline_steps": [],
            "raw_vis_gb": vis_size_gb,
            "processed_vis_gb": averaged_vis_size_gb,
            "data_retention_hrs": data_retention_hrs
        }

    # Add pipeline
    assert pip_name not in pipelines, f"Duplicated pipeline name {pip_name}!"
    scheduling_block_types[sbd_name]['pipeline_steps'].append(pip_name)
    pipelines[pip_name] = {
        "description": pip_name,
        "node_hours": node_hours,
        "node_hours_uncertainty": node_hours_uncertainty,
        "pct_parallelism": pct_parallelism[pars.pipeline],
        "pct_parallelism_max": pct_parallelism_max[pars.pipeline],
        "data_product_storage_gb": output_size_gb,
        "num_nodes": num_nodes[pars.pipeline]
    }



=== SC-001 ICAL ===
Baseline coalescing on
Evaluating Nfacet=1 -> DeltaW_stack=2433.53, Rflop=4.57172e+13
Evaluating Nfacet=2 -> DeltaW_stack=3272.86, Rflop=5.52245e+13
Evaluating Nfacet=3 -> DeltaW_stack=3934.87, Rflop=6.26995e+13
Nfacet=1, DeltaW_stack=2433.53  yielded the lowest value of Rflop=4.57172e+13
Compute time:             318.6377 Nh
Ingest visibility size:   8858 GB
Averaged visibility size: 771 GB
Output size:              0 GB
Fiducial frequency:       6572.19847297220 MHz
Beam size:                0.392034171911236"
Pixel size:               0.130678057303745"

=== SC-001 CIMG ===
Baseline coalescing on
Evaluating Nfacet=1 -> DeltaW_stack=933.05, Rflop=4.56337e+13
Evaluating Nfacet=2 -> DeltaW_stack=1215.74, Rflop=7.273e+13
Evaluating Nfacet=3 -> DeltaW_stack=1472.18, Rflop=1.07843e+14
Nfacet=1, DeltaW_stack=933.05  yielded the lowest value of Rflop=4.56337e+13
Compute time:             318.0552 Nh
Ingest visibility size:   8858 GB
Averaged visibility size: 771 GB
Outp

In [None]:
with open(pipeline_config_output_path, "w") as f:
    json.dump(pipelines, f, indent=4)
with open(scheduling_block_types_config_output_path, "w") as f:
    json.dump(scheduling_block_types, f, indent=4)