In [1]:
%load_ext lab_black
%load_ext pycodestyle_magic
%flake8_on --ignore E501,W505

Import the required python modules:

In [2]:
import rubin_sim
from rubin_sim import maf

Note the environment of this execution of this notebook:

In [3]:
from astropy.time import Time
import sys

print(f"Notebook last executed: {Time.now().iso}")
print(f"Python version: {sys.version}")
print(f"rubin_sim version: {rubin_sim.__version__}")

Notebook last executed: 2021-11-05 17:52:53.326
Python version: 3.9.5 | packaged by conda-forge | (default, Jun 19 2021, 00:32:32) 
[GCC 9.3.0]
rubin_sim version: 0.5.2.dev2+ge3367ea


This notebook will produce output files, which need to go somewhere.
These files will later be read and examined using the `Comparing Runs` notebook.

The following cell will create a directory to work in as a subdirectory of the current directory. Rename it to put the data samewhere else.

In [4]:
from os import path
from os import getcwd

data_dir = path.join(getcwd(), "bulk_metric_data")

If the directory doesn't already exist, create it:

In [5]:
from os import makedirs

if not path.exists(data_dir):
    makedirs(data_dir)

In [6]:
opsim_runs = (
    "baseline_nexp2_v1.7_10yrs",
    "baseline_2snaps_v1.5_10yrs",
    "baseline_v2.0_10yrs",
)

If you already have a couple of opsim database to work with, uncomment these lines and set the file names to use:

In [7]:
# my_opsim_run_path = '/my/path'
# opsim_run_fnames = {run: os.path.join(my_opsim_run_path, run + '.db') for run in opsim_runs}

Otherwise, the databases will be downloaded from the URLs specified here:

In [8]:
opsim_run_urls = {
    "baseline_nexp2_v1.7_10yrs": "https://lsst.ncsa.illinois.edu/sim-data/sims_featureScheduler_runs1.7/baseline/baseline_nexp2_v1.7_10yrs.db",
    "baseline_2snaps_v1.5_10yrs": "https://lsst.ncsa.illinois.edu/sim-data/sims_featureScheduler_runs1.5/baseline/baseline_2snaps_v1.5_10yrs.db",
    "baseline_v2.0_10yrs": "https://lsst.ncsa.illinois.edu/sim-data/sims_featureScheduler_runs2.0/baseline/baseline_v2.0_10yrs.db",
}

Now, actually download the files if (and only if) necessary:

In [9]:
from os import path
import urllib

for opsim_run in opsim_runs:
    if "opsim_run_fnames" not in locals():
        opsim_run_fnames = {}

    opsim_run_fname = path.join(data_dir, opsim_run + ".db")
    if opsim_run not in opsim_run_fnames:
        opsim_run_fnames[opsim_run] = opsim_run_fname

    if not path.isfile(opsim_run_fname):
        opsim_run_url = opsim_run_urls[opsim_run]
        print(f"Downloading {opsim_run_url} to {opsim_run_fname}")
        opsim_db_file = urllib.request.urlretrieve(opsim_run_url, opsim_run_fname)
        print("Download complete")

Now we have a dictionary with the database file names for the runs we want:

In [10]:
opsim_run_fnames

{'baseline_nexp2_v1.7_10yrs': '/data/des91.b/data/neilsen/LSST/devel/rubin_sim_notebooks/maf/tutorial/bulk_metric_data/baseline_nexp2_v1.7_10yrs.db',
 'baseline_2snaps_v1.5_10yrs': '/data/des91.b/data/neilsen/LSST/devel/rubin_sim_notebooks/maf/tutorial/bulk_metric_data/baseline_2snaps_v1.5_10yrs.db',
 'baseline_v2.0_10yrs': '/data/des91.b/data/neilsen/LSST/devel/rubin_sim_notebooks/maf/tutorial/bulk_metric_data/baseline_v2.0_10yrs.db'}

In [11]:
def make_open_shutter_batch(run_name):
    constraint = ""
    plotDict = {}
    slicer = maf.UniSlicer()
    metric = maf.OpenShutterFractionMetric(
        slewTimeCol="slewTime", expTimeCol="visitExposureTime", visitTimeCol="visitTime"
    )
    summary_metrics = [maf.IdentityMetric()]
    bundle = maf.MetricBundle(
        metric,
        slicer,
        constraint,
        summaryMetrics=summary_metrics,
        runName=run_name,
        plotDict=plotDict,
    )

    # If we do not care about the keys, just build a list
    # and let MAF build unique keys and convert it to a dict
    # for us.
    metric_bundles = [bundle]
    bundle_dict = maf.metricBundles.makeBundlesDictFromList(metric_bundles)

    return bundle_dict

In [12]:
def make_airmass_batch(run_name):
    constraint = ""
    plotDict = {}
    slicer = maf.OneDSlicer(
        sliceColName="airmass", binMin=1.0, binMax=2.5, binsize=0.05
    )
    metric = maf.CountMetric(col="airmass")

    # produces list of metrics with mean, median, RMS, etc.
    summary_metrics = maf.extendedSummary()

    bundle = maf.MetricBundle(
        metric,
        slicer,
        constraint,
        summaryMetrics=summary_metrics,
        runName=run_name,
        plotDict=plotDict,
    )

    metric_bundles = [bundle]
    bundle_dict = maf.metricBundles.makeBundlesDictFromList(metric_bundles)

    return bundle_dict

In [13]:
def multi_batch(run_name="opsim", bands=("g", "i"), footprint_area=18000):
    bundle_dict = {}

    # Open shutter fraction on each night
    bundle_dict.update(make_open_shutter_batch(run_name))

    # Hour Angle distribution
    bundle_dict.update(make_airmass_batch(run_name))

    return bundle_dict

The batch created by the above function is a dictionary with keys built from the run, metric, metric metadata, and slicer name; and whose values are the instances of `MetricBundle` themselves:

In [14]:
multi_batch()

{'opsim_OpenShutterFraction_UNIS': <rubin_sim.maf.metricBundles.metricBundle.MetricBundle at 0x7f385da218e0>,
 'opsim_Count_airmass_ONED': <rubin_sim.maf.metricBundles.metricBundle.MetricBundle at 0x7f385d5030d0>}

An assortment of pre-defined "batches" can be found in the `rubin_sim/maf/batches` subdirectory of the `sims_maf` product.

In [15]:
print(list(b for b in dir(maf) if b.lower().endswith("batch")))

['agnBatch', 'altazBatch', 'astrometryBatch', 'characterizationInnerBatch', 'characterizationOuterBatch', 'ddfBatch', 'descWFDBatch', 'discoveryBatch', 'fOBatch', 'filterchangeBatch', 'glanceBatch', 'hourglassBatch', 'metadataBatch', 'movingObjectsBatch', 'openshutterBatch', 'quickDiscoveryBatch', 'rapidRevisitBatch', 'scienceRadarBatch', 'slewBatch', 'srdBatch', 'tdcBatch', 'timeBatch', 'visitdepthBatch']


Help on these can be obtained in the usual way: `help(maf.glanceBatch)`

They can be called like other functions in `MAF`:

```
this_batch = maf.glanceBatch(run_name=run_name)
```

For this notebook, however, we will just use the sample created above.

### Creating and running batches

Now we can create a high-level driver that creates batches, builds a metric bundle group from them, and computes the metrics and makes plots:

In [16]:
from os import path


def compute_metrics(opsim_runs, opsim_run_fnames, data_dir, batch_name=""):
    batches = {}

    for run_name in opsim_runs:
        opsim_db = maf.OpsimDatabase(opsim_run_fnames[run_name])

        # Follow the opsim team practice and make separate
        # out_dir for each run, and put a results database
        # there.
        out_dir = path.join(data_dir, run_name, batch_name)
        results_db = maf.ResultsDb(outDir=out_dir)

        this_batch = multi_batch(run_name)
        bundle_group = maf.MetricBundleGroup(
            this_batch, dbObj=opsim_db, outDir=out_dir, resultsDb=results_db
        )
        bundle_group.runAll()
        bundle_group.plotAll()
        batches[run_name] = this_batch

    return batches

Finally, we call our driver to compute the metrics and make the plots (this example takes about 5 minutes):

In [17]:
isinstance("foo", str)

True

In [18]:
from inspect import getmembers


def _normalize_batches(in_batches):
    if isinstance(in_batches, str) or callable(in_batches):
        in_batches = [in_batches]

    maf_members = dict(getmembers(maf))

    batches = []
    for batch in in_batches:
        if isinstance(batch, str):
            batch = batch if batch.endswith("Batch") else batch + "Batch"
            batch = maf_members[batch]
        batches.append(batch)
    return batches

In [19]:
_normalize_batches("glance")[0].__name__

'glanceBatch'

In [20]:
from collections.abc import Mapping
from collections import OrderedDict
import os.path


def _normalize_run_fnames(in_run_fnames):
    if isinstance(in_run_fnames, Mapping):
        return in_run_fnames

    if isinstance(in_run_fnames, str):
        in_run_fnames = [in_run_fnames]

    run_fnames = OrderedDict()
    for maybe_run_fname in in_run_fnames:
        if maybe_run_fname.endswith(".db"):
            run_fname = maybe_run_fname
            run_name = os.path.splitext(os.path.split(run_fname)[-1])[0]
        else:
            # Assume we were given a run name
            # rather than the actual file name
            run_name = maybe_run_fname
            run_fname = maybe_run_fname + ".db"
        run_fnames[run_name] = run_fname

    return run_fnames

In [21]:
def compute_batches(
    opsim_run_fnames,
    batches,
    out_dir_base,
    batch_name=None,
    results_db=None,
    run=True,
    plot=True,
    save=True,
):
    batches = _normalize_batches(batches)
    opsim_run_fnames = _normalize_run_fnames(opsim_run_fnames)

    supplied_results_db = results_db

    if batch_name is None and len(batches) == 1:
        batch_name = _normalize_batches(batches)[0].__name__
        for trim_end in ("_batch", "Batch"):
            if batch_name.endswith(trim_end):
                batch_name = batch_name[: -1 * len(trim_end)]

    bundle_dicts = OrderedDict()
    for run_name, run_fname in opsim_run_fnames.items():
        if supplied_results_db is None:
            if batch_name is not None:
                run_batch_out_dir = os.path.join(out_dir_base, run_name, batch_name)
            else:
                run_batch_out_dir = os.path.join(out_dir_base, run_name)
            results_db = maf.ResultsDb(outDir=run_batch_out_dir)
        else:
            run_batch_out_dir = out_dir_base

        bundle_dicts[run_name] = {}

        for batch in batches:
            bundle_dicts[run_name].update(batch(run_name))

        opsim_db = maf.OpsimDatabase(run_fname)

        bundle_group = maf.MetricBundleGroup(
            bundle_dicts[run_name],
            dbObj=opsim_db,
            outDir=run_batch_out_dir,
            resultsDb=results_db,
        )

        plot_in_runAll = run and plot and not save

        if run:
            bundle_group.runAll(clearMemory=not save, plotNow=plot_in_runAll)
        else:
            bundle_group.readAll()

        if plot and not plot_in_runAll:
            bundle_group.plotAll()

        if save:
            maf.writeConfigs(opsim_db, run_batch_out_dir)

        if supplied_results_db is None:
            results_db.close()

    return bundle_dicts

In [23]:
%%time
batches = compute_batches(opsim_run_fnames, multi_batch, data_dir)

Querying database SummaryAllProps with no constraint for columns ['visitTime', 'visitExposureTime', 'airmass', 'slewTime'].
Found 2045493 visits
Running:  ['baseline_nexp2_v1_7_10yrs_OpenShutterFraction_UNIS']
Completed metric generation.
Running:  ['baseline_nexp2_v1_7_10yrs_Count_airmass_ONED']
Completed metric generation.
Running reduce methods.
Running summary statistics.
Completed.
Plotting figures with "" constraint now.
Plotting complete.
Querying database SummaryAllProps with no constraint for columns ['visitTime', 'visitExposureTime', 'airmass', 'slewTime'].
Found 2056927 visits
Running:  ['baseline_2snaps_v1_5_10yrs_OpenShutterFraction_UNIS']
Completed metric generation.
Running:  ['baseline_2snaps_v1_5_10yrs_Count_airmass_ONED']
Completed metric generation.
Running reduce methods.
Running summary statistics.
Completed.
Plotting figures with "" constraint now.
Plotting complete.
Querying database observations with no constraint for columns ['visitTime', 'visitExposureTime', '

In [24]:
1

1