In [None]:
# This cell does not get executed when run with Times Square
import os
import datetime

visit_origin = os.environ.get("SCHEDVIEW_VISIT_ORIGIN", "lsstcam")
day_obs = int(os.environ.get("SCHEDVIEW_DAY_OBS", (datetime.date.today() - datetime.timedelta(days=1)).strftime("%Y%m%d")))
sim_date = datetime.date.fromisoformat(os.environ.get("SCHEDVIEW_SIM_DATE", datetime.date(day_obs//10000, (day_obs%10000)//100, day_obs%100).isoformat()))

In [None]:
# Validate the inputs
import re
sim_date = sim_date.isoformat()

In [None]:
from IPython.display import display, HTML, Markdown
import datetime
import math
import sys
import os
import yaml
from urllib.parse import urlparse
import warnings
import itertools
import healpy as hp
import pandas as pd
import numpy as np
import astropy
import bokeh
import bokeh.io
import boto3
import colorcet
from erfa import ErfaWarning
from astropy.time import Time

In [None]:
from sklearn.neighbors import KernelDensity

In [None]:
usdf_sim_data_dir = "/sdf/data/rubin/shared/rubin_sim_data"
if os.path.exists(usdf_sim_data_dir):
    os.environ["RUBIN_SIM_DATA_DIR"] = "/sdf/data/rubin/shared/rubin_sim_data"

In [None]:
#sched_source = 'env'
sched_source = 'shared'
#sched_source = 'devel'
match sched_source:
    case 'shared':
        if os.path.exists('/sdf/data/rubin/shared/scheduler/packages'):
            sys.path.insert(0, "/sdf/data/rubin/shared/scheduler/packages/rubin_scheduler-3.8.0")
            sys.path.insert(0, "/sdf/data/rubin/shared/scheduler/packages/rubin_sim-2.2.4")
            sys.path.insert(0, "/sdf/data/rubin/shared/scheduler/packages/schedview-0.17.0")
    case 'devel':
        if os.path.exists('/sdf/data/rubin/user/neilsen/devel'):
            sys.path.insert(0, "/sdf/data/rubin/user/neilsen/devel/uranography")
            sys.path.insert(0, "/sdf/data/rubin/user/neilsen/devel/rubin_scheduler")
            sys.path.insert(0, "/sdf/data/rubin/user/neilsen/devel/rubin_sim")
            sys.path.insert(0, "/sdf/data/rubin/user/neilsen/devel/schedview")
    case _:
        # Use whatever is in the kernel python environment
        pass


In [None]:
import rubin_scheduler
import rubin_scheduler.utils
import rubin_sim.sim_archive
from rubin_scheduler.scheduler.model_observatory import ModelObservatory
from rubin_sim import maf
from lsst.resources import ResourcePath

In [None]:
import schedview.compute
import schedview.compute.visits
import schedview.collect
import schedview.collect.rewards
import schedview.collect.visits
import schedview.plot
import schedview.plot.rewards
from schedview import DayObs

In [None]:
display(Markdown(f"# Comparison of completed visits from dayobs {day_obs} and the nominal simulations completed on {sim_date}"))

This report compares completed visits (as queried from the consdb) for a night to the pre-night simulated visits for that night.

It is similar to the report that displays multiple pre-night simulations for one night, but includes the completed visits as well.

In [None]:
# Degraded IERS accuracy is never going to be important for these figures.

# If IERS degraded accuracy encountered, don't fail, just keep going.
astropy.utils.iers.conf.iers_degraded_accuracy = "ignore"

# Don't even complain.
warnings.filterwarnings(
    "ignore",
    category=astropy.utils.exceptions.AstropyWarning,
    message="Tried to get polar motions for times after IERS data is valid. Defaulting to polar motion from the 50-yr mean for those. This may affect precision at the arcsec level. Please check your astropy.utils.iers.conf.iers_auto_url and point it to a newer version if necessary.",
)

In [None]:
# In simulations, we go far enough into the future that the erfa module finds it "dubious".
# Keep the complaints quiet.
warnings.filterwarnings(
    "ignore",
    category=ErfaWarning,
    message=r".*dubious year.*",
)

In [None]:
# Don't complain about working with daytime MJDs either.
warnings.filterwarnings(
    "ignore",
    module="rubin_scheduler.skybrightness_pre.sky_model_pre",
    category=UserWarning,
    message="Requested MJD between sunrise and sunset, returning closest maps",
)

In [None]:
warnings.filterwarnings(
    "ignore",
    module=r"schedview.collect.opsim.*",
    category=UserWarning,
    message=r"Column .* not found in .*, skipping.",
)

In [None]:
warnings.filterwarnings(
    "ignore",
    module=r"rubin_sim.maf.stackers.*",
    category=UserWarning,
    message=r".*column day_obs_mjd already present in sim_data, may be overwritten.*",
)

In [None]:
# Quiet unimportant chatter from healpy.
healpy_logger = logging.getLogger("healpy")
healpy_logger.setLevel(logging.WARNING)

In [None]:
bokeh.io.output_notebook()

In [None]:
%matplotlib inline

In [None]:
archive_uri = "s3://rubin:rubin-scheduler-prenight/opsim/"

if urlparse(archive_uri).scheme.upper() == 'S3':
    os.environ["LSST_DISABLE_BUCKET_VALIDATION"] = "1"
    os.environ["S3_ENDPOINT_URL"] = "https://s3dfrgw.slac.stanford.edu/"

In [None]:
day_obs = DayObs.from_date(day_obs)
day_obs_mjd = day_obs.mjd
observatory = ModelObservatory(init_load_length=1)
timezone = "Chile/Continental"
telescope = "auxtel" if visit_origin.lower()=="latiss" else "simonyi"

## Astronomical events during the night

In [None]:
day_obs_datetime = Time(day_obs_mjd, format='mjd').datetime
day_obs_date = datetime.date(day_obs_datetime.year, day_obs_datetime.month, day_obs_datetime.day)
night_events = schedview.compute.astro.night_events(day_obs_date)
night_events

## Sun and moon positions in the middle of the night

In [None]:
model_observatory = ModelObservatory(init_load_length=1)
model_observatory.mjd = night_events.loc['night_middle', 'MJD']

In [None]:
body_positions_wide = pd.DataFrame(model_observatory.almanac.get_sun_moon_positions(night_events.loc['night_middle', 'MJD']))
body_positions_wide.index.name = 'r'
body_positions_wide.reset_index(inplace=True)

angle_columns = ['RA', 'dec', 'alt', 'az']
all_columns = angle_columns + ['phase']
body_positions = (
    pd.wide_to_long(body_positions_wide, stubnames=('sun', 'moon'), suffix=r'.*', sep='_', i='r', j='')
    .droplevel('r')
    .T[all_columns]
)
body_positions[angle_columns] = np.degrees(body_positions[angle_columns])
body_positions

All angles are in degrees.

In [None]:
completed_visits = schedview.collect.visits.read_visits(day_obs, visit_origin, stackers = schedview.collect.visits.NIGHT_STACKERS)
no_visits = len(completed_visits) == 0

In [None]:
if no_visits:
    display(HTML("<b>No completed visits found on this night.</b>"))

In [None]:
start_time = DayObs.from_date(day_obs).sun_n12_setting if no_visits else Time(completed_visits.observationStartMJD.min(), format='mjd') 

In [None]:
simulated_visits = schedview.collect.read_multiple_opsims(archive_uri, sim_date, day_obs_mjd, telescope=telescope).query(f'sim_date == "{sim_date}"')

In [None]:
if no_visits:
    visits = simulated_visits
else:
    ts_config_ocs_version = schedview.collect.get_version_at_time("ts_config_ocs", start_time)
    sal_indexes = schedview.collect.SAL_INDEX_GUESSES[visit_origin]
    opsim_config_script = await schedview.collect.get_scheduler_config(ts_config_ocs_version, telescope.lower(), start_time)
    completed_visits['start_date'] = pd.to_datetime(completed_visits['start_date'], format='ISO8601').dt.tz_localize('UTC')
    completed_visits['filter'] = completed_visits['band']
    completed_visits['sim_date'] = None
    completed_visits['sim_index'] = 0
    completed_visits['label'] = 'Completed'
    completed_visits['opsim_config_branch'] = ts_config_ocs_version
    completed_visits['opsim_config_repository'] = None
    completed_visits['opsim_config_script'] = opsim_config_script
    completed_visits['scheduler_version'] = schedview.collect.get_version_at_time('rubin_scheduler', start_time)
    completed_visits['sim_runner_kwargs'] = {}
    completed_visits.loc[:, 'tags'] = len(completed_visits) * [['completed']]

    visits = pd.concat([completed_visits, simulated_visits])
    

In [None]:
visits_ds = bokeh.models.ColumnDataSource(visits)

In [None]:
sim_labels = visits['label'].unique()
sim_color_mapper, sim_color_dict, sim_marker_mapper, sim_hatch_dict = schedview.plot.generate_sim_indicators(sim_labels)

## Visit parameter vs. time

The scalar visit parameter to plot can be chosen with the dropdown in the upper left.

By default, all simulations for the night are shown as well. This is often too busy, so the dropdown on the upper right can be used to specify individual simulations insteaod.

In [None]:
default_column = "nest_healpix"
# Create the insntance of bokeh.plotting.figure ourselves so
# we can set the width and height.
plot = bokeh.plotting.figure(y_axis_label=default_column, x_axis_label="Time (UTC)", frame_width=1024, frame_height=512)
fig = schedview.plot.visits.plot_visit_param_vs_time(visits, default_column, plot=plot, show_column_selector=True, show_sim_selector=True, size=10, marker_transform=sim_marker_mapper)
bokeh.io.show(fig)

## Often repeated fields

An often repeated field is a field repeated at least four times in at least one simulation, where a "field" is a unique combination field coordinates and filter.

In [None]:
often_repeated_fields, often_repeated_field_stats = schedview.compute.often_repeated_fields(visits)
often_repeated_field_stats.style.format({
    'first_time': lambda t: t.strftime("%H:%M:%S"),
    'last_time': lambda t: t.strftime("%H:%M:%S")}) 

At present, field coordinates must be exactly matched to be recognized as the "same" field.
A more robust approach would be to find clusters of nearby pointings (maybe with kmeans or a similar algorithm), and group by the identified clusters.

## Distribution comparisons

Overplotting distributions using kernel density estimates (similar to histograms, but continuous estimates of the underlying PDF).

KDEs are show here instead of histograms because they can be easier to intrepret for multiple overplotting distributions, if those distributions are actually different.

In [None]:
fig = schedview.plot.overplot_kernel_density_estimates(visits, column='fieldRA', x_points=np.arange(0, 360), colors=sim_color_dict, hatches=sim_hatch_dict, bandwidth=1)
bokeh.io.show(fig)

In [None]:
fig = schedview.plot.overplot_kernel_density_estimates(visits, column='fieldDec', x_points=np.arange(-90, 30), colors=sim_color_dict, hatches=sim_hatch_dict, bandwidth=1)
bokeh.io.show(fig)

In [None]:
fig = schedview.plot.overplot_kernel_density_estimates(visits[~ np.isnan(visits.airmass)], column='airmass', bandwidth=0.001, x_points=np.arange(1.0, 2.5, 0.005), colors=sim_color_dict, hatches=sim_hatch_dict)
bokeh.io.show(fig)

## Common visits

`sim_index` columns in the tables that follow refer to simulations with the following labels:

In [None]:
with pd.option_context('display.max_colwidth', 512):
    display(visits.groupby('sim_index')['label'].first().to_frame())

In [None]:
# nside=2**18 is about 1 arcsec resolution

visit_counts = schedview.compute.multisim.count_visits_by_sim(visits, visit_spec_columns=("fieldHpid", "band"), nside=2**18)

Coordinate/filter/exposure time combinations repeated more that four times in any simulation:

In [None]:
visit_counts.loc[visit_counts.max(axis='columns')>4, :]

Coordinate/filter/exposure time combinations all simulations have in common, statics on how often they occur:

In [None]:
(visit_counts
 .T.describe().T
 .rename(columns={'min': 'min_visits'})
 .query('min_visits>0')
 .rename(columns={'min_visits': 'min'})
 .loc[:, ['min', '25%', '50%', 'mean', '75%', 'max']]
 .sort_values('min', ascending=False)
)

Matrix of fraction of coordinate/filter/exposure time combinations present in one simulation that are alse present in another.

For example, column 0, row 1 has the fraction of such combinations present in completed visits that are also present is simulation 1.

In [None]:
visit_counts

In [None]:
try:
    print(schedview.compute.multisim.fraction_common(visit_counts, visit_counts.columns[0], visit_counts.columns[1]).item())
except IndexError:
    print("Too few simulations.")

In [None]:
schedview.compute.multisim.make_fraction_common_matrix(visit_counts, match_count=False)

Matrix of fraction of coordinate/filter/exposure time combinations present in one simulation that are alse present in another, where repeats in both are considered additional matches and differences in number of repeats of a given combination are counted as occurrences in one but not the other.

In [None]:
schedview.compute.multisim.make_fraction_common_matrix(visit_counts)    

## Timing offsets

The following table shows the stistics for differences in timing (in seconds) in corresponding visits between the completed visits (sim_index=0) and each other simulation.

In matching visits to find which ones in different simulations correspond to each other, each visit is counted only once.
When there are different total numbers of visits to the same field, the required number of visits of the simulation with more are dropped before matches are made.
When the total combinations to be checked is small, visits to be dropped are selected to be optimal to make the remainder of the visits match.
When there are too many total combinations to check in reasonable time, visits are dropped from the beginning or end.

In [None]:
visits.sim_index.nunique()

In [None]:
if visits.sim_index.nunique() > 1:
    matched_visit_dt_stats = schedview.compute.compute_matched_visit_delta_statistics(visits, sim_identifier_reference_value=0, visit_spec_columns=("fieldHpid", "band"), nside=2**18)
    print(matched_visit_dt_stats)
else:
    print("Too few simulations")