In [None]:
# This cell is only for setting parameter defaults
day_obs = "2024-09-04"
sim_date = "2024-09-04"

# Prenight Briefing for {{ params.day_obs }}

This is presently just a playground for building a pre-night briefing.

In [None]:
# Validate the inputs
import re
assert re.match(r'^\d\d\d\d-\d\d-\d\d$', day_obs) is not None
assert re.match(r'^\d\d\d\d-\d\d-\d\d$', sim_date) is not None

In [None]:
from IPython.display import display, HTML
import datetime
import math
import sys
import os
import yaml
from urllib.parse import urlparse
import warnings
import itertools
import pandas as pd
import numpy as np
import astropy
import bokeh
import bokeh.io
import boto3
import colorcet
from erfa import ErfaWarning
from astropy.time import Time

In [None]:
from sklearn.neighbors import KernelDensity

In [None]:
os.environ['RUBIN_SIM_DATA_DIR'] = '/sdf/data/rubin/user/neilsen/data/rubin_sim_data'

In [None]:
sys.path.insert(0, '/sdf/data/rubin/user/neilsen/devel/pip_targets/lib/python3.11/site-packages')

In [None]:
devel_versions = True
if devel_versions:
    sys.path.insert(0, '/sdf/data/rubin/user/neilsen/devel/times_square_sources/2024-03-25/uranography')
    sys.path.insert(0, '/sdf/data/rubin/user/neilsen/devel/rubin_scheduler')
    sys.path.insert(0, '/sdf/data/rubin/user/neilsen/devel/rubin_sim')
    sys.path.insert(0, '/sdf/data/rubin/user/neilsen/devel/schedview')

In [None]:
import rubin_scheduler
import rubin_scheduler.utils
import rubin_scheduler.sim_archive
from rubin_scheduler.scheduler.model_observatory import ModelObservatory
from rubin_sim import maf
from lsst.resources import ResourcePath

In [None]:
import schedview.compute
import schedview.compute.visits
import schedview.collect
import schedview.collect.rewards
import schedview.plot
import schedview.plot.rewards

In [None]:
# Degraded IERS accuracy is never going to be important for these figures.

# If IERS degraded accuracy encountered, don't fail, just keep going.
astropy.utils.iers.conf.iers_degraded_accuracy = "ignore"

# Don't even complain.
warnings.filterwarnings(
    "ignore",
    category=astropy.utils.exceptions.AstropyWarning,
    message="Tried to get polar motions for times after IERS data is valid. Defaulting to polar motion from the 50-yr mean for those. This may affect precision at the arcsec level. Please check your astropy.utils.iers.conf.iers_auto_url and point it to a newer version if necessary.",
)

In [None]:
# In simulations, we go far enough into the future that the erfa module finds it "dubious".
# Keep the complaints quiet.
warnings.filterwarnings(
    "ignore",
    category=ErfaWarning,
    message=r".*dubious year.*",
)

In [None]:
# Don't complain about working with daytime MJDs either.
warnings.filterwarnings(
    "ignore",
    module="rubin_scheduler.skybrightness_pre.sky_model_pre",
    category=UserWarning,
    message="Requested MJD between sunrise and sunset, returning closest maps",
)

In [None]:
# Quiet unimportant chatter from healpy.
healpy_logger = logging.getLogger("healpy")
healpy_logger.setLevel(logging.WARNING)

In [None]:
bokeh.io.output_notebook()

In [None]:
%matplotlib inline

In [None]:
archive_uri = "s3://rubin:rubin-scheduler-prenight/opsim/"

if urlparse(archive_uri).scheme.upper() == 'S3':
    os.environ["LSST_DISABLE_BUCKET_VALIDATION"] = "1"
    os.environ["S3_ENDPOINT_URL"] = "https://s3dfrgw.slac.stanford.edu/"

In [None]:
day_obs_mjd = int(Time(day_obs).mjd)
observatory = ModelObservatory(init_load_length=1)
timezone = "Chile/Continental"

## Astronomical events during the night

In [None]:
day_obs_datetime = Time(day_obs_mjd, format='mjd').datetime
day_obs_date = datetime.date(day_obs_datetime.year, day_obs_datetime.month, day_obs_datetime.day)
night_events = schedview.compute.astro.night_events(day_obs_date)
night_events

## Sun and moon positions in the middle of the night

In [None]:
model_observatory = ModelObservatory(init_load_length=1)
model_observatory.mjd = night_events.loc['night_middle', 'MJD']

In [None]:
body_positions_wide = pd.DataFrame(model_observatory.almanac.get_sun_moon_positions(night_events.loc['night_middle', 'MJD']))
body_positions_wide.index.name = 'r'
body_positions_wide.reset_index(inplace=True)

angle_columns = ['RA', 'dec', 'alt', 'az']
all_columns = angle_columns + ['phase']
body_positions = (
    pd.wide_to_long(body_positions_wide, stubnames=('sun', 'moon'), suffix=r'.*', sep='_', i='r', j='')
    .droplevel('r')
    .T[all_columns]
)
body_positions[angle_columns] = np.degrees(body_positions[angle_columns])
body_positions

All angles are in degrees.

## Simulated visits

In [None]:
sims_metadata = rubin_scheduler.sim_archive.read_archived_sim_metadata(archive_uri, latest=sim_date, num_nights=1)

In [None]:
visit_columns = [
    "observationId",
    "fieldRA",
    "fieldDec",
    "observationStartMJD",
    "flush_by_mjd",
    "visitExposureTime",
    "filter",
    "rotSkyPos",
    "rotSkyPos_desired",
    "numExposures",
    "airmass",
    "seeingFwhm500",
    "seeingFwhmEff",
    "seeingFwhmGeom",
    "skyBrightness",
    "night",
    "slewTime",
    "visitTime",
    "slewDistance",
    "fiveSigmaDepth",
    "altitude",
    "azimuth",
    "paraAngle",
    "cloud",
    "moonAlt",
    "sunAlt",
    "note",
    "block_id",
    "observationStartLST",
    "rotTelPos",
    "rotTelPos_backup",
    "moonAz",
    "sunAz",
    "sunRA",
    "sunDec",
    "moonRA",
    "moonDec",
    "moonDistance",
    "solarElong",
    "moonPhase",
    "cummTelAz",
    "scripted_id",
]

In [None]:
visits_list = []
for sim_uri, sim_metadata in sims_metadata.items():
    first_day_obs_mjd = astropy.time.Time(sim_metadata['simulated_dates']['first']).mjd
    last_day_obs_mjd = astropy.time.Time(sim_metadata['simulated_dates']['last']).mjd

    includes_day_obs = first_day_obs_mjd <= day_obs_mjd <= last_day_obs_mjd

    if not includes_day_obs:
        continue

    sim_rp = ResourcePath(sim_uri).join(sim_metadata['files']['observations']['name'])

    these_visits = schedview.collect.read_opsim(
        sim_rp,
        constraint=f"FLOOR(observationStartMJD-0.5)={day_obs_mjd}",
        stackers=[
            maf.stackers.TeffStacker(),
            maf.stackers.ObservationStartDatetime64Stacker(),
            maf.stackers.DayObsStacker(),
            maf.stackers.DayObsMJDStacker(),
            maf.stackers.DayObsISOStacker(),
            maf.stackers.OverheadStacker()
        ],
        dbcols=visit_columns
    )
    these_visits = schedview.compute.visits.add_coords_tuple(these_visits)
    these_visits['sim_date'] = sim_uri.split("/")[-3]
    these_visits['sim_index'] = int(sim_uri.split("/")[-2])
    
    for key in ['label', 'opsim_config_branch', 'opsim_config_repository', 'opsim_config_script', 'scheduler_version', 'sim_runner_kwargs', 'tags']:
        these_visits[key] = [sim_metadata[key]] * len(these_visits)

    visits_list.append(these_visits)

visits = pd.concat(visits_list)
visits_ds = bokeh.models.ColumnDataSource(visits)

In [None]:
sim_labels = visits['label'].unique()
num_sims = len(sim_labels)
sim_color_mapper = bokeh.models.CategoricalColorMapper(
    factors=sim_labels, palette=colorcet.palette["glasbey"][: num_sims], name="simulation"
)
sim_color_dict = dict(zip(sim_color_mapper.factors, sim_color_mapper.palette))

In [None]:
# Some bokeh symbols have the same outer shape but different inner markings,
# but these are harder to distinguish, so put them at the end.
all_markers = [m for m in bokeh.core.enums.MarkerType if "_" not in m] + [m for m in bokeh.core.enums.MarkerType if "_" in m]

# dot is hard to see
all_markers.remove('dot')
sim_marker_mapper = bokeh.models.CategoricalMarkerMapper(
    factors=sim_labels,
    markers=all_markers[:num_sims],
    name="simulation",
)

In [None]:
sim_hatch_dict = dict(zip(sim_labels, tuple(bokeh.core.enums.HatchPattern)[1:len(sim_labels)+1]))

## Altitude and airmass

In [None]:
fig = bokeh.plotting.figure(
    title="Altitude",
    x_axis_label="Time (UTC)",
    y_axis_label="Altitude",
    frame_width=1024,
    frame_height=633,
)

fig.scatter(x='start_date',
            y='altitude',
            legend_group='label',
            fill_alpha=0.2,
            color={"field": "label", "transform": sim_color_mapper},
            marker={"field": "label", "transform": sim_marker_mapper},
            source=visits_ds)

fig.extra_y_ranges = {"airmass": fig.y_range}
fig.add_layout(bokeh.models.LinearAxis(), "right")
fig.yaxis[1].ticker.desired_num_ticks = fig.yaxis[0].ticker.desired_num_ticks
fig.yaxis[1].formatter = schedview.plot.nightly.make_airmass_tick_formatter()
fig.yaxis[1].minor_tick_line_alpha = 0
fig.yaxis[1].axis_label = "Airmass"

fig.xaxis[0].ticker = bokeh.models.DatetimeTicker()
fig.xaxis[0].formatter = bokeh.models.DatetimeTickFormatter(hours="%H:%M")

fig.add_layout(fig.legend[0], 'below')

bokeh.io.show(fig)

## Often repeated fields

An often repeated field is a field repeated at least four times in at least one simulation:

In [None]:
field_repeats = visits.groupby(['fieldRA', 'fieldDec', 'filter', 'sim_index']).agg({'start_date': ['count', 'min', 'max'], 'label': 'first'})
column_map = {
    ('start_date', 'count'): 'count',
    ('start_date', 'min'): 'first_time',
    ('start_date', 'max'): 'last_time',
    ('label', 'first'): 'label'}
field_repeats.columns = pd.Index([column_map[c] for c in field_repeats.columns])

# Get the index in ra/dec/filter then use that as in index so we can show instances
# in simulations that have fewer than four visits of a field the is often
# visited in another simulation.
often_repeated_fields = field_repeats.query('count >= 4').droplevel('sim_index', 'index').index.unique()

often_repeated_field_stats = field_repeats.reset_index('sim_index').loc[often_repeated_fields, :].set_index('sim_index', append=True)
often_repeated_field_stats.style.format({
    'first_time': lambda t: t.strftime("%H:%M:%S"),
    'last_time': lambda t: t.strftime("%H:%M:%S")}) 

At present, field coordinates must be exactly matched to be recognized as the "same" field.
A more robust approach would be to find clusters of nearby pointings (maybe with kmeans or a similar algorithm), and group by the identified clusters.

## Distribution comparisons

In [None]:
def plot_pd(visits, column, x_points, bandwidth, kernel='epanechnikov', fig=None):

    
    if fig is None:
        fig = bokeh.plotting.figure(width=1024, height=633, title=column)

    for sim_label in sim_labels:
        this_sim_data = visits.set_index('label').loc[sim_label, column]
        kde = KernelDensity(kernel=kernel, bandwidth=bandwidth).fit(this_sim_data.values[:, np.newaxis])
        prob_density = np.exp(kde.score_samples(x_points[:, np.newaxis]))
        
        fig.varea(x_points, prob_density, 0, fill_alpha=0.1, color=sim_color_dict[sim_label], hatch_pattern=sim_hatch_dict[sim_label], legend_label=sim_label)
    
    fig.add_layout(fig.legend[0], 'below')
    return fig

In [None]:
fig = plot_pd(visits, column='fieldRA', x_points=np.arange(0, 360), bandwidth=1)
bokeh.io.show(fig)

In [None]:
fig = plot_pd(visits, column='fieldDec', x_points=np.arange(-90, 30), bandwidth=1)
bokeh.io.show(fig)

In [None]:
fig = plot_pd(visits, column='airmass', bandwidth=0.001, x_points=np.arange(1.0, 2.5, 0.005))
bokeh.io.show(fig)

## Common visits

In [None]:
visit_counts = (
    visits
    .groupby(['sim_index', 'fieldRA', 'fieldDec', 'filter', 'visitExposureTime'])
    .agg({'start_date': 'count'})
    .rename(columns={'start_date': 'count'})
    .reset_index()
    .pivot(index=['fieldRA', 'fieldDec', 'filter', 'visitExposureTime'],
           columns=['sim_index'],
           values='count')
    .fillna(0)
    .astype(int)
)

Coordinate/filter/exposure time combinations repeated more that four times in any simulation:

In [None]:
visit_counts.loc[visit_counts.max(axis='columns')>4, :]

Coordinate/filter/exposure time combinations all simulations have in common, and the count from the simulation where it is repeated the fewest times:

In [None]:
common_visits = visit_counts.min(axis='columns')
common_visits = common_visits[common_visits>0]
print(f"There are {common_visits.sum()} visits in common across all simulations:")
common_visits

In [None]:
def fraction_common(visit_counts, sim1, sim2, match_count=True):
    # Only count fields for which there is at least one visit in sim1
    these_visit_counts = visit_counts[visit_counts[sim2] > 0]

    if not match_count:
        # Count any number of repetitons of a field only once
        for sim in sim1, sim2:
            these_visit_counts.loc[these_visit_counts[sim]>0, sim] = 1

    num_common_visits = these_visit_counts[[sim2, sim1]].min(axis='columns').sum()
    num_visits2 = these_visit_counts[sim2].sum()
    fraction_common = num_common_visits/num_visits2
    return fraction_common


In [None]:
def make_fraction_common_matrix(visit_counts, match_count=True, sim_indexes=None):
    if sim_indexes is None:
        sim_indexes = visit_counts.columns.values

    common_matrix = pd.DataFrame(np.nan, index=sim_indexes, columns=sim_indexes)
    for row in sim_indexes:
        for column in sim_indexes:
            common_matrix.loc[row, column] = fraction_common(visit_counts, row, column, match_count=match_count)
    return common_matrix

Matrix of fraction of coordinate/filter/exposure time combinations present in one simulation that are alse present in another.

For example, column 1, row 2 has the fraction of such combinations present in simulation one that are also present is simulation 2.

In [None]:
make_fraction_common_matrix(visit_counts, match_count=False)

Matrix of fraction of coordinate/filter/exposure time combinations present in one simulation that are alse present in another, where repeats in both are considered additional matches and differences in number of repeats of a given combination are counted as occurrences in one but not the other.

In [None]:
make_fraction_common_matrix(visit_counts)    

## Timing offsets

In [None]:
def find_best_matches(start_times, sim_indexes=(1, 2), max_match_dist=np.inf):
    for sim_index in sim_indexes:
        if sim_index not in start_times.index:
            return pd.DataFrame({sim_indexes[0]: [], sim_indexes[1]: [], 'delta': []})
    
    if len(start_times.loc[[sim_indexes[0]]]) >= len(start_times.loc[[sim_indexes[1]]]):
        sim_map = {'longer': sim_indexes[0], 'shorter': sim_indexes[1]}
    else:
        sim_map = {'longer': sim_indexes[1], 'shorter': sim_indexes[0]}

    longer = start_times.loc[[sim_map['longer']]]
    shorter = start_times.loc[[sim_map['shorter']]]

    num_combinations = math.comb(len(longer), len(shorter))
    if num_combinations > 1000:
        # There are too many combinations to do a complete search in reasonable time
        # so assume the matches are sequential.
        def make_seq_iter(num_offsets, sequence_length):
            for i in range(num_offsets):
                yield np.arange(sequence_length) + i
        combo_iterator = make_seq_iter(len(longer)-len(shorter), len(shorter))
    else:
        combo_iterator = itertools.combinations(np.arange(len(longer)), len(shorter))
    
    matches = pd.DataFrame({'longer': longer.values[:len(shorter)], 'shorter': shorter.values})
    best_diff = np.inf
    most_matches = 0
    for matched_ids in combo_iterator:
        matches.loc[:, 'longer'] = longer.values[np.array(matched_ids)]
        matches['delta'] = (matches.shorter - matches.longer).dt.total_seconds()
        good_matches = matches.query(f'abs(delta) < {max_match_dist}')
        num_matches = len(good_matches)
        max_diff = matches['delta'].max()
        within_max_dist = max_diff <= max_match_dist
        if (num_matches >= most_matches) and (max_diff < best_diff):
            best_match = good_matches.copy()
            best_diff = max_diff
            most_matches = num_matches

    best_match.rename(columns=sim_map, inplace=True)
    return best_match

In [None]:
def compute_best_match_delta_stats(these_visits, sim_indexes):
    return find_best_matches(these_visits.set_index('sim_index').start_date, sim_indexes).loc[:,'delta'].describe()

In [None]:
reference_index = 1
delta_stats_list = []
for comparison_index in visits['sim_index'].unique():
    if comparison_index == reference_index:
        continue
    sim_indexes = [reference_index, comparison_index]
    these_delta_stats = visits.groupby(['fieldRA', 'fieldDec', 'filter', 'visitExposureTime']).apply(compute_best_match_delta_stats, sim_indexes=sim_indexes).query('count > 0')
    these_delta_stats['sim_index'] = comparison_index
    delta_stats_list.append(these_delta_stats)

pd.concat(delta_stats_list).set_index('sim_index', append=True).sort_index()