In [None]:
# Range of day_obs for data query
import rubin_nights.dayobs_utils as rn_dayobs

day_obs = rn_dayobs.day_obs_str_to_int(rn_dayobs.today_day_obs())
n_days = 30

# On-sky Efficiency {params.day_obs_min} to {params.day_obs_max}

In [None]:
# Some configuration for password/RPS tokenfiles

import getpass
import os
import sys

# Who is running the notebook? Some of us have preferences ..
username = getpass.getuser()
# Where is the notebook running? (RSPs are 'special')
current_location = os.getenv("EXTERNAL_INSTANCE_URL", "")

# RUBIN_SIM_DATA_DIR at usdf
if "usdf" in current_location:
    os.environ["RUBIN_SIM_DATA_DIR"] = "/sdf/data/rubin/shared/rubin_sim_data"

# TOKEN CONFIGURATION
if current_location != "":
    # You are on an rsp.
    # You should use the default RSP values, whether summit/base/USDF.
    tokenfile = None
    site = None
# If you are outside of an RSP? - just use USDF and your own USDF-RSP token
# See https://rsp.lsst.io/guides/auth/creating-user-tokens.html
else:
    # Substitute the location of your own tokenfile
    tokenfile = os.getenv("ACCESS_TOKEN_FILE", "")
    site = os.getenv("DATA_SITE", "")
    if tokenfile == "":
        # A very reasonable backup.
        tokenfile = os.path.join(os.path.expanduser("~"), ".lsst/usdf_rsp")
        site = "usdf"

In [None]:
# Imports
import warnings
import copy
import logging

import numpy as np
import pandas as pd
import sqlite3
import healpy as hp
import matplotlib.pyplot as plt
import colorcet as cc
import skyproj
from IPython.display import display, HTML

from astropy.time import Time, TimeDelta
from astropy.coordinates import SkyCoord
import astropy.units as u

from rubin_scheduler.scheduler.utils import SchemaConverter
from rubin_scheduler.site_models import Almanac
from rubin_scheduler.scheduler.features import Conditions
from rubin_scheduler.utils import (
    ddf_locations,
    angular_separation,
    approx_ra_dec2_alt_az,
    Site,
    SURVEY_START_MJD,
)

import rubin_sim.maf as maf
from rubin_sim.data import get_baseline

from rubin_nights import connections
import rubin_nights.dayobs_utils as rn_dayobs
import rubin_nights.plot_utils as rn_plots
import rubin_nights.augment_visits as augment_visits
import rubin_nights.rubin_scheduler_addons as rn_sch
import rubin_nights.rubin_sim_addons as rn_sim
import rubin_nights.observatory_status as observatory_status
import rubin_nights.scriptqueue as scriptqueue
import rubin_nights.scriptqueue_formatting as scriptqueue_formatting
import rubin_nights.targets_and_visits as targets_and_visits

import importlib

from lsst_survey_sim import lsst_support, simulate_lsst, plot

band_colors = rn_plots.PlotStyles.band_colors
logging.getLogger("rubin_nights").setLevel(logging.INFO)

# %load_ext memory_profiler

In [None]:
# Set up connections to data (will use consdb, exposure log and narrative log)
endpoints = connections.get_clients(tokenfile)

In [None]:
day_obs_max = day_obs
day_obs_min = rn_dayobs.time_to_day_obs(
    rn_dayobs.day_obs_to_time(day_obs_max) - TimeDelta(n_days, format="jd")
)
day_obs_min = rn_dayobs.day_obs_str_to_int(day_obs_min)
print(f"Querying for lsstcam visits {day_obs_min} to {day_obs_max}")

In [None]:
# Get Consdb data and add some flags for bad visits, filter changes, and calculate slew times and identify expected overheads vs. excess

# We query all programs to get slewtimes and idle throughout entire night
# But these are what will be considered "science"
programs = ["BLOCK-365", "BLOCK-407", "BLOCK-408"]

# Just a flag to make it clear if we're skipping retrieval from the consdb.
# (this can make it much quicker to repeat this cell to tweak the inserts below)
refresh_visits = True
if refresh_visits:
    skip_imgtypes = ["bias", "flat", "dark"]
    query = (
        "select *, q.* from cdb_lsstcam.visit1 left join cdb_lsstcam.visit1_quicklook as q on visit1.visit_id = q.visit_id "
        f"where visit1.day_obs >= {day_obs_min} and visit1.day_obs <= {day_obs_max} and img_type != 'bias' and img_type != 'flat' and img_type != 'dark'"
    )
    visits = endpoints["consdb"].query(query)
    visits = augment_visits.augment_visits(visits, "lsstcam")
    visits.reset_index(inplace=True)
    visits.drop("index", axis=1, inplace=True)
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        visits.to_hdf("v_now.h5", key="visits")
else:
    visits = pd.read_hdf("v_now.h5")
print(
    "all visits:",
    len(visits),
    "science visits:",
    len(visits.query("science_program in @programs")),
)


if len(visits) > 0:

    cols = ["overhead", "fault_idle", "program_change", "filter_change", "bad_flag"]
    new_df = pd.DataFrame(
        np.zeros((len(visits), len(cols))), columns=cols, index=visits.index
    )
    visits = visits.merge(new_df, right_index=True, left_index=True)

    # Flag science_program changes
    program_change = np.where(
        (visits.science_program[:-1].values != visits.science_program[1:].values)
    )[0]
    program_change = program_change + 1
    pmask = np.zeros(len(visits))
    pmask[0] = 1
    pmask[program_change] = 1
    visits["program_change"] = pmask

    # Flag filter changes
    filter_change = np.where(
        (visits.band[:-1].values != visits.band[1:].values)
        & (visits.day_obs[:-1].values == visits.day_obs[1:].values)
    )[0]
    filter_change = filter_change + 1
    fmask = np.zeros(len(visits))
    fmask[filter_change] = 1
    visits["filter_change"] = fmask

    # calculate slew times and identify expected overheads
    wait_before_slew = 1.6
    settle = 1.5
    max_scatter = 5.5
    visits, slewing = rn_sch.add_model_slew_times(
        visits,
        endpoints["efd"],
        model_settle=wait_before_slew + settle,
        dome_crawl=False,
        slew_while_changing_filter=False,
    )
    valid_overhead = np.min(
        [
            np.where(np.isnan(visits.slew_model.values), 0, visits.slew_model.values)
            + max_scatter,
            visits.visit_gap.values,
        ],
        axis=0,
    )
    visits["overhead"] = valid_overhead

    # Need to remove faults for the first visit of the night or where there was a different program we didn't fetch
    # (could skip *some* of this by fetching all visits, but might still have some missing due to flats?)
    skipped_visits = np.concatenate(
        [
            np.array([0]),
            np.where(visits.visit_id[:-1].values + 1 != visits.visit_id[1:].values)[0]
            + 1,
        ]
    )

    fault = visits.visit_gap - valid_overhead
    fault[skipped_visits] = np.nan
    visits["fault_idle"] = fault

    visits.loc[skipped_visits, "model_gap"] = np.nan

    # Pull lsst-dm excluded visit list to flag bad visits
    bad_visit_ids = augment_visits.fetch_excluded_visits("lsstcam")
    visits["bad_flag"] = np.zeros(len(visits), int)
    idx = visits.query("visit_id in @bad_visit_ids").index
    visits.loc[idx, "bad_flag"] = 1
    # Also pull bad visit lists from exposure log
    ee = endpoints["exposure_log"].query_log(
        rn_dayobs.day_obs_to_time(day_obs_min), rn_dayobs.day_obs_to_time(day_obs_max)
    )

    def make_visit_id(x):
        return f"{x.day_obs:d}{x.seq_num:05d}"

    exp_log_bad_visit_ids = (
        ee.query("exposure_flag == 'junk'").apply(make_visit_id, axis=1).values
    )
    idx = visits.query("visit_id in @exp_log_bad_visit_ids").index
    visits.loc[idx, "bad_flag"] = 1

    sci = visits.query("science_program in @programs")

else:
    print("Found no visits")
    print("The remainder of this notebook requires visits.")

## thoughts -- need to mark time associated with bad visits as fault somehow ..

In [None]:
# Plot expected slewtime vs. actual visit gap
# Sometimes we want a subset of days ..

print("For values attributable to SCIENCE visits only:")

dayobs = visits.day_obs.unique()

q = visits.query("science_program in @programs and day_obs in @dayobs")
total_time = (q.shut_time.sum() + q.overhead.sum() + q.fault_idle.sum()) / 3600
total_onsky = q.exp_time.sum() / 3600
total_req = (q.shut_time.sum() + q.overhead.sum()) / 3600
total_fault_idle = q.fault_idle.sum() / 3600
dd = pd.DataFrame(
    [total_time, total_onsky, total_req, total_fault_idle, len(q), len(q) * 30 / 3600],
    index=[
        "time for visits",
        "onsky exptime",
        "onsky+overhead",
        "fault+idle_sci",
        "nvis",
        "estimate time onsky",
    ],
    columns=["all " + "_".join(programs)],
)
display(dd.T)
cols = [
    "visit_id",
    "img_type",
    "observation_reason",
    "obs_start_mjd",
    "band",
    "s_ra",
    "s_dec",
    "sky_rotation",
    "altitude",
    "azimuth",
    "physical_rotator_angle",
    "clouds",
    "fwhm_eff",
    "filter_change",
    "slew_distance",
    "slew_model",
    "visit_gap",
    "model_gap",
]  # , 'overhead', 'fault_idle']
# display(sci.query("model_gap > 2 and model_gap < 10")[cols])

print(
    f"Min/median/mean predicted overheads: {q.overhead.min():.2f} {q.overhead.median():.2f} {q.overhead.mean():.2f}"
)
print(
    f"Min/median/mean actual visit gaps: {q.visit_gap.min():.2f} {q.visit_gap.median():.2f} {q.visit_gap.mean():.2f}"
)

fig, axes = plt.subplots(1, 3, figsize=(20, 5))
ax = axes[0]
ax.plot(q.visit_gap, q.slew_model, "k.")
x = np.arange(0, 500, 1)
ax.plot(x, x, alpha=0.3)
ax.fill_betweenx(x1=x + max_scatter, x2=x, y=x, color="pink", alpha=0.2)
ax.set_xlim(0, 30)
ax.set_ylim(0, 30)
ax.grid(alpha=0.4)
ax.set_xlabel("Visit gap (seconds)")
ax.set_ylabel("Slew model (seconds)")

ax = axes[1]
ax.plot(q.visit_gap, q.slew_model, "k.")
x = np.arange(0, 500, 1)
ax.plot(x, x, alpha=0.3)
ax.fill_betweenx(x1=x + max_scatter, x2=x, y=x, color="pink", alpha=0.2)
# ax.set_xlim(0, 30)
# ax.set_ylim(0, 30)
ax.grid(alpha=0.4)
ax.set_xlabel("Visit gap (seconds)")
ax.set_ylabel("Slew model (seconds)")

ax = axes[2]
ax.plot(q.slew_distance, q.model_gap, "k.")
ax.set_ylim(-10, 10)
ax.grid(alpha=0.3)
ax.set_xlabel("Slew distance (deg)")
_ = ax.set_ylabel("visit_gap - slew_model (seconds)")

In [None]:
# Summarize effect of current slew vs. "ideal" slew (40% + 3s settle)
print("Slew effects only (no fault or idle)")
dayobs = visits.day_obs.unique()
# dayobs = [20251108]
q = visits.query("science_program in @programs and day_obs in @dayobs")
# Calculate "open shutter fraction" without faults
slew_eff = (q.exp_time.sum()) / (q.dark_time.sum() + q.overhead.sum())
ideal_eff = (q.exp_time.sum()) / (q.dark_time.sum() + q.slew_model_ideal.sum())
print(f"Slew efficiency factor: {slew_eff: 0.2f}")
print(f"Ideal model efficiency equivalent: {ideal_eff: 0.2f}")
print(f"Ratio: slew / ideal {slew_eff / ideal_eff :0.2f}")

In [None]:
# Get narrative time lost logs
time_lost_logs = endpoints["narrative_log"].query_log(
    rn_dayobs.day_obs_to_time(visits.day_obs.min()),
    rn_dayobs.day_obs_to_time(visits.day_obs.max()),
    {"min_time_lost": "0.00001"},
)
time_lost_logs = time_lost_logs.query("not component.str.contains('AuxTel')")


def time_to_day_obs(x):
    return int(x.date_begin.split("T")[0].replace("-", ""))


time_lost_logs["day_obs"] = time_lost_logs.apply(time_to_day_obs, axis=1)
log_fault = (
    time_lost_logs.query("time_lost_type == 'fault'")
    .groupby("day_obs")
    .agg({"time_lost": "sum"})
    .rename({"time_lost": "log_fault"}, axis=1)
)
log_weather = (
    time_lost_logs.query("time_lost_type == 'weather'")
    .groupby("day_obs")
    .agg({"time_lost": "sum"})
    .rename({"time_lost": "log_weather"}, axis=1)
)
log_lost = log_fault.merge(log_weather, how="outer", on="day_obs")

In [None]:
# Estimate time lost from visit gaps and overheads ..
# Estimate some versions of effective system availability (per day)

dd = {}
for ddayobs in visits.day_obs.unique():
    q = visits.query("day_obs == @ddayobs")
    nvis = len(q)
    # Estimate of all fault/idle time - expect higher idle during other surveys
    all_fault_idle = round(q.fault_idle.sum() / 60 / 60, 2)
    # Estimate all fault/idle time - but only where visit_gap > 5 minutes
    gap_fault_idle = round(q.query("visit_gap > 5*60").fault_idle.sum() / 60 / 60, 2)
    # -12 degree night length
    sunset, sunrise = rn_dayobs.day_obs_sunset_sunrise(int(ddayobs), sun_alt=-12)
    # Should also get dome open/close times?
    # Estimate time from sunset to first science, and last science to sunrise.
    qq = q.query("science_program in @programs")
    if len(qq) == 0:
        twi_to_start = (sunrise.mjd - sunset.mjd) * 24 - gap_fault_idle
        end_to_twi = 0
    else:
        twi_to_start = (qq.obs_start_mjd.min() - sunset.mjd) * 24
        end_to_twi = (sunrise.mjd - qq.obs_end_mjd.max()) * 24
    night_hours = (sunrise.mjd - sunset.mjd) * 24
    # Estimate time spent in FBS compared to time expected for these visits in FBS
    fbs = q.query("science_program in @programs")
    time_in_fbs = (fbs.obs_end_mjd.max() - fbs.obs_start_mjd.min()) * 24
    time_predict_in_fbs = (fbs.dark_time.sum() + fbs.slew_model_ideal.sum()) / 60 / 60
    dd[ddayobs] = [
        ddayobs,
        night_hours,
        nvis,
        twi_to_start,
        end_to_twi,
        time_in_fbs,
        time_predict_in_fbs,
        all_fault_idle,
        gap_fault_idle,
    ]

dd = pd.DataFrame(
    dd,
    index=[
        "day_obs",
        "night_hours",
        "n_visits",
        "twi_to_start",
        "end_to_twi",
        "time_in_fbs",
        "time_predict_in_fbs",
        "total_fault_idle",
        "total_fault_idle_gap",
    ],
).T
dd = dd.merge(log_lost, how="outer", on="day_obs")
dd["day_obs"] = dd["day_obs"].astype(int)
dd.set_index("day_obs", inplace=True)
# Set time start/end of the night to the minimum of either 1.8 hours or the actual times
# (use the min because sometimes we don't do FBS at all)
missing_night_ends = np.min(
    [
        dd.night_hours.values - dd.twi_to_start.values - dd.end_to_twi.values,
        np.ones(len(dd)) * 1.8,
    ],
    axis=0,
)
# Now calculate the fraction of the night which could have been available for science
ratio_night_used = (dd.night_hours - missing_night_ends) / dd.night_hours
# Estimate the fraction of the night available/run
ratio = round(
    (dd.night_hours - missing_night_ends - dd.total_fault_idle_gap) / (dd.night_hours),
    2,
)
ratio = np.where(ratio <= 0, 0, ratio)
dd["ratio_all_times"] = ratio * slew_eff / ideal_eff
dd["ratio_fbs_times"] = round(
    dd.time_predict_in_fbs / dd.time_in_fbs * ratio_night_used, 2
)
dd["ratio_sim_ref"] = round((dd.night_hours - 0.37) / dd.night_hours, 2)

non_ratio_cols = [c for c in dd if "ratio" not in c]
dd.loc["total", non_ratio_cols] = dd[non_ratio_cols].sum(axis=0)
ratio_cols = [c for c in dd if "ratio" in c]
dd.loc["total", ratio_cols] = dd[ratio_cols].mean(axis=0)
display(dd.round(2))

print(
    f"fault+idle (hours) - total: {dd.total_fault_idle_gap.sum():.2f} mean: {np.nanmean(dd.total_fault_idle_gap):.2f}"
)

# make an average ratio, with a fudge factor for time lost at ends of night
ave_ratio = np.nanmean(
    (dd.night_hours - dd.total_fault_idle_gap - 1.8) / dd.night_hours
)
ave_sim_ratio = np.mean((dd.night_hours - 0.37) / (dd.night_hours))
print(f"some kind of average ratio {ave_ratio:.2f} compare to {ave_sim_ratio:.2f}")
print(f"Slew performance ratio {slew_eff / ideal_eff:.2f}")
print(f"System availability estimate all: {np.nanmean(dd.ratio_all_times):.2f}")
print(f"System availability estimate fbs: {np.nanmean(dd.ratio_fbs_times):.2f}")

In [None]:
print("Using baseline_v5.1.0_10yrs as the comparison")
fraction_wfd = 1692518.00 / 2075536.00
fO_comparison = 762.00 / 825 * 0.9 / fraction_wfd
print(f"fraction WFD in this sim: {fraction_wfd:.2f}")
print(f"median_nvis / 825 * 0.9 / fraction_wfd = {fO_comparison:.2f}")

In [None]:
# Plot of above system availabity * fO_comparison info per night
plt.figure(figsize=(12, 6))
q = dd.iloc[:-1]
x = np.arange(0, len(q))
y = q["ratio_fbs_times"] * fO_comparison
plt.plot(x, y, marker=".", label="efficiency fbs visits")
y = q["ratio_all_times"] * fO_comparison
plt.plot(x, y, marker=".", label="efficiency all visits")
yy = np.nanmean(
    np.concatenate(
        [
            q["ratio_all_times"].values * fO_comparison,
            q["ratio_fbs_times"].values * fO_comparison,
        ]
    )
)
plt.axhline(yy, color="gray", linestyle=":", label="mean")
_ = plt.xticks(x, q.index, rotation=90)
_ = plt.ylabel("SA*fO")
_ = plt.legend(loc=(1.01, 0.5))
plt.grid(alpha=0.2)

plt.figure(figsize=(8, 6))
y = q["ratio_fbs_times"]
_ = plt.hist(y, bins=30, alpha=0.5, label="efficiency fbs visits")
y = q["ratio_all_times"]
_ = plt.hist(y, bins=30, alpha=0.5, label="efficiency all visits")
plt.xlabel("SA * fO")
plt.ylabel("Nnights")
_ = plt.legend()