# Prompt Processing Timings for {{ params.date }}

In [None]:
# dates for demo: 2023-08-29, 2023-08-17, 2023-08-16, 2023-08-15
date = "2025-04-19"
instrument = "LSSTCam"
survey = "BLOCK-365"

In [None]:
dayobs = int(date.replace("-", ""))

In [None]:
match instrument:
    case "LATISS":
        sal_index = 2
        n_detector = 1
    case "LSSTCam":
        sal_index = 1
        n_detector = 189
    case "LSSTComCamSim":
        sal_index = 3
        n_detector = 9
    case _:
        logger.error(f"Unknown instrument {instrument}")

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from IPython.display import Markdown
import boto3
from astropy.time import Time, TimeDelta
from lsst_efd_client import EfdClient

In [None]:
import logging

logger = logging.getLogger("analysis")
logger.setLevel(logging.DEBUG)

In [None]:
from lsst.daf.butler import Butler
butler = Butler("/repo/embargo", writeable=False)

In [None]:
def get_file_timestamps(butler, datasetType, where="", collections=...):
    """
    Get the last-moditied timestamps of the dataset files in a bucket-based butler repo at USDF

    Returns
    -------
    timestamps : `dict` [`int`, datetime]
        The keys are the exposure_id
    """
    s3_endpoint = "https://sdfembs3.sdf.slac.stanford.edu"
    s3client = boto3.client("s3", endpoint_url=s3_endpoint)
    refs = butler.registry.queryDatasets(
        datasetType=datasetType,
        collections=collections,
        where=where,
    )

    timestamps = list()
    dimension = None
    for ref in refs:
        if not dimension:
            if "visit" in ref.dataId:
                dimension = "visit"
            else:
                dimension = "exposure"
        fits_uri = butler.getURI(ref)
        time_written = s3client.head_object(
            Bucket=fits_uri.netloc.split("@")[-1],
            Key=fits_uri.relativeToPathRoot,
        )["LastModified"]
        timestamps.append((ref.dataId[dimension], ref.dataId["detector"], time_written))

    return timestamps

In [None]:
def get_exposure_end(butler, exp_id):
    """
    Return exposure end time according to butler
    """
    results = butler.registry.queryDimensionRecords(
        "exposure",
        where="instrument=instrument_name and exposure=exp_id",
        bind={"instrument_name": instrument, "exp_id": exp_id},
    )
    if not results.count():
        print(f"No records for exp {exp_id}")
    return list(results)[0].timespan.end.utc

In [None]:
async def get_groups_from_next_visit_events(date):
    """Return the group IDs of nextVisit events on a day of observation

    Returns
    -------
    good_events : [`str`]
        The groupIds of the events that were not canceled.
    """
    client = EfdClient("usdf_efd")

    start = Time(date, scale="utc", format="isot") + TimeDelta(
        12 * 60 * 60, format="sec"
    )
    end = start + TimeDelta(1, format="jd")

    topic = "lsst.sal.ScriptQueue.logevent_nextVisit"
    df = await client.select_time_series(topic, ["*"], start.utc, end.utc)
    df_canceled = await client.select_time_series(
        topic + "Canceled", ["*"], start.utc, end.utc
    )

    if df.empty:
        return None

    # Only select on-sky imaging survey data
    df = df.loc[
        (df["coordinateSystem"] == 2)
        & (df["salIndex"] == sal_index)
        & (df["survey"] == survey)
    ]

    good_events = df["groupId"].tolist()
    if df_canceled.empty:
        logger.info("None canceled")
        return good_events
    for group in df_canceled["groupId"]:
        if group in good_events:
            logger.info("%s was canceled", group)
            good_events.remove(group)

    return good_events

In [None]:
groups = await get_groups_from_next_visit_events(date)

In [None]:
if groups:
    Markdown("There were %i uncanceled nextVisit events on %s" % (len(groups), dayobs))
else:
    Markdown("No records were found")

# Raw

In [None]:
# butler repo sanity check
count = butler.registry.queryDimensionRecords(
    "exposure",
    where=f"exposure.science_program IN ('{survey}') and instrument='{instrument}' and day_obs={dayobs}",
).count()
Markdown("%i exposure in butler registry records" % (count,))

In [None]:
timestamps = get_file_timestamps(
    butler,
    "raw",
    collections=[f"{instrument}/raw/all"],
    where=f"exposure.science_program IN ('{survey}') and instrument='{instrument}' and day_obs={dayobs}",
)

In [None]:
Markdown("%i raw datasets in butler" % (len(timestamps),))

In [None]:
seconds = []
for t in timestamps:
    timespan = Time(t[2]) - get_exposure_end(butler, t[0])
    seconds.append(timespan.to_value("sec"))

In [None]:
from statistics import median, mean

print(
    f"Raw arrivial: mean: {mean(seconds):.2f} seconds, median: {median(seconds):.2f} seconds"
)

In [None]:
threshold = 60 * 2
seconds_subset = [_ for _ in seconds if _ < threshold]
count_skip = len(seconds) - len(seconds_subset)
print(f"{count_skip} took longer than {threshold} seconds; excluding them below")

In [None]:
n, bins, patches = plt.hist(seconds_subset, bins=100)
plt.ylabel("counts")
plt.xlabel("seconds")
plt.title(f"exposure end to raw files at USDF {dayobs}; excluding {count_skip}")
plt.show()

In [None]:
plt.plot([t[2] for t in timestamps], seconds, ".")
plt.ylabel("seconds")
plt.xlabel("UTC time")
plt.title(f"exposure end to raw arrival at USDF {date}")
plt.ylim([0, threshold])
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%m-%d %H:%M"))
plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=1))
plt.xticks(rotation=90)
plt.show()

# Prompt Processing data products

In [None]:
dataset_type = "initial_pvi"  # "apdb_marker"

In [None]:
timestamps = get_file_timestamps(
    butler,
    dataset_type,
    collections=[f"{instrument}/prompt/output-{date}/*"],
)

In [None]:
seconds = []
for t in timestamps:
    timespan = Time(t[2]) - get_exposure_end(butler, t[0])
    seconds.append(timespan.to_value("sec"))

In [None]:
n, bins, patches = plt.hist(seconds, bins=100)
plt.ylabel("counts")
plt.xlabel("seconds")
plt.title(f"exposure end to prompt products {date}")
plt.show()

In [None]:
plt.plot([t[2] for t in timestamps], seconds, ".")
plt.ylabel("seconds")
plt.xlabel("UTC time")
plt.title(f"exposure end to prompt products {date}")
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%m-%d %H:%M"))
plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=1))
plt.xticks(rotation=90)
plt.show()