# Prompt Processing Timings for {{ params.date }}

In [None]:
# dates for demo: 2023-08-29, 2023-08-17, 2023-08-16, 2023-08-15
date = "2023-08-29"

In [None]:
dayobs = int(date.replace("-", ""))

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from IPython.display import Markdown
import boto3
from astropy.time import Time, TimeDelta
from lsst_efd_client import EfdClient

In [None]:
import logging
logger = logging.getLogger("analysis")
logger.setLevel(logging.DEBUG)

In [None]:
from lsst.daf.butler import Butler
butler = Butler("/repo/embargo", writeable=False)

In [None]:
def get_file_timestamps(butler, datasetType, where="", collections=...):
    """
    Get the last-moditied timestamps of the dataset files in a bucket-based butler repo at USDF

    Returns
    -------
    timestamps : `dict` [`int`, datetime]
        The keys are the exposure_id
    """
    
    s3_endpoint = "https://s3dfrgw.slac.stanford.edu"
    s3client = boto3.client("s3", endpoint_url=s3_endpoint)
    refs = butler.registry.queryDatasets(
        datasetType=datasetType,
        collections=collections,
        where=where,
    )

    timestamps = dict()
    dimension = None
    for ref in refs:
        if not dimension:
            if "visit" in ref.dataId:
                dimension = "visit"
            else:
                dimension = "exposure"
        fits_uri = butler.getURI(ref)
        time_written = s3client.head_object(
            Bucket=fits_uri.netloc,
            Key=fits_uri.relativeToPathRoot,
        )["LastModified"]
        timestamps[ref.dataId[dimension]] = time_written
    return timestamps

In [None]:
def get_exposure_end(butler, exp_id):
    """
    Return exposure end time according to butler
    """
    results = butler.registry.queryDimensionRecords(
        "visit", datasets="raw", collections="LATISS/raw/all",
        where="instrument='LATISS' and exposure=exp_id",
        bind={"exp_id": exp_id},
    )
    return list(results)[0].timespan.end.utc

In [None]:
async def get_groups_from_next_visit_events(date):
    """Return the group IDs of nextVisit events on a day of observation
    
    Returns
    -------
    good_events : [`str`]
        The groupIds of the events that were not canceled.
    """
    client = EfdClient("usdf_efd")

    start = Time(date, scale="utc", format="isot") + TimeDelta(12*60*60, format="sec")
    end = start + TimeDelta(1, format="jd")
    
    topic = "lsst.sal.ScriptQueue.logevent_nextVisit"
    df = await client.select_time_series(topic, ["*"], start.utc, end.utc)
    df_canceled = await client.select_time_series(topic+"Canceled", ["*"], start.utc, end.utc)

    if df.empty:
        return None
    
    # Only select on-sky AuxTel imaging survey
    df = df.loc[
            (df["coordinateSystem"] == 2)
            & (df["salIndex"] == 2)
            & (df["survey"] == "AUXTEL_PHOTO_IMAGING")
        ]
    
    good_events = df["groupId"].tolist()
    if df_canceled.empty:
        logger.info("None canceled")
        return good_events
    for group in df_canceled["groupId"]:
        if group in good_events:
            logger.info("%s was canceled", group)
            good_events.remove(group)

    return good_events

In [None]:
groups = await get_groups_from_next_visit_events(date)

In [None]:
if groups:
    Markdown("There were %i uncanceled nextVisit events on %s"%(len(groups), dayobs))
else:
    Markdown("No records were found")

# Raw

In [None]:
timestamps = get_file_timestamps(butler, "raw", 
                    collections=["LATISS/raw/all"], 
                    where=f"exposure.science_program IN ('AUXTEL_PHOTO_IMAGING') and instrument='LATISS' and exposure.day_obs={dayobs}")

In [None]:
Markdown("%i raws found in butler"%(len(timestamps),))

In [None]:
seconds = []
for exp_id in timestamps: 
    timespan = Time(timestamps[exp_id]) - get_exposure_end(butler, exp_id)
    seconds.append(timespan.to_value('sec'))

In [None]:
n, bins, patches = plt.hist(seconds, 10)
plt.ylabel('counts')
plt.xlabel('seconds')
plt.title(f"exposure end to raw files at USDF {dayobs}")
plt.show()

# Prompt Processing data products

In [None]:
timestamps = get_file_timestamps(butler, "apdb_marker", 
                    collections=[f"LATISS/prompt/output-{date}/ApPipe/prompt-proto-service-*"])

In [None]:
seconds = []
for exp_id in timestamps: 
    timespan = Time(timestamps[exp_id]) - get_exposure_end(butler, exp_id)
    seconds.append(timespan.to_value('sec'))

In [None]:
n, bins, patches = plt.hist(seconds, 10)
plt.ylabel('counts')
plt.xlabel('seconds')
plt.title(f"exposure end to prompt products {date}")
plt.show()

In [None]:
import matplotlib.dates as mdates
plt.plot(timestamps.values(), seconds, '.')
plt.ylabel('seconds')
plt.xlabel('UTC time')
plt.title(f"exposure end to prompt products {date}")
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m-%d %H:%M:%S'))
plt.gca().xaxis.set_major_locator(mdates.HourLocator(interval=1))
plt.show()