# Prompt Processing Bean Counting for {{ params.date }}

In [None]:
date = '2023-12-22'

In [None]:
dayobs = int(date.replace("-", ""))

In [None]:
from astropy.time import Time, TimeDelta
import pandas
from lsst_efd_client import EfdClient

In [None]:
import logging
logger = logging.getLogger("analysis")
logger.setLevel(logging.DEBUG)

In [None]:
async def get_df_from_next_visit_events(date):
    client = EfdClient("usdf_efd")

    start = Time(date, scale="utc", format="isot") + TimeDelta(12*60*60, format="sec")
    end = start + TimeDelta(1, format="jd")
    
    topic = "lsst.sal.ScriptQueue.logevent_nextVisit"
    df = await client.select_time_series(topic, ["*"], start.utc, end.utc)
    all_canceled = await client.select_time_series(topic+"Canceled", ["*"], start.utc, end.utc)

    if df.empty:
        logger.info(f"No events on {date}")
        return None
    
    # Only select on-sky AuxTel imaging survey
    df = df.loc[
        (df["coordinateSystem"] == 2)
        & (df["salIndex"] == 2)
        & (df["survey"] == "AUXTEL_PHOTO_IMAGING")
    ].set_index("groupId")
    logger.info(f"There were {len(df)} AUXTEL_PHOTO_IMAGING nextVisit events on {date}")
    
    # Ignore the explicitly canceled groups
    if not all_canceled.empty:
        canceled = df.index.intersection(all_canceled.set_index("groupId").index).tolist()
        if canceled:
            logger.info(f"{len(canceled)} events were canceled {canceled}")
            df = df.drop(canceled)

    return df

In [None]:
df_efd = await get_df_from_next_visit_events(date)

In [None]:
from lsst.daf.butler import Butler
butler = Butler("/repo/embargo", writeable=False)

In [None]:
results = butler.registry.queryDimensionRecords(
    "exposure",
    datasets="raw",
    collections="LATISS/raw/all",
    where="exposure.science_program IN ('AUXTEL_PHOTO_IMAGING') "
          "and instrument='LATISS' and exposure.day_obs=day_obs  ",
    bind={"day_obs": dayobs},
)
logger.info(f"Found {results.count()} raws in {butler} for {dayobs}")

embargo_records = dict()
for record in results:
    embargo_records[record.id] = record

df_butler = pandas.DataFrame.from_records([embargo_records[num].toDict() for num in embargo_records]).set_index("group_name")

In [None]:
groups_no_raw = set(df_efd.index) -  set(df_butler.index)
if groups_no_raw:
    logger.info(f"{len(groups_no_raw)} group had records in EFD but no raws in the embargo butler: {groups_no_raw}")

In [None]:
df_efd = df_efd.drop(groups_no_raw)

In [None]:
if len(df_efd) != len(df_butler):
    logger.warning("Counts do not match; need attention")

In [None]:
df_md = pandas.merge(df_efd.reset_index(), df_butler.reset_index(), 
                     how="outer", left_on="groupId", right_on="group_name",
                     suffixes=('_efd', '_butler'),
                     validate="one_to_one",).set_index("groupId")

logger.info(f"Total: {len(df_md)} groups in the table")

In [None]:
boring_cols = ["instrument_efd", "instrument_butler", "science_program", "observation_reason", "observation_type", 
               "cameraAngle", "has_simulated", "dome", "coordinateSystem", "rotationSystem",
               "private_identity", "private_origin", "private_revCode", "salIndex", "totalCheckpoints",
               "nimages",
               "day_obs", "survey", "exposure_time"]
for col in boring_cols:
    if df_md[col].nunique() == 1:
        logger.info(f"column {col} has only {df_md[col].unique()}")
        df_md.drop(columns=[col,], inplace=True) 
    else:
        logger.warning(f"Column {col} has {df_md[col].unique()}")

In [None]:
for col in ("physical_filter", ):
    logger.info(f"Column {col} has {df_md[col].unique()}")

In [None]:
df_md.columns

In [None]:
df_md["offset0"] = df_md["position0"] - df_md["tracking_ra"]
df_md["offset1"] = df_md["position1"] - df_md["tracking_dec"]

In [None]:
df_md[["seq_num", "position0", "tracking_ra", "offset0", "position1", "tracking_dec", "offset1", "physical_filter", "id"]]

In [None]:
df_md[["offset0", "offset1"]].describe()