Pipeline Errors for night of {{ params.day_obs }}
=====

In [None]:
day_obs = "2025-04-23"
instrument = "LSSTCam"
survey = "BLOCK-365"

In [None]:
!eups list lsst_distrib 

In [None]:
!echo $IMAGE_DESCRIPTION

In [None]:
butler_alias = "/repo/embargo"

In [None]:
import itertools
import lsst.daf.butler as dafButler
from lsst.daf.butler import DimensionNameError
from lsst.pipe.base import Pipeline
import tabulate
from dataclasses import dataclass

@dataclass
class error_summary:
    visit: int
    detector: int
    error_messages: list

b = dafButler.Butler(butler_alias, collections=f"{instrument}/prompt/output-{day_obs:s}")

In [None]:
log_visit_detector = set([(x.dataId['exposure'], x.dataId['detector']) for x in b.registry.queryDatasets("isr_log", where=f"exposure.science_program='{survey}' AND instrument='{instrument}'")])
print("Number of ISR records in butler: {:d}".format(len(log_visit_detector)))

isr_visit_detector = set([(x.dataId['exposure'], x.dataId['detector']) for x in b.registry.queryDatasets("post_isr_image", where=f"exposure.science_program='{survey}' AND instrument='{instrument}'")])
print("Number of successful ISR results: {:d}".format(len(isr_visit_detector)))

last_pvi_type = "initial_photometry_match_detector"
pvi_visit_detector = set([(x.dataId['visit'], x.dataId['detector']) for x in b.registry.queryDatasets(last_pvi_type, where=f"visit.science_program='{survey}' AND instrument='{instrument}'")])
print("Number of successful processCcd results: {:d}".format(len(pvi_visit_detector)))

missing_pvis = set(log_visit_detector - pvi_visit_detector)
missing_visits = [x[0] for x in missing_pvis]
print("Number of unsuccessful processCcd attempts: {:d}".format(len(missing_pvis)))

dia_visit_detector = set([(x.dataId['visit'], x.dataId['detector']) for x in b.registry.queryDatasets("apdb_marker", where=f"visit.science_program='{survey}' AND instrument='{instrument}'")])
print("Number of successful DIA attempts: {:d}".format(len(dia_visit_detector)))

In [None]:
def make_error_summaries(log_dataset_types_exposure, log_dataset_types_visit, data_ids):
    error_summaries = []
    for visit, detector in data_ids:
    
        visit_errors = []
        
        for ds_types in log_dataset_types_exposure:
            log_messages = b.get(ds_types, dataId={"instrument": instrument, "exposure": visit, "detector": detector})
            isr_errors = [msg for msg in log_messages if msg.levelno > 30]
            visit_errors.extend(isr_errors)
        
        for ds_types in log_dataset_types_visit:
            try:
                log_messages = b.get(ds_types, dataId={"instrument": instrument, "visit": visit, "detector": detector})
            except DimensionNameError: # Visit records can be missing due to corrupted headers.
                errors = []
            else:
                errors = [msg for msg in log_messages if msg.levelno > 30 or "SIGTERM" in msg.message]
            finally:
                visit_errors.extend(errors)
    
        error_summaries.append(error_summary(visit=visit, detector=detector, error_messages=visit_errors))
    return error_summaries

In [None]:
def make_url_from_visit(visit):
    s = str(visit)
    day_string = f"{s[0:4]}-{s[4:6]}-{s[6:8]}"
    counter = int(s[8:])
    if instrument == "LATISS":
        # Example: https://usdf-rsp.slac.stanford.edu/rubintv/summit-usdf/auxtel/event?key=auxtel/2024-08-12/monitor/000351/auxtel_monitor_2024-08-12_000351.png
        url = f"https://usdf-rsp.slac.stanford.edu/rubintv/summit-usdf/auxtel/event?key=auxtel/{day_string}/monitor/{counter:06d}/auxtel_monitor_{day_string}_{counter:06d}.png"
    elif instrument == "LSSTComCam":
        short_name = "comcam"
        # Example: https://usdf-rsp.slac.stanford.edu/rubintv/summit-usdf/comcam/event?key=comcam/2024-11-23/focal_plane_mosaic/000336/comcam_focal_plane_mosaic_2024-11-23_000336.jpg
        url = f"https://usdf-rsp.slac.stanford.edu/rubintv/summit-usdf/{short_name}/event?key={short_name}/{day_string}/focal_plane_mosaic/{counter:06d}/{short_name}_focal_plane_mosaic_{day_string}_{counter:06d}.jpg"
    elif instrument == "LSSTCam":
        short_name = "lsstcam"
        url = f"https://usdf-rsp.slac.stanford.edu/rubintv/summit-usdf/{short_name}/event?key={short_name}/{day_string}/focal_plane_mosaic/{counter:06d}/{short_name}_focal_plane_mosaic_{day_string}_{counter:06d}.jpg"
    return url

In [None]:
pipeline = Pipeline.from_uri(f"$AP_PIPE_DIR/pipelines/{instrument}/ApPipe.yaml#prompt")
pipeline.addConfigOverride("associateApdb", "apdb_config_url", "dummy")

pipeline_graph = pipeline.to_graph(registry=b.registry)

In [None]:
recurrent_errors = {
    "Exception BadAstrometryFit: Poor quality astrometric fit",
    "Exception NonfinitePsfShapeError: Failed to determine PSF",
    "Exception NormalizedCalibrationFluxError",
    "Exception MeasureApCorrError: Unable to measure aperture correction",
    "Exception ObjectSizeNoGoodSourcesError",
    "MatcherFailure: No matches found",
    "MatcherFailure: Not enough catalog objects",
    "MatcherFailure: Not enough refcat objects",
    "MatcherFailure: No matches to use for photocal",
    "NoPsfStarsToStarsMatchError",
    "PsfexTooFewGoodStarsError",
    "RuntimeError: Cannot compute PSF matching kernel: too few sources selected",
    "RuntimeError: No good PSF candidates to pass to PSFEx",
    "PsfexNoGoodStarsError",
    "RuntimeError: No objects passed our cuts for consideration as psf stars",
    "SIGTERM",
    "ValueError: cannot convert float NaN to integer",
}

In [None]:
last_records = log_visit_detector
table_contents = []
last_task = "isr"
for task in itertools.chain(pipeline_graph.tasks, ["end"]):
    if task in ("isr", "getRegionTimeFromVisit"):
        continue
    if task != "end":
        records = set([(x.dataId['visit'], x.dataId['detector']) for x in b.registry.queryDatasets(task+"_log",
                                                                                               where=f"visit.science_program='{survey}'AND instrument='{instrument}'")])
        error_summaries = make_error_summaries(["isr_log"], [last_task+"_log"], last_records - records)
        print(f"  {len(records):d} {task} records")
    else:
        error_summaries = make_error_summaries(["isr_log"], [last_task+"_log"], last_records - dia_visit_detector)
    
    for e in error_summaries: 
        if e.error_messages:
            msg = e.error_messages[-1].message[:1000]
        # Ignore those that did not run rewarpTemplate; likely single frame only.
        elif task == "rewarpTemplate":
            continue
        elif task == "end":
            msg = "?"
        else:
            msg = ""
        listed = any([err in msg for err in recurrent_errors])
        if not listed:
            msg = f'<span style="color: red;">{msg}</span>'
        table_contents.append((e.visit, e.detector, "<a href=\"" + make_url_from_visit(e.visit) + "\" target=\"_blank\">img</a>", last_task, msg)) 
        
    last_task = task
    last_records = records

In [None]:
print(f"{len(table_contents):d} errors, not including those falling back from ApPipe to SingleFrame")

In [None]:
def count_error(errMsg):
    return len([_[-1] for _ in table_contents if errMsg in _[-1]])

In [None]:
for err in recurrent_errors:
    count = count_error(err)
    if count:
        print("-", count, err)

In [None]:
table = tabulate.tabulate(sorted(table_contents), tablefmt='unsafehtml', headers=("Visit", "Det", "Img", "Last Task", "Error Message"))
table