In [None]:
# Set a range of dayobs values to search - 
#day_obs_min = "Today"
day_obs_min = "2024-11-24"
#day_obs_max = "Today"
day_obs_max = "2024-11-24"
time_order = 'newest first'
show_salIndex = 'all'

# EFD Scripts + Logs 

In [None]:
import os
import warnings
import numpy as np
import pandas as pd
from pandas import option_context
from IPython.display import display, Markdown, HTML
from astropy.time import Time, TimeDelta
import datetime
import astropy.units as u
import yaml
import html

# To generate a tiny gap in time
EPS_TIME = np.timedelta64(1, 'ms')
TIMESTAMP_ZERO = Time(0, format='unix_tai').utc.datetime

In [None]:
from enum import Enum
# lsst-ts-xml is in conda
# See https://github.com/lsst-ts/ts_xml/blob/develop/python/lsst/ts/xml/enums 
from lsst.ts.xml.sal_enums import State as CSCState
from lsst.ts.xml.enums.ScriptQueue import ScriptProcessState, SalIndex
from lsst.ts.xml.enums.Script import ScriptState
from lsst.ts.xml.enums.Watcher import AlarmSeverity

# Run as 'apply' per row (axis=1)
def apply_enum(x: pd.Series, column: str, enumvals: Enum) -> str:
    return enumvals(x[column]).name

In [None]:
import os
import requests
from lsst_efd_client import EfdClient

try:
    from lsst.summit.utils import ConsDbClient
    has_summit_utils = True
except ImportError:
    # No lsst.summit.utils
    has_summit_utils = False

try:
    from lsst.rsp import get_access_token
except ImportError:
    def get_access_token(token_file : str | None = None) -> str:
        token = os.environ.get("ACCESS_TOKEN")
        if token == None:
            if token_file is not None:
                with open("token_file", "r") as f:
                    token = f.read()
            else:
                warnings.warn("No RSP token available.")
        return token


def get_clients() -> dict:
    """Return site-specific client connections. 

    Returns
    -------
    endpoints : `dict`
        Dictionary with `efd`, `obsenv`, 
        `narrative_log`, and `exposure_log`
        connection information.
        For the obsenv, narrative log and exposure log, these are only
        defined for the summit or USDF.

    Note
    ----
    The authentication token required to access the log services
    is an RSP token, and is RSP site-specific. 
    For users outside the RSP, a token can be created as described in
    https://nb.lsst.io/environment/tokens.html
    """
    # Set up authentication
    token = get_access_token()
    auth = ("user", token)
    # This authentication is for nightlog, exposurelog, nightreport currently
    # But I think it's the same underlying info for EfdClient i.e.
    # https://github.com/lsst/schedview/blob/e11fbd51ee5e22d11fef9a52f66dfcc082181cb6/schedview/app/scheduler_dashboard/influxdb_client.py
    # For lots more information on rubin tokens see DMTN-234.
    # For information on scopes, see DMTN-235.
    
    # let's do this like lsst.summit.utils.getSite but simpler
    site = "UNKNOWN"
    location = os.getenv("EXTERNAL_INSTANCE_URL", "")
    if "tucson-teststand" in location:
        site = "tucson"
    elif "summit-lsp" in location:
        site = "summit"
    elif "base-lsp" in location:
        site = "base"
    elif "usdf-rsp" in location:
        site = "usdf"
    # If location not set, next step is to check hostname
    elif location == "":
        hostname = os.getenv("HOSTNAME", "")
        interactiveNodes = ("sdfrome", "sdfiana")
        if hostname.startswith(interactiveNodes):
            site = "usdf"
        elif hostname == "htcondor.ls.lsst.org":
            site = "base"
        elif hostname == "htcondor.cp.lsst.org":
            site = "summit"
    # If none of the above, use usdf again.
    if site == "UNKNOWN":
        site = "usdf"
    
    if site == "summit":
        api_base = "https://summit-lsp.lsst.codes"
        efd_client = EfdClient("summit_efd")
        obsenv_client = EfdClient("summit_efd", db_name="lsst.obsenv")
    elif site == "tucson":
        api_base = None
        efd_client = EfdClient("tucson_teststand_efd")
        obsenv_client = EfdClient("tucson_teststand_efd", db_name="lsst.obsenv")
    elif site == "base":
        api_base = "https://base-lsp.slac.lsst.codes"
        efd_client = EfdClient("base_efd")
        obsenv_client = EfdClient("base_efd", db_name="lsst.obsenv")
    elif site == "usdf":
        # For tokens, need to distinguish between dev and prod
        if "dev" in location:
            api_base = "https://usdf-rsp-dev.slac.stanford.edu"
        else:
            api_base = "https://usdf-rsp.slac.stanford.edu"
        efd_client = EfdClient("usdf_efd")
        obsenv_client = EfdClient("usdf_efd", db_name='lsst.obsenv')
    else:
        # Assume USDF prod
        efd_client = EfdClient("usdf_efd")
        obsenv_client = EfdClient("usdf_efd", db_name='lsst.obsenv')
        api_base = "https://usdf-rsp.slac.stanford.edu"
    narrative_log =  "/narrativelog/messages"
    exposure_log = "/exposurelog/messages"
    nightreport =  "/nightreport/reports"

    endpoints = {'api_base': api_base, 'auth': auth, 
                'efd': efd_client, 'obsenv': obsenv_client, 
                'narrative_log': narrative_log, 'exposure_log': exposure_log,  
                'nightreport': nightreport}
    
    # If some verbose output is desired
    # We'll put this here to make it easier to avoid printing the auth token
    endpoints_string = f"base url: {endpoints['api_base']} " 
    endpoints_string += f"efd host: {endpoints['efd'].influx_client.host}"
    endpoints['string'] = endpoints_string

    return endpoints

def query_logging_services(endpoint: str, auth: tuple, params: dict) -> pd.DataFrame:
    """Send query to narrative log or exposure log services.
    
    Parameters
    ----------
    endpoint : `str`
        The URL to send the query to.
        Usually like `https://usdf-rsp.slac.stanford.edu/narrativelog/messages`
    auth : `tuple`
        The username and password for authentication.
        The username can be any string, the password should be an RSP token.
        See e.g. https://nb.lsst.io/environment/tokens.html 
    params : `dict`
        Dictionary of parameters for the REST API query.
        See docs for each service for more details

    Returns
    -------
    messages : `pd.DataFrame`
        The returned log messages (if any available), in a dataframe.
    """
    # Very often, requests from the logging endpoints fail the first time.
    response = requests.get(endpoint, auth=auth, params=params)
    # Try twice.
    if response.status_code != 200:
        response = requests.get(endpoint, auth=auth, params=params)
    if response.status_code != 200:
        err_string = f"{endpoint} "
        err_string += " unavailable."
        print(err_string)
        print(response)
        print(response.status_code)
        messages = []
    else:
        messages = response.json()
    messages = pd.DataFrame(messages)
    return messages

In [None]:
# Query any EFD topic for the timespan day_obs_min to day_obs_max, when you don't already know the fields
# endpoints = get_clients()
# efd_client = endpoints['efd']
# topic = 'lsst.sal.ScriptQueue.logevent_summaryState'
# fields = await efd_client.get_fields(topic)
# fields = [f for f in fields if 'private' not in f and f != 'name' and f!= "duration"]
# #dd = await efd_client.select_time_series(topic, fields, tstart, tend)
# #or top 5 .. 
# dd = await efd_client.select_top_n(topic, fields, 5)
# dd

In [None]:
async def get_script_stream(t_start: Time, t_end: Time, efd_client: EfdClient) -> pd.DataFrame:
    """Get script description and configuration from lsst.sal.Script.logevent_description
    and lsst.sal.Script.command_configure topics.

    Parameters
    ----------
    t_start : `astropy.Time`
        The time to start searching for script events.
    t_end : `astropy.Time`
        The time at which to end searching for script events.
    efd_client : `EfdClient`
        EfdClient to query the efd.

    Returns
    -------
    script_stream : `pd.DataFrame`
        DataFrame containing script description and configuration.

    Note
    ----
    Note that these do not explicitly carry the scriptqueue salindex information.
    The "salIndex" in these topics is the script_salIndex. 
    """
    # Script will find information about how scripts are configured. 
    # The description topic gives a more succinct human name to the scripts
    topic = 'lsst.sal.Script.logevent_description'
    fields = ['classname', 'description', 'salIndex']
    scriptdescription = await efd_client.select_time_series(topic, fields, t_start, t_end)
    scriptdescription.rename({'salIndex': 'script_salIndex'}, axis=1, inplace=True)
            
    # This gets us more information about the script parameters, how they were configured
    topic = 'lsst.sal.Script.command_configure'
    fields = ['blockId', 'config',' executionId', 'salIndex']
    fields = await efd_client.get_fields(topic)
    fields = [f for f in fields if 'private' not in f]
    # note blockId is only filled for JSON BLOCK activities
    scriptconfig = await efd_client.select_time_series(topic, fields, t_start, t_end)
    scriptconfig.rename({'salIndex': 'script_salIndex'}, axis=1, inplace=True)

    # Merge these together on script_salIndex which is unique over tinterval
    # Found that (command_configure - script description) index time is mostly << 1 second for each script and < 1 second over a night
    if len(scriptconfig) == 0 or len(scriptdescription) == 0:
        print(f"Length of scriptdescription ({len(scriptdescription)}) and scriptconfig ({len(scriptconfig)}) in time period {t_start.utc.iso} to {t_end.utc.iso}")
        script_stream = pd.DataFrame([])
    else:
        script_stream = pd.merge(scriptdescription, scriptconfig, on='script_salIndex', suffixes=['_d', '_r'])
    return script_stream


async def get_script_state(t_start: Time, t_end: Time, queueIndex: int | None, efd_client: EfdClient) -> pd.DataFrame:
    """Get script status from lsst.sal.ScriptQueue.logevent_script topic.
    
    Parameters
    ----------
    t_start : `astropy.Time`
        The time to start searching for script events.
    t_end : `astropy.Time`
        The time at which to end searching for script events.
    efd_client : `EfdClient`
        EfdClient to query the efd.

    Returns
    -------
    script_state : `pd.DataFrame`
        DataFrame containing timing information and states.

        
    Note
    ----
    The scriptqueue is explicit here, in the salIndex. From here, these can be tied to 
    the running of individual scripts, within a single restart of the scriptqueue only.
    """
    # The status of each of these scripts is stored in scriptQueue.logevent_script
    # so find the status of each of these scripts (this is status at individual stages).
    topic = 'lsst.sal.ScriptQueue.logevent_script'
    fields = ['blockId', 'path', 'processState', 'scriptState', 'salIndex', 'scriptSalIndex', 
             'timestampProcessStart', 'timestampConfigureStart', 'timestampConfigureEnd', 'timestampRunStart', 'timestampProcessEnd']
    # Providing an integer salIndex will restrict this query to a single queue, but None will query all queues.
    scripts = await efd_client.select_time_series(topic, fields, t_start, t_end, index=queueIndex)
    scripts.rename({'scriptSalIndex': 'script_salIndex'}, axis=1, inplace=True)
    if len(scripts) == 0:
        print(f"Found 0 script events in {t_start.utc.iso} to {t_end.utc.iso}.")
        script_status = pd.DataFrame([])
        
    else:
        # Group scripts on 'script_salIndex' to consolidate the information about its status stages
        # Make a new column which we will fill with the max script state (== final state, given enum)
        # (new column so we don't have to deal with multi-indexes from multiple aggregation methods)
        scripts['finalScriptState'] = scripts['scriptState']
        script_status = scripts.groupby('script_salIndex').agg({'path': 'first', 
                                                                  'salIndex': 'max', 
                                                                  'finalScriptState': 'max', 
                                                                  'scriptState': 'unique', 
                                                                  'processState': 'unique', 
                                                                  'timestampProcessStart': 'min', 
                                                                  'timestampConfigureStart': 'min', 
                                                                  'timestampConfigureEnd': 'max', 
                                                                  'timestampRunStart': 'max', 
                                                                  'timestampProcessEnd': 'max'})
        # Convert timestamp columns from unix_tai timestamps for readability.
        # Yes, these timestamps really are unix_tai. 
        for col in [c for c in script_status.columns if c.startswith('timestamp')]:
            script_status[col] = Time(script_status[col], format='unix_tai').utc.datetime
        # Apply ScriptState enum for readability of final state
        script_status['finalScriptState'] = script_status.apply(apply_enum, args=['finalScriptState', ScriptState], axis=1)
        # Will apply 'best time' index after merge with script_stream
    return script_status

In [None]:
async def get_script_status(t_start: Time, t_end: Time, efd_client: EfdClient) -> pd.DataFrame:
    """Given a start and end time, appropriately query each ScriptQueue to find 
    script descriptions, configurations and status.

    Parameters
    ----------
    t_start : `astropy.Time`
        The time to start searching for script events.
    t_end : `astropy.Time`
        The time at which to end searching for script events.
    efd_client : `EfdClient`
        EfdClient to query the efd.
    obsenv_client: `EfdClient`
        EfdClient to query the obsenv (different database).

    Returns
    -------
    script_status : `pd.DataFrame`
        DataFrame containing script description, configuration, timing information and states.


    Note
    ----
    The index of the returned dataframe is chosen from the timestamps recorded for the script. 
    In order to best place the script message inline with other events such as acquired images,
    the time used is the `timestampRunStart` if available, `timestampConfigureEnd` next, and
    then falls back to `timestampConfigureStart` or `timestampProcessStart` if those are also not
    available.
    """

    # The script_salIndex is ONLY unique during the time that a particular queue remains not OFFLINE
    # However, each queue can go offline independently, so the time intervals that are required for each queue
    # can be different, and requires inefficient querying of the lsst.sal.Script topics (which don't include 
    # the queue identification explicitly). Furthermore, the downtime is infrequent, so probably we'd
    # most of the time prefer to do the efficient thing and query everything all at once. 

    # So first - see if that's possible.
    topic = 'lsst.sal.ScriptQueue.logevent_summaryState'
    fields = ['salIndex', 'summaryState']
    # Were there breaks in this queue?
    dd = await efd_client.select_time_series(topic, fields, t_start, t_end)
    if len(dd) == 0:
        offline_events = 0
    else:
        offline_state = CSCState.OFFLINE.value
        offline_events = len(dd.query('summaryState == @offline_state'))
    
    if offline_events == 0:
        print(f"No OFFLINE events during time interval {t_start} to {t_end} for any queue.")
        # So then go ahead and just do a single big query.
        script_stream = await get_script_stream(t_start, t_end, efd_client)
        script_status = await get_script_state(t_start, t_end, None, efd_client)
        script_status = pd.merge(script_stream, script_status, left_on='script_salIndex', right_index=True, suffixes=['', '_s'])
    
    else:
        # The ScriptQueues can be started and stopped independently, so run needs to run per-scriptqueue, per-uptime
        script_status = []
        for queue in SalIndex:
            topic = 'lsst.sal.ScriptQueue.logevent_summaryState'
            fields = ['salIndex', 'summaryState']
            # Were there breaks in this particular queue?
            dd = await efd_client.select_time_series(topic, fields, t_start, t_end, index=queue)
            if len(dd) == 0:
                tstops = []
                tintervals = [[t_start, t_end]]
            else:
                dd['state'] = dd.apply(apply_enum, args=['summaryState', CSCState], axis=1)
                dd['state_time'] = Time(dd.index.values)
            
                tstops = dd.query('state == "OFFLINE"').state_time.values
                if len(tstops) == 0:
                    tintervals = [[t_start, t_end]]
                if len(tstops) > 0:
                    ts = tstops[0]
                    ts_next = ts + TimeDelta(0.1 * u.second)
                    ts_next = Time(ts_next)
                    tintervals = [[t_start, ts]]    
                    for ts in tstops[1:]:
                        tintervals.append([ts_next, ts])
                        ts_next = ts + TimeDelta(0.1 * u.second)
                    tintervals.append([ts_next, t_end])
            if len(tstops) == 0:
                print(f"For {queue.name}, found 0 ScriptQueue OFFLINE events in the time period  {t_start} to {t_end}.")
            else:
                print(f"For {queue.name}, found {len(tstops)} ScriptQueue restarts in the time period {t_start} to {t_end}, so will query in {len(tstops)+1} chunks")
                print(f"OFFLINE event at @ {[t.utc.iso for t in tstops]}")
            
            # Do the script queue queries for each time interval in this queue
            for tinterval in tintervals:
                script_stream_t = await get_script_stream(tinterval[0], tinterval[1], efd_client)    
                script_status_t = await get_script_state(tinterval[0], tinterval[1], queue, efd_client)
                # Merge with script_stream so we get better descriptions and configuration information
                if len(script_status_t) == 0 or len(script_stream_t) == 0:
                    dd = []
                else:
                    dd = pd.merge(script_stream_t, script_status_t, left_on='script_salIndex', right_index=True, suffixes=['', '_s'])
                    script_status.append(dd)
                print(f"Found {len(dd)} script-status messages during {[e.iso for e in tinterval]} for {queue.name}")
        # Convert to a single dataframe
        script_status = pd.concat(script_status)
    
    print(f"Found {len(script_status)} script status messages")
    
    # script_status columns: 
    # ['classname', 'description', 'script_salIndex', 'ScriptID', 'blockId',
    # 'config', 'executionId', 'logLevel', 'pauseCheckpoint',
    # 'stopCheckpoint', 'path', 'salIndex', 'finalScriptState', 'scriptState',
    # 'processState', 'timestampProcessStart', 'timestampConfigureStart',
    # 'timestampConfigureEnd', 'timestampRunStart', 'timestampProcessEnd'] 
    # columns used in final merged dataframe:
    # ['time', 'name', 'description', 'config', 'script_salIndex', 'salIndex', 
    # 'finalStatus', 'timestampProcessStart', 'timestampConfigureEnd', 'timestampRunStart', 'timestampProcessEnd'] 

    def _find_best_script_time(x):
        # Try run start first
        best_time = x.timestampRunStart
        if best_time == TIMESTAMP_ZERO:
            best_time = x.timestampConfigureEnd
        if best_time == TIMESTAMP_ZERO:
            best_time = x.timestampConfigureStart
        if best_time ==  TIMESTAMP_ZERO:
            best_time = x.timestampProcessStart
        return best_time    
    # Create an index that will slot this into the proper place for runtime / image acquisition, etc
    script_status.index = script_status.apply(_find_best_script_time, axis=1)
    script_status.index = script_status.index.tz_localize("UTC")
    script_status.sort_index(inplace=True)
    return script_status

async def get_tracebacks(t_start: Time, t_end: Time, efd_client: EfdClient) -> pd.DataFrame:
    """Find tracebacks in lsst.sal.Script.logevent_logMessage.

    Parameters
    ----------
    t_start : `astropy.Time`
        The time to start searching for script events.
    t_end : `astropy.Time`
        The time at which to end searching for script events.
    efd_client : `EfdClient`
        EfdClient to query the efd.

    Returns
    -------
    tracebacks : `pd.DataFrame`
        DataFrame containing tracebacks.
    """
    # Add tracebacks for failed scripts -- these should just slot in right after FAILED scripts, and link with script_salIndex
    topic = "lsst.sal.Script.logevent_logMessage"
    fields = ["message", "traceback", "salIndex"]
    traceback_messages = await efd_client.select_time_series(topic, fields, t_start, t_end)
    traceback_messages.rename({'salIndex': 'script_salIndex'}, axis=1, inplace=True)
    # Only keep the lines where the traceback wasn't empty.
    traceback_messages.query('traceback != ""', inplace=True)
    # Add salIndex of queue where the script was run
    def queue_from_script_salindex(x):
        return int(str(x.script_salIndex)[0])
    traceback_messages['salIndex'] = traceback_messages.apply(queue_from_script_salindex, axis=1)
    def make_config_message(x):
        return f"Traceback for {x.script_salIndex}"
    traceback_messages['config'] = traceback_messages.apply(make_config_message, axis=1)
    # Going to rename some of these columns here, just because scheduler configs and script queue already match nicely
    traceback_messages.rename({'traceback': 'description', 'message': 'classname'}, axis=1, inplace=True)
    traceback_messages['finalScriptState'] = 'Traceback'
    traceback_messages['timestampProcessStart'] = traceback_messages.index.copy().tz_localize(None).astype('datetime64[ns]')
    return traceback_messages


# Scheduler dependency information
async def get_scheduler_configs(t_start: Time, t_end: Time, efd_client: EfdClient, obsenv_client: EfdClient) -> pd.DataFrame:    
    # First find the obsenv to find the version of ts_config_ocs
    topic = 'lsst.obsenv.summary'
    fields = ['summit_extras', 'summit_utils',  'ts_standardscripts', 'ts_externalscripts', 'ts_config_ocs']
    # Query longer time period for obsenv, so we can be sure to know how scheduler enables
    # t_start_local is already at the point where we have a scheduler enable event
    # But sometimes the obsenv has been updated quite a long time before that even (with auxtel at least)
    t_start_local = t_start
    step_back = TimeDelta(1, format='jd')
    obsenv = await obsenv_client.select_time_series(topic, fields, t_start_local - TimeDelta(1, format='jd'), t_end)
    i = 0
    while len(obsenv) == 0 and i < 90:
        t_start_local = t_start_local - step_back
        obsenv = await obsenv_client.select_time_series(topic, fields, t_start_local - TimeDelta(1, format='jd'), t_end)
        i += 1
    if len(obsenv) == 0:
        warnings.warn(f"Could not find obsenv values within previous {(i-1) * step_back.jd} days")
        # This shouldn't happen, but could before obsenv was implemented.
        # We need something to fill in for work below.
        bad_obsenv0 = [(t_start - step_back * 3).utc.datetime] + ['unknown' for f in fields]
        bad_obsenv1 = [t_start.utc.datetime] + ['unknown' for f in fields]
        obsenv = pd.DataFrame([bad_obsenv0, bad_obsenv1], columns=['time'] + fields)
        obsenv.set_index('time', inplace=True)
        obsenv.index = obsenv.index.tz_localize("UTC")
    elif i > 1:
        # If we looped backwards additional days to find previous obsenv, just use the last value.
        obsenv = obsenv.iloc[:1]
        
    check = np.all((obsenv[fields][1:].values == obsenv[fields][:-1].values), axis=1)
    classname = np.where(check, "Obsenv Check", "Obsenv Update")
    obsenv['classname'] = np.concatenate([np.array(['Obsenv']), classname])
    obsenv['description'] = ("ts_config_ocs: " + obsenv['ts_config_ocs'])
    obsenv['config'] = ("ts_standardscripts: " + obsenv['ts_standardscripts'] + 
                        "; ts_externalscripts: " + obsenv['ts_externalscripts'] + 
                        "; summit_utils: " + obsenv['summit_utils'] + 
                        "; summit_extras: " + obsenv['summit_extras'])
    # The obsenv is shared across all scriptqueues. The salIndex has to apply to all.
    obsenv['salIndex'] = 0
    obsenv['script_salIndex'] = -1

    # Scheduler dependency information - updated independently of obsenv.
    topic = 'lsst.sal.Scheduler.logevent_dependenciesVersions'
    fields = await efd_client.get_fields(topic)
    fields = [f for f in fields if "private" not in f]
    # Sometimes the scheduler hasn't been set up, if it's a limited timespan.
    t_start_local = t_start
    deps = await efd_client.select_time_series(topic, fields, t_start_local, t_end)    
    i = 0
    while len(deps) == 0 and i < 90:         
        t_start_local = t_start_local - step_back
        deps = await efd_client.select_time_series(topic, fields, t_start_local, t_end)   
        i += 1
    if len(deps) == 0:
        warnings.warn(f"Could not find scheduler config within previous {(i-1) * step_back.jd} days")
        
    if i > 1:
        deps = deps.iloc[:1]
    
    # Reconfigure output to fit into script_status fields 
    deps['classname'] = "Scheduler dependencies"
    deps['description'] = deps['scheduler'] + ' ' + deps['seeingModel']
    models = [c for c in deps.columns if 'observatory' in c or 'Model' in c]
    def build_dep_string(x, models): 
        dep_string = ''
        for m in models:
            dep_string += f"{m}: {x[m]}, "
        dep_string = dep_string[:-2]
        return dep_string
    deps['config'] = deps.apply(build_dep_string, args=[models], axis=1)
    deps['script_salIndex'] = -1
    
    # The configurationApplied should happen with every scheduler update
    topic = 'lsst.sal.Scheduler.logevent_configurationApplied'
    fields = await efd_client.get_fields(topic)
    fields = [f for f in fields if "private" not in f]
    con = await efd_client.select_time_series(topic, fields, t_start_local, t_end)
    con['classname'] = "Scheduler configuration"
    # Build description from schemaVersion (just in case) and ts_config_ocs 
    ts_config_ocs_in_place = []
    for time in con.index:
        prev_obsenv = obsenv.query('index < @time')
        if len(prev_obsenv) == 0:
            ts_config_ocs_in_place.append('Unknown')
        else:
            ts_config_ocs_in_place.append(prev_obsenv.iloc[-1]['ts_config_ocs'])
    con['ts_config_ocs'] = ts_config_ocs_in_place
    con['description'] = 'ts_config_ocs ' + con['ts_config_ocs'] + ' ' + con['schemaVersion']
    con.rename({'configurations': 'config'}, axis=1, inplace=True)
    con['script_salIndex'] = -1

    # Combine results
    dd =  pd.concat([deps, con, obsenv])
    # Trim back results to t_start, keeping last previous update information
    # Trim obsenv back to range for other values
    # But keep last entry so we have easy record 
    tt = pd.to_datetime(t_start.utc.datetime).tz_localize("UTC")
    # Keep last scheduler configuration update
    old_dd_sched = dd.query('index < @tt and classname == "Scheduler configuration"')[-1:]
    old_dd_deps = dd.query('index < @tt and classname == "Scheduler dependencies"')[-1:]
    old_dd_obsenv = dd.query('index < @tt and classname.str.contains("Obsenv")')[-1:]
    dd = dd.query('index >= @tt')
    sched_config = pd.concat([old_dd_sched, old_dd_obsenv, old_dd_deps, dd])

    # Reformat
    cols = ['classname', 'description', 'config', 'salIndex', 'script_salIndex']
    drop_cols = [c for c in sched_config.columns if c not in cols]
    sched_config.drop(drop_cols, axis=1, inplace=True)
    sched_config.sort_index(inplace=True)
    sched_config['timestampProcessStart'] = sched_config.index.copy().tz_localize(None).astype('datetime64[ns]')
    sched_config['finalScriptState'] = "Configuration"
    print(f"Found {len(sched_config)} scheduler configuration records")
    return sched_config

In [None]:
async def get_error_codes(t_start: Time, t_end: Time, efd_client: EfdClient) -> pd.DataFrame:
    """Get all messages from logevent_errorCode topics."""
    # Get error codes
    topics = await efd_client.get_topics()
    err_codes = [t for t in topics if 'errorCode' in t]
    
    errs = []
    for topic in err_codes:
        df = await efd_client.select_time_series(topic, ['errorCode', 'errorReport'], t_start, t_end)
        if len(df) > 0:
            df['topic'] = topic
            errs += [df]
    if len(errs) > 0:
        errs = pd.concat(errs).sort_index()
        def strip_csc(x):
            return x.topic.replace("lsst.sal", "").replace("logevent_errorCode", "").replace(".", "") + "CSC error"
        errs['component'] = errs.apply(strip_csc, axis=1)
        # Rename some columns to match narrative log columns
        errs.rename({'errorCode': 'error_code', 'errorReport': 'message_text', 'topic': 'origin'}, axis=1, inplace=True)
        # Add a salindex so we can color-code based on this as a "source"
        errs['salIndex'] = 4
        errs['finalStatus'] = "ERR"
        errs['timestampProcessStart'] = errs.index.values.copy()
    else:
        # Make an empty dataframe.
        errs = pd.DataFrame([], columns=['component', 'error_code', 'message_text', 'origin', 'salIndex', 'finalStatus', 'timestampProcessStart'])
    
    print(f"Found {len(errs)} error messages")
    return errs

async def get_watcher_alarms(t_start: Time, t_end: Time, efd_client: EfdClient) -> pd.DataFrame:
    """Get and consolidate watcher alarms from lsst.sal.Watcher.logevent_alarm topic."""
    topic = 'lsst.sal.Watcher.logevent_alarm'
    fields = await efd_client.get_fields(topic)
    fields = [f for f in fields if ('private' not in f) and (f != 'name') and (f != 'duration')]
    watcher_messages = await efd_client.select_time_series(topic, fields, t_start, t_end)
    # Convert severity to readable string.
    watcher_messages['severity'] = watcher_messages.apply(apply_enum, args=('severity', AlarmSeverity), axis=1)
    # Convert times for readability.
    for col in [c for c in watcher_messages.columns if 'timestamp' in c]:
        watcher_messages[col] = Time(watcher_messages[col], format='unix_tai').utc.datetime
    # Join on reason to consolidate messages, then join on timestampAcknowledged and timestampSeverityOldest
    watcher_messages = watcher_messages.groupby(['reason', 'timestampAcknowledged']).first() 
    watcher_messages.reset_index(drop=False, inplace=True)
    # Join watcher messages based on timestampSeverityOldest too, maybe
    watcher_messages = watcher_messages.groupby('timestampSeverityOldest').first()
    watcher_messages.reset_index(drop=False, inplace=True)
    watcher_messages.index = watcher_messages['timestampSeverityOldest'].copy()
    watcher_messages.index.names = [None]
    watcher_messages.index = watcher_messages.index.tz_localize("UTC")
    # And since the timestampSeverityOldest can be different while the future 
    # Rename some columns for merge with errors 
    watcher_messages.rename({'reason': 'message_text', 'escalateTo': 'component',  'acknowledgedBy': 'origin', 'severity': 'error_code'}, axis=1, inplace=True)
    watcher_messages['salIndex'] = 4
    watcher_messages['error_code'] = 0
    watcher_messages['finalStatus'] = "ALARM"
    print(f"Found {len(watcher_messages)} watcher messages")
    return watcher_messages.sort_index()


def get_narrative_log(t_start: Time, t_end: Time, narrative_log_endpoint: str, auth: dict) -> pd.DataFrame:
    """Get the narrative log entries."""    
    log_limit = 50000
    params = {"is_human" : "either",
              "is_valid" : "true",
              "has_date_begin" : True,
              "min_date_begin" : t_start.to_datetime(),
              "max_date_begin" : t_end.to_datetime(),
              "order_by" : "date_begin",
              "limit": log_limit, 
             }
    messages = query_logging_services(narrative_log_endpoint, auth=auth, params=params)
    # Modify narrative log content to match dataframes from errors and watcher topics better.
    # Strip out repeated \n\n and \r\n characters for nicer printing in dataframe.
    if len(messages) > 0:
        def strip_rns(x):
            return x.message_text.replace("\r\n", "\n").replace("\n\n", "\n").rstrip("\n")
        def make_time(x, column):
            return Time(x[column], format='isot', scale='tai').utc.datetime
        def clarify_log(x):
            if x.components is None:
                component = "Log"
            else:
                component = "Log " + " ".join(x.components)
            return component
        # Strip excessive \r\n and \n\n from messages
        messages['message_text'] = messages.apply(strip_rns, axis=1)
        # Add a time index - use date_added as this makes them align best at present
        messages['time'] = messages.apply(make_time, args=["date_added"], axis=1)
        messages.set_index('time', inplace=True)
        messages.index = messages.index.tz_localize("UTC")
        # Join the components and add "Log" explicitly
        messages['component'] = messages.apply(clarify_log, axis=1)
        # rename some columns to match error data
        messages.rename({'time_lost_type': 'error_code', 'user_id': 'origin'}, axis=1, inplace=True)
        # Add a salindex so we can color-code based on this as a "source"
        messages['salIndex'] = 0
        messages['error_code'] = 0
        messages['finalStatus'] = "Log"
        messages['timestampProcessStart'] = messages.apply(make_time, args=["date_begin"], axis=1)
        messages['timestampRunStart'] = messages.apply(make_time, args=["date_added"], axis=1)
        messages['timestampProcessEnd'] = messages.apply(make_time, args=["date_end"], axis=1)
    print(f"Found {len(messages)} messages in the narrative log")
    if len(messages) == log_limit:
        print(f"Whoops, likely lost some log messages due to limit of {log_limit}.")
    return messages

In [None]:
async def get_narrative_and_errs(t_start: Time, t_end: Time, efd_client: EfdClient, 
                                 narrative_log_endpoint: str | None, auth: dict | None, 
                                 include_watcher : bool = True) -> pd.DataFrame:
    """Get narrative, errorCode and (possibly) watcher alarms."""

    if narrative_log_endpoint is not None:
        messages = get_narrative_log(t_start, t_end, narrative_log_endpoint, auth)
    else:
        messages = pd.DataFrame([])
        
    errs = await get_error_codes(t_start, t_end, efd_client)
    if include_watcher:
        watcher = await get_watcher_alarms(t_start,  t_end, efd_client)
    else:
        # Maybe we'll get some of the messages, for start of the night state, for now
        # watcher = await get_watcher_alarms(t_start,  t_end, efd_client)
        # watcher = watcher.query('message_text.str.len() > 100')
        watcher = pd.DataFrame([])
        print(f"Kept {len(watcher)} watcher messages")
        
    # Merge narrative log messages and error messages    
    narrative_and_errs = pd.concat([errs, watcher, messages]).sort_index()
    ncols = ['component', 'origin', 'message_text', 'error_code', 'salIndex', 'timestampSeverityOldest', 'timestampAcknowledged', 'timestampMaxSeverity']
    return narrative_and_errs

In [None]:
async def get_exposure_info(t_start: Time, t_end: Time, efd_client: EfdClient, 
                            exposure_log_endpoint: str | None, auth: dict | None) -> pd.DataFrame:
    """Get exposure information from lsst.sal.CCCamera.logevent_endOfImageTelemetry
    and join it with exposure log information. 
    """
    # Find exposure information - Simonyi Tel
    topic = 'lsst.sal.CCCamera.logevent_endOfImageTelemetry' 
    fields = ['imageName', 'imageIndex', 'exposureTime', 'darkTime', 'measuredShutterOpenTime', 
              'additionalValues', 'timestampAcquisitionStart', 'timestampDateEnd', 'timestampDateObs']
    image_acquisition_cc = await efd_client.select_time_series(topic, fields, t_start, t_end)
    # If there were zero images in this timeperiod, just return now.
    if len(image_acquisition_cc) > 0:
        for col in [c for c in image_acquisition_cc.columns if c.startswith("timestamp")]:
            image_acquisition_cc[col] = Time(image_acquisition_cc[col], format='unix_tai').utc.datetime
        image_acquisition_cc['salIndex'] = 5
        image_acquisition_cc['script_salIndex'] = 0
        image_acquisition_cc['finalStatus'] = "Image Acquired"
        def make_config_col_for_image(x):
            return f"exp {x.exposureTime} // dark {x.darkTime} // open {x.measuredShutterOpenTime} "
        image_acquisition_cc['config'] = image_acquisition_cc.apply(make_config_col_for_image, axis=1)
        image_acquisition_cc.index = image_acquisition_cc['timestampAcquisitionStart'].copy()
        image_acquisition_cc.index = image_acquisition_cc.index.tz_localize("UTC")
        print(f"Found {len(image_acquisition_cc)} image times for Simonyi")
        
    # Find exposure information - Aux Tel
    topic = 'lsst.sal.ATCamera.logevent_endOfImageTelemetry' 
    fields = ['imageName', 'imageIndex', 'exposureTime', 'darkTime', 'measuredShutterOpenTime', 
              'additionalValues', 'timestampAcquisitionStart', 'timestampDateEnd', 'timestampDateObs']
    image_acquisition_at = await efd_client.select_time_series(topic, fields, t_start, t_end)
    # If there were zero images in this timeperiod, just return now.
    if len(image_acquisition_at) > 0:
        for col in [c for c in image_acquisition_at.columns if c.startswith("timestamp")]:
            # Is it possible ATCamera is not using tai?
            image_acquisition_at[col] = Time(image_acquisition_at[col], format='unix_tai').utc.datetime
        image_acquisition_at['salIndex'] = 6
        image_acquisition_at['script_salIndex'] = 0
        image_acquisition_at['finalStatus'] = "Image Acquired"
        def make_config_col_for_image(x):
            return f"exp {x.exposureTime} // dark {x.darkTime} // open {x.measuredShutterOpenTime} "
        image_acquisition_at['config'] = image_acquisition_at.apply(make_config_col_for_image, axis=1)
        image_acquisition_at.index = image_acquisition_at['timestampAcquisitionStart'].copy()
        image_acquisition_at.index = image_acquisition_at.index.tz_localize("UTC")
        print(f"Found {len(image_acquisition_at)} image times for AuxTel")

    image_acquisition = pd.concat([image_acquisition_cc, image_acquisition_at])

    # Now get exposure log information if exposure_log_endpoint defined.
    if exposure_log_endpoint is not None:
        log_limit = 50000
        # A cheap conversion to dayobs int
        min_dayobs_int = int(t_start.iso[0:10].replace('-', ''))
        max_dayobs_int = int(t_end.iso[0:10].replace('-', ''))
        params = {"is_human" : "either",
                  "is_valid" : "true",
                  "min_day_obs" : min_dayobs_int,
                  "max_day_obs" : max_dayobs_int,
                  "limit": log_limit, 
                 }
        
        exp_logs = query_logging_services(exposure_log_endpoint, auth=auth, params=params)
        print(f"Found {len(exp_logs)} messages in the exposure log")
        
        # Modify exposure log and match with exposures to add time tag.
        if len(exp_logs) > 0:
            # Find a time to add the exposure logs into the records (next to the image).
            exp = pd.merge(image_acquisition, exp_logs, how='right', left_on='imageName', right_on='obs_id')
            # Set the time for the exposure log just slightly after the image start time
            exp_log_image_time = exp['timestampAcquisitionStart'] + EPS_TIME
            exp_logs['img_time'] = exp_log_image_time
            exp_logs.set_index('img_time', inplace=True)
            exp_logs.index = exp_logs.index.tz_localize("UTC")
            exp_logs['salIndex'] = 0
            exp_logs['script_salIndex'] = 0
            # Rename some columns in the exposure log so that we can consolidate them here
            exp_logs.rename({'obs_id': 'imageName', 'user_id': 'config', 'message_text': 'additionalValues', 'exposure_flag': 'finalStatus'}, axis=1, inplace=True)
            image_acquisition = pd.concat([image_acquisition, exp_logs]).sort_index()
            print("Joined exposure and exposure log")
    return image_acquisition

In [None]:
async def get_consolidated_messages(t_start: Time, t_end: Time, include_watcher: bool = False) -> pd.DataFrame:
    """Get consolidated messages from EFD ScriptQueue, errorCodes, CCCamera, exposure and narrative logs.

    Parameters
    ----------
    t_start : `astropy.Time`
        Time of the start of the messages.
    t_end : `astropy.Time`
        Time of the end of the messages.
    include_watcher : `bool`
        Include messages from Watcher.logevent_alarms?

    Returns
    -------
    efd_and_messages : `pd.DataFrame`
    """
    endpoints = get_clients()
    print(endpoints['string'])
    
    # Now rename columns so we can put these all into the same dataframe
    # goal columns : 
    cols = ['time', 'name', 'description', 'config', 'script_salIndex', 'salIndex', 'finalStatus', 'timestampProcessStart', 'timestampConfigureEnd', 'timestampRunStart', 'timestampProcessEnd'] 
    
    # columns from scripts
    script_status = await get_script_status(t_start, t_end, endpoints['efd'])
    # script_cols = ['classname', 'description', 'config', 'script_salIndex', 'salIndex', 'blockId', 'finalScriptState', 'scriptState', 'timestampProcessStart', 'timestampConfigureEnd', 'timestampRunStart', 'timestampProcessEnd']
    tracebacks = await get_tracebacks(t_start, t_end, endpoints['efd'])
    scheduler_configs = await get_scheduler_configs(t_start, t_end, endpoints['efd'], endpoints['obsenv'])
    script_status = pd.concat([scheduler_configs, script_status, tracebacks])
    script_status.rename({'classname': 'name', 'finalScriptState': 'finalStatus'}, axis=1, inplace=True)
    
    # columns from narrative and errors
    narrative_and_errs = await get_narrative_and_errs(t_start, t_end, endpoints['efd'], endpoints['api_base'] + endpoints['narrative_log'], endpoints['auth'], include_watcher=include_watcher)
    # narrative_cols = ['component', 'origin', 'message_text', 'error_code', 'salIndex']
    # if include_watcher:
    #     narrative_cols = narrative_cols + ['timestampSeverityOldest', 'timestampAcknowledged', 'timestampMaxSeverity']
    narrative_and_errs.rename({'component': 'name', 'origin': 'config', 'message_text': 'description', 'error_code': 'script_salIndex'}, axis=1, inplace=True)
    if 'timestampSeverityOldest' in narrative_and_errs.columns:
        narrative_and_errs.rename({'timestampSeverityOldest': 'timestampProcessStart', 'timestampAcknowledged': 'timestampConfigureEnd', 'timestampMaxSeverity': 'timestampRunStart'}, axis=1, inplace=True)
    
    # columns from images_and_logs
    image_and_logs = await get_exposure_info(t_start, t_end, endpoints['efd'], endpoints['api_base'] + endpoints['exposure_log'], endpoints['auth'])
    # image_cols = ['imageName', 'additionalValues', 'config', 'finalStatus', 'script_salIndex', 'salIndex', 'timestampAcquisitionStart', 'timestampDateObs', 'timestampDateEnd']
    image_and_logs.rename({'imageName': 'name', 'additionalValues' : 'description', 
                           'timestampAcquisitionStart': 'timestampProcessStart', 'timestampDateObs': 'timestampRunStart', 'timestampDateEnd': 'timestampProcessEnd'}, axis=1, inplace=True) 

    efd_and_messages = pd.concat([script_status, narrative_and_errs, image_and_logs]).sort_index()
    # Wrap description, which can may have long zero-space messages in the errors
    efd_and_messages['description'] = efd_and_messages['description'].str.wrap(100)

    # Add some big labels which could be used to indicate foldups
    # The blocks can be complicated - a single BLOCK can actually trigger multiple AddBlock commands (?)
    # So go back and check command_addBlock directly.
    topic = 'lsst.sal.Scheduler.command_addBlock'
    block_names = await endpoints['efd'].select_time_series(topic, ['id'], t_start, t_end, index=None)
    # Find the FBS setup and starts
    fbs_resume_times = efd_and_messages.query('name == "MTSchedulerResume"')
    scheduler_configs = efd_and_messages.query('name == "Scheduler configuration"')
    def find_fbs_yaml(row, scheduler_configs):
        earlier_configs = scheduler_configs.query('index < @row.name')
        best_config = earlier_configs.iloc[-1].config
        return best_config.split(',')[-1]
    sched_yamls = fbs_resume_times.apply(find_fbs_yaml, args=[scheduler_configs], axis=1)
    sched_yamls = pd.DataFrame(sched_yamls, columns=['id'])
    if len(block_names) > 0 and len(sched_yamls) > 0:
        foldups = pd.concat([block_names, sched_yamls])
    elif len(block_names) == 0:
        foldups = sched_yamls
    else:
        foldups = block_names

    if len(foldups) > 0:
        # If we actually have some addBlock or resumeScheduler events, add those. 
        # Note that we could have images and events -- running from scripts. 
        # .. but I don't know how to track these.
        foldups = foldups.sort_index()
        foldups.rename({'id': 'name'}, axis=1, inplace=True)
        foldups['salIndex'] = 0
        foldups['script_salIndex'] = -1
        foldups['finalStatus'] = 'Job Change'
        foldups['config'] = ''
        foldups['description'] = 'New BLOCK or FBS configuration'
        foldups['timestampProcessStart'] = foldups.index.copy()
        foldups['timestampProcessEnd'] = np.concatenate([foldups.index[1:].copy(), np.array([efd_and_messages.index[-1]])])
        efd_and_messages = pd.concat([efd_and_messages, foldups]).sort_index()


    # use an integer index, which makes it easier to pull up values plus avoids occasional failures of time uniqueness
    efd_and_messages.reset_index(drop=False, inplace=True)
    efd_and_messages.rename({'index': 'time'}, axis=1, inplace=True)

    print(f"Total combined messages {len(efd_and_messages)}")

    
    return efd_and_messages, cols

In [None]:
# Define name and colours from salIndex    
def get_name_and_color_from_salindex(sal_index, unknown_color='#f9f9f9'):
    # Colors from https://medialab.github.io/iwanthue/
    return {
        5: ('Simonyi Exposure', '#b6ecf5'),
        6: ('Auxtel Exposure', '#d8f1f5'),
        0: ('Narrative log', '#cf7ddc'),
        1: ('MTQueue', '#b4c546'),
        2: ('ATQueue', '#bab980'),
        3: ('OCSQueue', '#b2baad'),
        4: ('EFD error', '#9cb5d5'),
    }.get(sal_index, ("??", unknown_color))
    
# Add a custom formatter to handle YAML-like strings with dynamic background colors
def format_config_as_yaml_with_colors(row):
    config_value = row['config']
    sal_index = row['salIndex']
    script_salindex = row['script_salIndex']
    
    # Define background colors based on salIndex
    background_color = get_name_and_color_from_salindex(sal_index)[1]

    might_be_yaml = (script_salindex > 0) and (sal_index in [1, 2, 3])
    might_be_yaml = might_be_yaml and isinstance(config_value, str) and len(config_value) > 0
    might_be_yaml = might_be_yaml and not config_value.startswith('Traceback')

    #if script_salindex > 0 and sal_index in [1,2,3] and isinstance(config_value, str) and len(config_value) > 0:
    if might_be_yaml:
        try:
            # Parse the YAML-like string
            parsed_yaml = yaml.safe_load(config_value)
            # Format back to YAML with proper indentation
            formatted_yaml = yaml.dump(parsed_yaml, default_flow_style=False)
            return (
                f"<pre style='background: {background_color}; padding: 10px; border: 1px solid #ddd; margin: 0;'>"
                f"{formatted_yaml}</pre>"
            )
        except yaml.YAMLError:
            # If parsing fails, return as plain text in a styled <pre> block
            return (
                f"<pre style='background: {background_color}; padding: 10px; border: 1px solid #ddd; margin: 0;'>"
                f"{config_value}</pre>"
            )
    elif config_value.startswith('Traceback'):
        return f"<pre style='background: {background_color}'>{config_value}</pre>"
    else:
        return config_value  # Return as-is if salIndex is 0 or invalid type

def format_tracebacks(row):
    return html.escape(row.description)

    
def pretty_print_messages(efd_and_messages: pd.DataFrame, cols: list, time_order: str,
                         show_salIndex: list[int] = [0, 1, 2, 3, 4, 5]) -> None:

    keep = np.zeros(len(efd_and_messages), dtype=bool)
    for si in show_salIndex:
        keep |= efd_and_messages.salIndex == si
    efd_and_messages = efd_and_messages[keep]
    
    def highlight_salindex(s):
        return [f'background-color: {get_name_and_color_from_salindex(s.salIndex)[1]}'] * len(s)
    msg = ["Color coding by "]
    for i in np.sort(efd_and_messages.salIndex.unique()):
        what, color = get_name_and_color_from_salindex(i)
        msg.append(f" <font style='background-color: {color[0:]};'>{what}</font> ")
    display(HTML(" ".join(msg)))
    
    if time_order == "newest first": 
        efd_and_messages = efd_and_messages[::-1]
    # Apply yaml-like formatting conditionally
    efd_and_messages['config'] = efd_and_messages.apply(format_config_as_yaml_with_colors, axis=1)
    efd_and_messages['description'] = efd_and_messages.apply(format_tracebacks, axis=1)
    # Adjust the display call to include the formatted column
    styled_table = (
        efd_and_messages[cols]
        .style.apply(highlight_salindex, axis=1)  # Preserve color formatting for other columns
        .set_table_styles([dict(selector='th', props=[('text-align', 'left')])])
        .set_properties(**{'text-align': 'left'})
    )
    
    # Render with HTML
    display(HTML(styled_table.format().to_html()))
    return 

In [None]:
# Set a range of times to search, based on dayobs
if day_obs_min.lower() == "today":
    # Shift the 12hour offset following the definition of day_obs in https://sitcomtn-032.lsst.io/    
    # Drop the hours, minutes, seconds to get the ISO formatted day_obs
    day_obs_min = Time(np.floor(Time.now().mjd - 0.5), format='mjd', scale='utc').iso[0:10]

if day_obs_min.lower() == "yesterday":
    # Shift the 12hour offset following the definition of day_obs in https://sitcomtn-032.lsst.io/
    # Drop the hours, minutes, seconds to get the ISO fromatted day_obs
    day_obs_min = (Time(np.floor(Time.now().mjd - 0.5), format='mjd', scale='utc') - TimeDelta(1, format='jd')).iso[0:10]

# Set a range of times to search, based on dayobs
if day_obs_max.lower() == "today":
    # Shift the 12hour offset following the definition of day_obs in https://sitcomtn-032.lsst.io/    
    # Drop the hours, minutes, seconds to get the ISO formatted day_obs
    day_obs_max = Time(np.floor(Time.now().mjd - 0.5), format='mjd', scale='utc').iso[0:10]

if day_obs_max.lower() == "yesterday":
    # Shift the 12hour offset following the definition of day_obs in https://sitcomtn-032.lsst.io/
    # Drop the hours, minutes, seconds to get the ISO fromatted day_obs
    day_obs_max = (Time(np.floor(Time.now().mjd - 0.5), format='mjd', scale='utc') - TimeDelta(1, format='jd')).iso[0:10]

try:
    t_start = Time(f"{day_obs_min}T12:00:00", format='isot', scale='utc')
except ValueError:
    print(f"Is day_obs_min the right format? {day_obs_min} should be YYYY-MM-DD")
    t_start = None
try:
    t_end = Time(f"{day_obs_max}T12:00:00", format='isot', scale='utc') + TimeDelta(1, format='jd')
except ValueError:
    print(f"Is day_obs_max the right format? {day_obs_max} should be YYYY-MM-DD")
    t_start = None

if t_start is None or t_end is None:
    print("Did not get valid inputs for time period.")
    

print(f"Querying for messages from {t_start.iso} to {t_end.iso}")
print(f"Notebook executed at {Time.now().utc.iso}")
efd_and_messages, cols = await get_consolidated_messages(t_start, t_end)

# Could add these to parameters
save_log = False
make_link = False

if save_log:
    log_filename = f"log_{day_obs_min}_{day_obs_max}.h5"
    # We will always get a performance warning here, because the dataframe includes string objects
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        efd_and_messages[cols].to_hdf(log_filename, key='messages')
        print(f"Wrote to {log_filename}")
if make_link:
    import base64
    html_table = efd_and_messages[cols].to_xml(index=False)
    b64 = base64.b64encode(html_table.encode())
    payload = b64.decode()
    log_xml =  f"log_{day_obs_min}_{day_obs_max}.xml"
    html_link = f'<a download="{log_xml}" href="data:text/csv;base64,{payload}" target="_blank">Download XML table of log messages</a>'
    display(HTML(html_link))
    print(" read download with pandas.read_xml, convert times using .astype('datetime64[ns]')")

In [None]:
if isinstance(show_salIndex, str) and show_salIndex.lower() == 'all':
    show_salIndex = efd_and_messages.salIndex.unique()
# Ok, otherwise we have to do some parsing .. we get a string but need list of ints.
showsal = []
for i in show_salIndex:
    try:
        showsal.append(int(i))
    except ValueError:
        # Wasn't an integer, pass
        # easier when no negative int salIndexes.
        pass
show_salIndex = showsal

pretty_print_messages(efd_and_messages, cols, time_order, show_salIndex=show_salIndex)

In [None]:
import pandas as pd
import asyncio
from astropy.time import Time, TimeDelta

# --- Define your time range ---
# (Adjust day_obs_min and day_obs_max as needed.)
day_obs_min = "2024-11-06"
day_obs_max = "2024-11-07"
#day_obs_max = "2024-11-18"

try:
    t_start = Time(f"{day_obs_min}T12:00:00", format='isot', scale='utc')
except ValueError:
    raise ValueError(f"day_obs_min should be in YYYY-MM-DD format: got {day_obs_min}")
try:
    t_end = Time(f"{day_obs_max}T12:00:00", format='isot', scale='utc') + TimeDelta(1, format='jd')
except ValueError:
    raise ValueError(f"day_obs_max should be in YYYY-MM-DD format: got {day_obs_max}")

print(f"Querying for messages from {t_start.iso} to {t_end.iso}")

# --- Get the consolidated messages ---
# (This call is asynchronous. Adjust if you are using an async-enabled notebook.)
efd_and_messages, cols = await get_consolidated_messages(t_start, t_end)

# --- Map salIndex values to group names ---
salindex_mapping = {
    5: 'Simonyi Exposure',
    6: 'Auxtel Exposure',
    0: 'Narrative log',
    1: 'MTQueue',
    2: 'ATQueue',
    3: 'OCSQueue',
    4: 'EFD error'
}

# --- Create a dictionary of DataFrames, one per group ---
groups = {}
for sal_index, group_name in salindex_mapping.items():
    groups[group_name] = efd_and_messages[efd_and_messages['salIndex'] == sal_index]

# --- (Optional) Print summary counts for each group ---
print("Message counts by group:")
for group_name, df in groups.items():
    print(f"  {group_name}: {len(df)} messages")

In [None]:
# Now you can work with each group individually.
simonyi_df    = groups['Simonyi Exposure']
auxtel_df     = groups['Auxtel Exposure']
narrative_df  = groups['Narrative log']
mtqueue_df    = groups['MTQueue']
atqueue_df    = groups['ATQueue']
ocsqueue_df   = groups['OCSQueue']
efd_error_df  = groups['EFD error']

# For example, to display the first few rows of each DataFrame:
print("Simonyi Exposure:")
display(simonyi_df.head())

print("Auxtel Exposure:")
display(auxtel_df.head())

print("Narrative log:")
display(narrative_df.head())

print("MTQueue:")
display(mtqueue_df.head())

print("ATQueue:")
display(atqueue_df.head())

print("OCSQueue:")
display(ocsqueue_df.head())

print("EFD error:")
display(efd_error_df.head())

In [None]:
import pandas as pd
import asyncio
from astropy.time import Time, TimeDelta

# --- Define tu rango de tiempo ---
day_obs_min = "2024-11-24"
day_obs_max = "2024-11-24"

try:
    t_start = Time(f"{day_obs_min}T12:00:00", format='isot', scale='utc')
except ValueError:
    raise ValueError(f"day_obs_min should be in YYYY-MM-DD format: got {day_obs_min}")
try:
    t_end = Time(f"{day_obs_max}T12:00:00", format='isot', scale='utc') + TimeDelta(1, format='jd')
except ValueError:
    raise ValueError(f"day_obs_max should be in YYYY-MM-DD format: got {day_obs_max}")

print(f"Querying for messages from {t_start.iso} to {t_end.iso}")

# --- Obtener los mensajes consolidados ---
# (Esta llamada es asíncrona; ajústala según tu entorno)
efd_and_messages, cols = await get_consolidated_messages(t_start, t_end)

# --- Mapeo de salIndex a nombres de grupo ---
salindex_mapping = {
    5: 'Simonyi Exposure',
    6: 'Auxtel Exposure',
    0: 'Narrative log',
    1: 'MTQueue',
    2: 'ATQueue',
    3: 'OCSQueue',
    4: 'EFD error'
}

# --- Crear un diccionario de DataFrames, uno por grupo ---
groups = {}
for sal_index, group_name in salindex_mapping.items():
    groups[group_name] = efd_and_messages[efd_and_messages['salIndex'] == sal_index]

# --- (Opcional) Mostrar conteos por grupo ---
print("Message counts by group:")
for group_name, df in groups.items():
    print(f"  {group_name}: {len(df)} messages")

# AstroChat

In [None]:
df_custom = mtqueue_df[['config', 'name']]

In [None]:
from lsst.summit.extras.astrochat import set_api_key, AstroChat
%matplotlib inline

day_obs = 20241124
set_api_key()
chat = AstroChat(data=df_custom, day_obs=day_obs, verbosity='ALL', export=True)

In [None]:
chat.list_demos()

In [None]:
chat.run('make a summary of the data')