In [None]:
# Parameters. Set defaults here.
# Times Square replaces this cell with the user's parameters.
record_limit = '99'

<a class="anchor" id="imports"></a>
## Imports and General Setup

In [None]:
# Only use packages available in the Rubin Science Platform
import requests
from collections import defaultdict
import pandas as pd
from pprint import pp
from urllib.parse import urlencode
from IPython.display import FileLink, display_markdown
from matplotlib import pyplot as plt
import os

In [None]:
limit = int(record_limit)

response_timeout = 3.05  # seconds, how long to wait for connection
read_timeout = 20  # seconds
timeout = (float(response_timeout), float(read_timeout))

summit = 'https://summit-lsp.lsst.codes'
usdf = 'https://usdf-rsp-dev.slac.stanford.edu'
tucson = 'https://tucson-teststand.lsst.codes'
server = os.environ.get('EXTERNAL_INSTANCE_URL', summit)
log = 'exposurelog'
service = f'{server}/{log}'
service

<a class="anchor" id="setup_source"></a>
## Setup Source

In [None]:
recs = None
ok = True

ignore_fields = set(['tags', 'urls', 'message_text', 'id', 'date_added', 
                     'obs_id', 'day_obs', 'seq_num', 'parent_id', 'user_id',
                     'date_invalidated', 'date_begin', 'date_end',
                     'time_lost', # float
                     #'systems','subsystems','cscs',  # values are lists, special handling
                    ])

In [None]:
# logrep_utils.py

############################################
# Python Standard Library
from urllib.parse import urlencode
import itertools
from datetime import datetime
import warnings
from collections import defaultdict
############################################
# External Packages
import requests


MAX_CONNECT_TIMEOUT = 3.1    # seconds
MAX_READ_TIMEOUT = 90 * 60   # seconds

class ApiAdapter:
    def __init__(self, *,
                 server_url='https://tucson-teststand.lsst.codes',
                 connect_timeout=3.05,  # seconds
                 read_timeout=10 * 60,  # seconds
                 ):
        self.server = server_url
        self.c_timeout = min(MAX_CONNECT_TIMEOUT,
                             float(connect_timeout))  # seconds
        self.r_timeout = min(MAX_READ_TIMEOUT,  # seconds
                             float(read_timeout))
        self.timeout = (self.c_timeout, self.r_timeout)


class ExposurelogAdapter(ApiAdapter):
    service = 'exposurelog'

    def get_instruments(self):
        url = f'{self.server}/{self.service}/instruments'
        try:
            instruments = requests.get(url, timeout=self.timeout).json()
        except Exception as err:
            warnings.warn(f'No instruments retrieved: {err}')
            instruments = dict(dummy=[])
        # Flatten the lists
        return list(itertools.chain.from_iterable(instruments.values()))

    def get_exposures(self, instrument, registry=1):
        qparams = dict(instrument=instrument, registery=registry)
        url = f'{self.server}/{self.service}/exposures?{urlencode(qparams)}'
        try:
            recs = requests.get(url, timeout=self.timeout).json()
        except Exception as err:
            warnings.warn(f'No exposures retrieved: {err}')
            recs = []
        return recs

    def get_observation_gaps(self, instruments=None,
                             min_day_obs=None,  # YYYYMMDD
                             max_day_obs=None,  # YYYYMMDD
                             ):
        if not instruments:
            instruments = self.get_instruments()
        assert isinstance(instruments,list), \
            f'"instruments" must be a list.  Got {instruments!r}'
        
        # inst_day_rollupol[instrument] => dict[day] => exposureGapInMinutes
        inst_day_rollup = defaultdict(dict)  # Instrument/Day rollup

        for instrum in instruments:
            recs = self.get_exposures(instrum)
            instrum_gaps = dict()
            for day,dayrecs in itertools.groupby(recs,
                                                 key=lambda r: r['day_obs']):
                gaps = list()
                begin = end = None
                for rec in dayrecs:
                    begin = rec['timespan_begin']
                    if end:
                        # span in minutes
                        diff = (datetime.fromisoformat(begin)
                                - datetime.fromisoformat(end)
                                ).total_seconds() / 60.0

                        gaps.append((
                            datetime.fromisoformat(end).time().isoformat(),
                            datetime.fromisoformat(begin).time().isoformat(),
                            diff
                        ))
                    end = rec['timespan_end']
                instrum_gaps[day] = gaps

                #!roll = dict()
                # Rollup gap times by day
                for day,tuples in instrum_gaps.items():
                    #!roll[day] = sum([t[2] for t in tuples])
                    inst_day_rollup[instrum][day] = sum([t[2] for t in tuples])

        return inst_day_rollup



# gaps,recs = logrep_utils.ExposurelogAdapter(server_url='https://usdf-rsp-dev.slac.stanford.edu').get_observation_gaps('LSSTComCam')

# gaps,recs = logrep_utils.ExposurelogAdapter(server_url='[[https://tucson-teststand.lsst.codes').get_observation_gaps('LSSTComCam')


In [None]:
gaps = ExposurelogAdapter(server_url=server).get_observation_gaps()
dict(gaps)

<a class="anchor" id="get_records"></a>
## Get Records

In [None]:
# Endpoint: messages

# is_human=either&is_valid=either&offset=0&limit=50' 
# site_ids=tucson&message_text=wubba&min_level=0&max_level=999&user_ids=spothier&user_agents=LOVE
# tags=love&exclude_tags=ignore_message
qstr = urlencode(dict(is_human='either',is_valid='either', limit=limit))
url = f'{service}/messages?{qstr}'

try:
    print(f'Attempt to get logs from {url=}')
    response = requests.get(url, timeout=timeout)
    response.raise_for_status()
    recs = response.json()
    flds = set(recs[0].keys())
    facflds = flds - ignore_fields
    # facets(field) = set(value-1, value-2, ...)
    facets = {fld: set([str(r[fld])
                for r in recs if not isinstance(r[fld], list)]) 
                    for fld in facflds}
except Exception as err:
    ok = False
    print(f'ERROR getting {log} from {url=}: {err=}')
numf = len(flds) if ok else 0
numr = len(recs) if ok else 0
print(f'Retrieved {numr} records, each with {numf=} fields.')

<a class="anchor" id="table"></a>
## Tables of (mostly raw) results

### Fields names provided in records from log.

In [None]:
pd.DataFrame(flds, columns=['Field Name'])

### Facets from log records.
A *facet* is the set all of values found for a field in the retrieved records. Facets are only calculated for some fields.

In [None]:
display(pd.DataFrame.from_dict(facets, orient='index'))
facets

### Table of selected log record fields.
Table can be retrieved as CSV file for local use.

In [None]:
cols = ['obs_id', 'user_id', 'user_agent','is_human','is_valid','exposure_flag']
df = pd.DataFrame(recs)[cols]

# Allow download of CSV version of DataFrame
csvfile = 'tl.csv'
df.to_csv(csvfile)
myfile = FileLink(csvfile)
print('Table available as CSV file: ')
display(myfile)
df

In [None]:
df = pd.DataFrame(recs)
df

In [None]:
cols = ['obs_id', 'site_id', 'instrument', 'message_text', 'tags','user_id', 'user_agent','is_human','is_valid','exposure_flag']
df = pd.DataFrame(recs, columns=None)
df

<a class="anchor" id="plot"></a>
## Plots from log

In [None]:
display_markdown(f'### Date vs ExposureGap (minutes) for {instrument=!s}', raw=True)
for instrument, day_gaps in gaps.items():
    x,y = zip(*day_gaps.items())
    df = pd.DataFrame(dict(day=x,minutes=y))
    df.plot.bar(x='day', y='minutes', title=f'{instrument=!s}')

<a class="anchor" id="raw_analysis"></a>
## Raw Content Analysis

### Example of one record

In [None]:
rec = recs[-1]

msg = rec["message_text"]
md = f'Message text from log:\n> {msg}'
display_markdown(md, raw=True)

md = f'One full record (the last one retrieved):\n> {rec}'
display_markdown(md, raw=True)

display(rec)

<a class="anchor" id="elicitation"></a>
## Stakeholder Elicitation

In [None]:
#EXTERNAL_INSTANCE_URL
ed = dict(os.environ.items())
with pd.option_context('display.max_rows', None,):
    print(pd.DataFrame(ed.values(), index=ed.keys()))