Skip to content

Commit

Permalink
Add python log handler to store LogRecord as they come in
Browse files Browse the repository at this point in the history
These records can then be serialized in butler datastore.
  • Loading branch information
timj committed Jul 7, 2021
1 parent 6e62486 commit b838dc6
Show file tree
Hide file tree
Showing 2 changed files with 241 additions and 1 deletion.
176 changes: 175 additions & 1 deletion python/lsst/daf/butler/core/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,185 @@

from __future__ import annotations

__all__ = ("VERBOSE",)
__all__ = ("VERBOSE", "ButlerLogRecords", "ButlerLogRecordHandler")

import logging
import datetime
from typing import List, Union, Optional, ClassVar, Iterable, Iterator

from logging import LogRecord, StreamHandler
from pydantic import BaseModel, ValidationError

VERBOSE = (logging.INFO + logging.DEBUG) // 2
"""Verbose log level"""

_LONG_LOG_FORMAT = "%(levelname)s %(asctime)s %(name)s %(filename)s:%(lineno)s - %(message)s"
"""Default format for log records."""

logging.addLevelName(VERBOSE, "VERBOSE")


class ButlerLogRecord(BaseModel):
"""A model representing a `logging.LogRecord`.
A `~logging.LogRecord` always uses the current time in its record
when recreated and that makes it impossible to use it as a
serialization format. Instead have a local representation of a
`~logging.LogRecord` that matches Butler needs.
"""

_log_format: ClassVar[str] = _LONG_LOG_FORMAT

name: str
asctime: datetime.datetime
message: str
levelno: int
levelname: str
filename: str
lineno: int
funcName: Optional[str]
process: int
processName: str

class Config:
allow_mutation = False

@classmethod
def from_record(cls, record: LogRecord) -> ButlerLogRecord:
"""Create a new instance from a `~logging.LogRecord`.
Parameters
----------
record : `logging.LogRecord`
The record from which to extract the relevant information.
"""
# The properties that are one-to-one mapping.
simple = ("name", "levelno", "levelname", "filename", "lineno", "funcName", "process", "processName")

record_dict = {k: getattr(record, k) for k in simple}

record_dict["message"] = record.getMessage()

# Always use UTC because in distributed systems we can't be sure
# what timezone localtime is and it's easier to compare logs if
# every system is using the same time.
record_dict["asctime"] = datetime.datetime.fromtimestamp(record.created,
tz=datetime.timezone.utc)

return cls(**record_dict)

def format(self, log_format: Optional[str] = None) -> str:
"""Format this record.
Parameters
----------
log_format : `str`, optional
The format string to use. This string follows the standard
%-style use for formatting log messages. If `None` the class
default will be used.
Returns
-------
text : `str`
The formatted log message.
"""
if log_format is None:
log_format = self._log_format

as_dict = self.dict()

as_dict["asctime"] = as_dict["asctime"].isoformat()
formatted = log_format % as_dict
return formatted

def __str__(self) -> str:
return self.format()


# The class below can convert LogRecord to ButlerLogRecord if needed.
Record = Union[LogRecord, ButlerLogRecord]


# Do not inherit from MutableSequence since mypy insists on the values
# being Any even though we wish to constrain them to Record.
class ButlerLogRecords(BaseModel):
"""Class representing a collection of `ButlerLogRecord`.
"""

__root__: List[ButlerLogRecord]
_log_format: Optional[str] = None

@property
def log_format(self) -> str:
if self._log_format is None:
return _LONG_LOG_FORMAT
return self._log_format

@log_format.setter
def log_format(self, format: str) -> None:
self._log_format = format

def __len__(self) -> int:
return len(self.__root__)

# The signature does not match the one in BaseModel but that is okay
# if __root__ is being used.
# See https://pydantic-docs.helpmanual.io/usage/models/#custom-root-types
def __iter__(self) -> Iterator[ButlerLogRecord]: # type: ignore
return iter(self.__root__)

def __setitem__(self, index: int, value: Record) -> None:
self.__root__[index] = self._validate_record(value)

def __getitem__(self, index: int) -> ButlerLogRecord:
return self.__root__[index]

def __reversed__(self) -> Iterator[ButlerLogRecord]:
return self.__root__.__reversed__()

def __delitem__(self, index: int) -> None:
del self.__root__[index]

def __str__(self) -> str:
# Ensure that every record uses the same format string.
return "\n".join(record.format(self.log_format) for record in self.__root__)

def _validate_record(self, record: Record) -> ButlerLogRecord:
if isinstance(record, ButlerLogRecord):
pass
elif isinstance(record, LogRecord):
record = ButlerLogRecord.from_record(record)
else:
raise ValidationError(f"Can only not append item of type {type(record)}")
return record

def insert(self, index: int, value: Record) -> None:
self.__root__.insert(index, self._validate_record(value))

def append(self, value: Record) -> None:
value = self._validate_record(value)
self.__root__.append(value)

def clear(self) -> None:
self.__root__.clear()

def extend(self, records: Iterable[Record]) -> None:
self.__root__.extend(self._validate_record(record) for record in records)

def pop(self, index: int = -1) -> ButlerLogRecord:
return self.__root__.pop(index)

def reverse(self) -> None:
self.__root__.reverse()


class ButlerLogRecordHandler(StreamHandler):
"""Python log handler that accumulates records.
"""

def __init__(self) -> None:
super().__init__()
self.records = ButlerLogRecords(__root__=[])

def emit(self, record: LogRecord) -> None:
self.records.append(record)
66 changes: 66 additions & 0 deletions tests/test_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# This file is part of daf_butler.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (http://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import unittest
import logging

from lsst.daf.butler import ButlerLogRecordHandler, ButlerLogRecords, VERBOSE


class LoggingTestCase(unittest.TestCase):
"""Test we can capture log messages."""

def testRecordCapture(self):
handler = ButlerLogRecordHandler()

log = logging.getLogger(self.id())
log.setLevel(VERBOSE)
log.addHandler(handler)

test_messages = (
(logging.INFO, "This is a log message", True),
(logging.WARNING, "This is a warning message", True),
(logging.DEBUG, "This debug message should not be stored", False),
(VERBOSE, "A verbose message should appear", True),
)

for level, message, _ in test_messages:
log.log(level, message)

expected = [info for info in test_messages if info[2]]

self.assertEqual(len(handler.records), len(expected))

for given, record in zip(expected, handler.records):
self.assertEqual(given[0], record.levelno)
self.assertEqual(given[1], record.message)

# Check that we can serialize the records
json = handler.records.json()

records = ButlerLogRecords.parse_raw(json)
for original_record, new_record in zip(handler.records, records):
self.assertEqual(new_record, original_record)
self.assertEqual(str(records), str(handler.records))


if __name__ == "__main__":
unittest.main()

0 comments on commit b838dc6

Please sign in to comment.