Skip to content

Commit

Permalink
Move json log file reader to ButlerLogRecords
Browse files Browse the repository at this point in the history
This makes it available to test code and to other users without
having to limit reading to butler formatter.
  • Loading branch information
timj committed Jul 21, 2021
1 parent fbc9386 commit 0d3b2fd
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 19 deletions.
52 changes: 51 additions & 1 deletion python/lsst/daf/butler/core/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import datetime
import traceback
from collections import defaultdict
from typing import List, Union, Optional, ClassVar, Iterable, Iterator, Dict
from typing import List, Union, Optional, ClassVar, Iterable, Iterator, Dict, IO

from logging import LogRecord, StreamHandler, Formatter
from pydantic import BaseModel, ValidationError
Expand Down Expand Up @@ -230,6 +230,56 @@ def from_records(cls, records: Iterable[ButlerLogRecord]) -> ButlerLogRecords:
"""
return cls(__root__=list(records))

@classmethod
def from_file(cls, filename: str) -> ButlerLogRecords:
"""Read records from file.
Parameters
----------
filename : `str`
Name of file containing the JSON records.
Notes
-----
Works with one-record-per-line format JSON files and a direct
serialization of the Pydantic model.
"""
with open(filename, "r") as fd:
return cls.from_stream(fd)

@classmethod
def from_stream(cls, stream: IO) -> ButlerLogRecords:
"""Read records from I/O stream.
Parameters
----------
stream : `typing.IO`
Stream from which to read JSON records.
Notes
-----
Works with one-record-per-line format JSON files and a direct
serialization of the Pydantic model.
"""
first_line = stream.readline()

# Allow byte or str streams since pydantic supports either.
first_char = str(first_line[0])
if first_char == "[":
# This is a ButlerLogRecords model serialization.
all = first_line + stream.read()
return cls.parse_raw(all)

# A stream of records with one record per line.
if first_char != "{":
raise RuntimeError(f"Unrecognized JSON log format. First lines is '{first_line}'")
records = [ButlerLogRecord.parse_raw(first_line)]
for line in stream:
if line: # Filter out blank lines.
records.append(ButlerLogRecord.parse_raw(line))

return cls.from_records(records)

@property
def log_format(self) -> str:
if self._log_format is None:
Expand Down
19 changes: 2 additions & 17 deletions python/lsst/daf/butler/formatters/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,22 +66,7 @@ def _readFile(self, path: str, pytype: Optional[Type[Any]] = None) -> Any:
elif not issubclass(pytype, ButlerLogRecords):
raise RuntimeError(f"Python type {pytype} does not seem to be a ButlerLogRecords type")

with open(path, "r") as fd:
first = fd.readline()
if first.startswith("["):
# This is a ButlerLogRecords serialization.
all = first + fd.read()
return pytype.parse_raw(all)

# A stream of records with one record per line.
if not first.startswith("{"):
raise RuntimeError(f"Unrecognized JSON log format. First lines is '{first}'")
records = [ButlerLogRecord.parse_raw(first)]
for line in fd:
if line: # Filter out blank lines.
records.append(ButlerLogRecord.parse_raw(line))

return pytype.from_records(records)
return pytype.from_file(path)

def _fromBytes(self, serializedDataset: bytes, pytype: Optional[Type[Any]] = None) -> Any:
"""Read the bytes object as a python object.
Expand All @@ -99,7 +84,7 @@ def _fromBytes(self, serializedDataset: bytes, pytype: Optional[Type[Any]] = Non
The requested data as a Python object or None if the string could
not be read.
"""
# Duplicates some of the logic from readFile above.
# Duplicates some of the logic from ButlerLogRecords.from_file
if pytype is None:
pytype = ButlerLogRecords
elif not issubclass(pytype, ButlerLogRecords):
Expand Down
3 changes: 2 additions & 1 deletion tests/test_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ def testJsonLogStream(self):

# Rewind the stream and pull messages out of it.
stream.seek(0)
records = [ButlerLogRecord.parse_raw(line) for line in stream]
records = ButlerLogRecords.from_stream(stream)
self.assertIsInstance(records[0], ButlerLogRecord)
self.assertEqual(records[0].message, "A message")
self.assertEqual(records[1].levelname, "WARNING")

Expand Down

0 comments on commit 0d3b2fd

Please sign in to comment.