diff --git a/doc/source/available_checks.rst b/doc/source/available_checks.rst index 8094cdba..584ed87a 100644 --- a/doc/source/available_checks.rst +++ b/doc/source/available_checks.rst @@ -216,6 +216,57 @@ Requirements - `requests`_ +.. _check-last-log-activity: + +LastLogActivity +*************** + +.. program:: check-last-log-activity + +Parses a log file and uses the most recent time contained in the file to determine activity. +For this purpose, the log file lines are iterated from the back until a line matching a configurable regular expression is found. +This expression is used to extract the contained timestamp in that log line, which is then compared to the current time with an allowed delta. +The check only looks at the first line from the back that contains a timestamp. +Further lines are ignored. +A typical use case for this check would be a web server access log file. + +This check supports all date formats that are supported by the `dateutil parser `_. + +Options +======= + +.. option:: log_file + + path to the log file that should be analyzed + +.. option:: pattern + + A regular expression used to determine whether a line of the log file contains a timestamp to look at. + The expression must contain exactly one matching group. + For instance, ``^\[(.*)\] .*$`` might be used to find dates in square brackets at line beginnings. + +.. option:: minutes + + The number of minutes to allow log file timestamps to be in the past for detecting activity. + If a timestamp is older than `` - `` no activity is detected. + default: 10 + +.. option:: encoding + + The encoding with which to parse the log file. default: ascii + +.. option:: timezone + + The timezone to assume in case a timestamp extracted from the log file has not associated timezone information. + Timezones are expressed using the names from the Olson timezone database (e.g. ``Europe/Berlin``). + default: ``UTC`` + +Requirements +============ + +* `dateutil`_ +* `pytz`_ + .. _check-load: Load diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst index 12720d8f..f5bae2b1 100644 --- a/doc/source/changelog.rst +++ b/doc/source/changelog.rst @@ -15,6 +15,7 @@ New activity checks ------------------- * :ref:`check-jsonpath`: Similar to the existing :ref`check-xpath`, the new checks requests a JSON URL and evaluates it against a `JSONPath`_ expression to determine activity (:issue:`81`). +* :ref:`check-last-log-activity`: Check log files for most recent contained timestamps (:issue:`98`, :issue:`99`). Fixed bugs ========== diff --git a/doc/source/conf.py b/doc/source/conf.py index c86ce6f6..c53d6c2e 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -68,6 +68,7 @@ .. _portalocker: https://portalocker.readthedocs.io .. _jsonpath-ng: https://github.com/h2non/jsonpath-ng .. _JSONPath: https://goessner.net/articles/JsonPath/ +.. _pytz: https://pythonhosted.org/pytz/ .. |project| replace:: {project} .. |project_bold| replace:: **{project}** diff --git a/src/autosuspend/checks/activity.py b/src/autosuspend/checks/activity.py index 1fe947eb..9358b27e 100644 --- a/src/autosuspend/checks/activity.py +++ b/src/autosuspend/checks/activity.py @@ -780,3 +780,103 @@ def check(self) -> Optional[str]: return None except (json.JSONDecodeError, requests.exceptions.RequestException) as error: raise TemporaryCheckError(error) from error + + +class LastLogActivity(Activity): + @classmethod + def create(cls, name: str, config: configparser.SectionProxy) -> "LastLogActivity": + import pytz + + try: + return cls( + name, + Path(config["log_file"]), + re.compile(config["pattern"]), + timedelta(minutes=config.getint("minutes", fallback=10)), + config.get("encoding", "ascii"), + pytz.timezone(config.get("timezone", "UTC")), # type: ignore + ) + except KeyError as error: + raise ConfigurationError( + "Missing config key {}".format(error), + ) from error + except re.error as error: + raise ConfigurationError( + "Regular expression is invalid: {}".format(error), + ) from error + except ValueError as error: + raise ConfigurationError( + "Unable to parse configuration: {}".format(error), + ) from error + + def __init__( + self, + name: str, + log_file: Path, + pattern: Pattern, + delta: timedelta, + encoding: str, + default_timezone: timezone, + ) -> None: + if delta.total_seconds() < 0: + raise ValueError("Given delta must be positive") + if pattern.groups != 1: + raise ValueError("Given pattern must have exactly one capture group") + super().__init__(name=name) + self.log_file = log_file + self.pattern = pattern + self.delta = delta + self.encoding = encoding + self.default_timezone = default_timezone + + def _safe_parse_date(self, match: str, now: datetime) -> datetime: + from dateutil.parser import parse + from dateutil.utils import default_tzinfo + + try: + match_date = default_tzinfo(parse(match), self.default_timezone) + if match_date > now: + raise TemporaryCheckError( + "Detected date {} is in the future".format(match_date) + ) + return match_date + except ValueError as error: + raise TemporaryCheckError( + "Detected date {} cannot be parsed as a date".format(match) + ) from error + except OverflowError as error: + raise TemporaryCheckError( + "Detected date {} is out of the valid range".format(match) + ) from error + + def _file_lines_reversed(self) -> Iterable[str]: + try: + # Probably not the most effective solution for large log files. Might need + # optimizations later on. + return reversed( + self.log_file.read_text(encoding=self.encoding).splitlines() + ) + except IOError as error: + raise TemporaryCheckError( + "Cannot access log file {}".format(self.log_file) + ) from error + + def check(self) -> Optional[str]: + lines = self._file_lines_reversed() + + now = datetime.now(tz=timezone.utc) + for line in lines: + match = self.pattern.match(line) + if not match: + continue + + match_date = self._safe_parse_date(match.group(1), now) + + # Only check the first line (reverse order) that has a match, not all + if (now - match_date) < self.delta: + return "Log activity in {} at {}".format(self.log_file, match_date) + else: + return None + + # No line matched at all + return None diff --git a/tests/test_checks_activity.py b/tests/test_checks_activity.py index 213c1141..9e16328a 100644 --- a/tests/test_checks_activity.py +++ b/tests/test_checks_activity.py @@ -1,5 +1,6 @@ from collections import namedtuple import configparser +from datetime import timedelta, timezone from getpass import getuser import json from pathlib import Path @@ -17,6 +18,7 @@ import pytest from pytest_httpserver import HTTPServer from pytest_mock import MockFixture +import pytz import requests from autosuspend.checks import ( @@ -32,6 +34,7 @@ JsonPath, Kodi, KodiIdleTime, + LastLogActivity, Load, LogindSessionsIdle, Mpd, @@ -1541,3 +1544,274 @@ def test_create_invalid_path(self) -> None: ) with pytest.raises(ConfigurationError): JsonPath.create("name", parser["section"]) + + +class TestLastLogActivity(CheckTest): + def create_instance(self, name: str) -> LastLogActivity: + return LastLogActivity( + name=name, + log_file=Path("some_file"), + pattern=re.compile(f"^(.*)$"), + delta=timedelta(minutes=10), + encoding="ascii", + default_timezone=timezone.utc, + ) + + def test_is_active(self, tmpdir: Path) -> None: + file_path = tmpdir / "test.log" + file_path.write_text("2020-02-02 12:12:23", encoding="ascii") + + with freeze_time("2020-02-02 12:15:00"): + assert ( + LastLogActivity( + "test", + file_path, + re.compile(r"^(.*)$"), + timedelta(minutes=10), + "ascii", + timezone.utc, + ).check() + is not None + ) + + def test_is_not_active(self, tmpdir: Path) -> None: + file_path = tmpdir / "test.log" + file_path.write_text("2020-02-02 12:12:23", encoding="ascii") + + with freeze_time("2020-02-02 12:35:00"): + assert ( + LastLogActivity( + "test", + file_path, + re.compile(r"^(.*)$"), + timedelta(minutes=10), + "ascii", + timezone.utc, + ).check() + is None + ) + + def test_uses_last_line(self, tmpdir: Path) -> None: + file_path = tmpdir / "test.log" + # last line is too old and must be used + file_path.write_text( + "\n".join(["2020-02-02 12:12:23", "1900-01-01"]), encoding="ascii" + ) + + with freeze_time("2020-02-02 12:15:00"): + assert ( + LastLogActivity( + "test", + file_path, + re.compile(r"^(.*)$"), + timedelta(minutes=10), + "ascii", + timezone.utc, + ).check() + is None + ) + + def test_ignores_lines_that_do_not_match(self, tmpdir: Path) -> None: + file_path = tmpdir / "test.log" + file_path.write_text("ignored", encoding="ascii") + + assert ( + LastLogActivity( + "test", + file_path, + re.compile(r"^foo(.*)$"), + timedelta(minutes=10), + "ascii", + timezone.utc, + ).check() + is None + ) + + def test_uses_pattern(self, tmpdir: Path) -> None: + file_path = tmpdir / "test.log" + file_path.write_text("foo2020-02-02 12:12:23bar", encoding="ascii") + + with freeze_time("2020-02-02 12:15:00"): + assert ( + LastLogActivity( + "test", + file_path, + re.compile(r"^foo(.*)bar$"), + timedelta(minutes=10), + "ascii", + timezone.utc, + ).check() + is not None + ) + + def test_uses_given_timezone(self, tmpdir: Path) -> None: + file_path = tmpdir / "test.log" + # would match if timezone wasn't used + file_path.write_text("2020-02-02 12:12:00", encoding="ascii") + + with freeze_time("2020-02-02 12:15:00"): + assert ( + LastLogActivity( + "test", + file_path, + re.compile(r"^(.*)$"), + timedelta(minutes=10), + "ascii", + timezone(offset=timedelta(hours=10)), + ).check() + is None + ) + + def test_prefers_parsed_timezone(self, tmpdir: Path) -> None: + file_path = tmpdir / "test.log" + # would not match if provided timezone wasn't used + file_path.write_text("2020-02-02T12:12:01-01:00", encoding="ascii") + + with freeze_time("2020-02-02 13:15:00"): + assert ( + LastLogActivity( + "test", + file_path, + re.compile(r"^(.*)$"), + timedelta(minutes=10), + "ascii", + timezone.utc, + ).check() + is not None + ) + + def test_fails_if_dates_cannot_be_parsed(self, tmpdir: Path) -> None: + file_path = tmpdir / "test.log" + # would match if timezone wasn't used + file_path.write_text("202000xxx", encoding="ascii") + + with pytest.raises(TemporaryCheckError): + LastLogActivity( + "test", + file_path, + re.compile(r"^(.*)$"), + timedelta(minutes=10), + "ascii", + timezone.utc, + ).check() + + def test_fails_if_dates_are_in_the_future(self, tmpdir: Path) -> None: + file_path = tmpdir / "test.log" + # would match if timezone wasn't used + file_path.write_text("2022-01-01", encoding="ascii") + + with freeze_time("2020-02-02 12:15:00"): + with pytest.raises(TemporaryCheckError): + LastLogActivity( + "test", + file_path, + re.compile(r"^(.*)$"), + timedelta(minutes=10), + "ascii", + timezone.utc, + ).check() + + def test_fails_if_file_cannot_be_read(self, tmpdir: Path) -> None: + file_path = tmpdir / "test.log" + + with pytest.raises(TemporaryCheckError): + LastLogActivity( + "test", + file_path, + re.compile(r"^(.*)$"), + timedelta(minutes=10), + "ascii", + timezone.utc, + ).check() + + def test_create(self) -> None: + parser = configparser.ConfigParser() + parser.read_string( + """ + [section] + name = somename + log_file = /some/file + pattern = ^foo(.*)bar$ + minutes = 42 + encoding = utf-8 + timezone = Europe/Berlin + """ + ) + + created = LastLogActivity.create("thename", parser["section"]) + + assert created.log_file == Path("/some/file") + assert created.pattern == re.compile(r"^foo(.*)bar$") + assert created.delta == timedelta(minutes=42) + assert created.encoding == "utf-8" + assert created.default_timezone == pytz.timezone("Europe/Berlin") + + def test_create_handles_pattern_errors(self) -> None: + parser = configparser.ConfigParser() + parser.read_string( + """ + [section] + name = somename + log_file = /some/file + pattern = ^^(foo(.*)bar$ + """ + ) + + with pytest.raises(ConfigurationError): + LastLogActivity.create("thename", parser["section"]) + + def test_create_handles_delta_errors(self) -> None: + parser = configparser.ConfigParser() + parser.read_string( + """ + [section] + name = somename + log_file = /some/file + pattern = (.*) + minutes = test + """ + ) + + with pytest.raises(ConfigurationError): + LastLogActivity.create("thename", parser["section"]) + + def test_create_handles_negative_deltas(self) -> None: + parser = configparser.ConfigParser() + parser.read_string( + """ + [section] + name = somename + log_file = /some/file + pattern = (.*) + minutes = -42 + """ + ) + + with pytest.raises(ConfigurationError): + LastLogActivity.create("thename", parser["section"]) + + def test_create_handles_missing_pattern_groups(self) -> None: + parser = configparser.ConfigParser() + parser.read_string( + """ + [section] + name = somename + log_file = /some/file + pattern = .* + """ + ) + + with pytest.raises(ConfigurationError): + LastLogActivity.create("thename", parser["section"]) + + def test_create_handles_missing_keys(self) -> None: + parser = configparser.ConfigParser() + parser.read_string( + """ + [section] + name = somename + """ + ) + + with pytest.raises(ConfigurationError): + LastLogActivity.create("thename", parser["section"])