From a894ad16ed3dc0c6eeabcb5444c88a42086c5a82 Mon Sep 17 00:00:00 2001 From: trick77 Date: Sun, 17 May 2026 11:14:10 +0200 Subject: [PATCH] fix(logging): stable leading-key order on every JSON line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit structlog inserts user-supplied kwargs into event_dict before TimeStamper / EventRenamer / the metadata processor run, which means `logger.info("event", k=v)` produces JSON where `k` lands at position 0, ahead of `timestamp`. Lines from the uvicorn bridge — which carry no user kwargs — put `timestamp` first. The mixed shape is what produced the out-of-order `riptide_collector_starting` line where `version` led: {"version": "0.1.0", "teams": 1, ..., "timestamp": "...", "msg": "..."} All 22 structured-log call sites in the codebase use the same kwargs convention so every one of them has the same problem. Adding a final reorder processor at the formatter level catches every line — uvicorn bridge, access middleware, every router — without per-call-site edits. Also drop the redundant `version=__version__` kwarg in lifespan: the metadata processor already supplies it, the kwarg was a duplicate that landed at position 0 in the dict. Tests: - test_stable_leading_key_order_with_user_kwargs — unit-level: the chain reorders even when the caller passes kwargs that would otherwise lead. - test_every_emitted_line_starts_with_timestamp — end-to-end: parse the actual JSON output of a real webhook request and assert every line starts with `timestamp`. --- src/riptide_collector/logging_config.py | 21 ++++++++++ src/riptide_collector/main.py | 4 +- tests/test_log_schema.py | 53 +++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 1 deletion(-) diff --git a/src/riptide_collector/logging_config.py b/src/riptide_collector/logging_config.py index d0609f1..7c34d32 100644 --- a/src/riptide_collector/logging_config.py +++ b/src/riptide_collector/logging_config.py @@ -47,6 +47,26 @@ def _strip_reserved(_logger: Any, _name: str, event_dict: dict[str, Any]) -> dic return event_dict +# Stable leading-key order on every emitted line. structlog inserts user +# kwargs into event_dict before TimeStamper / EventRenamer / metadata +# processors run, which means `logger.info("event", k=v)` produces JSON +# where `k` appears before `timestamp`. A naive `tail -f | jq` then sees +# inconsistent column ordering across lines. Forcing the leading order +# here (and falling through to insertion order for everything else) gives +# a uniform shape across the uvicorn bridge and every structlog call site +# without touching the call sites themselves. +_LEADING_KEYS = ("timestamp", "log_level", "service", "version", "env", "msg") + + +def _stable_field_order(_logger: Any, _name: str, event_dict: dict[str, Any]) -> dict[str, Any]: + del _logger, _name + reordered: dict[str, Any] = {k: event_dict[k] for k in _LEADING_KEYS if k in event_dict} + for k, v in event_dict.items(): + if k not in reordered: + reordered[k] = v + return reordered + + def configure_logging(level: str = "INFO", env: str = "dev") -> None: log_level = getattr(logging, level.upper(), logging.INFO) @@ -60,6 +80,7 @@ def configure_logging(level: str = "INFO", env: str = "dev") -> None: structlog.processors.EventRenamer("msg"), _rename_level, _strip_reserved, + _stable_field_order, ] # Bridge: route stdlib logs (uvicorn, sqlalchemy, alembic) through the diff --git a/src/riptide_collector/main.py b/src/riptide_collector/main.py index 26bad79..02ac780 100644 --- a/src/riptide_collector/main.py +++ b/src/riptide_collector/main.py @@ -63,7 +63,9 @@ async def lifespan(_app: FastAPI): # pyright: ignore[reportUnusedFunction] logger.info(f"riptide version: {__version__}") logger.info( "riptide_collector_starting", - version=__version__, + # version is supplied by the service-metadata processor; passing + # it as a kwarg here just produced a duplicate that landed at + # position 0 in the JSON dict before the reorder processor. teams=len(config.get().teams_by_name), keys=len(team_keys.team_names()), production_stage=config.get().environments.production_stage, diff --git a/tests/test_log_schema.py b/tests/test_log_schema.py index 122259a..8b9da19 100644 --- a/tests/test_log_schema.py +++ b/tests/test_log_schema.py @@ -259,6 +259,7 @@ def _run_chain(self, env: str, **kwargs: Any) -> dict[str, Any]: from riptide_collector.logging_config import ( _make_service_metadata_processor, _rename_level, + _stable_field_order, _strip_reserved, ) @@ -267,6 +268,7 @@ def _run_chain(self, env: str, **kwargs: Any) -> dict[str, Any]: structlog.processors.EventRenamer("msg"), _rename_level, _strip_reserved, + _stable_field_order, ] for proc in chain: processed = proc(cast(Any, None), "info", processed) @@ -303,3 +305,54 @@ def test_splunk_reserved_kwargs_namespaced(self) -> None: assert out["splunk_sourcetype"] == "z" for forbidden in ("source", "host", "index", "sourcetype"): assert forbidden not in out + + def test_stable_leading_key_order_with_user_kwargs(self) -> None: + # The regression that triggered this rule: structlog inserts user + # kwargs into event_dict *before* the timestamp/service/version/env + # processors run, producing JSON where 'version' or 'teams' appears + # ahead of 'timestamp'. The reorder processor must restore a stable + # leading order regardless of what kwargs the caller passes. + out = self._run_chain( + "prod", + teams=3, + keys=2, + production_stage="prod", + timestamp="2026-05-17T09:04:17Z", + ) + head = list(out.keys())[:5] + assert head == ["timestamp", "log_level", "service", "version", "env"] + # Caller-supplied kwargs still appear, just after the leading block. + assert {"teams", "keys", "production_stage"}.issubset(out.keys()) + + +class TestRenderedFieldOrder: + """End-to-end: every JSON line emitted by the running app must start + with `timestamp` regardless of what call site produced it. + + Catches the regression a tail/grep user would notice immediately — a + structlog call that passes kwargs gets the kwargs as leading keys + unless the reorder processor is wired into the chain. + """ + + async def test_every_emitted_line_starts_with_timestamp( + self, + client: AsyncClient, + log_buffer: io.StringIO, + ) -> None: + # Hit a webhook that runs through every call-site shape: uvicorn + # bridge (already covered at startup), the access-log middleware, + # and a router that emits webhook_processed with kwargs. + headers = {"Authorization": f"Bearer {CHECKOUT_NOERGLER}"} + r = await client.post( + "/webhooks/noergler", + json=_load("noergler_pr_completed_merged.json"), + headers=headers, + ) + assert r.status_code == 202 + + events = _parse_json_lines(log_buffer.getvalue()) + assert events, "no JSON log lines captured — fixture or pipeline broken" + for line in events: + assert next(iter(line)) == "timestamp", ( + f"line does not start with 'timestamp': {list(line)[:3]} — {line}" + )