From 14e1455239359e1cbd03b22763e97fe9d8c58a19 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 21 May 2026 21:47:23 -0400 Subject: [PATCH] fix(invoke5): warn on unknown v5 fields without breaking parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``GenerationMetadata5`` carries ``extra="allow"`` intentionally: InvokeAI's v5 metadata schema is actively extended between releases, and silently dropping a new upstream field is preferable to refusing to parse the image. The trade-off is reduced visibility into schema drift — the previous behavior was completely silent, so new fields slipped through unnoticed. Adds a ``model_validator(mode="after")`` hook on ``GenerationMetadata5`` that walks ``self.__pydantic_extra__`` (Pydantic's storage for ``extra="allow"``'d fields) and logs each new field name once per process via a module-level ``_warned_extra_fields`` set. Subsequent images carrying the same field are silent — the goal is "did anything new show up?", not per-image instrumentation, so log volume stays proportional to the number of distinct schema additions seen. Behavior preserved end-to-end: parsing still succeeds for any v5 payload regardless of unknown fields; the warning is purely observability. Four new tests cover: * Unknown fields trigger a warning (and known ones don't). * Same-named fields seen twice only warn once. * New fields seen after a known one fire a fresh warning that doesn't re-mention the already-known ones. * Multiple unknown fields in one payload coalesce into one warning with all names listed. Per-test fixture clears ``_warned_extra_fields`` so test order doesn't influence what counts as first-seen. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../invoke/invoke5metadata.py | 38 +++++++++ tests/backend/test_invoke_metadata.py | 81 +++++++++++++++++++ 2 files changed, 119 insertions(+) diff --git a/photomap/backend/metadata_modules/invoke/invoke5metadata.py b/photomap/backend/metadata_modules/invoke/invoke5metadata.py index 67d92e0b..4949f1c0 100644 --- a/photomap/backend/metadata_modules/invoke/invoke5metadata.py +++ b/photomap/backend/metadata_modules/invoke/invoke5metadata.py @@ -1,3 +1,4 @@ +import logging from typing import Any, Literal from pydantic import BaseModel, ConfigDict, Field, model_serializer, model_validator @@ -15,6 +16,19 @@ tag_reference_images, ) +logger = logging.getLogger(__name__) + +# Field names already reported by ``GenerationMetadata5._warn_unknown_fields`` +# in this process. InvokeAI's v5 schema is actively extended between +# releases — that's why the model carries ``extra="allow"`` (see +# ``ConfigDict`` below) rather than ``"forbid"``: silently dropping a new +# upstream field is preferable to refusing to parse the image. The trade-off +# is reduced visibility into schema drift, which this set + the +# ``model_validator`` hook below address: each new unknown field is logged +# once per process so an operator notices "huh, InvokeAI added X — should I +# capture it?" without log spam on every parsed image. +_warned_extra_fields: set[str] = set() + class ControlLayer(BaseModel): id: str @@ -241,6 +255,30 @@ def fixup_controlnets(cls, data: dict[str, Any]) -> dict[str, Any]: del data["controlnets"] return data + @model_validator(mode="after") + def _warn_unknown_fields(self) -> "GenerationMetadata5": + """Log v5 fields that weren't declared on this schema. + + ``extra="allow"`` keeps parsing forward-compatible across InvokeAI + releases that add new metadata fields, but otherwise leaves us + blind to schema drift. This hook surfaces each unknown field name + once per process so the next person updating the schema notices + what's slipping through. Subsequent images carrying the same + field are silent — the goal is "did anything new show up?", not + per-image instrumentation. + """ + extra = self.__pydantic_extra__ or {} + new_fields = set(extra.keys()) - _warned_extra_fields + if new_fields: + _warned_extra_fields.update(new_fields) + logger.warning( + "InvokeAI v5 metadata contains field(s) not declared in " + "GenerationMetadata5: %s. Parsing succeeded via extra='allow'; " + "add to the schema if useful in the drawer or recall payload.", + ", ".join(sorted(new_fields)), + ) + return self + @model_serializer(mode="wrap") def serialize_model(self, serializer, info): """Exclude None values when serializing.""" diff --git a/tests/backend/test_invoke_metadata.py b/tests/backend/test_invoke_metadata.py index b4312882..e1fac437 100644 --- a/tests/backend/test_invoke_metadata.py +++ b/tests/backend/test_invoke_metadata.py @@ -412,6 +412,87 @@ def test_raster_images_skip_disabled_layers(self, v5_canvas_metadata): assert view.raster_images == ["raster1.png", "raster2.png"] +# --------------------------------------------------------------------------- +# GenerationMetadata5 — unknown-field warning +# --------------------------------------------------------------------------- + + +class TestV5UnknownFieldWarning: + """``extra="allow"`` keeps parsing forward-compatible, but the + ``_warn_unknown_fields`` model_validator surfaces drift so it doesn't + go un-noticed. Each new field name is logged once per process; repeats + are silent.""" + + @pytest.fixture(autouse=True) + def _reset_warned(self): + # ``_warned_extra_fields`` is module-level; clear it around each test + # so order doesn't influence what counts as "first-time-seen". + from photomap.backend.metadata_modules.invoke import invoke5metadata + + invoke5metadata._warned_extra_fields.clear() + yield + invoke5metadata._warned_extra_fields.clear() + + def _v5_with(self, **extra) -> dict: + return { + "metadata_version": 5, + "app_version": "5.6.0", + "model": {"name": "test"}, + "positive_prompt": "p", + "seed": 1, + **extra, + } + + def test_logs_unknown_field_on_first_encounter(self, caplog): + import logging + + with caplog.at_level(logging.WARNING): + GenerationMetadataAdapter().parse( + self._v5_with(brand_new_field=42, another_one="x") + ) + # Both unknown fields appear in a single warning, comma-separated and sorted. + record = next(r for r in caplog.records if r.levelno == logging.WARNING) + assert "another_one" in record.getMessage() + assert "brand_new_field" in record.getMessage() + + def test_does_not_warn_for_known_fields(self, caplog): + import logging + + with caplog.at_level(logging.WARNING): + GenerationMetadataAdapter().parse(self._v5_with()) + # No warnings — every field we set is in the schema. + assert not any(r.levelno == logging.WARNING for r in caplog.records) + + def test_repeats_are_silent(self, caplog): + import logging + + adapter = GenerationMetadataAdapter() + with caplog.at_level(logging.WARNING): + adapter.parse(self._v5_with(brand_new_field=42)) + # Drop the first-encounter warning so the second parse can be + # asserted silent on its own. + caplog.clear() + adapter.parse(self._v5_with(brand_new_field=42)) + assert not any(r.levelno == logging.WARNING for r in caplog.records) + + def test_new_field_after_seen_one_does_warn(self, caplog): + import logging + + adapter = GenerationMetadataAdapter() + with caplog.at_level(logging.WARNING): + adapter.parse(self._v5_with(first_field=1)) + caplog.clear() + adapter.parse(self._v5_with(first_field=1, second_field=2)) + # Only ``second_field`` is mentioned in the second warning; + # ``first_field`` was already in ``_warned_extra_fields`` from the + # call above. + warnings = [r for r in caplog.records if r.levelno == logging.WARNING] + assert len(warnings) == 1 + msg = warnings[0].getMessage() + assert "second_field" in msg + assert "first_field" not in msg + + # --------------------------------------------------------------------------- # format_invoke_metadata — end-to-end HTML rendering # ---------------------------------------------------------------------------