From e9b96f6a08bd05367922a76ff500c1a77e1035d3 Mon Sep 17 00:00:00 2001 From: Layne Penney Date: Mon, 27 Apr 2026 15:38:08 -0500 Subject: [PATCH] test: add producer v1.1 validator specs --- packages/ts/tests/test_finalize.ts | 38 ++++++++++ packages/ts/tests/test_validate.ts | 118 +++++++++++++++++++++++++++++ tests/python/test_finalize.py | 32 ++++++++ tests/python/test_validate.py | 115 ++++++++++++++++++++++++++++ 4 files changed, 303 insertions(+) diff --git a/packages/ts/tests/test_finalize.ts b/packages/ts/tests/test_finalize.ts index 96ad751..2f2d289 100644 --- a/packages/ts/tests/test_finalize.ts +++ b/packages/ts/tests/test_finalize.ts @@ -37,6 +37,29 @@ describe("finalizeExtraction", () => { expect(result.extraction[field as keyof typeof result.extraction]).toBe(expected); }); + test("round-trips a structured producer", () => { + const producer = { + version: "1", + model: "anthropic://claude-sonnet-4-6", + deployment: "bedrock", + configuration: { + reasoning_effort: "high", + temperature: 0.2, + provider_flag: true, + }, + operator: "synapt-dev", + signature: "eyJhbGciOiJIUzI1NiJ9.payload.signature", + }; + + const result = finalizeExtraction( + llmOutput(), + { produced_by: producer } as unknown as Parameters[1], + ); + + expect(result.extraction.produced_by).toEqual(producer); + expect(result.validation.valid).toBe(true); + }); + test("injects extensions and extension versions", () => { const result = finalizeExtraction(llmOutput(), { produced_by: "test://model", @@ -216,6 +239,21 @@ describe("finalizeExtraction", () => { expect(result.validation.errors.some((error) => error.path === "embeddings[0].dimensions")).toBe(true); }); + test("reports invalid structured producer in validation result", () => { + const result = finalizeExtraction( + llmOutput(), + { + produced_by: { + version: "1", + model: "claude-sonnet-4-6", + }, + } as unknown as Parameters[1], + ); + + expect(result.validation.valid).toBe(false); + expect(result.validation.errors.some((error) => error.path === "produced_by.model")).toBe(true); + }); + test("passes end-to-end finalization", () => { const result = finalizeExtraction( llmOutput({ diff --git a/packages/ts/tests/test_validate.ts b/packages/ts/tests/test_validate.ts index c4c1a7d..0683b31 100644 --- a/packages/ts/tests/test_validate.ts +++ b/packages/ts/tests/test_validate.ts @@ -326,6 +326,108 @@ describe("validateExtraction", () => { expect(result.valid).toBe(expectedValid); }); + test("accepts v1.0 string produced_by for backwards compatibility", () => { + const result = validateExtraction(minimalExtraction({ + produced_by: "anthropic://claude-sonnet-4-6", + })); + expect(result.valid).toBe(true); + }); + + test("accepts minimal v1.1 structured producer", () => { + const result = validateExtraction(minimalExtraction({ + produced_by: { + version: "1", + model: "anthropic://claude-sonnet-4-6", + }, + })); + expect(result.valid).toBe(true); + }); + + test("accepts full v1.1 structured producer", () => { + const result = validateExtraction(minimalExtraction({ + produced_by: { + version: "1", + model: "anthropic://claude-sonnet-4-6", + model_version: "claude-sonnet-4-6-20250514", + deployment: "bedrock", + configuration: { + reasoning_effort: "high", + system_prompt_hash: "abc123", + temperature: 0.2, + top_p: 0.95, + max_tokens: 2048, + vendor_flag: true, + }, + operator: "synapt-dev", + signature: "eyJhbGciOiJIUzI1NiJ9.payload.signature", + }, + })); + expect(result.valid).toBe(true); + }); + + test.each([ + [ + "missing version", + { model: "anthropic://claude-sonnet-4-6" }, + "produced_by.version", + ], + [ + "missing model", + { version: "1" }, + "produced_by.model", + ], + [ + "unknown root field", + { version: "1", model: "anthropic://claude-sonnet-4-6", extra_field: "boom" }, + "produced_by.extra_field", + ], + [ + "malformed model uri", + { version: "1", model: "claude-sonnet-4-6" }, + "produced_by.model", + ], + [ + "non-string signature", + { version: "1", model: "anthropic://claude-sonnet-4-6", signature: { alg: "HS256" } }, + "produced_by.signature", + ], + ])("rejects structured producer with %s", (_name, producedBy, errorPath) => { + const result = validateExtraction(minimalExtraction({ produced_by: producedBy })); + expect(result.valid).toBe(false); + expect(result.errors.some((error) => error.path === errorPath)).toBe(true); + }); + + test("accepts open configuration object with arbitrary extra fields", () => { + const result = validateExtraction(minimalExtraction({ + produced_by: { + version: "1", + model: "anthropic://claude-sonnet-4-6", + configuration: { + provider_sampling_mode: "adaptive", + vendor_flag: true, + }, + }, + })); + expect(result.valid).toBe(true); + }); + + test("accepts known configuration fields", () => { + const result = validateExtraction(minimalExtraction({ + produced_by: { + version: "1", + model: "anthropic://claude-sonnet-4-6", + configuration: { + reasoning_effort: "medium", + system_prompt_hash: "f00dbabe", + temperature: 0.1, + top_p: 0.95, + max_tokens: 2048, + }, + }, + })); + expect(result.valid).toBe(true); + }); + test.each([ ["entity name empty", minimalExtraction({ entities: [{ name: "", type: "person" }] }), "entities[0].name"], ["entity type empty", minimalExtraction({ entities: [{ name: "Mom", type: "" }] }), "entities[0].type"], @@ -529,6 +631,7 @@ describe("JSON Schema dereference", () => { expect(schema).toContain("embedding/v1.json"); expect(schema).toContain("assertion-signals/v1.json"); expect(schema).toContain("temporal-ref/v1.json"); + expect(schema).toContain("producer/v1.json"); }); test("extraction schema carries the expected required fields", () => { @@ -553,6 +656,7 @@ describe("JSON Schema dereference", () => { resolve(SCHEMAS_DIR, "embedding", "v1.json"), resolve(SCHEMAS_DIR, "source-ref", "v1.json"), resolve(SCHEMAS_DIR, "temporal-ref", "v1.json"), + resolve(SCHEMAS_DIR, "producer", "v1.json"), resolve(SCHEMAS_DIR, "extract", "v1.json"), ]; @@ -567,6 +671,12 @@ describe("JSON Schema dereference", () => { const cases = [ minimalExtraction(), minimalExtraction({ version: "2" }), + minimalExtraction({ + produced_by: { + version: "1", + model: "anthropic://claude-sonnet-4-6", + }, + }), minimalExtraction({ entities: [{ id: "e1", name: "Mom", type: "person" }], goals: [{ text: "Recovery", status: "open", entity_refs: ["e1"] }], @@ -588,6 +698,7 @@ describe("JSON Schema dereference", () => { resolve(SCHEMAS_DIR, "embedding", "v1.json"), resolve(SCHEMAS_DIR, "source-ref", "v1.json"), resolve(SCHEMAS_DIR, "temporal-ref", "v1.json"), + resolve(SCHEMAS_DIR, "producer", "v1.json"), resolve(SCHEMAS_DIR, "extract", "v1.json"), ]) { const schema = loadJson>(file); @@ -619,4 +730,11 @@ describe("JSON Schema dereference", () => { const validatorValid = validateExtraction(doc).valid; expect(schemaValid).toBe(validatorValid); }); + + test("producer schema exists with canonical id", () => { + const producerSchema = loadJson>( + resolve(SCHEMAS_DIR, "producer", "v1.json"), + ); + expect(producerSchema.$id).toBe("https://synapt.dev/schemas/producer/v1.json"); + }); }); diff --git a/tests/python/test_finalize.py b/tests/python/test_finalize.py index 0dfbf3d..b15fe5e 100644 --- a/tests/python/test_finalize.py +++ b/tests/python/test_finalize.py @@ -39,6 +39,25 @@ def test_injects_produced_by(self): ) assert result.extraction["produced_by"] == "openai://gpt-4o-mini" + def test_injects_structured_produced_by(self): + producer = { + "version": "1", + "model": "anthropic://claude-sonnet-4-6", + "deployment": "bedrock", + "configuration": { + "reasoning_effort": "high", + "temperature": 0.2, + "provider_flag": True, + }, + "operator": "synapt-dev", + "signature": "eyJhbGciOiJIUzI1NiJ9.payload.signature", + } + result = finalize_extraction( + _llm_output(), + FinalizeContext(produced_by=producer), # type: ignore[arg-type] + ) + assert result.extraction["produced_by"] == producer + def test_injects_user_id(self): result = finalize_extraction( _llm_output(), @@ -406,6 +425,19 @@ def test_malformed_embedding_reported_in_validation(self): assert any("model" in e.path for e in result.validation.errors) assert any("dimensions" in e.path for e in result.validation.errors) + def test_invalid_structured_producer_reported_in_validation(self): + result = finalize_extraction( + _llm_output(), + FinalizeContext( + produced_by={ + "version": "1", + "model": "claude-sonnet-4-6", + }, # type: ignore[arg-type] + ), + ) + assert not result.validation.valid + assert any("produced_by.model" in e.path for e in result.validation.errors) + class TestEndToEnd: diff --git a/tests/python/test_validate.py b/tests/python/test_validate.py index ae3ee18..6c4a08e 100644 --- a/tests/python/test_validate.py +++ b/tests/python/test_validate.py @@ -309,6 +309,114 @@ def test_produced_by_empty_string(self): assert not result.valid +class TestProducedByProducerObject: + + def test_produced_by_string_backwards_compat_valid(self): + doc = _minimal_extraction(produced_by="anthropic://claude-sonnet-4-6") + result = validate_extraction(doc) + assert result.valid + + def test_produced_by_structured_minimal_valid(self): + doc = _minimal_extraction(produced_by={ + "version": "1", + "model": "anthropic://claude-sonnet-4-6", + }) + result = validate_extraction(doc) + assert result.valid + + def test_produced_by_structured_full_valid(self): + doc = _minimal_extraction(produced_by={ + "version": "1", + "model": "anthropic://claude-sonnet-4-6", + "model_version": "claude-sonnet-4-6-20250514", + "deployment": "bedrock", + "configuration": { + "reasoning_effort": "high", + "system_prompt_hash": "abc123", + "temperature": 0.2, + "top_p": 0.95, + "max_tokens": 2048, + "vendor_flag": True, + }, + "operator": "synapt-dev", + "signature": "eyJhbGciOiJIUzI1NiJ9.payload.signature", + }) + result = validate_extraction(doc) + assert result.valid + + def test_produced_by_structured_missing_version_fails(self): + doc = _minimal_extraction(produced_by={ + "model": "anthropic://claude-sonnet-4-6", + }) + result = validate_extraction(doc) + assert not result.valid + assert any(e.path == "produced_by.version" for e in result.errors) + + def test_produced_by_structured_missing_model_fails(self): + doc = _minimal_extraction(produced_by={ + "version": "1", + }) + result = validate_extraction(doc) + assert not result.valid + assert any(e.path == "produced_by.model" for e in result.errors) + + def test_produced_by_structured_unknown_root_field_fails(self): + doc = _minimal_extraction(produced_by={ + "version": "1", + "model": "anthropic://claude-sonnet-4-6", + "extra_field": "boom", + }) + result = validate_extraction(doc) + assert not result.valid + assert any(e.path == "produced_by.extra_field" for e in result.errors) + + def test_produced_by_structured_open_configuration_passes(self): + doc = _minimal_extraction(produced_by={ + "version": "1", + "model": "anthropic://claude-sonnet-4-6", + "configuration": { + "provider_sampling_mode": "adaptive", + "vendor_flag": True, + }, + }) + result = validate_extraction(doc) + assert result.valid + + def test_produced_by_structured_known_configuration_fields_pass(self): + doc = _minimal_extraction(produced_by={ + "version": "1", + "model": "anthropic://claude-sonnet-4-6", + "configuration": { + "reasoning_effort": "medium", + "system_prompt_hash": "f00dbabe", + "temperature": 0.1, + "top_p": 0.95, + "max_tokens": 2048, + }, + }) + result = validate_extraction(doc) + assert result.valid + + def test_produced_by_structured_malformed_model_fails(self): + doc = _minimal_extraction(produced_by={ + "version": "1", + "model": "claude-sonnet-4-6", + }) + result = validate_extraction(doc) + assert not result.valid + assert any(e.path == "produced_by.model" for e in result.errors) + + def test_produced_by_structured_non_string_signature_fails(self): + doc = _minimal_extraction(produced_by={ + "version": "1", + "model": "anthropic://claude-sonnet-4-6", + "signature": {"alg": "HS256"}, + }) + result = validate_extraction(doc) + assert not result.valid + assert any(e.path == "produced_by.signature" for e in result.errors) + + class TestNonEmptyStrings: def test_entity_name_empty(self): @@ -967,6 +1075,13 @@ def test_extraction_schema_references_sub_schemas(self): assert "embedding/v1.json" in schema_str assert "assertion-signals/v1.json" in schema_str assert "temporal-ref/v1.json" in schema_str + assert "producer/v1.json" in schema_str + + def test_producer_schema_file_exists(self): + producer_path = Path(__file__).resolve().parents[2] / "schemas" / "producer" / "v1.json" + assert producer_path.exists() + schema = json.loads(producer_path.read_text()) + assert schema["$id"] == "https://synapt.dev/schemas/producer/v1.json" def test_extraction_schema_required_fields(self): schema_path = Path(__file__).resolve().parents[2] / "schemas" / "extract" / "v1.json"