Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@ name: Linting
# Lint gate runs on every PR and push to main.
#
# - ruff format + ruff check are HARD gates (block the PR).
# - mypy is ADVISORY for now (continue-on-error): the package carries ~28
# pre-existing type errors that predate CI enforcement. Tracked for burn-down
# in makegov/tango-python; flip `continue-on-error` off once that's clear.
# - mypy is a HARD gate: the package type-checks cleanly under strict mypy.
# (The earlier ~28-error burn-down is complete.)
# - The SDK filter/shape conformance check needs the canonical manifest from the
# private makegov/tango repo, which requires a TANGO_API_REPO_ACCESS_TOKEN
# secret the public CI does not have. The conformance job SKIPS cleanly when
Expand Down Expand Up @@ -42,8 +41,7 @@ jobs:
- name: Lint with ruff
run: uv run ruff check tango/

- name: Type check with mypy (advisory)
continue-on-error: true
- name: Type check with mypy
run: uv run mypy tango/

conformance:
Expand Down
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2026-06-02): `actions/checkout` v4→v6, `astral-sh/setup-uv` v4→v8.1.0
(pinned exact — no floating `v8` major tag is published yet), and
`codecov/codecov-action` v3→v5 (with the renamed `files:` input).
- `mypy` is now a **hard gate** in `lint.yml` (no longer advisory). The
`tango/` package type-checks cleanly under strict mypy.

### Changed
- Completed the strict-`mypy` burn-down across `tango/shapes/` (parser,
generator, factory, schema). All changes are type-annotation/typing
corrections with no runtime behavior change, except:
- `FieldSchema.nested_model` is now typed `type | str | None` (it always
accepted string model names from the explicit schemas; the annotation was
wrong). `ModelFactory.validate_data` and `ShapeParser._validate_field_spec`
likewise accept `type | str` for the model argument.
- Removed two dead `elif field_spec.is_wildcard:` branches (in
`TypeGenerator.generate_type` and `ModelFactory.create_instance`) and the
now-orphaned `_parse_nested_wildcard` helper. These were unreachable —
wildcard field specs are fully handled by the top-of-loop branch that
`continue`s before reaching them — so removal is behavior-preserving.

## [1.1.1] - 2026-05-29

Expand Down
88 changes: 12 additions & 76 deletions tango/shapes/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from collections.abc import Callable
from datetime import date, datetime
from decimal import Decimal
from typing import Any
from typing import Any, cast

from tango.exceptions import ModelInstantiationError
from tango.shapes.generator import TypeGenerator
Expand Down Expand Up @@ -542,38 +542,6 @@ def create_instance(
# Value is not a dict - might be a primitive or None
result[result_field_name] = value

elif field_spec.is_wildcard:
# Wildcard on nested field - use full model type
# This is handled at the top level, but we need to handle it here too
# for nested wildcards like recipient(*)
if field_schema.nested_model:
if field_schema.is_list:
if isinstance(value, list):
nested_instances = []
for item in value:
if isinstance(item, dict):
# Parse all fields from the nested model
nested_instance = self._parse_nested_wildcard(
item, field_schema.nested_model
)
nested_instances.append(nested_instance)
else:
nested_instances.append(item)
result[result_field_name] = nested_instances
else:
result[result_field_name] = value
else:
if isinstance(value, dict):
nested_instance = self._parse_nested_wildcard(
value, field_schema.nested_model
)
result[result_field_name] = nested_instance
else:
result[result_field_name] = value
else:
# Not a nested model, just use the value
result[result_field_name] = value

else:
# Simple field - parse using appropriate parser
parsed_value = self._parse_field(
Expand Down Expand Up @@ -661,7 +629,7 @@ def _resolve_nested_model(self, nested_model: type | str) -> type:
raise ModelInstantiationError(
f"Could not resolve nested model '{nested_model}'"
)
return model_class
return cast(type, model_class)
except ImportError as err:
raise ModelInstantiationError(
f"Could not import models module to resolve '{nested_model}'"
Expand Down Expand Up @@ -704,41 +672,6 @@ def _create_nested_instance(
# Recursively create nested instance
return self.create_instance(data, nested_shape, resolved_model, nested_type)

def _parse_nested_wildcard(
self, data: dict[str, Any], nested_model: type | str
) -> dict[str, Any]:
"""Parse nested object with wildcard (all fields)

Args:
data: Nested object data
nested_model: Model class or string name for the nested object

Returns:
Dictionary with all parsed fields
"""
# Resolve nested model if it's a string
resolved_model = self._resolve_nested_model(nested_model)

# Ensure model is registered
if not self.schema_registry.is_registered(resolved_model):
self.schema_registry.register(resolved_model)

# Get model schema
model_schema = self.schema_registry.get_schema(resolved_model)

# Parse all fields
result: dict[str, Any] = {}
for field_name, value in data.items():
if field_name in model_schema:
field_schema = model_schema[field_name]
parsed_value = self._parse_field(field_name, value, field_schema.type, field_schema)
result[field_name] = parsed_value
else:
# Field not in schema, include as-is
result[field_name] = value

return result

def _parse_field(self, field_name: str, value: Any, field_type: type, field_schema: Any) -> Any:
"""Parse a single field value using appropriate parser

Expand Down Expand Up @@ -778,7 +711,7 @@ def _parse_field(self, field_name: str, value: Any, field_type: type, field_sche
return value

def validate_data(
self, data: dict[str, Any], shape_spec: ShapeSpec, base_model: type
self, data: dict[str, Any], shape_spec: ShapeSpec, base_model: type | str
) -> list[str]:
"""Validate that data matches the shape specification

Expand All @@ -803,11 +736,15 @@ def validate_data(
errors: list[str] = []

if not isinstance(data, dict):
errors.append(f"Expected dictionary data, got {type(data).__name__}")
errors.append( # type: ignore[unreachable]
f"Expected dictionary data, got {type(data).__name__}"
)
return errors

# Ensure model is registered
if not self.schema_registry.is_registered(base_model):
# Ensure model is registered. String model names are expected to be
# pre-registered (explicit schemas); only concrete classes can be
# auto-registered via introspection.
if isinstance(base_model, type) and not self.schema_registry.is_registered(base_model):
self.schema_registry.register(base_model)

# Get model schema
Expand All @@ -826,9 +763,8 @@ def validate_data(

# Check if field exists in schema
if field_spec.name not in model_schema:
errors.append(
f"Field '{field_spec.name}' does not exist in {base_model.__name__} schema"
)
model_name = base_model.__name__ if isinstance(base_model, type) else base_model
errors.append(f"Field '{field_spec.name}' does not exist in {model_name} schema")
continue

field_schema = model_schema[field_spec.name]
Expand Down
39 changes: 12 additions & 27 deletions tango/shapes/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import logging
import threading
from collections import OrderedDict
from typing import Any, get_args, get_origin, get_type_hints
from typing import Any, cast, get_args, get_origin, get_type_hints

from tango.exceptions import TypeGenerationError
from tango.shapes.models import ShapeSpec
Expand Down Expand Up @@ -250,7 +250,10 @@ def generate_type(

field_schema = model_schema[field_spec.name]

# Determine field type
# Determine field type. The value is a heterogeneous mix of type
# objects, parameterized generics (list[...]), and union objects,
# so it is intentionally typed as Any.
field_type: Any
if field_spec.nested_fields:
# Generate nested type
if not field_schema.nested_model:
Expand All @@ -275,25 +278,7 @@ def generate_type(

# Handle optional types
if field_schema.is_optional:
field_type = field_type | None # type: ignore

annotations[field_name] = field_type

elif field_spec.is_wildcard:
# Wildcard on nested field - use full model type
if field_schema.nested_model:
# Resolve nested model if it's a string
field_type = self._resolve_nested_model(field_schema.nested_model)
else:
field_type = field_schema.type

# Handle list types
if field_schema.is_list:
field_type = list[field_type] # type: ignore

# Handle optional types
if field_schema.is_optional:
field_type = field_type | None # type: ignore
field_type = field_type | None

annotations[field_name] = field_type

Expand All @@ -303,11 +288,11 @@ def generate_type(

# Handle list types
if field_schema.is_list:
field_type = list[field_type] # type: ignore
field_type = list[field_type]

# Handle optional types
if field_schema.is_optional:
field_type = field_type | None # type: ignore
field_type = field_type | None

annotations[field_name] = field_type

Expand All @@ -329,7 +314,7 @@ def generate_type(
field_type = field_schema.type
# Handle optional types
if field_schema.is_optional:
field_type = field_type | None # type: ignore
field_type = field_type | None
annotations[auto_field] = field_type

# Create TypedDict dynamically
Expand Down Expand Up @@ -414,7 +399,7 @@ def _resolve_nested_model(self, nested_model: type | str) -> type:
model_class = getattr(models, nested_model, None)
if model_class is None:
raise TypeGenerationError(f"Could not resolve nested model '{nested_model}'")
return model_class
return cast(type, model_class)
except ImportError as err:
raise TypeGenerationError(
f"Could not import models module to resolve '{nested_model}'"
Expand Down Expand Up @@ -555,7 +540,7 @@ def _format_type_annotation(self, type_annotation: Any) -> str:

# Handle basic types
if hasattr(type_annotation, "__name__"):
type_name = type_annotation.__name__
type_name = str(type_annotation.__name__)
else:
type_name = str(type_annotation)

Expand All @@ -576,7 +561,7 @@ def _format_type_annotation(self, type_annotation: Any) -> str:
if args:
formatted_args = [self._format_type_annotation(arg) for arg in args]
return f"{origin.__name__}[{', '.join(formatted_args)}]"
return origin.__name__
return str(origin.__name__)

return type_name

Expand Down
33 changes: 17 additions & 16 deletions tango/shapes/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def _suggest_field_correction(invalid_field: str, valid_fields: list[str]) -> st

# Check for common prefix
best_match = None
best_score = 0
best_score = 0.0

for field in valid_fields:
# Count common prefix length
Expand Down Expand Up @@ -167,6 +167,13 @@ def __init__(self, cache_enabled: bool = True, schema_registry: SchemaRegistry |
self._schema_registry = schema_registry
self._schema_registry_initialized = schema_registry is not None

def _ensure_registry(self) -> SchemaRegistry:
"""Return the schema registry, lazily creating it on first use."""
if self._schema_registry is None:
self._schema_registry = SchemaRegistry()
self._schema_registry_initialized = True
return self._schema_registry

def parse(self, shape: str) -> ShapeSpec:
"""Parse a shape string into a ShapeSpec

Expand Down Expand Up @@ -544,25 +551,22 @@ def validate(self, shape_spec: ShapeSpec, model_class: type) -> None:
>>> spec = parser.parse("invalid_field")
>>> parser.validate(spec, Contract) # Raises ShapeValidationError
"""
# Lazy initialize schema registry
if not self._schema_registry_initialized:
self._schema_registry = SchemaRegistry()
self._schema_registry_initialized = True
registry = self._ensure_registry()

# Ensure model is registered
if not self._schema_registry.is_registered(model_class):
self._schema_registry.register(model_class)
if not registry.is_registered(model_class):
registry.register(model_class)

# Validate each field
for field_spec in shape_spec.fields:
self._validate_field_spec(field_spec, model_class)

def _validate_field_spec(self, field_spec: FieldSpec, model_class: type) -> None:
def _validate_field_spec(self, field_spec: FieldSpec, model_class: type | str) -> None:
"""Validate a single field specification against a model

Args:
field_spec: Field specification to validate
model_class: Model class to validate against
model_class: Model class (or registered model name) to validate against

Raises:
ShapeValidationError: If field is invalid
Expand All @@ -571,20 +575,17 @@ def _validate_field_spec(self, field_spec: FieldSpec, model_class: type) -> None
if field_spec.is_wildcard:
return

# Lazy initialize schema registry if needed
if not self._schema_registry_initialized:
self._schema_registry = SchemaRegistry()
self._schema_registry_initialized = True
registry = self._ensure_registry()

# Validate field exists in model
try:
field_schema = self._schema_registry.validate_field(model_class, field_spec.name)
field_schema = registry.validate_field(model_class, field_spec.name)
except ShapeValidationError as e:
# Enhance error message with suggestions
model_name = (
model_class.__name__ if hasattr(model_class, "__name__") else str(model_class)
)
model_schema = self._schema_registry.get_schema(model_class)
model_schema = registry.get_schema(model_class)
valid_fields = list(model_schema.keys())

error_msg = f"Field '{field_spec.name}' does not exist in {model_name}."
Expand Down Expand Up @@ -630,7 +631,7 @@ def _validate_field_spec(self, field_spec: FieldSpec, model_class: type) -> None
error_msg += "\n\nNested selections are only valid for object fields like 'recipient', 'agency', 'location', etc."

# Find some nested fields as examples
model_schema = self._schema_registry.get_schema(model_class)
model_schema = registry.get_schema(model_class)
nested_examples = [
name for name, schema in model_schema.items() if schema.nested_model
]
Expand Down
5 changes: 3 additions & 2 deletions tango/shapes/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
list indicators independently of the dataclass definitions.
"""

import builtins
from dataclasses import dataclass
from typing import Any, get_args, get_origin, get_type_hints

Expand All @@ -33,10 +34,10 @@ class FieldSchema:
"""

name: str
type: type
type: builtins.type
is_optional: bool
is_list: bool
nested_model: type | None = None
nested_model: builtins.type | str | None = None

def __repr__(self) -> str:
"""String representation for debugging"""
Expand Down
Loading