### A journey into Python type hints and Pydantic

* How gradual type hints are valuable even though they aren't enforced at runtime
* Using `pydantic` so that type hints are respected both in development and at runtime

In [1]:
raw_input = """
{
    "temperature": {
        "long_name": "teeeeemperature",
        "arity": 12,
        "unit": "K",
        "timesteps": [
            {
                "time": "2024-11-28T06:00:00",
                "value": 277
            },
            {
                "time": "2024-11-28T12:00:00",
                "value": 297.3213666577565674
            }
        ]
    }
}
"""

In [2]:
import json

parameters = json.loads(raw_input)
parameters  # parameters: Any

{'temperature': {'long_name': 'teeeeemperature',
  'arity': 12,
  'unit': 'K',
  'timesteps': [{'time': '2024-11-28T06:00:00', 'value': 277},
   {'time': '2024-11-28T12:00:00', 'value': 297.3213666577566}]}}

In [3]:
parameters['temperature']['long_name']  # : Any

'teeeeemperature'

In [4]:
parameters['temperature']['timesteps'][0]['time']  # : Any

'2024-11-28T06:00:00'

In [5]:
parameters['temperature']['timesteps'][1]['value']  # : Any

297.3213666577566

In [6]:
try:
    parameters['temperature']['timesteps'][1]['notRealFieldName']  # : Any
except KeyError as e:
    print(repr(e))

KeyError('notRealFieldName')


In [7]:
from typing import Any
from decimal import Decimal

parameters: dict[str, Any] = json.loads(raw_input, parse_float=Decimal)
parameters  # : dict[str, Any]

{'temperature': {'long_name': 'teeeeemperature',
  'arity': 12,
  'unit': 'K',
  'timesteps': [{'time': '2024-11-28T06:00:00', 'value': 277},
   {'time': '2024-11-28T12:00:00', 'value': Decimal('297.3213666577565674')}]}}

In [8]:
try:
    print(parameters[0])  # "Literal[0]" is not assignable to "str"
except KeyError as e:
    print(repr(e))

KeyError(0)


In [9]:
try:
    print(parameters.temperature)  # Attribute "temperature" is unknown
except AttributeError as e:
    print(repr(e))

AttributeError("'dict' object has no attribute 'temperature'")


In [10]:
temperature = parameters['temperature']  # temperature: Any
timestep_one_value = temperature['timesteps'][0]['value']  # timestep_one_value: Any
timestep_two_value = temperature['timesteps'][1]['value']  # timestep_two_value: Any
print(timestep_one_value, timestep_two_value)

277 297.3213666577565674


In [11]:
from typing import TypedDict, Union, Literal

class Timestep(TypedDict):
    time: str # but we wish it was a datetime
    value: Union[int, Decimal]

class Parameter(TypedDict):
    long_name: str
    arity: int
    unit: Literal['K', 'mm/s', 'm']
    timesteps: list[Timestep]

parameters: dict[str, Parameter] = json.loads(raw_input, parse_float=Decimal)
parameters  # parameters: dict[str, Parameter]

{'temperature': {'long_name': 'teeeeemperature',
  'arity': 12,
  'unit': 'K',
  'timesteps': [{'time': '2024-11-28T06:00:00', 'value': 277},
   {'time': '2024-11-28T12:00:00', 'value': Decimal('297.3213666577565674')}]}}

In [12]:
temp = parameters['temperature']  # temp: Parameter
timestep_one = temp['timesteps'][0]  # timestep_one: Timestep
timestep_one

{'time': '2024-11-28T06:00:00', 'value': 277}

In [13]:
try:
    arity = timestep_one['arity']  # "arity" is not a defined key in "Timestep"
except KeyError as e:
    print(repr(e))

KeyError('arity')


In [14]:
try:
    print("isinstance(..., Parameter)", isinstance(timestep_one, Parameter))  # TypedDict class not allowed for instance or class checks
    print(isinstance(timestep_one, TypedDict))  # TypedDict cannot be used for instance or class checks
except TypeError:
    print("isinstance(..., TypedDict) raises an exception")
print("isinstance(..., dict)", isinstance(timestep_one, dict))

isinstance(..., TypedDict) raises an exception
isinstance(..., dict) True


In [15]:
from datetime import datetime

class Timestep(TypedDict):
    time: str # but we wish it was a datetime
    value: Union[int, Decimal]

    @property # error: TypedDict classes can contain only type annotations
    def time_as_datetime(self):
        return datetime.strptime(self.time, '%Y-%m-%dT%H:%M:%S')

In [16]:
from dataclasses import dataclass

@dataclass
class Timestep:
    time: str  # but we wish it was a datetime
    value: Union[int, Decimal]

    @property
    def time_as_datetime(self):
        return datetime.strptime(self.time, '%Y-%m-%dT%H:%M:%S')

@dataclass
class Parameter:
    long_name: str
    arity: int
    unit: Literal['K', 'mm/s', 'm']
    timesteps: list[Timestep]

parameters: dict[str, Parameter] = {
    key: Parameter(**value)
    for key, value in json.loads(raw_input, parse_float=Decimal).items()
}
temperature = parameters['temperature'] # temperature: Parameter
temperature

Parameter(long_name='teeeeemperature', arity=12, unit='K', timesteps=[{'time': '2024-11-28T06:00:00', 'value': 277}, {'time': '2024-11-28T12:00:00', 'value': Decimal('297.3213666577565674')}])

In [17]:
timesteps = temperature.timesteps  # timesteps: list[Timestep]
timestep_one = timesteps[0]  # timestep_one: Timestep
print(timestep_one)

{'time': '2024-11-28T06:00:00', 'value': 277}


In [18]:
try:
    print(timestep_one.time_as_datetime)  # timestep_one.time_as_datetime: datetime
except AttributeError:
    print("it's a dict, not a Timestep...")

it's a dict, not a Timestep...


In [19]:
bad_raw_input = """
{
    "temperature": {
        "long_name": 32,
        "arity": true,
        "unit": "kg",
        "timesteps": ["first", "second"]
    }
}
"""

bad_parameters: dict[str, Parameter] = {
    key: Parameter(**value)
    for key, value in json.loads(bad_raw_input, parse_float=Decimal).items()
}
bad_temperature = bad_parameters['temperature']  # bad_parameter: Parameter
bad_timesteps = bad_temperature.timesteps  # bad_timesteps: list[Timestep]
bad_timestep_one = timesteps[0]  # bad_timestep_one: Timestep
print(bad_timestep_one)
print("this is complete nonsense but the type-checker has no idea")

{'time': '2024-11-28T06:00:00', 'value': 277}
this is complete nonsense but the type-checker has no idea


In [20]:
Parameter.__annotations__
# Parameter.__dataclass_fields__ is similar but includes whatever field(...) specifies

{'long_name': str,
 'arity': int,
 'unit': typing.Literal['K', 'mm/s', 'm'],
 'timesteps': list[__main__.Timestep]}

In [21]:
def validate_dataclass_instance(instance: Any) -> None:
    """
    FOR DEMONSTRATION PURPOSES ONLY
    PLEASE DO NOT ACTUALLY DO ANYTHING LIKE THIS
    """
    for key, field in instance.__dataclass_fields__.items():
        value = getattr(instance, key)
        if not isinstance(value, field.type):
            raise ValueError(key, value)
        # ... handle generic types like list[T] ...
        # ... detect collections and recurse into them ...
        # ... allow either value if the type is a Union ...
        # ... a truly obscene repetoire of edge cases to detect and handle

try:
    validate_dataclass_instance(bad_temperature)
except ValueError as e:
    print(repr(e))

ValueError('long_name', 32)


In [22]:
import pydantic

class Timestep(pydantic.BaseModel):
    time: str  # but we wish it was a datetime
    value: Union[int, Decimal]

    @property
    def time_as_datetime(self):
        return datetime.strptime(self.time, '%Y-%m-%dT%H:%M:%S')

class Parameter(pydantic.BaseModel):
    long_name: str
    arity: int
    unit: Literal['K', 'mm/s', 'm']
    timesteps: list[Timestep]

# we need an adapter when the root is a dict or list
# https://docs.pydantic.dev/2.10/concepts/type_adapter/
parameters_adapter = pydantic.TypeAdapter(dict[str, Parameter])

parameters = parameters_adapter.validate_json(raw_input)
temperature = parameters['temperature']  # temperature: Parameter
temperature

Parameter(long_name='teeeeemperature', arity=12, unit='K', timesteps=[Timestep(time='2024-11-28T06:00:00', value=277), Timestep(time='2024-11-28T12:00:00', value=Decimal('297.3213666577566'))])

In [23]:
temperature.timesteps[0].time_as_datetime  # : datetime

datetime.datetime(2024, 11, 28, 6, 0)

In [24]:
try:
    parameters = parameters_adapter.validate_json(bad_raw_input)
except pydantic.ValidationError as e:
    print(repr(e))

4 validation errors for dict[str,Parameter]
temperature.long_name
  Input should be a valid string [type=string_type, input_value=32, input_type=int]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type
temperature.unit
  Input should be 'K', 'mm/s' or 'm' [type=literal_error, input_value='kg', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/literal_error
temperature.timesteps.0
  Input should be an object [type=model_type, input_value='first', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type
temperature.timesteps.1
  Input should be an object [type=model_type, input_value='second', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type


In [25]:
from enum import Enum
from typing import Annotated

class Timestep(pydantic.BaseModel):
    time: datetime  # strs will be converted to datetime if possible
    value: Decimal  # ints will be converted to Decimal

class UnitEnum(str, Enum):
    DEGREES_KELVIN = 'K'
    MILLIMETRES_PER_SECOND = 'mm/s'
    METRES = 'm'

def timesteps_in_order(timesteps: list[Timestep]) -> list[Timestep]:
    """Validates that the Timesteps are in ascending temporal order"""
    times = [step.time for step in timesteps]
    if sorted(times) != times:
        raise ValueError(f'Step times {times} are not in ascending order')
    return timesteps
    # or return sorted(timesteps, key=lambda ts: ts.time)

class Parameter(pydantic.BaseModel):
    long_name: Annotated[str, pydantic.StringConstraints(max_length=25)]  # validates string length
    arity: pydantic.PositiveInt  # validates arity > 0
    unit: UnitEnum  # validates unit is in the Enum, and converts it to an Enum value
    timesteps: Annotated[list[Timestep], pydantic.AfterValidator(timesteps_in_order)]

parameters_adapter = pydantic.TypeAdapter(dict[str, Parameter])
parameters = parameters_adapter.validate_json(raw_input)
temperature = parameters['temperature']  # temperature: Parameter
temperature

Parameter(long_name='teeeeemperature', arity=12, unit=<UnitEnum.DEGREES_KELVIN: 'K'>, timesteps=[Timestep(time=datetime.datetime(2024, 11, 28, 6, 0), value=Decimal('277')), Timestep(time=datetime.datetime(2024, 11, 28, 12, 0), value=Decimal('297.3213666577566'))])

In [26]:
try:
    direct_temperature = Parameter(long_name='temperature', arity='parity', unit=UnitEnum.DEGREES_KELVIN, timesteps=[])  # "Literal['parity']" is not assignable to "int"
except pydantic.ValidationError as e:
    print(e)
else:
    print(direct_temperature)

1 validation error for Parameter
arity
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='parity', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/int_parsing


In [27]:
try:
    direct_temperature = Parameter(long_name="temperature_but_just_way_too_long_for_anyone", arity=3, unit=UnitEnum.DEGREES_KELVIN, timesteps=[])
except pydantic.ValidationError as e:
    print(e)

1 validation error for Parameter
long_name
  String should have at most 25 characters [type=string_too_long, input_value='temperature_but_just_way_too_long_for_anyone', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/string_too_long


In [28]:
from datetime import timedelta

timesteps = [
    Timestep(time=datetime.now(), value=Decimal(3)),
    Timestep(time=datetime.now() - timedelta(minutes=5), value=Decimal(4))
]
try:
    direct_temperature = Parameter(long_name="temperature", arity=3, unit=UnitEnum.DEGREES_KELVIN, timesteps=timesteps)
except pydantic.ValidationError as e:
    print(e)

1 validation error for Parameter
timesteps
  Value error, Step times [datetime.datetime(2025, 1, 29, 20, 25, 12, 630854), datetime.datetime(2025, 1, 29, 20, 20, 12, 630854)] are not in ascending order [type=value_error, input_value=[Timestep(time=datetime.d...4), value=Decimal('4'))], input_type=list]
    For further information visit https://errors.pydantic.dev/2.10/v/value_error


In [29]:
print(temperature.model_dump_json(indent=4))

{
    "long_name": "teeeeemperature",
    "arity": 12,
    "unit": "K",
    "timesteps": [
        {
            "time": "2024-11-28T06:00:00",
            "value": "277"
        },
        {
            "time": "2024-11-28T12:00:00",
            "value": "297.3213666577566"
        }
    ]
}


In [30]:
temperature = parameters['temperature']
temperature.long_name = 3  # "Literal[3]" is not assignable to "str"
print("we can trash the pydantic Model instance at runtime:", temperature.long_name == 3)

we can trash the pydantic Model instance at runtime: True


In [31]:
class Parameter(pydantic.BaseModel):
    # specifying a model_config 
    # https://docs.pydantic.dev/2.10/api/config/
    model_config = pydantic.ConfigDict(frozen=True)

    long_name: Annotated[str, pydantic.StringConstraints(max_length=25)]  # validates string length
    arity: pydantic.PositiveInt  # validates arity > 0
    unit: UnitEnum  # validates unit is in the Enum, and converts it to an Enum value
    timesteps: Annotated[list[Timestep], pydantic.AfterValidator(timesteps_in_order)]

parameters_adapter = pydantic.TypeAdapter(dict[str, Parameter])
parameters = parameters_adapter.validate_json(raw_input)
temperature = parameters['temperature']
try:
    temperature.long_name = 3
except pydantic.ValidationError as e:
    print(repr(e))

1 validation error for Parameter
long_name
  Instance is frozen [type=frozen_instance, input_value=3, input_type=int]
    For further information visit https://errors.pydantic.dev/2.10/v/frozen_instance


### Other cool things to do with Pydantic

- Using an [`alias_generator`](https://docs.pydantic.dev/latest/api/config/#pydantic.config.ConfigDict.alias_generator) so that our JSON fields are consistently `camelCase` but our Python model fields are `snake_case`
- Implementing HTTP APIs with [`FastAPI`](https://fastapi.tiangolo.com/), in which `pydantic` fuels request deserialisation and validation, response (and error) serialisation, JSON schema generation, all from type hints
- `pydantic` supports [Generic models](https://docs.pydantic.dev/2.10/concepts/models/#generic-models), but as with all generics, think very hard before opening this can of worms

Read the documentation, find out what's possible!

### Bonus section: sometimes NamedTuple is all you need

In [32]:
# Let us not forget the humble but formidable NamedTuple:
from typing import NamedTuple

class Timestep(NamedTuple):
    time: datetime
    value: Decimal

timestep = Timestep(time=datetime.now(), value=Decimal('297.3213666577565674'))

print(timestep)
print(timestep.time)

Timestep(time=datetime.datetime(2025, 1, 29, 20, 25, 12, 702006), value=Decimal('297.3213666577565674'))
2025-01-29 20:25:12.702006


In [33]:
try:
    timestep.time = datetime.now()
except AttributeError as e:
    print(repr(e))

AttributeError("can't set attribute")
