## Chapter 5. Data Class Builders

- collections.namedtuple: the simplest way since v2.6
- typing.NamedTuple: An alternative that requires type hints on the fields since v3.5
- @dataclasses.dataclass: A class decorator that allows more customization than previous alternatives since v3.7

In [1]:
# Example 5-1. class/coordinates.py

class Coordinate:
    def __init__(self, lat, lon):
        self.lat = lat
        self.lon = lon

In [2]:
moscow = Coordinate(55.76, 37.62)
print(moscow)  # Bad representation!
location = Coordinate(55.76, 37.62)
print(location == moscow)  # False!
print((location.lat, location.lon) == (moscow.lat, moscow.lon))

<__main__.Coordinate object at 0x000002B4D1301330>
False
True


In [3]:
from collections import namedtuple

Coordinate = namedtuple('Coordinate', 'lat lon')
print(issubclass(Coordinate, tuple))
moscow = Coordinate(55.756, 37.617)
print(moscow)  # Useful __repr__
print(moscow == Coordinate(lat=55.756, lon=37.617))  # Meaningful __eq__

True
Coordinate(lat=55.756, lon=37.617)
True


In [4]:
import typing

# newer typing.NamedTuple adds a type annotation to each field
Coordinate = typing.NamedTuple('Coordinate', [('lat', float), ('lon', float)])
# Coordinate = typing.NamedTuple('Coordinate', lat=float, lon=float)

print(issubclass(Coordinate, tuple))
print(typing.get_type_hints(Coordinate))
moscow = Coordinate(55.756, 37.617)
print(moscow)
print(moscow == Coordinate(lat=55.756, lon=37.617))

True
{'lat': <class 'float'>, 'lon': <class 'float'>}
Coordinate(lat=55.756, lon=37.617)
True


In [5]:
# Example 5-2. typing_namedtuple/coordinates.py

from typing import NamedTuple

class Coordinate(NamedTuple):  # not class inheritance -> it is metaclass
    lat: float
    lon: float

    def __str__(self):
        ns = 'N' if self.lat >= 0 else 'S'
        we = 'E' if self.lon >= 0 else 'W'
        return f'{abs(self.lat):.1f}°{ns}, {abs(self.lon):.1f}°{we}'


In [8]:
#print(issubclass(Coordinate, typing.NamedTuple))  # TypeError
print(issubclass(Coordinate, tuple))

moscow = Coordinate(55.756, 37.617)
print(moscow)  # __str__
print(moscow == Coordinate(lat=55.756, lon=37.617))
print()

moscow2 = eval(repr(moscow))
print(moscow2 is moscow)
print(moscow2 == moscow)
print(moscow2)

moscow  # __repr__

True
55.8°N, 37.6°E
True

False
True
55.8°N, 37.6°E


Coordinate(lat=55.756, lon=37.617)

In [9]:
# Example 5-3. dataclass/coordinates.py

from dataclasses import dataclass

@dataclass(frozen=True)
class Coordinate:
    lat: float
    lon: float
    
    def __str__(self):
        ns = 'N' if self.lat >= 0 else 'S'
        we = 'E' if self.lon >= 0 else 'W'
        return f'{abs(self.lat):.1f}°{ns}, {abs(self.lon):.1f}°{we}'

In [10]:
moscow = Coordinate(55.756, 37.617)
print(moscow)  # __str__
print(moscow == Coordinate(lat=55.756, lon=37.617))
moscow  # __repr__

55.8°N, 37.6°E
True


Coordinate(lat=55.756, lon=37.617)

In [11]:
import dataclasses

moscow_dict = dataclasses.asdict(moscow)
moscow_dict['lat']

55.756

### Classic Named Tuples

In [12]:
# Example 5-4. Defining and using a named tuple type

from collections import namedtuple
City = namedtuple('City', 'name country population coordinates')
tokyo = City('Tokyo', 'JP', 36.933, (35.689722, 139.691667))
tokyo  # call __repr__

City(name='Tokyo', country='JP', population=36.933, coordinates=(35.689722, 139.691667))

In [13]:
print(tokyo.population)
print(tokyo.coordinates)
print(tokyo[1])

36.933
(35.689722, 139.691667)
JP


In [14]:
# Example 5-5. Named tuple attributes and methods (continued from the previous example)\

print(City._fields)
Coordinate = namedtuple('Coordinate', 'lat lon')

delhi_data = ('Delhi NCR', 'IN', 21.935, Coordinate(28.613889, 77.208889))
delhi = City._make(delhi_data)
print(delhi._asdict())  # Make a dict

import json
json.dumps(delhi._asdict())

('name', 'country', 'population', 'coordinates')
{'name': 'Delhi NCR', 'country': 'IN', 'population': 21.935, 'coordinates': Coordinate(lat=28.613889, lon=77.208889)}


'{"name": "Delhi NCR", "country": "IN", "population": 21.935, "coordinates": [28.613889, 77.208889]}'

In [13]:
# Example 5-6. Named tuple attributes and methods, continued from Example 5-5

Coordinate = namedtuple('Coordinate', 'lat lon reference', defaults=['WGS84'])
print(Coordinate(0, 0))
print(Coordinate._field_defaults)

Coordinate(lat=0, lon=0, reference='WGS84')
{'reference': 'WGS84'}


In [15]:
# Example 1-1. A deck as a sequence of cards

import collections

Card = collections.namedtuple('Card', ['rank', 'suit'])

class FrenchDeck:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = 'spades diamonds clubs hearts'.split()

    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits
                                        for rank in self.ranks]

    def __len__(self):
        return len(self._cards)

    def __getitem__(self, position):
        return self._cards[position]

In [16]:
# Example 5-7. frenchdeck.doctest: Adding a class attribute and a method to Card,
# the namedtuple from “A Pythonic Card Deck” on page 5

Card.suit_values = dict(spades=3, hearts=2, diamonds=1, clubs=0)  # attach a class attribute with values for each suit

def spades_high(card):
    rank_value = FrenchDeck.ranks.index(card.rank)
    suit_value = card.suit_values[card.suit]
    return rank_value * len(card.suit_values) + suit_value

In [17]:
Card.overall_rank = spades_high
lowest_card = Card('2', 'clubs')
highest_card = Card('A', 'spades')
print(lowest_card.overall_rank())
print(highest_card.overall_rank())

0
51


### Typed Named Tuples

In [18]:
# Example 5-8. typing_namedtuple/coordinates2.py

from typing import NamedTuple

class Coordinate(NamedTuple):
    lat: float
    lon: float
    reference: str = 'WGS84'


In [19]:
# Example 5-9. Python does not enforce type hints at runtime

import typing

class Coordinate(typing.NamedTuple):
    lat: float
    lon: float

trash = Coordinate('Ni!', None)
print(trash)

Coordinate(lat='Ni!', lon=None)


In [49]:
# Example 5-10. meaning/demo_plain.py: a plain class with type hints

class DemoPlainClass:
    a: int  # not an attribute, just added to annotation, should be initialized in __init__ or __new__
    b: float = 1.1  # save annotation and an attribute
    c = 'spam'  # just a plain old class attribute

    def __init__(self, value):
        self.a = value

print(DemoPlainClass.__annotations__)
# print(DemoPlainClass.a)  # error
print(DemoPlainClass.b)
print(DemoPlainClass.c)

print()
#DemoPlainClass.b = 1.2
#DemoPlainClass.c = 'x'
obj = DemoPlainClass(10)
print(obj.a)  # Error
print(obj.b)  # retrieve the class attribute, not an instance attribute
print(obj.c)

{'a': <class 'int'>, 'b': <class 'float'>}
1.1
spam

10
1.2
x


In [29]:
# Example 5-11. meaning/demo_nt.py: a class built with typing.NamedTuple

import typing

class DemoNTClass(typing.NamedTuple):
    a: int          # an annotation + an instance attribute
    b: float = 1.1  # same
    c = 'spam'      # a plain old class attribute

print(DemoNTClass.__annotations__)
print(DemoNTClass.a)
print(DemoNTClass.b)
print(DemoNTClass.c)

{'a': <class 'int'>, 'b': <class 'float'>}
_tuplegetter(0, 'Alias for field number 0')
_tuplegetter(1, 'Alias for field number 1')
spam


In [50]:
print(DemoNTClass.__doc__)

# DemoNTClass.b = 1.2
# DemoNTClass.c = 'x'
nt = DemoNTClass(8)
print(nt.a)
print(nt.b)
print(nt.c)

DemoNTClass(a, b)
8
1.2
x


# Class and instance variable annotations 

https://peps.python.org/pep-0526/#class-and-instance-variable-annotations

Type annotations can also be used to annotate class and instance variables in class bodies and methods. In particular, the value-less notation a: int allows one to annotate instance variables that should be initialized in __init__ or __new__. The proposed syntax is as follows:

```
class BasicStarship:
    captain: str = 'Picard'               # instance variable with default
    damage: int                           # instance variable without default
    stats: ClassVar[Dict[str, int]] = {}  # class variable
```

Here ClassVar is a special class defined by the typing module that indicates to the static type checker that this variable should not be set on instances.

Note that a ClassVar parameter cannot include any type variables, regardless of the level of nesting: ClassVar[T] and ClassVar[List[Set[T]]] are both invalid if T is a type variable.

In [36]:
# Example 5-12. meaning/demo_dc.py: a class decorated with @dataclass

from dataclasses import dataclass

@dataclass
class DemoDataClass:
    a: int          # annotation and an instance attribute
    b: float = 1.1  # class attribute, becomes an instance attirubte (default value)
    c = 'spam'      # a plain class attribute -> not bound to instance

In [48]:
print(DemoDataClass.__annotations__)
print(DemoDataClass.__doc__)

# print(DemoDataClass.a)  # error
print(DemoDataClass.b)
print(DemoDataClass.c)

dc = DemoDataClass(9)
print(dc.__annotations__)
print(dc.__doc__)
print(dc.a, dc.b, dc.c)

dc.a = 10
dc.b = 'oops'

dc.c = 'whatever'
dc.z = 'secret stash'
print(dc.a, dc.b, dc.c, dc.z)

print(DemoDataClass.b)
print(DemoDataClass.c)
DemoDataClass.b = 'x'

print(DemoDataClass.b)
print(dc.b)
dd = DemoDataClass(1)
print(dd.a, dd.b, dd.c)  # Restore the original class values!
print(DemoDataClass.b)

{'a': <class 'int'>, 'b': <class 'float'>}
DemoDataClass(a: int, b: float = 1.1)
x
spam
{'a': <class 'int'>, 'b': <class 'float'>}
DemoDataClass(a: int, b: float = 1.1)
9 1.1 spam
10 oops whatever secret stash
x
spam
x
oops
1 1.1 spam
x


In [41]:
# Example 5-13. dataclass/club_wrong.py: this class raises ValueError

@dataclass
class ClubMember:
    name: str
    guests: list = []  # Mutable default values!


ValueError: mutable default <class 'list'> for field guests is not allowed: use default_factory

In [42]:
# Example 5-14. dataclass/club.py: this ClubMember definition works

from dataclasses import dataclass, field

@dataclass
class ClubMember:
    name: str
    guests: list = field(default_factory=list)

In [5]:
# Example 5-15. dataclass/club_generic.py: this ClubMember definition is more precise

from dataclasses import dataclass, field

@dataclass
class ClubMember:
    name: str
    guests: list[str] = field(default_factory=list)

In [44]:
# Example 5-16. dataclass/hackerclub.py: doctests for HackerClubMember

# ``HackerClubMember`` objects accept an optional ``handle`` argument::
anna = HackerClubMember('Anna Ravenscroft', handle='AnnaRaven')
print(anna)

# If ``handle`` is omitted, it's set to the first part of the member's name::
leo = HackerClubMember('Leo Rochael')
print(leo)

# Members must have a unique handle. The following ``leo2`` will not be created,
# because its ``handle`` would be 'Leo', which was taken by ``leo``::
# leo2 = HackerClubMember('Leo DaVinci')
# Traceback (most recent call last):
# ...
# ValueError: handle 'Leo' already exists.

# To fix, ``leo2`` must be created with an explicit ``handle``::
leo2 = HackerClubMember('Leo DaVinci', handle='Neo')
print(leo2)

HackerClubMember(name='Anna Ravenscroft', guests=[], handle='AnnaRaven')
HackerClubMember(name='Leo Rochael', guests=[], handle='Leo')
HackerClubMember(name='Leo DaVinci', guests=[], handle='Neo')


In [43]:
# Example 5-17. dataclass/hackerclub.py: code for HackerClubMember

from dataclasses import dataclass

@dataclass
class HackerClubMember(ClubMember):
    all_handles = set()  # all_handlers: ClassVar[set[str]] = set()
    handle: str = ''
    
    def __post_init__(self):
        cls = self.__class__
        if self.handle == '':
            self.handle = self.name.split()[0]
        if self.handle in cls.all_handles:
            msg = f'handle {self.handle!r} already exists.'
            raise ValueError(msg)
        cls.all_handles.add(self.handle)

In [1]:
# Example 5-18. Example from the dataclasses module documentation

from dataclasses import dataclass
from dataclasses import InitVar

@dataclass
class C:
    i: int
    j: int = None
    
    # database: InitVar[DatabaseType] = None

    def __post_init__(self, database):  # init-only variable, pass to __init__, but not in fields.
        if self.j is None and database is not None:
            self.j = database.lookup('j')

### @dataclass Example: Dublin Core Resource Record

In [2]:
# Example 5-19. dataclass/resource.py: code for Resource, a class based on Dublin Core terms
from dataclasses import dataclass, field
from typing import Optional
from enum import Enum, auto
from datetime import date
import datetime

class ResourceType(Enum):
    BOOK = auto()
    EBOOK = auto()
    VIDEO = auto()

    def __repr__(self):  # To enable __repr__ in the inherited class from this
        # return f'ResourceType({self.value})'
        return f'ResourceType({self.__class__.__name__}.{self.name})'


@dataclass
class Resource:
    """Media resource description."""

    identifier: str  # only required field
    title: str = '<untitled>'
    creators: list[str] = field(default_factory=list)
    date: Optional[date] = None
    type: ResourceType = ResourceType.BOOK
    description: str = ''
    language: str = ''
    subjects: list[str] = field(default_factory=list)

In [3]:
description = 'Improving the design of existing code'
book = Resource('978-0-13-475759-9', 'Refactoring, 2nd Edition',
    ['Martin Fowler', 'Kent Beck'], date(2018, 11, 19),
    ResourceType.BOOK, description, 'EN',
    ['computer programming', 'OOP'])

book # doctest: +NORMALIZE_WHITESPACE
book2 = eval(repr(book))
print(book2)
print(book2.type)

Resource(identifier='978-0-13-475759-9', title='Refactoring, 2nd Edition', creators=['Martin Fowler', 'Kent Beck'], date=datetime.date(2018, 11, 19), type=ResourceType(ResourceType.BOOK), description='Improving the design of existing code', language='EN', subjects=['computer programming', 'OOP'])
ResourceType.BOOK


In [6]:
from typing import Optional

@dataclass
class Resource:
    """Media resource description."""

    identifier: str  # only required field
    title: str = '<untitled>'
    creators: list[str] = field(default_factory=list)
    date: Optional[date] = None
    type: ResourceType = ResourceType.BOOK
    description: str = ''
    language: str = ''
    subjects: list[str] = field(default_factory=list)

    def __repr__(self):
        cls = self.__class__
        cls_name = cls.__name__
        indent = ' ' * 4
        res = [f'{cls_name}(']
        for f in fields(cls):  # each field f in the class
            value = getattr(self, f.name)  # get the named attribute from the "instance"
            res.append(f'{indent}{f.name} = {value!r},')
        res.append(')')
        return '\n'.join(res)


In [7]:
description = 'Improving the design of existing code'
book = Resource('978-0-13-475759-9', 'Refactoring, 2nd Edition',
    ['Martin Fowler', 'Kent Beck'], date(2018, 11, 19),
    ResourceType.BOOK, description, 'EN',
    ['computer programming', 'OOP'])

book # doctest: +NORMALIZE_WHITESPACE

TypeError: field() takes 0 positional arguments but 1 was given

### Pattern Matching Class Instances

In [25]:
# Example 5-22. City class and a few instances
import typing

class City(typing.NamedTuple):
    continent: str
    name: str
    country: str


cities = [
    City('Asia', 'Tokyo', 'JP'),
    City('Asia', 'Delhi', 'IN'),
    City('North America', 'Mexico City', 'MX'),
    City('North America', 'New York', 'US'),
    City('South America', 'São Paulo', 'BR'),
]

def match_asian_cities():
    results = []
    for city in cities:
        match city:
            case City(continent='Asia'):  # Keyword Class Patterns
            # case City(continent='Asia', country=cc):  # Add contry attribute
                results.append(city)
    return results

def match_asian_cities_pos():
    results = []
    for city in cities:
        match city:
            case City('Asia'):  # Positional Class Patterns
            # case City('Asia', _, country):
                results.append(city)
    return results

City.__match_args__

('continent', 'name', 'country')