# Data Classes

Data classes (introduced to the standard library in Python 3.7, available as a third party module in 3.6) allow us to reduce a lot of boilerplate code for classes whose primary purpose is to store data.

Think of them as super-charged `namedtuple`s

In [1]:
from dataclasses import dataclass
@dataclass
class Card:
    rank: str
    suit: str

In [2]:
c = Card(rank='J', suit='diamonds')
c

Card(rank='J', suit='diamonds')

In [3]:
c.rank

'J'

In [4]:
c.rank = 'Q'

In [5]:
Card('Q', 'diamonds') == c

True

In [7]:
class Card:
    rank: str
    suit: str

How about some default values?

In [8]:
@dataclass
class Card:
    rank: str = '2'
    suit: str = 'Spades'

In [9]:
Card()

Card(rank='2', suit='Spades')

In [10]:
Card.__annotations__

{'rank': str, 'suit': str}

In [11]:
Card.__dict__

mappingproxy({'__module__': '__main__',
              '__annotations__': {'rank': str, 'suit': str},
              'rank': '2',
              'suit': 'Spades',
              '__dict__': <attribute '__dict__' of 'Card' objects>,
              '__weakref__': <attribute '__weakref__' of 'Card' objects>,
              '__doc__': "Card(rank: str = '2', suit: str = 'Spades')",
              '__dataclass_params__': _DataclassParams(init=True,repr=True,eq=True,order=False,unsafe_hash=False,frozen=False),
              '__dataclass_fields__': {'rank': Field(name='rank',type=<class 'str'>,default='2',default_factory=<dataclasses._MISSING_TYPE object at 0x7f2d60b57ee0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD),
               'suit': Field(name='suit',type=<class 'str'>,default='Spades',default_factory=<dataclasses._MISSING_TYPE object at 0x7f2d60b57ee0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD)},
      

What if we don't want to do static typing?

In [12]:
from typing import Any

@dataclass
class Card:
    rank: Any = '2'
    suit: Any = 'Spades'

In [13]:
Card()

Card(rank='2', suit='Spades')

In [14]:
Card(rank='the greatest rank, ever'.split(), suit=3.14159)

Card(rank=['the', 'greatest', 'rank,', 'ever'], suit=3.14159)

In [15]:
Card.__annotations__

{'rank': typing.Any, 'suit': typing.Any}

## More complex examples

In [16]:
ranks = '2 3 4 5 6 7 8 9 10 J Q K A'.split()
suits = 'spades clubs hearts diamonds'.split()
def make_deck():
    return [Card(r, s) for r in ranks for s in suits]

In [17]:
from typing import List

@dataclass
class Card:
    rank: str
    suit: str
        
@dataclass
class Deck:
    cards: List[Card]


In [18]:
Deck(cards=make_deck())

Deck(cards=[Card(rank='2', suit='spades'), Card(rank='2', suit='clubs'), Card(rank='2', suit='hearts'), Card(rank='2', suit='diamonds'), Card(rank='3', suit='spades'), Card(rank='3', suit='clubs'), Card(rank='3', suit='hearts'), Card(rank='3', suit='diamonds'), Card(rank='4', suit='spades'), Card(rank='4', suit='clubs'), Card(rank='4', suit='hearts'), Card(rank='4', suit='diamonds'), Card(rank='5', suit='spades'), Card(rank='5', suit='clubs'), Card(rank='5', suit='hearts'), Card(rank='5', suit='diamonds'), Card(rank='6', suit='spades'), Card(rank='6', suit='clubs'), Card(rank='6', suit='hearts'), Card(rank='6', suit='diamonds'), Card(rank='7', suit='spades'), Card(rank='7', suit='clubs'), Card(rank='7', suit='hearts'), Card(rank='7', suit='diamonds'), Card(rank='8', suit='spades'), Card(rank='8', suit='clubs'), Card(rank='8', suit='hearts'), Card(rank='8', suit='diamonds'), Card(rank='9', suit='spades'), Card(rank='9', suit='clubs'), Card(rank='9', suit='hearts'), Card(rank='9', suit='

## This is tempting, but don't do it!

```python
from typing import List

@dataclass
class Card:
    rank: str
    suit: str
        
@dataclass
class Deck:
    cards: List[Card] = make_deck()  # this fails if we try it
```


In [19]:
try:
    @dataclass
    class Deck:
        cards: List[Card] = make_deck()  # this fails if we try it
except Exception as err:
    print('Got exception', err)

Got exception mutable default <class 'list'> for field cards is not allowed: use default_factory


## Why not use mutable defaults?

In [None]:
def append_list(value, lst=[]):
    lst.append(value)
    return lst

In [None]:
append_list(5, [1,2])

In [None]:
append_list(10)

In [None]:
append_list(5)

In [None]:
def append_list(value, lst=None):
    if lst is None:
        lst = []
    lst.append(value)
    return lst

## Back to data classes

In [20]:
from dataclasses import field

@dataclass
class Deck:
    cards: List[Card] = field(default_factory=make_deck)


In [21]:
Deck.__dict__

mappingproxy({'__module__': '__main__',
              '__annotations__': {'cards': typing.List[__main__.Card]},
              '__dict__': <attribute '__dict__' of 'Deck' objects>,
              '__weakref__': <attribute '__weakref__' of 'Deck' objects>,
              '__doc__': 'Deck(cards: List[__main__.Card] = <factory>)',
              '__dataclass_params__': _DataclassParams(init=True,repr=True,eq=True,order=False,unsafe_hash=False,frozen=False),
              '__dataclass_fields__': {'cards': Field(name='cards',type=typing.List[__main__.Card],default=<dataclasses._MISSING_TYPE object at 0x7f2d60b57ee0>,default_factory=<function make_deck at 0x7f2d60af60d0>,init=True,repr=True,hash=None,compare=True,metadata=mappingproxy({}),_field_type=_FIELD)},
              '__init__': <function __main__.__create_fn__.<locals>.__init__(self, cards: List[__main__.Card] = <factory>) -> None>,
              '__repr__': <function __main__.__create_fn__.<locals>.__repr__(self)>,
              '__eq__

In [22]:
Deck()

Deck(cards=[Card(rank='2', suit='spades'), Card(rank='2', suit='clubs'), Card(rank='2', suit='hearts'), Card(rank='2', suit='diamonds'), Card(rank='3', suit='spades'), Card(rank='3', suit='clubs'), Card(rank='3', suit='hearts'), Card(rank='3', suit='diamonds'), Card(rank='4', suit='spades'), Card(rank='4', suit='clubs'), Card(rank='4', suit='hearts'), Card(rank='4', suit='diamonds'), Card(rank='5', suit='spades'), Card(rank='5', suit='clubs'), Card(rank='5', suit='hearts'), Card(rank='5', suit='diamonds'), Card(rank='6', suit='spades'), Card(rank='6', suit='clubs'), Card(rank='6', suit='hearts'), Card(rank='6', suit='diamonds'), Card(rank='7', suit='spades'), Card(rank='7', suit='clubs'), Card(rank='7', suit='hearts'), Card(rank='7', suit='diamonds'), Card(rank='8', suit='spades'), Card(rank='8', suit='clubs'), Card(rank='8', suit='hearts'), Card(rank='8', suit='diamonds'), Card(rank='9', suit='spades'), Card(rank='9', suit='clubs'), Card(rank='9', suit='hearts'), Card(rank='9', suit='

## More `dataclass`  and `field` options

`@dataclass(args)` 
- `init=True`': create an `__init__` method?
- `repr=True`': create a `__repr__` method?
- `eq=True`: create an `__eq__` method?
- `order=False`: allow comparisons (`__gt__`, `__ge__`, etc.)
- `unsafe_hash=False`: create a `__hash__` method? (unsafe b/c it can change)
- `frozen=False`: are the declared properties immutable?

`field(args)`
- `default=<MISSING>`: default value
- `default_factory=<MISSING>`: default factory
- `init=True`: use in `__init__` for dataclass?
- `repr=True`: use in `__repr__` for dataclass?
- `hash=True`: use in `__hash__` for dataclass?
- `compare=True`: use in `__eq__` and `__ne__` for dataclass?
- `metadata=None`: arbitrary metadata to attach to field


In [23]:
@dataclass
class MyClass:
    a: Any = field(metadata={'my': 'metadata'})
     
    def foo(self, a: int) -> List[int]:
        print('call foo', a)

In [24]:
obj = MyClass(5)

In [25]:
obj.a

5

In [26]:
obj

MyClass(a=5)

In [27]:
obj.foo('this is no int')

call foo this is no int


In [28]:
MyClass.foo.__annotations__

{'a': int, 'return': typing.List[int]}

In [29]:
obj.__dict__

{'a': 5}

Metadata is awkward to access, however:

In [30]:
MyClass.__dataclass_fields__['a'].metadata

mappingproxy({'my': 'metadata'})

# Aside on hashability

In [31]:
@dataclass(unsafe_hash=True)
class MyHashable:
    a: int

In [32]:
foo = MyHashable(a=5)

In [33]:
dct = {foo: 10}

In [34]:
bar = MyHashable(a=5)

In [35]:
bar == foo

True

In [36]:
bar in dct

True

In [37]:
foo.a = 10

In [38]:
bar in dct

False

In [39]:
foo in dct

False

In [40]:
list(dct.keys())

[MyHashable(a=10)]

In [41]:
foo.a = 5

In [42]:
foo in dct

True

In [43]:
dct

{MyHashable(a=5): 10}

# Other uses of type annotations / hinting

- https://pydantic-docs.helpmanual.io for schema creation
- http://mypy-lang.org for static type checking 