# 01_05: Data classes

In [1]:
import math
import collections
import dataclasses
import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as pp

<table>
<tr><th>name</th><th>lastname</th><th>birthday</th></tr>
<tr><td>Michele</td><td>Vallisneri</td><td>July 15</td></tr>
<tr><td>Albert</td><td>Einstein</td><td>March 14</td></tr>
<tr><td>John</td><td>Lennon</td><td>October 9</td></tr>
<tr><td>Jocelyn</td><td>Bell Burnell</td><td>July 15</td></tr>
</table>

In [2]:
peopledict = [{"name": "Michele", "lastname": "Vallisneri",   "birthday": "July 15"},
              {"name": "Albert",  "lastname": "Einstein",     "birthday": "March 14"},
              {"name": "John",    "lastname": "Lennon",       "birthday": "October 9"},
              {"name": "Jocelyn", "lastname": "Bell Burnell", "birthday": "July 15"}]

In [3]:
[person for person in peopledict if person["birthday"] == "July 15"]

[{'name': 'Michele', 'lastname': 'Vallisneri', 'birthday': 'July 15'},
 {'name': 'Jocelyn', 'lastname': 'Bell Burnell', 'birthday': 'July 15'}]

In [4]:
Person = collections.namedtuple("Person", ["name", "lastname", "birthday"])

In [5]:
Person(name='Michele', lastname='Vallisneri', birthday='July 15')

Person(name='Michele', lastname='Vallisneri', birthday='July 15')

In [6]:
peopletuples = [Person("Michele", "Vallisneri", "July 15"),
                Person("Albert", "Einstein", "March 14"),
                Person("John", "Lennon", "October 9"),
                Person("Jocelyn", "Bell Burnell", "July 15")]

In [7]:
[person for person in peopletuples if person.lastname == "Lennon"]

[Person(name='John', lastname='Lennon', birthday='October 9')]

In [8]:
Person(**peopledict[3])

Person(name='Jocelyn', lastname='Bell Burnell', birthday='July 15')

In [9]:
peopletuples[3]._asdict()

{'name': 'Jocelyn', 'lastname': 'Bell Burnell', 'birthday': 'July 15'}

In [10]:
@dataclasses.dataclass
class Persondata:
    name: str
    lastname: str
    birthday: str = "unknown"

In [11]:
peopledata = [Persondata(name="Michele", lastname="Vallisneri", birthday="July 15"),
              Persondata("Albert", "Einstein", "March 14"),
              Persondata("John", "Lennon", "October 9"),
              Persondata("Jocelyn", "Bell Burnell", "July 15")]

In [12]:
[person for person in peopledata if person.birthday != "July 15"]

[Persondata(name='Albert', lastname='Einstein', birthday='March 14'),
 Persondata(name='John', lastname='Lennon', birthday='October 9')]

In [13]:
@dataclasses.dataclass
class Persondata:
    name: str
    lastname: str
    birthday: str = "unknown"
    
    # when writing class methods, "self" refers to instances
    def fullname(self):
        return self.name + " " + self.lastname

    # the special method __str__ overrides the standard printout
    def __str__(self):
        return self.lastname + ", " + self.name + ", born " + self.birthday

In [14]:
michele = Persondata('Michele', 'Vallisneri', 'July 15')

In [15]:
michele.fullname()

'Michele Vallisneri'

In [16]:
print(michele)

Vallisneri, Michele, born July 15


In [17]:
@dataclasses.dataclass(frozen = True)
class Persondata_frozen:
    name: str
    lastname: str
    birthday: str = "unknown"


@dataclasses.dataclass(order = True)
class Persondata_ordered:
    name: str
    lastname: str
    birthday: str = "unknown"


@dataclasses.dataclass
class Persondata_customorder:
    name: str
    lastname: str
    birthday: str = "unknown"

    # custom "less than" comparison
    def __lt__(self, other):       
        return (self.lastname, self.name, self.birthday) < (other.lastname, other.name, other.birthday)


@dataclasses.dataclass
class Persondata_computed:
    name: str
    lastname: str
    birthday: str = "unknown"
    fullname: str = dataclasses.field(init=False) # will compute it below

    def __post_init__(self):
        self.fullname = self.name + " " + self.lastname

In [18]:
import pydantic

In [19]:
@pydantic.dataclasses.dataclass
class Persondata_pydantic:
    name: str
    lastname: str
    birthday: str = "unknown"

    @pydantic.field_validator("birthday")
    def validate_date(cls, value): # a class method, so first argument is the class 
        
        # will fail if date is not "MONTHNAME DAYNUMBER" 
        datetime.datetime.strptime(value, "%B %d")
        
        return value

In [20]:
Persondata_pydantic("Michele", 15, "July 15")

ValidationError: 1 validation error for Persondata_pydantic
1
  Input should be a valid string [type=string_type, input_value=15, input_type=int]
    For further information visit https://errors.pydantic.dev/2.9/v/string_type

In [21]:
Persondata_pydantic('Michele', "Vallisneri", "7/15")

ValidationError: 1 validation error for Persondata_pydantic
2
  Value error, time data '7/15' does not match format '%B %d' [type=value_error, input_value='7/15', input_type=str]
    For further information visit https://errors.pydantic.dev/2.9/v/value_error