In [5]:
COLORS = {
    'header': '\033[95m',
    'blue': '\033[94m',
    'cyan': '\033[96m',
    'green': '\033[92m',
    'warning': '\033[93m',
    'fail': '\033[91m',
    'endc': '\033[0m',
    'bold': '\033[1m',
    'underline': '\033[4m'
}

def color_text(text: str, color: str) -> str:
    return f"{COLORS.get(color, COLORS['endc'])}{text}{COLORS['endc']}"


### `Dataclasses`:

In [6]:
import json
from typing import List, Any, Type
from dataclasses import dataclass, asdict, is_dataclass


@dataclass
class AddressDataclass:
    street: str
    city: str
    postal_code: str

@dataclass
class UserDataclass:
    id: int
    name: str
    email: str
    age: int
    is_active: bool
    address: AddressDataclass
    tags: List[str]

def encode_dataclass(obj: Any) -> bytes:
    if not is_dataclass(obj):
        raise ValueError("encode_dataclass: Object must be a dataclass instance")
    return json.dumps(asdict(obj), ensure_ascii=False).encode()

def decode_dataclass(data: bytes, cls: Type) -> Any:
    return cls( **json.loads(data) )


### `Pydantic v2`:

In [7]:
from typing import List
from pydantic import BaseModel


class AddressPydantic(BaseModel):
    street: str
    city: str
    postal_code: str

class UserPydantic(BaseModel):
    id: int
    name: str
    email: str
    age: int
    is_active: bool
    address: AddressPydantic
    tags: List[str]

def encode_pydantic(obj: BaseModel) -> str:
    return obj.model_dump_json(exclude_defaults=True).encode()

def decode_pydantic(data: bytes, cls: Type[BaseModel]) -> BaseModel:
    return cls.model_validate_json(data.decode())



### `Msgspec`:


In [8]:
from typing import List
from msgspec import Struct, json


class AddressMsgspec(Struct, kw_only=True, omit_defaults=True):
    street: str
    city: str
    postal_code: str

class UserMsgspec(Struct, kw_only=True, omit_defaults=True):
    id: int
    name: str
    email: str
    age: int
    is_active: bool
    address: AddressMsgspec
    tags: List[str]


def encode_msgspec(obj: Struct) -> bytes:
    return json.encode(obj)

def decode(data: bytes, cls: Type[Struct]) -> Struct:
    return json.decode(data, type=cls)



In [None]:
import random
from typing import Dict, Any


class GeneratorAddress:

    STREETS = [
        "Main St", 
        "High St", 
        "Broadway", 
        "Elm St", 
        "Maple Ave",
        "Main Avenue",
        "Oak Street",
        "Pine Lane",
        "Longwood Drive",
    ]
    CITIES = [
        "London", 
        "Porto Alegre", 
        "Moscow", 
        "Bologna", 
        "Buenos Aires", 
        "Rio de Janeiro",
        "New York",
        "São Paulo",
        "Tokyo",
        "Berlin",
        "Rome",
        "Madrid",
        "Paris",
        "Barcelona",
        "Lisbon",
        "Vienna",
        "Prague",
        "Budapest",
        "Warsaw",
        "Dublin",
    ]

    @staticmethod
    def generate_random_address() -> Dict[str, Any]:
        return {
            'street': random.choice(GeneratorAddress.STREETS),
            'city': random.choice(GeneratorAddress.CITIES),
            'postal_code': f"{random.randint(10000, 99999)}"
        }

class GeneratorUser:
    
    TAGS = [
        "user", 
        "admin", 
        "developer", 
        "tester",
    ]
    NAMES = [
        "Alice", 
        "Bob", 
        "Charlie", 
        "David", 
        "Eve", 
        "Frank",
        "Anne",
        "George",
        "Hannah",
        "James",
        "Claire",
        "Roger",
    ]
    EMAIL_DOMAINS = [
        "yahoo.com", 
        "gmail.com",  
        "email.com", 
        "mail.com",
    ]

    @staticmethod
    def generate_random_user(user_id: int) -> Dict[str, Any]:
        return {
            'id': user_id,
            'name': random.choice(GeneratorUser.NAMES),
            'email': f"{random.choice(GeneratorUser.NAMES).lower()}{user_id}@{random.choice(GeneratorUser.EMAIL_DOMAINS)}",
            'age': random.randint(18, 70),
            'is_active': random.choice([True, False]),
            'address': GeneratorAddress.generate_random_address(),
            'tags': random.sample(GeneratorUser.TAGS, k=random.randint(1, len(GeneratorUser.TAGS))),
        }

class Generator:

    @staticmethod
    def generate_user(user_id: int) -> Dict[str, Any]:
        return GeneratorUser.generate_random_user(user_id)



In [None]:
import time
import timeit
import functools
from typing import Callable, Any, TypeVar, Tuple


T = TypeVar("T")


def timed(func: Callable[..., T]) -> Callable[..., Tuple[T, float]]:
    @functools.wraps(func)
    def wrapper(*args: Any, **kwargs: Any) -> Tuple[T, float]:
        start = time.perf_counter()
        result = func(*args, **kwargs)
        elapsed = time.perf_counter() - start
        return result, elapsed
    return wrapper


def benchmark_fn(fn: Callable, globals_dict: dict, number: int = 1000) -> float:
    timer = timeit.Timer(stmt=f"{fn.__name__}()", globals=globals_dict)
    total = timer.timeit(number=number)
    return total / number


@timed
def generate_data(sample_size: int):
    return [Generator.generate_user(i) for i in range(sample_size)]

data, took = generate_data(100)
print("Time taken:", took, "seconds")


Time taken: 0.0034370419962215237 seconds


In [64]:
user = Generator.generate_user(2)
add = user.pop('address')
user_dataclass = UserDataclass(**user, address=AddressDataclass(**add))
user_dataclass

UserDataclass(id=2, name='James', email='george2@email.com', age=36, is_active=False, address=AddressDataclass(street='Broadway', city='Dublin', postal_code='76856'), tags=['admin', 'developer', 'user'])

In [60]:
import json
import random
import timeit
from typing import List, Any, Type
from dataclasses import dataclass, asdict, is_dataclass

from pydantic import BaseModel
import msgspec

# --- Models / dataclasses / msgspec structs ---

@dataclass
class AddressDataclass:
    street: str
    city: str
    postal_code: str

@dataclass
class UserDataclass:
    id: int
    name: str
    email: str
    age: int
    is_active: bool
    address: AddressDataclass
    tags: List[str]

class AddressPydantic(BaseModel):
    street: str
    city: str
    postal_code: str

class UserPydantic(BaseModel):
    id: int
    name: str
    email: str
    age: int
    is_active: bool
    address: AddressPydantic
    tags: List[str]

class AddressMsgspec(msgspec.Struct, kw_only=True, omit_defaults=True):
    street: str
    city: str
    postal_code: str

class UserMsgspec(msgspec.Struct, kw_only=True, omit_defaults=True):
    id: int
    name: str
    email: str
    age: int
    is_active: bool
    address: AddressMsgspec
    tags: List[str]

# --- Encode / decode helpers ---

def encode_dataclass(obj: Any) -> bytes:
    assert is_dataclass(obj)
    return json.dumps(asdict(obj), ensure_ascii=False).encode()

def decode_dataclass(data: bytes) -> UserDataclass:
    d = json.loads(data.decode())
    # Assumes nested fields dicts match exactly dataclass signature
    addr = AddressDataclass(**d['address'])
    return UserDataclass(
        id=d['id'],
        name=d['name'],
        email=d['email'],
        age=d['age'],
        is_active=d['is_active'],
        address=addr,
        tags=d['tags'],
    )

def encode_pydantic(obj: BaseModel) -> bytes:
    return obj.model_dump_json(exclude_defaults=True).encode()

def decode_pydantic(data: bytes) -> UserPydantic:
    return UserPydantic.model_validate_json(data.decode())

def encode_msgspec(obj: msgspec.Struct) -> bytes:
    return msgspec.json.encode(obj)

def decode_msgspec(data: bytes) -> UserMsgspec:
    return msgspec.json.decode(data, type=UserMsgspec)

# --- Generator of random data ---

def random_user_data(user_id: int) -> dict:
    # similar to your Generator.generate_user
    return {
        'id': user_id,
        'name': f"User{user_id}",
        'email': f"user{user_id}@example.com",
        'age': random.randint(18, 70),
        'is_active': random.choice([True, False]),
        'address': {
            'street': random.choice(["Main St", "High St", "Elm St"]),
            'city': random.choice(["London", "Paris", "Berlin"]),
            'postal_code': str(random.randint(10000,99999))
        },
        'tags': random.sample(["user", "admin", "tester", "dev"], k=random.randint(1,3))
    }

# Pre-generate a sample object (for encode) and sample serialized data (for decode)
sample_data = random_user_data(1)
user_dc = UserDataclass(
    **{**sample_data, 'address': AddressDataclass(**sample_data['address'])}
)
user_pd = UserPydantic(**sample_data)
user_ms = UserMsgspec(**sample_data)

serialized_dc = encode_dataclass(user_dc)
serialized_pd = encode_pydantic(user_pd)
serialized_ms = encode_msgspec(user_ms)

# --- Benchmarking functions ---

def bench_dataclass_instantiation():
    UserDataclass(
        **{**sample_data, 'address': AddressDataclass(**sample_data['address'])}
    )

def bench_pydantic_instantiation():
    UserPydantic(**sample_data)

def bench_msgspec_instantiation():
    UserMsgspec(**sample_data)

def bench_dataclass_encode():
    encode_dataclass(user_dc)

def bench_dataclass_decode():
    decode_dataclass(serialized_dc)

def bench_pydaclass_decode():
    decode_dataclass(serialized_dc)

def bench_pydantic_encode():
    encode_pydantic(user_pd)

def bench_pydantic_decode():
    decode_pydantic(serialized_pd)

def bench_msgspec_encode():
    encode_msgspec(user_ms)

def bench_msgspec_decode():
    decode_msgspec(serialized_ms)

# --- Running benchmarks with timeit --- 

def run_bench(fn, number=100000):
    t = timeit.timeit(fn, number=number)
    print(f"{fn.__name__:30s} -> {t/number*1e6:.1f} µs per run")

if __name__ == "__main__":
    print("=== Instantiation ===")
    run_bench(bench_dataclass_instantiation)
    run_bench(bench_pydantic_instantiation)
    run_bench(bench_msgspec_instantiation)

    print("\n=== Encode ===")
    run_bench(bench_dataclass_encode)
    run_bench(bench_pydantic_encode)
    run_bench(bench_msgspec_encode)

    print("\n=== Decode ===")
    run_bench(bench_dataclass_decode)
    run_bench(bench_pydantic_decode)
    run_bench(bench_msgspec_decode)


=== Instantiation ===
bench_dataclass_instantiation  -> 3.7 µs per run
bench_pydantic_instantiation   -> 2.6 µs per run
bench_msgspec_instantiation    -> 0.8 µs per run

=== Encode ===
bench_dataclass_encode         -> 46.4 µs per run
bench_pydantic_encode          -> 3.1 µs per run
bench_msgspec_encode           -> 1.3 µs per run

=== Decode ===
bench_dataclass_decode         -> 7.7 µs per run
bench_pydantic_decode          -> 3.6 µs per run
bench_msgspec_decode           -> 1.6 µs per run
