In [1]:
from benchmark_dataclass import instantiate_dataclass, encode_dataclass, decode_dataclass
from benchmark_pydantic import instantiate_pydantic, encode_pydantic, decode_pydantic
from benchmark_msgspec import instantiate_msgspec, encode_msgspec, decode_msgspec



## 1. Generate Data:

In [45]:
import statistics
from time import perf_counter
from data_generator import generate_users_batch


batch_size = 1_000
users_data = generate_users_batch(batch_size=batch_size)


# 2. Microbenchmark:

* Running the benchmark for each library with the generated data with a batch size of 1.

In [47]:
def micro_benchmark(fn, obj, iterations=50_000):
    times = []
    for _ in range(iterations):
        start = perf_counter()
        fn(obj)
        end = perf_counter()
        times.append(end - start)

    avg_us = statistics.mean(times) * 1e6
    return avg_us

def batch_benchmark(fn, batch):
    start = perf_counter()
    for item in batch:
        fn(item)
    end = perf_counter()
    total = end - start
    avg_us = (total / len(batch)) * 1e6
    return avg_us, total


In [52]:
batch_size = 1
single_user = generate_users_batch(batch_size=batch_size)[0]

# Instantiate objects for encoding/decoding
dc_obj = instantiate_dataclass(single_user)
py_obj = instantiate_pydantic(single_user)
ms_obj = instantiate_msgspec(single_user)

# Encode once
dc_encoded = encode_dataclass(dc_obj)
py_encoded = encode_pydantic(py_obj)
ms_encoded = encode_msgspec(ms_obj)

print("=== Microbenchmark: single-object repeated ===\n")
print(f"Dataclass: instantiate = {micro_benchmark(instantiate_dataclass, single_user):.2f} µs")
print(f"Pydantic:  instantiate = {micro_benchmark(instantiate_pydantic, single_user):.2f} µs")
print(f"Msgspec:   instantiate = {micro_benchmark(instantiate_msgspec, single_user):.2f} µs\n")

print(f"Dataclass: encode = {micro_benchmark(lambda _: encode_dataclass(dc_obj), None):.2f} µs")
print(f"Pydantic:  encode = {micro_benchmark(lambda _: encode_pydantic(py_obj), None):.2f} µs")
print(f"Msgspec:   encode = {micro_benchmark(lambda _: encode_msgspec(ms_obj), None):.2f} µs\n")

print(f"Dataclass: decode = {micro_benchmark(lambda _: decode_dataclass(dc_encoded), None):.2f} µs")
print(f"Pydantic:  decode = {micro_benchmark(lambda _: decode_pydantic(py_encoded), None):.2f} µs")
print(f"Msgspec:   decode = {micro_benchmark(lambda _: decode_msgspec(ms_encoded), None):.2f} µs")



=== Microbenchmark: single-object repeated ===

Dataclass: instantiate = 0.39 µs
Pydantic:  instantiate = 1.17 µs
Msgspec:   instantiate = 0.17 µs

Dataclass: encode = 2.75 µs
Pydantic:  encode = 0.75 µs
Msgspec:   encode = 0.14 µs

Dataclass: decode = 1.44 µs
Pydantic:  decode = 0.86 µs
Msgspec:   decode = 0.21 µs


In [55]:
batch_size = 1_000_000
users = generate_users_batch(batch_size)

print("\n=== Batch Benchmark: many objects once ===")

# Instantiation benchmark
print(f"Dataclass instantiate avg = {batch_benchmark(instantiate_dataclass, users)[0]:.2f} µs")
print(f"Pydantic instantiate avg = {batch_benchmark(instantiate_pydantic, users)[0]:.2f} µs")
print(f"Msgspec  instantiate avg = {batch_benchmark(instantiate_msgspec, users)[0]:.2f} µs")

dc_objs  = [instantiate_dataclass(u) for u in users]
py_objs  = [instantiate_pydantic(u) for u in users]
ms_objs  = [instantiate_msgspec(u) for u in users]

print("\n--- Encoding ---")
print(f"Dataclass encode = {batch_benchmark(encode_dataclass, dc_objs)[0]:.2f} µs")
print(f"Pydantic encode = {batch_benchmark(encode_pydantic, py_objs)[0]:.2f} µs")
print(f"Msgspec encode = {batch_benchmark(encode_msgspec, ms_objs)[0]:.2f} µs")

dc_bin  = [encode_dataclass(o) for o in dc_objs]
py_bin  = [encode_pydantic(o) for o in py_objs]
ms_bin  = [encode_msgspec(o) for o in ms_objs]

print("\n--- Decoding ---")
print(f"Dataclass decode = {batch_benchmark(decode_dataclass, dc_bin)[0]:.2f} µs")
print(f"Pydantic decode = {batch_benchmark(decode_pydantic, py_bin)[0]:.2f} µs")
print(f"Msgspec decode = {batch_benchmark(decode_msgspec, ms_bin)[0]:.2f} µs")


=== Batch Benchmark: many objects once ===
Dataclass instantiate avg = 0.22 µs
Pydantic instantiate avg = 0.72 µs
Msgspec  instantiate avg = 0.15 µs

--- Encoding ---
Dataclass encode = 2.84 µs
Pydantic encode = 0.80 µs
Msgspec encode = 0.12 µs

--- Decoding ---
Dataclass decode = 1.42 µs
Pydantic decode = 0.93 µs
Msgspec decode = 0.23 µs
