Click [here]() to access the associated Medium article.

# Setup


In [55]:
%pip install -q ujson msgpack

[33mDEPRECATION: textract 1.6.5 has a non-standard dependency specifier extract-msg<=0.29.*. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of textract or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [28]:
import json
import pickle
import timeit

import msgpack
import ujson


# Example object to serialize
data = {
    "name": "Alice",
    "age": 30,
    "is_student": False,
    "courses": ["Math", "Science"]
}

# Pickle

## Basic Usage


In [1]:
# Serialize the object to a file
with open("data.pkl", "wb") as file:
    pickle.dump(data, file)

# Deserialize the object from the file
with open("data.pkl", "rb") as file:
    loaded_data = pickle.load(file)

print(loaded_data)

{'name': 'Alice', 'age': 30, 'is_student': False, 'courses': ['Math', 'Science']}


## Pickle Protocols

In [3]:
# Serialize with a specific protocol
with open("data_protocol2.pkl", "wb") as file:
    pickle.dump(data, file, protocol=2)

# Deserialize
with open("data_protocol2.pkl", "rb") as file:
    loaded_data_protocol2 = pickle.load(file)

print(loaded_data_protocol2)

{'name': 'Alice', 'age': 30, 'is_student': False, 'courses': ['Math', 'Science']}


## Performance Considerations

In [11]:
def serialize_with_protocol(protocol):
    return pickle.dumps(data, protocol=protocol)


# Benchmark protocol 2
time_protocol2 = timeit.timeit(
    lambda: serialize_with_protocol(2), number=1000000
)
print(f"Protocol 2: {time_protocol2:.5f} seconds")

# Benchmark protocol 5
time_protocol5 = timeit.timeit(
    lambda: serialize_with_protocol(5), number=1000000
)
print(f"Protocol 5: {time_protocol5:.5f} seconds")

Protocol 2: 0.45007 seconds
Protocol 5: 0.37886 seconds


# JSON

## Basic Usage

In [13]:
# Serialize the object to a JSON string
json_string = json.dumps(data, indent=4)
print(json_string)

# Deserialize the JSON string back to a Python object
loaded_data = json.loads(json_string)
print(loaded_data)

{
    "name": "Alice",
    "age": 30,
    "is_student": false,
    "courses": [
        "Math",
        "Science"
    ]
}
{'name': 'Alice', 'age': 30, 'is_student': False, 'courses': ['Math', 'Science']}


## Working with JSON Files

In [14]:
# Serialize the object to a file
with open("data.json", "w") as file:
    json.dump(data, file, indent=4)

# Deserialize the object from the file
with open("data.json", "r") as file:
    loaded_data = json.load(file)

print(loaded_data)

{'name': 'Alice', 'age': 30, 'is_student': False, 'courses': ['Math', 'Science']}


## Custom Serialization

In [15]:
class Student:
    def __init__(self, name, age, is_student, courses):
        self.name = name
        self.age = age
        self.is_student = is_student
        self.courses = courses


def student_to_dict(student):
    return {
        "name": student.name,
        "age": student.age,
        "is_student": student.is_student,
        "courses": student.courses,
    }


student = Student("Alice", 30, False, ["Math", "Science"])

# Serialize the custom object
json_string = json.dumps(student, default=student_to_dict, indent=4)
print(json_string)


# Deserialize the custom object
def dict_to_student(d):
    return Student(d["name"], d["age"], d["is_student"], d["courses"])


loaded_student = json.loads(json_string, object_hook=dict_to_student)
print(loaded_student.__dict__)

{
    "name": "Alice",
    "age": 30,
    "is_student": false,
    "courses": [
        "Math",
        "Science"
    ]
}
{'name': 'Alice', 'age': 30, 'is_student': False, 'courses': ['Math', 'Science']}


## Performance Considerations

In [21]:
# Benchmark standard json
time_json = timeit.timeit(lambda: json.dumps(data), number=100000)
print(f"Standard JSON:\t{time_json:.5f} seconds")

# Benchmark ujson
time_ujson = timeit.timeit(lambda: ujson.dumps(data), number=100000)
print(f"UltraJSON:\t{time_ujson:.5f} seconds")

Standard JSON:	0.17757 seconds
UltraJSON:	0.04344 seconds


# MessagePack

## Basic Usage

In [29]:
# Serialize the object to a binary format
binary_data = msgpack.packb(data)
print(binary_data)

# Deserialize the binary data back to a Python object
loaded_data = msgpack.unpackb(binary_data)
print(loaded_data)

b'\x84\xa4name\xa5Alice\xa3age\x1e\xaais_student\xc2\xa7courses\x92\xa4Math\xa7Science'
{'name': 'Alice', 'age': 30, 'is_student': False, 'courses': ['Math', 'Science']}


## Working with MessagePack Files

In [30]:
# Serialize the object to a file
with open("data.msgpack", "wb") as file:
    msgpack.pack(data, file)

# Deserialize the object from the file
with open("data.msgpack", "rb") as file:
    loaded_data = msgpack.unpack(file)

print(loaded_data)

{'name': 'Alice', 'age': 30, 'is_student': False, 'courses': ['Math', 'Science']}


## Custom Serialization

In [41]:
class Student:
    def __init__(self, name, age, is_student, courses):
        self.name = name
        self.age = age
        self.is_student = is_student
        self.courses = courses


def student_to_dict(student):
    return {
        "name": student.name,
        "age": student.age,
        "is_student": student.is_student,
        "courses": student.courses,
    }


def dict_to_student(d):
    return Student(d["name"], d["age"], d["is_student"], d["courses"])


student = Student("Alice", 30, False, ["Math", "Science"])

# Serialize the custom object
binary_data = msgpack.packb(student, default=student_to_dict)
print(binary_data)

# Deserialize the custom object
loaded_data = msgpack.unpackb(binary_data, object_hook=dict_to_student)
print(loaded_data.__dict__)

b'\x84\xa4name\xa5Alice\xa3age\x1e\xaais_student\xc2\xa7courses\x92\xa4Math\xa7Science'
{'name': 'Alice', 'age': 30, 'is_student': False, 'courses': ['Math', 'Science']}


## Performance Considerations

In [54]:
# Benchmark json
time_json = timeit.timeit(lambda: json.dumps(data), number=10000)
print(f"{'JSON:':13}{time_json:.5f} seconds")

# Benchmark msgpack
time_msgpack = timeit.timeit(lambda: msgpack.packb(data), number=10000)
print(f"{'MessagePack:':13}{time_msgpack:.5f} seconds")

JSON:        0.02905 seconds
MessagePack: 0.00810 seconds
