In [None]:
# serialisation formats: pickle, HDF5, h5, hickle, ...

In [None]:
# dumps

In [15]:
from itsdangerous.serializer import Serializer
s = Serializer("secret-key")
s.dumps([1, 2, 3, 4])

'[1, 2, 3, 4].r7R9RhGgDPvvWl3iNzLuIIfELmo'

In [None]:
# loads

In [17]:
s.loads('[1, 2, 3, 4].r7R9RhGgDPvvWl3iNzLuIIfELmo')

[1, 2, 3, 4]

In [None]:
# 1. pickle format, only in python

In [1]:
import pickle
 
# A test object
test_dict = {"Hello": "World!"}
 
# Serialization
with open("test.pickle", "wb") as outfile:
    pickle.dump(test_dict, outfile)
print("Written object", test_dict)
 
# Deserialization
with open("test.pickle", "rb") as infile:
    test_dict_reconstructed = pickle.load(infile)
print("Reconstructed object", test_dict_reconstructed)
 
if test_dict == test_dict_reconstructed:
    print("Reconstruction success")

Written object {'Hello': 'World!'}
Reconstructed object {'Hello': 'World!'}
Reconstruction success


In [2]:
import pickle
 
class NewClass:
    def __init__(self, data):
        print(data)
        self.data = data
 
# Create an object of NewClass
new_class = NewClass(1)
 
# Serialize and deserialize
pickled_data = pickle.dumps(new_class)
reconstructed = pickle.loads(pickled_data)
 
# Verify
print("Data from reconstructed object:", reconstructed.data)


1
Data from reconstructed object: 1


In [None]:
# pickle and tensorflow

In [3]:
import pickle
 
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, AveragePooling2D, Dropout, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
 
# Load MNIST digits
(X_train, y_train), (X_test, y_test) = mnist.load_data()
 
# Reshape data to (n_samples, height, wiedth, n_channel)
X_train = np.expand_dims(X_train, axis=3).astype("float32")
X_test = np.expand_dims(X_test, axis=3).astype("float32")
 
# One-hot encode the output
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
 
# LeNet5 model
model = Sequential([
    Conv2D(6, (5,5), input_shape=(28,28,1), padding="same", activation="tanh"),
    AveragePooling2D((2,2), strides=2),
    Conv2D(16, (5,5), activation="tanh"),
    AveragePooling2D((2,2), strides=2),
    Conv2D(120, (5,5), activation="tanh"),
    Flatten(),
    Dense(84, activation="tanh"),
    Dense(10, activation="softmax")
])
 
# Train the model
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
earlystopping = EarlyStopping(monitor="val_loss", patience=4, restore_best_weights=True)
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32, callbacks=[earlystopping])
 
# Evaluate the model
print(model.evaluate(X_test, y_test, verbose=0))
 
# Pickle to serialize and deserialize
pickled_model = pickle.dumps(model)
reconstructed = pickle.loads(pickled_model)
 
# Evaluate again
print(reconstructed.evaluate(X_test, y_test, verbose=0))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
[0.039929646998643875, 0.9886999726295471]




[0.039929646998643875, 0.9886999726295471]


In [None]:
# 2. HDF5 format, name.hdf5

In [4]:
pip install h5py

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [8]:
import h5py

In [7]:
with h5py.File("test.hdf5", "w") as file:
    # creates new group_1 in file
    file.create_group("group_1")
    group1 = file["group_1"]
    # creates dataset inside group1
    group1.create_dataset("dataset1", shape=(10,))
    # to access the dataset
    dataset = file["group_1"]["dataset1"]

In [None]:
# 3. HDF5 Tensorflow format, name.h5

In [9]:
from tensorflow import keras
 
# Create model
model = keras.models.Sequential([
 	keras.layers.Input(shape=(10,)),
 	keras.layers.Dense(1)
])
 
model.compile(optimizer="adam", loss="mse")
 
# using the .h5 extension in the file name specifies that the model
# should be saved in HDF5 format
model.save("my_model.h5")

In [10]:
import json
 
with h5py.File("my_model.h5", "r") as infile:
    for key in infile.attrs.keys():
        formatted = infile.attrs[key]
        if key.endswith("_config"):
            formatted = json.dumps(json.loads(formatted), indent=4)
        print(f"{key}: {formatted}")

backend: tensorflow
keras_version: 2.9.0
model_config: {
    "class_name": "Sequential",
    "config": {
        "name": "sequential_1",
        "layers": [
            {
                "class_name": "InputLayer",
                "config": {
                    "batch_input_shape": [
                        null,
                        10
                    ],
                    "dtype": "float32",
                    "sparse": false,
                    "ragged": false,
                    "name": "input_1"
                }
            },
            {
                "class_name": "Dense",
                "config": {
                    "name": "dense_2",
                    "trainable": true,
                    "dtype": "float32",
                    "units": 1,
                    "activation": "linear",
                    "use_bias": true,
                    "kernel_initializer": {
                        "class_name": "GlorotUniform",
                        "config": {
 

In [None]:
# 4. hickle

In [11]:
pip install hickle

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting hickle
  Downloading hickle-5.0.2-py3-none-any.whl (107 kB)
[K     |████████████████████████████████| 107 kB 5.3 MB/s 
Installing collected packages: hickle
Successfully installed hickle-5.0.2


In [12]:
import os
import hickle as hkl
import numpy as np

# Create a numpy array of data
array_obj = np.ones(32768, dtype='float32')

# Dump to file
hkl.dump(array_obj, 'test.hkl', mode='w')

# Dump data, with compression
hkl.dump(array_obj, 'test_gzip.hkl', mode='w', compression='gzip')

# Compare filesizes
print('uncompressed: %i bytes' % os.path.getsize('test.hkl'))
print('compressed:   %i bytes' % os.path.getsize('test_gzip.hkl'))

# Load data
array_hkl = hkl.load('test_gzip.hkl')

# Check the two are the same file
assert array_hkl.dtype == array_obj.dtype
assert np.all((array_hkl, array_obj))

uncompressed: 139284 bytes
compressed:   11988 bytes


In [None]:
# 5. cpickle

In [14]:
try:
    import cPickle as pickle
except:
    import pickle
import sys

class SimpleObject(object):

    def __init__(self, name):
        self.name = name
        l = list(name)
        l.reverse()
        self.name_backwards = ''.join(l)
        return

In [None]:
# 6. HDF5 Pandas format, name.h5

In [18]:
import pandas as pd
df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z'])  
df.to_hdf('./store.h5', 'data')  
reread = pd.read_hdf('./store.h5') 