In [None]:
from expelliarmus import Wizard
import pathlib
import h5py
import numpy as np
import timeit
import requests
import pickle
import os

## Download original data in evt3 raw format

In [None]:
fname = "driving_sample"
extension_map = {
    'dat': 'dat',
    'evt2': 'raw',
    'evt3': 'raw',
    'hdf5': 'hdf5',
    'hdf5_lzf': 'hdf5',
    'hdf5_gzip': 'hdf5',
    'numpy': 'npy',
}
get_fpath = lambda encoding: f"{fname}_{encoding}.{extension_map[encoding]}"

if not os.path.exists(get_fpath('evt3')):
    # Downloading files.
    print("Downloading EVT3 file... ", end="")
    if not pathlib.Path(get_fpath('evt3')).is_file():
        r = requests.get("https://dataset.prophesee.ai/index.php/s/nVcLLdWAnNzrmII/download", allow_redirects=True) # spinner.dat, DAT
        open(get_fpath('evt3'), 'wb').write(r.content)
    print("done!")

wizard = Wizard(encoding="evt3")
data = wizard.read(get_fpath('evt3'))

## Generate all comparison files

In [None]:
# evt2 and dat
raw_encodings = ["dat", "evt2", "evt3"]
for encoding in raw_encodings[:2]:
    if not os.path.exists(get_fpath(encoding)):
        print(f"Generating file for {encoding} encoding.")
        wizard = Wizard(encoding="evt3")
        wizard.set_encoding(encoding)
        wizard.save(fpath=get_fpath(encoding), arr=data)

# variants of hdf5
hdf5_encodings = ["hdf5", "hdf5_lzf", "hdf5_gzip"]
for encoding in hdf5_encodings:
    fpath = pathlib.Path(f"{fname}_{encoding}.hdf5")
    if not os.path.exists(fpath):
        print(f"Generating file for {encoding} encoding.")
        fp = h5py.File(fpath, "w")
        fpath = pathlib.Path(get_fpath(encoding))
        if encoding=="hdf5":
            fp.create_dataset(name="events", shape=data.shape, dtype=data.dtype, data=data)
        elif encoding=="hdf5_lzf":
            fp.create_dataset(name="events", shape=data.shape, dtype=data.dtype, data=data, compression="lzf")
        elif encoding=="hdf5_gzip":
            fp.create_dataset(name="events", shape=data.shape, dtype=data.dtype, data=data, compression="gzip")
        fp.close()

# numpy
fpath = get_fpath('numpy')
if not os.path.exists(fpath):
    print(f"Generating file for numpy encoding.")
    np.save(fpath, data, allow_pickle=True)

## Run benchmarks

In [None]:
REPEAT = 10
get_fsize_MB = lambda fpath: round(fpath.stat().st_size/(1024*1024))

# evt2, evt3, dat
raw_times = []
raw_sizes = []
for encoding in raw_encodings:
    fpath = get_fpath(encoding)
    wizard = Wizard(encoding)
    wizard.set_file(fpath)
    raw_times.append(sum(timeit.repeat(lambda: wizard.read(fpath), number=1, repeat=REPEAT))/REPEAT)
    raw_sizes.append(get_fsize_MB(pathlib.Path(fpath)))

# hdf5 variants
hdf5_times = []
hdf5_sizes = []
for encoding in hdf5_encodings:
    fpath = get_fpath(encoding)
    fp = h5py.File(fpath)
    hdf5_times.append(sum(timeit.repeat(lambda: fp["events"][:], number=1, repeat=REPEAT))/REPEAT)
    fp.close()
    hdf5_sizes.append(get_fsize_MB(pathlib.Path(fpath)))

# numpy
fpath = get_fpath('numpy')
numpy_time = sum(timeit.repeat(lambda: np.load(fpath), number=1, repeat=REPEAT))/REPEAT
numpy_size = get_fsize_MB(pathlib.Path(fpath))

## Plot results

In [None]:
import pandas as pd

df = pd.DataFrame({
    'Encoding': raw_encodings + hdf5_encodings + ["numpy"],
    'Framework': ["expelliarmus"] * len(raw_encodings) + ["h5py"] * len(hdf5_encodings) + ["numpy"],
    'Read time [s]': raw_times + hdf5_times + [numpy_time],
    'File size [MB]': raw_sizes + hdf5_sizes + [numpy_size],
})

In [None]:
import plotly.express as px

title = f"Reading the same {int(len(data)/1e6)} million events from different files."
fig = px.scatter(df, x='Read time [s]', y='File size [MB]', color='Framework', symbol='Encoding', title=title)
fig.update_traces(marker_size=13)
fig.write_image('file_read_benchmark.png')

In [None]:
# alternative plot

fig = px.scatter(df, size='File size [MB]', x='Read time [s]', y='Encoding', color='Framework')
fig.show()
