In [2]:
# %pip install expelliarmus --quiet
# %pip install aedat --quiet
# %pip install loris --quiet
# %pip install brotli --quiet
# %pip install h5py --quiet
# %pip install numpy --quiet

from expelliarmus import Wizard
import aedat
import pathlib
import h5py
import numpy as np
import timeit
import requests
import pickle
import os
import loris
import brotli
from pathlib import Path

In [3]:
# fname = "driving_sample"
fname = "construction"  # use this one if you want to include aedat benchmarks

# where to download and generate all the benchmark data
folder = Path("data/file-benchmark")
folder.mkdir(parents=True, exist_ok=True)

# key is the name of the encoding, value is the file name ending
extension_map = {
    "aedat": ".aedat4",
    "dat": ".dat",
    "evt2": "_evt2.raw",
    "evt3": "_evt3.raw",
    "hdf5": ".hdf5",
    "hdf5_lzf": "_lzf.hdf5",
    "hdf5_gzip": "_gzip.hdf5",
    "numpy": ".npy",
    "eventstream": ".es",
    "brotli": ".bin.br",
}
get_fpath = lambda encoding: f"{folder}/{fname}{extension_map[encoding]}"

In [4]:
if fname == "driving_sample":
    if not os.path.exists(get_fpath("evt3")):
        print("Downloading EVT3 file... ", end="")
        if not pathlib.Path(get_fpath("evt3")).is_file():
            r = requests.get(
                "https://dataset.prophesee.ai/index.php/s/nVcLLdWAnNzrmII/download",
                allow_redirects=True,
            )
            open(get_fpath("evt3"), "wb").write(r.content)
        print("done!")

    wizard = Wizard(encoding="evt3")
    data = wizard.read(get_fpath("evt3"))


if fname == "construction":
    if not os.path.exists(get_fpath("aedat")):
        print("Downloading aedat4 file... ", end="")
        if not pathlib.Path(get_fpath("aedat")).is_file():
            r = requests.get(
                "https://cloudstor.aarnet.edu.au/plus/s/ORQ2oOz9NfwiHLZ/download?path=%2F&files=construction.aedat4",
                allow_redirects=True,
            )
            open(get_fpath("aedat"), "wb").write(r.content)
    if not os.path.exists(get_fpath("eventstream")):
        print("Downloading eventstream file... ", end="")
        if not pathlib.Path(get_fpath("eventstream")).is_file():
            r = requests.get(
                "https://cloudstor.aarnet.edu.au/plus/s/ORQ2oOz9NfwiHLZ/download?path=%2F&files=construction.es",
                allow_redirects=True,
            )
            open(get_fpath("eventstream"), "wb").write(r.content)
        print("done!")

    decoder = aedat.Decoder(get_fpath("aedat"))
    events = np.concatenate(
        [packet["events"] for packet in decoder if "events" in packet]
    )
    data = events.astype(
        np.dtype([("t", "<i8"), ("x", "<i2"), ("y", "<i2"), ("p", "u1")], align=True)
    )

KeyboardInterrupt: 

In [5]:
## Generate all comparison files

# evt2 and dat
raw_encodings = ["dat", "evt2", "evt3"]
for encoding in raw_encodings:
    if not os.path.exists(get_fpath(encoding)):
        print(f"Generating file for {encoding} encoding.")
        wizard = Wizard(encoding=encoding)
        wizard.save(fpath=get_fpath(encoding), arr=data)

# variants of hdf5
hdf5_encodings = ["hdf5", "hdf5_lzf", "hdf5_gzip"]
for encoding in hdf5_encodings:
    fpath = get_fpath(encoding)
    if not os.path.exists(fpath):
        with h5py.File(fpath, "w") as fp:
            print(f"Generating file for {encoding} encoding.")
            dataset_dict = dict(
                name="events",
                shape=data.shape,
                dtype=data.dtype,
                data=data,
            )
            if encoding == "hdf5":
                fp.create_dataset(**dataset_dict)
            elif encoding == "hdf5_lzf":
                fp.create_dataset(**dataset_dict, compression="lzf")
            elif encoding == "hdf5_gzip":
                fp.create_dataset(**dataset_dict, compression="gzip")

# numpy
fpath = get_fpath("numpy")
if not os.path.exists(fpath):
    print(f"Generating file for numpy encoding.")
    np.save(fpath, data, allow_pickle=True)

# brotli
if not os.path.exists(get_fpath("brotli")):
    print(f"Generating file for brotli encoding.")
    with open(get_fpath("brotli")+".tmp", "wb") as out_file:
        with open(get_fpath("dat"), "rb") as in_file:
            buff = in_file.read()
            cnt, i = 0, 0
            while (cnt < 3):
                cnt += 1 if buff[i]==0x0A else 0
                i += 1
            out_file.write(buff[i:])
    with open(get_fpath("brotli"), "wb") as out_file:
        with open(get_fpath("brotli")+".tmp", "rb") as in_file:
            in_file.read(2)
            out_file.write(brotli.compress(in_file.read(), quality=5))
    os.remove(get_fpath("brotli")+".tmp")

In [None]:
## Run benchmarks

REPEAT = 10
get_fsize_MB = lambda fpath: round(fpath.stat().st_size / (1024 * 1024))
arr = None

# evt2, evt3, dat
print("Benchmarking expelliarmus.")
raw_times = []
raw_sizes = []
for encoding in raw_encodings:
    fpath = get_fpath(encoding)
    wizard = Wizard(encoding)
    wizard.set_file(fpath)
    
    def fn():
        arr = wizard.read(fpath)
        
    raw_times.append(
        sum(timeit.repeat(fn, number=1, repeat=REPEAT)) / REPEAT
    )
    raw_sizes.append(get_fsize_MB(pathlib.Path(fpath)))

# hdf5 variants
print("Benchmarking HDF5.")
hdf5_times = []
hdf5_sizes = []
for encoding in hdf5_encodings:
    fpath = get_fpath(encoding)
    fp = h5py.File(fpath)
    
    def fn(): 
        arr = fp["events"][:]
    
    hdf5_times.append(
        sum(timeit.repeat(fn, number=1, repeat=REPEAT)) / REPEAT
    )
    fp.close()
    hdf5_sizes.append(get_fsize_MB(pathlib.Path(fpath)))

# numpy
print("Benchmarking NumPy.")
fpath = get_fpath("numpy")

def fn(): 
    arr = np.load(fpath)
    
numpy_time = (
    sum(timeit.repeat(fn , number=1, repeat=REPEAT)) / REPEAT
)
numpy_size = get_fsize_MB(pathlib.Path(fpath))

# aedat4
print("Benchmarking AEDAT.")
fpath = get_fpath("aedat")

def fn():
    arr = [packet["events"] for packet in aedat.Decoder(fpath) if "events" in packet]

aedat_time = (
    sum(timeit.repeat(fn, number=1, repeat=REPEAT)) / REPEAT
)
aedat_size = get_fsize_MB(pathlib.Path(fpath))

# eventstream
print("Benchmarking eventstream.")
fpath = get_fpath("eventstream")

def fn():
    arr = loris.read_file(fpath)
    
es_time = (
    sum(timeit.repeat(fn, number=1, repeat=REPEAT)) / REPEAT
)
es_size = get_fsize_MB(pathlib.Path(fpath))

Benchmarking expelliarmus.
Benchmarking HDF5.
Benchmarking NumPy.


In [7]:
# brotli
print("Benchmarking brotli.")
fpath = get_fpath("brotli")
dtype = np.dtype([("t", "<i8"), ("x", "<i2"), ("y", "<i2"), ("p", "u1")])


def brotli_read():
    # Reading to a numpy buffer the data.
    with open(fpath, "rb") as fp:
        np_buff = np.frombuffer(
            brotli.decompress(fp.read()), dtype=np.uint64
        )  # , align=True)

    # Creating the structured NumPy array.
    arr = np.empty(len(np_buff), dtype=dtype)

    # Decoding the buffer.
    arr["t"] = np_buff & 0xFFFFFFFF  # 32 bits
    arr["x"] = (np_buff >> (64 - 32)) & 0x3FFF  # 14 bits
    arr["y"] = (np_buff >> (64 - (32 + 14))) & 0x3FFF  # 14 bits
    arr["p"] = np_buff >> (64 - (32 + 28))

brotli_time = sum(timeit.repeat(brotli_read, number=1, repeat=REPEAT)) / REPEAT
brotli_size = get_fsize_MB(pathlib.Path(fpath))

Benchmarking brotli.
29.49, 447


In [None]:
## Aggregate results

import pandas as pd

df = pd.DataFrame(
    {
        "Encoding": raw_encodings
        + hdf5_encodings
        + ["numpy", "aedat4", "eventstream"],  # , "dat/brotli"],
        "Framework": ["expelliarmus"] * len(raw_encodings)
        + ["h5py"] * len(hdf5_encodings)
        + ["numpy", "aedat", "loris"],  # "expelliarmus/brotli"],
        "Read time [s]": raw_times
        + hdf5_times
        + [numpy_time, aedat_time, es_time],  # brotli_time],
        "File size [MB]": raw_sizes
        + hdf5_sizes
        + [numpy_size, aedat_size, es_size],  # brotli_size],
    }
)

In [None]:
## Plot results

import plotly.express as px
from IPython.display import Image

title = f"Reading the same {int(len(data)/1e6)} million events from different files."
fig = px.scatter(
    df,
    x="Read time [s]",
    y="File size [MB]",
    color="Framework",
    symbol="Encoding",
    title=title,
    template="plotly_dark",
)
fig.update_traces(marker_size=13)
fig.write_image("file_read_benchmark.png")