In [None]:
import lmdb
import rasterio
import safetensors
import numpy as np
from pathlib import Path
from safetensors.numpy import deserialize, load_file, load

def read_single_band_raster(path):
    with rasterio.open(path) as r:
        return r.read(1)

p = Path("tiffs/BigEarthNet")
source_data = {file: read_single_band_raster(file) for file in p.glob("**/*.tif*")}

# code to create the directory
# ./result/bin/encoder --bigearthnet-s1-root tiffs/BigEarthNet/S1/ --bigearthnet-s2-root tiffs/BigEarthNet/S2/ artifacts/
env = lmdb.open("artifacts", readonly=True)

with env.begin(write=False) as txn:
    cur = txn.cursor()
    decoded_lmdb_data = {k.decode("utf-8"): load(v) for (k, v) in cur}

# The encoded data is nested inside of another safetensor dictionary, where the inner keys are derived from the band suffix
decoded_values = [v for outer_v in decoded_lmdb_data.values() for v in outer_v.values()]

# Simply check if the data remains identical, as this is the only _true_ thing I care about from the Python viewpoint
# If the keys/order or anything else is wrong, it isn't part of the integration test but should be handled separately as a unit test!
for (source_key, source_value) in source_data.items():
    assert any(np.array_equal(source_value, decoded_value) for decoded_value in decoded_values), f"Couldn't find data in the LMDB database that matches the data from: {source_key}"

In [52]:
import os

executable = os.environ.get("ENCODER_EXEC_PATH")
assert executable, "Executable ENCODER_EXEC_PATH wasn't set"

AssertionError: Executable ENCODER_EXEC_PATH wasn't set

In [53]:
str(p)

'tiffs/BigEarthNet'