# LMDB Usage Example

In [None]:
from bigearthnet_encoder.encoder import write_S2_lmdb_with_lbls
from bigearthnet_common.example_data import get_s2_example_folder_path

# generate tiny LMDB archive
s2_path = get_s2_example_folder_path()
write_S2_lmdb_with_lbls(s2_path)
example_patch = [p.name for p in s2_path.iterdir()][0]

In [None]:
from pathlib import Path
from bigearthnet_patch_interface.s2_interface import BigEarthNet_S2_Patch

p = Path("S2_lmdb.db")
assert p.exists()

In [None]:
import lmdb

import numpy as np

# readahead should be True if dataset fits in RAM
# otherwise it may be faster to set readahead = False
# as readonly=True no need for `locking` which _should_ take longer if lock=True
env = lmdb.open(str(p), readonly=True, readahead=True, lock=False)
# possible optimization use single call to
# getmulti(keys) instead of a new thread with a single element as transaction?

with env.begin() as txn:
    byteflow = txn.get(example_patch.encode("utf-8"))
    s2_patch = BigEarthNet_S2_Patch.loads(byteflow)

bands_10m = s2_patch.get_stacked_10m_bands()
bands_20m = s2_patch.get_stacked_20m_bands()

# interpolate to 10m dimension

A common next step is to interpolate the data into a common size.
There are quite a few ways to interpolate the bands, two popular approaches are:
- [skimage.transform.resize](https://scikit-image.org/docs/dev/api/skimage.transform.html#resize)
- [torch.nn.functional.interpolate](https://pytorch.org/docs/stable/generated/torch.nn.functional.interpolate.html)

A couple of quick notes about torch:
- requires data to be 'batched'
- we use `mode=bicubic`
    - further analysis could maybe show that a faster method works just as well
- interpolation uses edge value padding for out-of-boundary values

And a couple of notes about skimage:
- requires channel dimension to be _last_ (Width x Height x Channel)
- order=3 should be equal to `bi-cubic` according to [warp](https://scikit-image.org/docs/dev/api/skimage.transform.html#skimage.transform.warp) documentation

And in general:
- Data needs to be converted to `float32`

One issue is that the output values are _not_ identical.
But the visual output looks very similar and I cannot see any visual differences.

In [None]:
from torch import Tensor
from torch.nn.functional import interpolate
import skimage

In [None]:
%%timeit
# 3 == bicubic
bands_20m_sa = np.float32(np.moveaxis(bands_20m, 0, -1))
bands_20m_interp = skimage.transform.resize(
    bands_20m_sa, (120, 120, 6), order=3, mode="edge"
)
bands_20m_interp = np.moveaxis(bands_20m_interp, -1, 0)

In [None]:
%%timeit
# applies edge-padding to the input shape
# single "batch"
bands_10m_torch = Tensor(np.float32(bands_10m)).unsqueeze(dim=0)
bands_20m_torch = Tensor(np.float32(bands_20m)).unsqueeze(dim=0)

bands_20m_interp = interpolate(
    bands_20m_torch,
    bands_10m.shape[-2:],
    mode="bicubic",
)
bands_20m_interp.shape

As we can see here, the PyTorch implementation is _a lot_ faster than the skimage implementation.
On my local machine the PyTorch implementation is 10x faster!

In [None]:
# figure.align = center
import matplotlib.pyplot as plt

bands_10m_torch = Tensor(np.float32(bands_10m)).unsqueeze(dim=0)
bands_20m_torch = Tensor(np.float32(bands_20m)).unsqueeze(dim=0)

bands_20m_interp = interpolate(bands_20m_torch, bands_10m.shape[-2:], mode="bicubic")
plt.imshow(bands_20m_interp[0][0], cmap="gray")
plt.title("Torch interpolate (bicubic)")
plt.axis("off");

In [None]:
# figure.align = center
bands_20m_sa = np.float32(np.moveaxis(bands_20m, 0, -1))
bands_20m_interp = skimage.transform.resize(
    bands_20m_sa, (120, 120, 6), order=3, mode="edge"
)
bands_20m_interp = np.moveaxis(bands_20m_interp, -1, 0)

plt.imshow(bands_20m_interp[0], cmap="gray")
plt.title("Skimage resize (bicubic)")
plt.axis("off");