### Create dataset

In [1]:
from collections.abc import Iterable
from pathlib import Path
from typing import NamedTuple, TypeVar

import h5py
import numpy as np
import tqdm

from mynd.database import H5Database, create_file_database, load_file_database
from mynd.camera import ImageBundle, ImageBundleLoader

from mynd.tasks.image_export import CreateDatabaseTask
from mynd.tasks.image_export import generate_image_bundle_loaders

# Validation
from mynd.tasks.image_export import (
    ImageBundleTemplate,
    create_image_bundle_template,
    check_image_bundle_fits_template,
)

# Memory allocation and loading
from mynd.tasks.image_export import (
    ImageBundleBuffers,
    load_bundle_buffers,
    allocate_datasets,
)

from mynd.utils.log import logger
from mynd.utils.result import Ok, Err, Result


T: TypeVar = TypeVar("T")


def generate_chunks(items: Iterable[T], chunk_size: int) -> Iterable[list[T]]:
    """Generate chunks of the items with the maximum size of chunk size."""
    for index in range(0, len(items), chunk_size):
        yield items[index : index + chunk_size]


def insert_images_into_database(
    group: h5py.Group,
    bundle_loaders: Iterable[ImageBundleLoader],
    chunk_size: int,
) -> Result[None, str]:
    """Builds a frame database based on the given configuration."""

    bundle_count: int = len(bundle_loaders)

    if bundle_count == 0:
        return Err("no bundle loaders provided for database insert")

    # Create template to validate the shape, format, and dtype
    template: ImageBundleTemplate = create_image_bundle_template(bundle_loaders[0]())

    # TODO: Return datasets from here
    allocate_datasets(group, template, bundle_count)

    running_index: int = 0
    for loaders in tqdm.tqdm(
        generate_chunks(bundle_loaders, chunk_size), desc="Loading bundles..."
    ):

        chunk_count: int = len(loaders)

        # TODO: Load images to buffer
        load_buffer_result: Result[ImageBundleBuffers, str] = load_bundle_buffers(
            template, loaders
        )

        if load_buffer_result.is_err():
            return load_buffer_result

        buffers: ImageBundleBuffers = load_buffer_result.ok()

        # TODO: Load buffer to dataset
        group.get("labels")[
            running_index : running_index + chunk_count
        ] = buffers.labels
        group.get("intensities")[
            running_index : running_index + chunk_count
        ] = buffers.intensities
        group.get("ranges")[
            running_index : running_index + chunk_count
        ] = buffers.ranges
        group.get("normals")[
            running_index : running_index + chunk_count
        ] = buffers.normals

        running_index += len(loaders)

    raise NotImplementedError("insert_images_into_database is not implemented")

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


  @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32)


In [2]:
def main() -> None:
    """Main function."""

    LOADER_COUNT: int = -1

    SSD_01: Path = Path("/data/kingston_snv_01")
    SSD_02: Path = Path("/data/kingston_snv_02")

    paths: dict[str, Path] = {
        "images": SSD_02
        / Path("acfr_images_preprocessed/r23685bc_20100605_021022_images"),
        "ranges": SSD_01 / Path("acfr_stereo_ranges/r23685bc_20100605_021022_ranges"),
        "normals": SSD_01
        / Path("acfr_stereo_normals/r23685bc_20100605_021022_normals"),
        "output": SSD_01
        / Path("acfr_camera_databases/r23685bc_20100605_021022_test.h5"),
    }

    # VISITS: r23685bc_20100605_021022, r23685bc_20120530_233021, r23685bc_20140616_225022

    # Each group defines a collection of images and geometry (range and normals) captured by one sensor
    file_patterns: dict[str, str] = {
        "stereo_left": "*_LC16*",
        # "stereo_right": "*_RM16*",
    }

    config: CreateDatabaseTask.Config = CreateDatabaseTask.Config(
        output_path=paths.get("output"),
        group_patterns=file_patterns,
    )

    # --------------------------------------------------------------------------------

    bundle_loaders: list[ImageBundleLoader] = generate_image_bundle_loaders(
        image_directory=paths.get("images"),
        range_directory=paths.get("ranges"),
        normal_directory=paths.get("normals"),
        pattern=file_patterns.get("stereo_left"),
    )

    # --------------------------------------------------------------------------------

    # Create / load file database
    if not config.output_path.exists():
        result: Result[h5py.File, str] = create_file_database(config.output_path)
    else:
        result: Result[H5Database, str] = load_file_database(config.output_path)

    if result.is_err():
        logger.error(result.err())
        return

    database: H5Database = result.ok()
    group: H5Database.Group = database.create_group(
        "/r23685bc_20100605_021022/images"
    ).unwrap()

    # TODO: Insert image bundle into database
    insertion_result: Result[None, str] = insert_images_into_database(
        group=group,
        bundle_loaders=bundle_loaders[:LOADER_COUNT],
        chunk_size=100,
    )

    if insertion_result.is_err():
        logger.error(insertion_result.err())

    database.visit(logger.info)


# ---------- Run main function ----------
main()

UnwrapError: Called `Result.unwrap()` on an `Err` value: 'group is already in database'