linkml · rly · Feb 5, 2024 · Feb 1, 2024 · Feb 5, 2024 · Feb 5, 2024
diff --git a/docs/conf.py b/docs/conf.py
@@ -4,16 +4,15 @@
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
 
 import os
-import re
-import sys
 from datetime import date
 from linkml_arrays import __version__
+
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
-project = 'linkml-arrays'
+project = "linkml-arrays"
 copyright = f"{date.today().year}, Ryan Ly <rly@lbl.gov>"
-author = 'Ryan Ly <rly@lbl.gov>'
+author = "Ryan Ly <rly@lbl.gov>"
 release = __version__
 
 # -- General configuration ---------------------------------------------------
@@ -25,7 +24,7 @@
     "sphinx_rtd_theme",
     "sphinx_click",
     "sphinx_autodoc_typehints",
-    "myst_parser"
+    "myst_parser",
 ]
 
 # generate autosummary pages
@@ -46,13 +45,13 @@
 # This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
-html_theme = 'sphinx_rtd_theme'
-html_static_path = ['_static']
+html_theme = "sphinx_rtd_theme"
+html_static_path = ["_static"]
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,7 +9,7 @@ readme = "README.md"
 [tool.poetry.dependencies]
 python = "^3.9"
 #setuptools = "^65.5.0"
-#tox = "^3.25.1"
+tox = "^3.25.1"
 #click = "^8.1.3"
 #importlib-metadata = "^4.8.0"
 linkml-runtime = "^1.6.0"

diff --git a/src/linkml_arrays/__init__.py b/src/linkml_arrays/__init__.py
@@ -1,4 +1,5 @@
 """linkml-arrays package."""
+
 import importlib_metadata
 
 try:

diff --git a/src/linkml_arrays/cli.py b/src/linkml_arrays/cli.py
@@ -1,7 +1,9 @@
 """Command line interface for linkml-arrays."""
-import click
+
 import logging
 
+import click
+
 from linkml_arrays import __version__
 from linkml_arrays.main import demo
 
@@ -11,6 +13,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 @click.group()
 @click.option("-v", "--verbose", count=True)
 @click.option("-q", "--quiet")
@@ -30,11 +33,11 @@ def main(verbose: int, quiet: bool):
     if quiet:
         logger.setLevel(level=logging.ERROR)
 
+
 @main.command()
 def run():
     """Run the linkml-arrays's demo command."""
     demo()
-
 
 
 if __name__ == "__main__":

diff --git a/src/linkml_arrays/dumpers/__init__.py b/src/linkml_arrays/dumpers/__init__.py
@@ -1,5 +1,13 @@
 """Dumper classes for linkml-arrays."""
-from .yaml_numpy_dumper import YamlNumpyDumper
-from .yaml_hdf5_dumper import YamlHdf5Dumper
+
 from .hdf5_dumper import Hdf5Dumper
-from .zarr_directory_store_dumper import ZarrDirectoryStoreDumper
+from .yaml_hdf5_dumper import YamlHdf5Dumper
+from .yaml_numpy_dumper import YamlNumpyDumper
+from .zarr_directory_store_dumper import ZarrDirectoryStoreDumper
+
+__all__ = [
+    "Hdf5Dumper",
+    "YamlHdf5Dumper",
+    "YamlNumpyDumper",
+    "ZarrDirectoryStoreDumper",
+]
diff --git a/src/linkml_arrays/dumpers/hdf5_dumper.py b/src/linkml_arrays/dumpers/hdf5_dumper.py
@@ -1,18 +1,22 @@
+"""Class for dumping a LinkML model to an HDF5 file."""
+
 from typing import Union
 
 import h5py
-from pydantic import BaseModel
-
+from linkml_runtime import SchemaView
 from linkml_runtime.dumpers.dumper_root import Dumper
 from linkml_runtime.utils.yamlutils import YAMLRoot
-from linkml_runtime import SchemaView
+from pydantic import BaseModel
 
 
-def iterate_element(
-        element: Union[YAMLRoot, BaseModel],
-        schemaview: SchemaView,
-        group: h5py.Group = None
+def _iterate_element(
+    element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, group: h5py.Group = None
 ):
+    """Recursively iterate through the elements of a LinkML model and save them.
+
+    Writes Pydantic BaseModel objects as groups, slots that implement "linkml:elements"
+    as datasets, and other slots as attributes.
+    """
     # get the type of the element
     element_type = type(element).__name__
 
@@ -25,23 +29,26 @@ def iterate_element(
             if isinstance(v, BaseModel):
                 # create a subgroup and recurse
                 subgroup = group.create_group(k)
-                iterate_element(v, schemaview, subgroup)
+                _iterate_element(v, schemaview, subgroup)
             else:
                 # create an attribute on the group
                 group.attrs[k] = v
 
 
 class Hdf5Dumper(Dumper):
+    """Dumper class for LinkML models to HDF5 files."""
+
+    # TODO is this the right method to overwrite? it does not dump a string
+    def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs):
+        """Dump the element to an HDF5 file.
 
-    def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs) -> str:
-        """ Return element formatted as a YAML string with paths to HDF5 files containing the arrays as datasets"""
+        Raises:
+            ValueError: If the class requires an identifier and it is not provided.
+        """
         id_slot = schemaview.get_identifier_slot(element.__class__.__name__)
         if id_slot is None:
             raise ValueError("The class requires an identifier.")
         id_value = getattr(element, id_slot.name)
         output_file_path = f"{id_value}.h5"
         with h5py.File(output_file_path, "w") as f:
-            iterate_element(element, schemaview, f)
-
-
-
+            _iterate_element(element, schemaview, f)
diff --git a/src/linkml_arrays/dumpers/yaml_hdf5_dumper.py b/src/linkml_arrays/dumpers/yaml_hdf5_dumper.py
@@ -1,15 +1,28 @@
+"""Class for dumping a LinkML model to a YAML file with paths to HDF5 files."""
+
 from typing import Union
 
 import h5py
-from pydantic import BaseModel
 import yaml
-
+from linkml_runtime import SchemaView
 from linkml_runtime.dumpers.dumper_root import Dumper
 from linkml_runtime.utils.yamlutils import YAMLRoot
-from linkml_runtime import SchemaView
+from pydantic import BaseModel
+
+
+def _iterate_element(
+    element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, parent_identifier=None
+):
+    """Recursively iterate through the elements of a LinkML model and save them.
 
+    Returns a dictionary with the same structure as the input element, but with the slots
+    that implement "linkml:elements" (arrays) are written to HDF5 files and the paths to these
+    files are returned in the dictionary. Each array is written to an HDF5 dataset at path
+    "/data" in a new HDF5 file.
 
-def iterate_element(element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, parent_identifier = None):
+    Raises:
+        ValueError: If the class requires an identifier and it is not provided.
+    """
     # get the type of the element
     element_type = type(element).__name__
 
@@ -33,24 +46,25 @@ def iterate_element(element: Union[YAMLRoot, BaseModel], schemaview: SchemaView,
                 output_file_path = f"{parent_identifier}.{found_class.name}.{found_slot.name}.h5"
             else:
                 output_file_path = f"{found_class.name}.{found_slot.name}.h5"
-            with h5py.File(output_file_path, "w") as f:  # TODO do not assume that there is only one by this name
+            with h5py.File(
+                output_file_path, "w"
+            ) as f:  # TODO do not assume that there is only one by this name
                 f.create_dataset("data", data=v)
             ret_dict[k] = f"file:./{output_file_path}"  # TODO make this nicer
         else:
             if isinstance(v, BaseModel):
-                v2 = iterate_element(v, schemaview, id_value)
+                v2 = _iterate_element(v, schemaview, id_value)
                 ret_dict[k] = v2
             else:
                 ret_dict[k] = v
     return ret_dict
 
 
 class YamlHdf5Dumper(Dumper):
+    """Class for dumping a LinkML model to a YAML file with paths to HDF5 files."""
 
     def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs) -> str:
-        """ Return element formatted as a YAML string with paths to HDF5 files containing the arrays as datasets"""
-        input = iterate_element(element, schemaview)
+        """Return element formatted as a YAML string."""
+        input = _iterate_element(element, schemaview)
 
         return yaml.dump(input)
-
-
diff --git a/src/linkml_arrays/dumpers/yaml_numpy_dumper.py b/src/linkml_arrays/dumpers/yaml_numpy_dumper.py
@@ -1,15 +1,28 @@
+"""Class for dumpling a LinkML model to a YAML file with paths to NumPy files."""
+
 from typing import Union
 
 import numpy as np
-from pydantic import BaseModel
 import yaml
-
+from linkml_runtime import SchemaView
 from linkml_runtime.dumpers.dumper_root import Dumper
 from linkml_runtime.utils.yamlutils import YAMLRoot
-from linkml_runtime import SchemaView
+from pydantic import BaseModel
+
+
+def _iterate_element(
+    element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, parent_identifier=None
+):
+    """Recursively iterate through the elements of a LinkML model and save them.
 
+    Returns a dictionary with the same structure as the input element, but with the slots
+    that implement "linkml:elements" (arrays) are written to HDF5 files and the paths to these
+    files are returned in the dictionary. Each array is written to an HDF5 dataset at path
+    "/data" in a new HDF5 file.
 
-def iterate_element(element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, parent_identifier = None):
+    Raises:
+        ValueError: If the class requires an identifier and it is not provided.
+    """
     # get the type of the element
     element_type = type(element).__name__
 
@@ -37,19 +50,18 @@ def iterate_element(element: Union[YAMLRoot, BaseModel], schemaview: SchemaView,
             ret_dict[k] = f"file:./{output_file_path}"  # TODO make this nicer
         else:
             if isinstance(v, BaseModel):
-                v2 = iterate_element(v, schemaview, id_value)
+                v2 = _iterate_element(v, schemaview, id_value)
                 ret_dict[k] = v2
             else:
                 ret_dict[k] = v
     return ret_dict
 
 
 class YamlNumpyDumper(Dumper):
+    """Dumper class for LinkML models to YAML files with paths to NumPy files."""
 
     def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs) -> str:
-        """ Return element formatted as a YAML string with paths to numpy files containing the ndarrays"""
-        input = iterate_element(element, schemaview)
+        """Return element formatted as a YAML string."""
+        input = _iterate_element(element, schemaview)
 
         return yaml.dump(input)
-
-
diff --git a/src/linkml_arrays/dumpers/zarr_directory_store_dumper.py b/src/linkml_arrays/dumpers/zarr_directory_store_dumper.py
@@ -1,18 +1,22 @@
+"""Class for dumping a LinkML model to a Zarr directory store."""
+
 from typing import Union
 
 import zarr
-from pydantic import BaseModel
-
+from linkml_runtime import SchemaView
 from linkml_runtime.dumpers.dumper_root import Dumper
 from linkml_runtime.utils.yamlutils import YAMLRoot
-from linkml_runtime import SchemaView
+from pydantic import BaseModel
 
 
-def iterate_element(
-        element: Union[YAMLRoot, BaseModel],
-        schemaview: SchemaView,
-        group: zarr.hierarchy.Group = None
+def _iterate_element(
+    element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, group: zarr.hierarchy.Group = None
 ):
+    """Recursively iterate through the elements of a LinkML model and save them.
+
+    Writes Pydantic BaseModel objects as groups, slots that implement "linkml:elements"
+    as datasets, and other slots as attributes.
+    """
     # get the type of the element
     element_type = type(element).__name__
 
@@ -25,24 +29,27 @@ def iterate_element(
             if isinstance(v, BaseModel):
                 # create a subgroup and recurse
                 subgroup = group.create_group(k)
-                iterate_element(v, schemaview, subgroup)
+                _iterate_element(v, schemaview, subgroup)
             else:
                 # create an attribute on the group
                 group.attrs[k] = v
 
 
 class ZarrDirectoryStoreDumper(Dumper):
+    """Dumper class for LinkML models to Zarr directory stores."""
+
+    # TODO is this the right method to overwrite? it does not dump a string
+    def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs):
+        """Dump the element to a Zarr directory store.
 
-    def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs) -> str:
-        """ Return element formatted as a YAML string with paths to HDF5 files containing the arrays as datasets"""
+        Raises:
+            ValueError: If the class requires an identifier and it is not provided.
+        """
         id_slot = schemaview.get_identifier_slot(element.__class__.__name__)
         if id_slot is None:
             raise ValueError("The class requires an identifier.")
         id_value = getattr(element, id_slot.name)
         output_file_path = f"{id_value}.zarr"
         store = zarr.DirectoryStore(output_file_path)
         root = zarr.group(store=store, overwrite=True)
-        iterate_element(element, schemaview, root)
-
-
-
+        _iterate_element(element, schemaview, root)
diff --git a/src/linkml_arrays/loaders/__init__.py b/src/linkml_arrays/loaders/__init__.py
@@ -1,5 +1,13 @@
 """Dumper classes for linkml-arrays."""
-from .yaml_numpy_loader import YamlNumpyLoader
-from .yaml_hdf5_loader import YamlHdf5Loader
+
 from .hdf5_loader import Hdf5Loader
-from .zarr_directory_store_loader import ZarrDirectoryStoreLoader
+from .yaml_hdf5_loader import YamlHdf5Loader
+from .yaml_numpy_loader import YamlNumpyLoader
+from .zarr_directory_store_loader import ZarrDirectoryStoreLoader
+
+__all__ = [
+    "Hdf5Loader",
+    "YamlHdf5Loader",
+    "YamlNumpyLoader",
+    "ZarrDirectoryStoreLoader",
+]