Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,15 @@
# https://www.sphinx-doc.org/en/master/usage/configuration.html

import os
import re
import sys
from datetime import date
from linkml_arrays import __version__

# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

project = 'linkml-arrays'
project = "linkml-arrays"
copyright = f"{date.today().year}, Ryan Ly <rly@lbl.gov>"
author = 'Ryan Ly <rly@lbl.gov>'
author = "Ryan Ly <rly@lbl.gov>"
release = __version__

# -- General configuration ---------------------------------------------------
Expand All @@ -25,7 +24,7 @@
"sphinx_rtd_theme",
"sphinx_click",
"sphinx_autodoc_typehints",
"myst_parser"
"myst_parser",
]

# generate autosummary pages
Expand All @@ -46,13 +45,13 @@
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

templates_path = ['_templates']
templates_path = ["_templates"]

# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = 'sphinx_rtd_theme'
html_static_path = ['_static']
html_theme = "sphinx_rtd_theme"
html_static_path = ["_static"]

# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
Expand Down
1,145 changes: 718 additions & 427 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ readme = "README.md"
[tool.poetry.dependencies]
python = "^3.9"
#setuptools = "^65.5.0"
#tox = "^3.25.1"
tox = "^3.25.1"
#click = "^8.1.3"
#importlib-metadata = "^4.8.0"
linkml-runtime = "^1.6.0"
Expand Down
1 change: 1 addition & 0 deletions src/linkml_arrays/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""linkml-arrays package."""

import importlib_metadata

try:
Expand Down
7 changes: 5 additions & 2 deletions src/linkml_arrays/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Command line interface for linkml-arrays."""
import click

import logging

import click

from linkml_arrays import __version__
from linkml_arrays.main import demo

Expand All @@ -11,6 +13,7 @@

logger = logging.getLogger(__name__)


@click.group()
@click.option("-v", "--verbose", count=True)
@click.option("-q", "--quiet")
Expand All @@ -30,11 +33,11 @@ def main(verbose: int, quiet: bool):
if quiet:
logger.setLevel(level=logging.ERROR)


@main.command()
def run():
"""Run the linkml-arrays's demo command."""
demo()



if __name__ == "__main__":
Expand Down
14 changes: 11 additions & 3 deletions src/linkml_arrays/dumpers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
"""Dumper classes for linkml-arrays."""
from .yaml_numpy_dumper import YamlNumpyDumper
from .yaml_hdf5_dumper import YamlHdf5Dumper

from .hdf5_dumper import Hdf5Dumper
from .zarr_directory_store_dumper import ZarrDirectoryStoreDumper
from .yaml_hdf5_dumper import YamlHdf5Dumper
from .yaml_numpy_dumper import YamlNumpyDumper
from .zarr_directory_store_dumper import ZarrDirectoryStoreDumper

__all__ = [
"Hdf5Dumper",
"YamlHdf5Dumper",
"YamlNumpyDumper",
"ZarrDirectoryStoreDumper",
]
35 changes: 21 additions & 14 deletions src/linkml_arrays/dumpers/hdf5_dumper.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
"""Class for dumping a LinkML model to an HDF5 file."""

from typing import Union

import h5py
from pydantic import BaseModel

from linkml_runtime import SchemaView
from linkml_runtime.dumpers.dumper_root import Dumper
from linkml_runtime.utils.yamlutils import YAMLRoot
from linkml_runtime import SchemaView
from pydantic import BaseModel


def iterate_element(
element: Union[YAMLRoot, BaseModel],
schemaview: SchemaView,
group: h5py.Group = None
def _iterate_element(
element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, group: h5py.Group = None
):
"""Recursively iterate through the elements of a LinkML model and save them.

Writes Pydantic BaseModel objects as groups, slots that implement "linkml:elements"
as datasets, and other slots as attributes.
"""
# get the type of the element
element_type = type(element).__name__

Expand All @@ -25,23 +29,26 @@ def iterate_element(
if isinstance(v, BaseModel):
# create a subgroup and recurse
subgroup = group.create_group(k)
iterate_element(v, schemaview, subgroup)
_iterate_element(v, schemaview, subgroup)
else:
# create an attribute on the group
group.attrs[k] = v


class Hdf5Dumper(Dumper):
"""Dumper class for LinkML models to HDF5 files."""

# TODO is this the right method to overwrite? it does not dump a string
def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs):
"""Dump the element to an HDF5 file.

def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs) -> str:
""" Return element formatted as a YAML string with paths to HDF5 files containing the arrays as datasets"""
Raises:
ValueError: If the class requires an identifier and it is not provided.
"""
id_slot = schemaview.get_identifier_slot(element.__class__.__name__)
if id_slot is None:
raise ValueError("The class requires an identifier.")
id_value = getattr(element, id_slot.name)
output_file_path = f"{id_value}.h5"
with h5py.File(output_file_path, "w") as f:
iterate_element(element, schemaview, f)



_iterate_element(element, schemaview, f)
34 changes: 24 additions & 10 deletions src/linkml_arrays/dumpers/yaml_hdf5_dumper.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
"""Class for dumping a LinkML model to a YAML file with paths to HDF5 files."""

from typing import Union

import h5py
from pydantic import BaseModel
import yaml

from linkml_runtime import SchemaView
from linkml_runtime.dumpers.dumper_root import Dumper
from linkml_runtime.utils.yamlutils import YAMLRoot
from linkml_runtime import SchemaView
from pydantic import BaseModel


def _iterate_element(
element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, parent_identifier=None
):
"""Recursively iterate through the elements of a LinkML model and save them.

Returns a dictionary with the same structure as the input element, but with the slots
that implement "linkml:elements" (arrays) are written to HDF5 files and the paths to these
files are returned in the dictionary. Each array is written to an HDF5 dataset at path
"/data" in a new HDF5 file.

def iterate_element(element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, parent_identifier = None):
Raises:
ValueError: If the class requires an identifier and it is not provided.
"""
# get the type of the element
element_type = type(element).__name__

Expand All @@ -33,24 +46,25 @@ def iterate_element(element: Union[YAMLRoot, BaseModel], schemaview: SchemaView,
output_file_path = f"{parent_identifier}.{found_class.name}.{found_slot.name}.h5"
else:
output_file_path = f"{found_class.name}.{found_slot.name}.h5"
with h5py.File(output_file_path, "w") as f: # TODO do not assume that there is only one by this name
with h5py.File(
output_file_path, "w"
) as f: # TODO do not assume that there is only one by this name
f.create_dataset("data", data=v)
ret_dict[k] = f"file:./{output_file_path}" # TODO make this nicer
else:
if isinstance(v, BaseModel):
v2 = iterate_element(v, schemaview, id_value)
v2 = _iterate_element(v, schemaview, id_value)
ret_dict[k] = v2
else:
ret_dict[k] = v
return ret_dict


class YamlHdf5Dumper(Dumper):
"""Class for dumping a LinkML model to a YAML file with paths to HDF5 files."""

def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs) -> str:
""" Return element formatted as a YAML string with paths to HDF5 files containing the arrays as datasets"""
input = iterate_element(element, schemaview)
"""Return element formatted as a YAML string."""
input = _iterate_element(element, schemaview)

return yaml.dump(input)


30 changes: 21 additions & 9 deletions src/linkml_arrays/dumpers/yaml_numpy_dumper.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
"""Class for dumpling a LinkML model to a YAML file with paths to NumPy files."""

from typing import Union

import numpy as np
from pydantic import BaseModel
import yaml

from linkml_runtime import SchemaView
from linkml_runtime.dumpers.dumper_root import Dumper
from linkml_runtime.utils.yamlutils import YAMLRoot
from linkml_runtime import SchemaView
from pydantic import BaseModel


def _iterate_element(
element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, parent_identifier=None
):
"""Recursively iterate through the elements of a LinkML model and save them.

Returns a dictionary with the same structure as the input element, but with the slots
that implement "linkml:elements" (arrays) are written to HDF5 files and the paths to these
files are returned in the dictionary. Each array is written to an HDF5 dataset at path
"/data" in a new HDF5 file.

def iterate_element(element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, parent_identifier = None):
Raises:
ValueError: If the class requires an identifier and it is not provided.
"""
# get the type of the element
element_type = type(element).__name__

Expand Down Expand Up @@ -37,19 +50,18 @@ def iterate_element(element: Union[YAMLRoot, BaseModel], schemaview: SchemaView,
ret_dict[k] = f"file:./{output_file_path}" # TODO make this nicer
else:
if isinstance(v, BaseModel):
v2 = iterate_element(v, schemaview, id_value)
v2 = _iterate_element(v, schemaview, id_value)
ret_dict[k] = v2
else:
ret_dict[k] = v
return ret_dict


class YamlNumpyDumper(Dumper):
"""Dumper class for LinkML models to YAML files with paths to NumPy files."""

def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs) -> str:
""" Return element formatted as a YAML string with paths to numpy files containing the ndarrays"""
input = iterate_element(element, schemaview)
"""Return element formatted as a YAML string."""
input = _iterate_element(element, schemaview)

return yaml.dump(input)


35 changes: 21 additions & 14 deletions src/linkml_arrays/dumpers/zarr_directory_store_dumper.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
"""Class for dumping a LinkML model to a Zarr directory store."""

from typing import Union

import zarr
from pydantic import BaseModel

from linkml_runtime import SchemaView
from linkml_runtime.dumpers.dumper_root import Dumper
from linkml_runtime.utils.yamlutils import YAMLRoot
from linkml_runtime import SchemaView
from pydantic import BaseModel


def iterate_element(
element: Union[YAMLRoot, BaseModel],
schemaview: SchemaView,
group: zarr.hierarchy.Group = None
def _iterate_element(
element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, group: zarr.hierarchy.Group = None
):
"""Recursively iterate through the elements of a LinkML model and save them.

Writes Pydantic BaseModel objects as groups, slots that implement "linkml:elements"
as datasets, and other slots as attributes.
"""
# get the type of the element
element_type = type(element).__name__

Expand All @@ -25,24 +29,27 @@ def iterate_element(
if isinstance(v, BaseModel):
# create a subgroup and recurse
subgroup = group.create_group(k)
iterate_element(v, schemaview, subgroup)
_iterate_element(v, schemaview, subgroup)
else:
# create an attribute on the group
group.attrs[k] = v


class ZarrDirectoryStoreDumper(Dumper):
"""Dumper class for LinkML models to Zarr directory stores."""

# TODO is this the right method to overwrite? it does not dump a string
def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs):
"""Dump the element to a Zarr directory store.

def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs) -> str:
""" Return element formatted as a YAML string with paths to HDF5 files containing the arrays as datasets"""
Raises:
ValueError: If the class requires an identifier and it is not provided.
"""
id_slot = schemaview.get_identifier_slot(element.__class__.__name__)
if id_slot is None:
raise ValueError("The class requires an identifier.")
id_value = getattr(element, id_slot.name)
output_file_path = f"{id_value}.zarr"
store = zarr.DirectoryStore(output_file_path)
root = zarr.group(store=store, overwrite=True)
iterate_element(element, schemaview, root)



_iterate_element(element, schemaview, root)
14 changes: 11 additions & 3 deletions src/linkml_arrays/loaders/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
"""Dumper classes for linkml-arrays."""
from .yaml_numpy_loader import YamlNumpyLoader
from .yaml_hdf5_loader import YamlHdf5Loader

from .hdf5_loader import Hdf5Loader
from .zarr_directory_store_loader import ZarrDirectoryStoreLoader
from .yaml_hdf5_loader import YamlHdf5Loader
from .yaml_numpy_loader import YamlNumpyLoader
from .zarr_directory_store_loader import ZarrDirectoryStoreLoader

__all__ = [
"Hdf5Loader",
"YamlHdf5Loader",
"YamlNumpyLoader",
"ZarrDirectoryStoreLoader",
]
Loading