Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(converters): add option include descendants #1630

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions linkml/utils/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,14 @@
help="Infer missing slot values",
)
@click.option("--context", "-c", multiple=True, help="path to JSON-LD context file")
@click.option(
"--include-range-class-descendants/--no-range-class-descendants",
default=False,
show_default=False,
help="""
When handling range constraints, include all descendants of the range class instead of just the range class
""",
)
@click.version_option(__version__, "-V", "--version")
@click.argument("input")
def cli(
Expand All @@ -83,6 +91,7 @@ def cli(
validate=None,
infer=None,
index_slot=None,
include_range_class_descendants=False,
) -> None:
"""
Converts instance data to and from different LinkML Runtime serialization formats.
Expand Down Expand Up @@ -141,17 +150,25 @@ def cli(
raise Exception("--index-slot is required for CSV input")
inargs["index_slot"] = index_slot
inargs["schema"] = schema
obj = loader.load(source=input, target_class=py_target_class, **inargs)
if infer:
infer_config = inference_utils.Config(use_expressions=True, use_string_serialization=True)
infer_all_slot_values(obj, schemaview=sv, config=infer_config)
try:
data_as_dict = loader.load_as_dict(source=input, **inargs)
except NotImplementedError:
print("load_as_dict has failed! Loading as object")
obj = loader.load(source=input, target_class=py_target_class, **inargs)
data_as_dict = as_simple_dict(obj)
if validate:
if schema is None:
raise Exception(
"--schema must be passed in order to validate. Suppress with --no-validate"
)
# TODO: use validator framework
validation.validate_object(obj, schema)
validator = JsonSchemaDataValidator(schema, include_range_class_descendants=include_range_class_descendants)
validator.iter_validate_dict(data_as_dict, target_class_name=py_target_class.class_name)

obj = loader.load(source=input, target_class=py_target_class, **inargs)

if infer:
infer_config = inference_utils.Config(use_expressions=True, use_string_serialization=True)
infer_all_slot_values(obj, schemaview=sv, config=infer_config)

output_format = _get_format(output, output_format, default="json")
if output_format == "json-ld":
Expand Down
2 changes: 2 additions & 0 deletions linkml/utils/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def validate_object(
schema: Union[str, TextIO, SchemaDefinition],
target_class: Type[YAMLRoot] = None,
closed: bool = True,
include_range_class_descendants=False,
):
"""
validates instance data against a schema
Expand All @@ -41,6 +42,7 @@ def validate_object(
mergeimports=True,
top_class=target_class.class_name,
not_closed=not_closed,
include_range_class_descendants=include_range_class_descendants,
).serialize(not_closed=not_closed)
jsonschema_obj = json.loads(jsonschemastr)
return jsonschema.validate(
Expand Down
15 changes: 15 additions & 0 deletions tests/test_utils/input/Person-01.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
id: P:004
name: eventful life
has_events:
- employed_at: ROR:1
started_at_time: "2019-01-01"
is_current: true
- started_at_time: "2023-01-01"
in_location: GEO:1234
diagnosis:
id: CODE:P1789
name: hypertension
procedure:
id: CODE:P1846
name: valve repair

7 changes: 7 additions & 0 deletions tests/test_utils/input/animals.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
animals:
- animal_family: Dog
max_age: "17 years"
breed: Golden Retriever
- animal_family: Ant
max_age: "7 years"
venom: true
36 changes: 36 additions & 0 deletions tests/test_utils/input/schema7.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
id: https://example.org/descendants
name: Test_descendants
imports:
- linkml:types
prefixes:
linkml: https://w3id.org/linkml/
classes:
Animal:
slots:
- animal_family
attributes:
max_age:
range: string
Dog:
is_a: Animal
slots:
- animal_family
- breed
Ant:
is_a: Animal
slots:
- animal_family
- venom
Container:
attributes:
animals:
multivalued: true
range: Animal
slots:
animal_family:
range: string
designates_type: true
breed:
range: string
venom:
range: boolean
37 changes: 37 additions & 0 deletions tests/test_utils/test_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
YAML_OUT = env.expected_path("data_example.out.yaml")
RDF_OUT = env.expected_path("data_example.out.ttl")

SCHEMA_DESCENDANTS = env.input_path("schema7.yaml")
DATA_DESCENDANTS = env.input_path("animals.yaml")


class TestCommandLineInterface(unittest.TestCase):
def setUp(self) -> None:
Expand Down Expand Up @@ -49,6 +52,40 @@ def test_infer_and_convert(self):
self.assertEqual(p2["age_category"], "adult")
self.assertEqual(p2["full_name"], "first2 last2")

def test_convert_including_descendants(self):
"""
Tests using the --include-range-class-descendants option to support
subtype polymorphism.
"""
result = self.runner.invoke(
cli,
[
"--include-range-class-descendants",
"--validate",
"-C",
"Container",
"-s",
SCHEMA_DESCENDANTS,
"-t",
"json-ld",
"-o",
JSON_OUT,
DATA_DESCENDANTS,
],
)
print(result.stdout)
if result.exit_code:
print(result.exception)
raise result.exception
else:
with open(JSON_OUT) as file:
p1 = json.load(file)
print(p1)
self.assertEqual(p1["animals"][0]["animal_family"], "Dog")
self.assertEqual(p1["animals"][0]["breed"], "Golden Retriever")
self.assertEqual(p1["animals"][1]["animal_family"], "Ant")
self.assertTrue(p1["animals"][1]["venom"])

def test_version(self):
runner = CliRunner(mix_stderr=False)
result = runner.invoke(cli, ["--version"])
Expand Down
Loading