From fca34435d7c6ab94def60a02190ba6e42a03b4da Mon Sep 17 00:00:00 2001 From: Andrew Davison Date: Fri, 14 Nov 2025 15:46:31 +0100 Subject: [PATCH] Run regression tests with both "v4" and "latest"; add some tests of the `validate()` method. --- pipeline/src/base.py | 2 +- pipeline/src/properties.py | 4 +- pipeline/tests/test_regressions.py | 124 ++++++++++++++++++----------- pipeline/tests/test_validation.py | 27 +++++++ 4 files changed, 107 insertions(+), 50 deletions(-) create mode 100644 pipeline/tests/test_validation.py diff --git a/pipeline/src/base.py b/pipeline/src/base.py index d952b2ea..ada9101b 100644 --- a/pipeline/src/base.py +++ b/pipeline/src/base.py @@ -161,7 +161,7 @@ def validate(self, ignore=None): Returns a dict containing information about any validation failures. """ - return self._validate(ignore=ignore) + return dict(self._validate(ignore=ignore)) def _validate(self, ignore=None, seen=None): # this is implemented as an internal method so that the diff --git a/pipeline/src/properties.py b/pipeline/src/properties.py index 7cb7dcd5..3b61de20 100644 --- a/pipeline/src/properties.py +++ b/pipeline/src/properties.py @@ -131,7 +131,7 @@ def validate(self, value, ignore=None, seen=None): if isinstance(item, Link): item_type = f"value contains a link to {item.allowed_types}" else: - item_type = f"value contains {type(item)}" + item_type = f"value contains {type(item).__name__}" failures["type"].append( f"{self.name}: Expected {', '.join(t.__name__ for t in self.types)}, " + item_type ) @@ -166,7 +166,7 @@ def validate(self, value, ignore=None, seen=None): if isinstance(value, Link): value_type = f"value contains a link to {value.allowed_types}" else: - value_type = f"value contains {type(value)}" + value_type = f"value contains {type(value).__name__}" failures["type"].append( f"{self.name}: Expected {', '.join(t.__name__ for t in self.types)}, " + value_type ) diff --git a/pipeline/tests/test_regressions.py b/pipeline/tests/test_regressions.py index 3b24dcbe..aa65d254 100644 --- a/pipeline/tests/test_regressions.py +++ b/pipeline/tests/test_regressions.py @@ -1,39 +1,45 @@ from datetime import date import json import os + +import pytest + from openminds import Collection, IRI -from openminds.v4 import core as omcore +import openminds.latest +import openminds.v4 from utils import build_fake_node -def test_issue_0002(): +@pytest.mark.parametrize("om", [openminds.latest, openminds.v4]) +def test_issue_0002(om): # https://github.com/openMetadataInitiative/openMINDS_Python/issues/2 # @type should not be given as a list but as a string - node = build_fake_node(omcore.Person) + node = build_fake_node(om.core.Person) data = node.to_jsonld() assert data["@type"] == "https://openminds.om-i.org/types/Person" -def test_issue_0003(): +@pytest.mark.parametrize("om", [openminds.latest, openminds.v4]) +def test_issue_0003(om): # https://github.com/openMetadataInitiative/openMINDS_Python/issues/3 # validate() does not complain about direct entries that should be lists # we address this by always wrapping a single item in a list in such cases - some_file = omcore.File( + some_file = om.core.File( iri=IRI("http://example.com/some_file.txt"), name="some_file.txt", ) - node1 = omcore.FileArchive( + node1 = om.core.FileArchive( iri=IRI("http://example.com/archive.zip"), - format=omcore.ContentType(name="application/zip"), + format=om.core.ContentType(name="application/zip"), source_data=[some_file], # multiple=True, min_items=1 ) - node2 = omcore.FileArchive( + node2 = om.core.FileArchive( iri=IRI("http://example.com/archive.zip"), - format=omcore.ContentType(name="application/zip"), + format=om.core.ContentType(name="application/zip"), source_data=some_file, # multiple=True, min_items=1 ) # on export, a single item should be wrapped in a list, where the property expects an array @@ -59,14 +65,15 @@ def test_issue_0003(): ) -def test_issue0005(): +@pytest.mark.parametrize("om", [openminds.latest, openminds.v4]) +def test_issue0005(om): # https://github.com/openMetadataInitiative/openMINDS_Python/issues/5 # validate() does not complain about list/tuple entries that should be a direct single entry - uni1 = omcore.Organization(full_name="University of This Place") - person = omcore.Person( + uni1 = om.core.Organization(full_name="University of This Place") + person = om.core.Person( given_name="A", family_name="Professor", - affiliations=[omcore.Affiliation(member_of=uni1, end_date=(2023, 9, 30))], + affiliations=[om.core.Affiliation(member_of=uni1, end_date=(2023, 9, 30))], ) failures = person.validate() assert len(failures) == 1 @@ -76,16 +83,17 @@ def test_issue0005(): assert len(failures) == 0 -def test_issue0007(): +@pytest.mark.parametrize("om", [openminds.latest, openminds.v4]) +def test_issue0007(om): # https://github.com/openMetadataInitiative/openMINDS_Python/issues/7 # Instances of embedded types with value type "array" are not correctly resolved for saving and causing an error. - person = omcore.Person(given_name="A", family_name="Professor", id="_:001") - uni1 = omcore.Organization(full_name="University of This Place", id="_:002") - uni2 = omcore.Organization(full_name="University of That Place", id="_:003") + person = om.core.Person(given_name="A", family_name="Professor", id="_:001") + uni1 = om.core.Organization(full_name="University of This Place", id="_:002") + uni2 = om.core.Organization(full_name="University of That Place", id="_:003") person.affiliations = [ - omcore.Affiliation(member_of=uni1), - omcore.Affiliation(member_of=uni2), + om.core.Affiliation(member_of=uni1), + om.core.Affiliation(member_of=uni2), ] actual = person.to_jsonld(include_empty_properties=False, embed_linked_nodes=False, with_context=True) @@ -149,17 +157,18 @@ def test_issue0007(): assert saved_data == expected_saved_data -def test_issue0008(): +@pytest.mark.parametrize("om", [openminds.latest, openminds.v4]) +def test_issue0008(om): # https://github.com/openMetadataInitiative/openMINDS_Python/issues/8 # The instance of linked types in instances of embedded types are integrated as embedded not linked # (example: person -> affiliations (embedded) -> organization (linked)) - uni1 = omcore.Organization(full_name="University of This Place", id="_:001") - person = omcore.Person( + uni1 = om.core.Organization(full_name="University of This Place", id="_:001") + person = om.core.Person( id="_:002", given_name="A", family_name="Professor", - affiliations=[omcore.Affiliation(member_of=uni1, end_date=date(2023, 9, 30))], + affiliations=[om.core.Affiliation(member_of=uni1, end_date=date(2023, 9, 30))], ) actual = person.to_jsonld(include_empty_properties=False, embed_linked_nodes=False, with_context=True) expected = { @@ -179,14 +188,15 @@ def test_issue0008(): assert actual == expected -def test_issue0026(): +@pytest.mark.parametrize("om", [openminds.latest, openminds.v4]) +def test_issue0026(om): # https://github.com/openMetadataInitiative/openMINDS_Python/issues/26 # When reading a JSON-LD file, the attributes of LinkedMetadata nodes # inside EmbeddedMetadata nodes are not set properly - uni1 = omcore.Organization(full_name="University of This Place", id="_:uthisp") - person = omcore.Person( - given_name="A", family_name="Professor", affiliations=[omcore.Affiliation(member_of=uni1)], id="_:ap" + uni1 = om.core.Organization(full_name="University of This Place", id="_:uthisp") + person = om.core.Person( + given_name="A", family_name="Professor", affiliations=[om.core.Affiliation(member_of=uni1)], id="_:ap" ) c = Collection(person) @@ -196,57 +206,59 @@ def test_issue0026(): output_paths = c.save("issue0026.jsonld", individual_files=False, include_empty_properties=False) new_collection = Collection() - new_collection.load(*output_paths) + new_collection.load(*output_paths, version=om.__name__.split(".")[1]) os.remove("issue0026.jsonld") - person_again = [item for item in new_collection if isinstance(item, omcore.Person)][0] + person_again = [item for item in new_collection if isinstance(item, om.core.Person)][0] assert len(person_again.affiliations) == 1 assert person_again.affiliations[0].member_of.full_name == "University of This Place" -def test_issue0023(): +@pytest.mark.parametrize("om", [openminds.latest, openminds.v4]) +def test_issue0023(om): # https://github.com/openMetadataInitiative/openMINDS_Python/issues/23 # If a user adds an instance/node to a collection, and then later adds linked types to the instance, # currently that is not added to the collection - uni1 = omcore.Organization(full_name="University of This Place", id="_:uthisp") - person = omcore.Person( - given_name="A", family_name="Professor", affiliations=[omcore.Affiliation(member_of=uni1)], id="_:ap" + uni1 = om.core.Organization(full_name="University of This Place", id="_:uthisp") + person = om.core.Person( + given_name="A", family_name="Professor", affiliations=[om.core.Affiliation(member_of=uni1)], id="_:ap" ) - dv = omcore.DatasetVersion(full_name="The name of the dataset version", custodians=[person], id="_:dv") + dv = om.core.DatasetVersion(full_name="The name of the dataset version", custodians=[person], id="_:dv") c = Collection(dv) # even though we add uni2 and the repository after creating the collection, # they should be included when we save the collection. - uni2 = omcore.Organization(full_name="University of That Place", id="_:uthatp") - person.affiliations.append(omcore.Affiliation(member_of=uni2)) - dv.repository = omcore.FileRepository(iri="http://example.com", id="_:fr") + uni2 = om.core.Organization(full_name="University of That Place", id="_:uthatp") + person.affiliations.append(om.core.Affiliation(member_of=uni2)) + dv.repository = om.core.FileRepository(iri="http://example.com", id="_:fr") output_paths = c.save("issue0023.jsonld", individual_files=False, include_empty_properties=False) new_collection = Collection() - new_collection.load(*output_paths) + new_collection.load(*output_paths, version=om.__name__.split(".")[1]) os.remove("issue0023.jsonld") - dv_again = [item for item in new_collection if isinstance(item, omcore.DatasetVersion)][0] - assert isinstance(dv_again.repository, omcore.FileRepository) + dv_again = [item for item in new_collection if isinstance(item, om.core.DatasetVersion)][0] + assert isinstance(dv_again.repository, om.core.FileRepository) assert dv_again.repository.iri.value == "http://example.com" assert len(dv_again.custodians[0].affiliations) == 2 assert dv_again.custodians[0].affiliations[0].member_of.full_name == "University of This Place" assert dv_again.custodians[0].affiliations[1].member_of.full_name == "University of That Place" -def test_issue0056(): +@pytest.mark.parametrize("om", [openminds.latest, openminds.v4]) +def test_issue0056(om): # https://github.com/openMetadataInitiative/openMINDS_Python/issues/56 # Since we are permissive on object creation, serialization to JSON-LD should work # even if the object gives validation failures. # However, under some circumstances, to_jsonld() produces a data structure # that cannot be saved as a JSON string. - dataset = omcore.Dataset( + dataset = om.core.Dataset( digital_identifier=[ - omcore.DOI(identifier="abc"), - omcore.DOI(identifier="def") + om.core.DOI(identifier="abc"), + om.core.DOI(identifier="def") ] ) failures = dataset.validate(ignore=["required"]) @@ -256,17 +268,35 @@ def test_issue0056(): json.dumps(data) # this should not raise an Exception -def test_issue0073(): +@pytest.mark.parametrize("om", [openminds.v4]) +def test_issue0073a(om): # https://github.com/openMetadataInitiative/openMINDS_Python/issues/73 # Infinite recursion in validate() - ds1 = omcore.DatasetVersion( + ds1 = om.core.DatasetVersion( short_name="ds1", is_alternative_version_of=None ) - ds2 = omcore.DatasetVersion( + ds2 = om.core.DatasetVersion( short_name="ds2", is_alternative_version_of=ds1 ) ds1.is_alternative_version_of = ds2 failures = ds1.validate() + + +@pytest.mark.parametrize("om", [openminds.latest]) +def test_issue0073b(om): + # https://github.com/openMetadataInitiative/openMINDS_Python/issues/73 + # Infinite recursion in validate() + ds1 = om.core.DatasetVersion( + short_name="ds1", + is_variant_of=None + ) + ds2 = om.core.DatasetVersion( + short_name="ds2", + is_variant_of=ds1 + ) + ds1.is_variant_of = ds2 + + failures = ds1.validate() diff --git a/pipeline/tests/test_validation.py b/pipeline/tests/test_validation.py new file mode 100644 index 00000000..21cfb58c --- /dev/null +++ b/pipeline/tests/test_validation.py @@ -0,0 +1,27 @@ +import openminds.v4 as om + + +def test_invalid_type(): + # invalid: type + mouse = om.controlled_terms.Species.mus_musculus + dsv = om.core.DatasetVersion(accessibility=mouse) + assert dsv.validate(ignore=["required"]) == { + "type": ["accessibility: Expected ProductAccessibility, value contains Species"] + } + + # valid + dsv = om.core.DatasetVersion(study_targets=[mouse]) + assert dsv.validate(ignore=["required"]) == {} + + # invalid: doubly-nested list + dsv = om.core.DatasetVersion(study_targets=[[mouse]]) + assert "value contains list" in dsv.validate(ignore=["required"])["type"][0] + + +def test_required(): + p = om.core.Person() + assert p.validate() == { + "required": [ + "given_name is required, but was not provided", + ] + }