diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index cf277cc2..cce93a71 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -11,10 +11,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3.0.2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v3.1.2 + uses: actions/setup-python@v4.3.0 with: python-version: 3.9 diff --git a/sssom/cli.py b/sssom/cli.py index 8155b49d..8d640914 100644 --- a/sssom/cli.py +++ b/sssom/cli.py @@ -25,10 +25,9 @@ from sssom.constants import ( DEFAULT_VALIDATION_TYPES, - MAPPING_SET_SLOTS, - MAPPING_SLOTS, PREFIX_MAP_MODES, SchemaValidationType, + SSSOMSchemaView, ) from sssom.context import get_default_metadata @@ -628,7 +627,7 @@ def decorator(f): @main.command() @input_argument @output_option -@dynamically_generate_sssom_options(MAPPING_SLOTS) +@dynamically_generate_sssom_options(SSSOMSchemaView().mapping_slots) def filter(input: str, output: TextIO, **kwargs): """Filter a dataframe by dynamically generating queries based on user input. @@ -659,7 +658,7 @@ def filter(input: str, output: TextIO, **kwargs): type=bool, help="Multivalued slots should be replaced or not. [default: False]", ) -@dynamically_generate_sssom_options(MAPPING_SET_SLOTS) +@dynamically_generate_sssom_options(SSSOMSchemaView().mapping_set_slots) def annotate(input: str, output: TextIO, replace_multivalued: bool, **kwargs): """Annotate metadata of a mapping set. diff --git a/sssom/constants.py b/sssom/constants.py index 0cec65a2..bd851b1d 100644 --- a/sssom/constants.py +++ b/sssom/constants.py @@ -2,6 +2,7 @@ import pathlib from enum import Enum +from typing import List import pkg_resources from linkml_runtime.utils.schema_as_dict import schema_as_dict @@ -11,15 +12,6 @@ HERE = pathlib.Path(__file__).parent.resolve() -SCHEMA_YAML = pkg_resources.resource_filename( - "sssom_schema", "schema/sssom_schema.yaml" -) -SCHEMA_VIEW = SchemaView(SCHEMA_YAML) -# SCHEMA_VIEW = package_schemaview("sssom_schema") -SCHEMA_DICT = schema_as_dict(SCHEMA_VIEW.schema) -MAPPING_SLOTS = SCHEMA_DICT["classes"]["mapping"]["slots"] -MAPPING_SET_SLOTS = SCHEMA_DICT["classes"]["mapping set"]["slots"] - OWL_EQUIV_CLASS = "http://www.w3.org/2002/07/owl#equivalentClass" RDFS_SUBCLASS_OF = "http://www.w3.org/2000/01/rdf-schema#subClassOf" @@ -42,16 +34,6 @@ PREFIX_MAP_MODE_SSSOM_DEFAULT_ONLY, PREFIX_MAP_MODE_MERGED, ] -ENTITY_REFERENCE = "EntityReference" - -MULTIVALUED_SLOTS = [ - c for c in SCHEMA_VIEW.all_slots() if SCHEMA_VIEW.get_slot(c).multivalued -] -ENTITY_REFERENCE_SLOTS = [ - c - for c in SCHEMA_VIEW.all_slots() - if SCHEMA_VIEW.get_slot(c).range == ENTITY_REFERENCE -] # Slot Constants MIRROR_FROM = "mirror_from" @@ -177,3 +159,55 @@ class SchemaValidationType(str, Enum): SchemaValidationType.JsonSchema, SchemaValidationType.PrefixMapCompleteness, ] + + +class SSSOMSchemaView: + """ + SchemaView class from linkml which is instantiated when necessary. + + Reason for this: https://github.com/mapping-commons/sssom-py/issues/322 + Implemented via PR: https://github.com/mapping-commons/sssom-py/pull/323 + """ + + entity_reference = "EntityReference" + yaml = pkg_resources.resource_filename("sssom_schema", "schema/sssom_schema.yaml") + _view = None + _dict = None + + @property + def view(self) -> SchemaView: + """Return SchemaView object.""" + if self._view is None: + self._view = SchemaView(self.yaml) + return self._view + + @property + def dict(self) -> dict: + """Return SchemaView as a dictionary.""" + if self._dict is None: + self._dict = schema_as_dict(self.view.schema) + return self._dict + + @property + def mapping_slots(self) -> List[str]: + """Return list of mapping slots.""" + return self.dict["classes"]["mapping"]["slots"] + + @property + def mapping_set_slots(self) -> List[str]: + """Return list of mapping set slots.""" + return self.dict["classes"]["mapping set"]["slots"] + + @property + def entity_reference_slots(self) -> List[str]: + """Return list of entity reference slots.""" + return [ + c + for c in self.view.all_slots() + if self.view.get_slot(c).range == self.entity_reference + ] + + @property + def multivalued_slots(self) -> List[str]: + """Return list of multivalued slots.""" + return [c for c in self.view.all_slots() if self.view.get_slot(c).multivalued] diff --git a/sssom/context.py b/sssom/context.py index 609aec40..98381304 100644 --- a/sssom/context.py +++ b/sssom/context.py @@ -7,7 +7,7 @@ from linkml.generators.jsonldcontextgen import ContextGenerator -from sssom.constants import SCHEMA_YAML +from sssom.constants import SSSOMSchemaView from .external_context import sssom_external_context from .typehints import Metadata, MetadataType, PrefixMap @@ -29,7 +29,7 @@ def get_jsonld_context(): :return: JSON-LD context """ - sssom_context = ContextGenerator(SCHEMA_YAML).serialize() + sssom_context = ContextGenerator(SSSOMSchemaView().yaml).serialize() return json.loads(sssom_context, strict=False) diff --git a/sssom/parsers.py b/sssom/parsers.py index 3bb7f95b..4ba5fcfd 100644 --- a/sssom/parsers.py +++ b/sssom/parsers.py @@ -30,8 +30,6 @@ MAPPING_JUSTIFICATION, MAPPING_JUSTIFICATION_UNSPECIFIED, MAPPING_SET_ID, - MAPPING_SET_SLOTS, - MAPPING_SLOTS, OBJECT_ID, OBJECT_LABEL, OBJECT_SOURCE, @@ -43,6 +41,7 @@ SUBJECT_LABEL, SUBJECT_SOURCE, SUBJECT_SOURCE_ID, + SSSOMSchemaView, ) from .context import ( @@ -318,11 +317,11 @@ def _get_mdict_ms_and_bad_attrs( k = str(k) v = _address_multivalued_slot(k, v) # if hasattr(Mapping, k): - if k in MAPPING_SLOTS: + if k in SSSOMSchemaView().mapping_slots: mdict[k] = v ok = True # if hasattr(MappingSet, k): - if k in MAPPING_SET_SLOTS: + if k in SSSOMSchemaView().mapping_set_slots: ms[k] = v ok = True if not ok: diff --git a/sssom/util.py b/sssom/util.py index 31b916d2..7d0fedfd 100644 --- a/sssom/util.py +++ b/sssom/util.py @@ -40,12 +40,9 @@ from .constants import ( COMMENT, CONFIDENCE, - ENTITY_REFERENCE_SLOTS, MAPPING_JUSTIFICATION, MAPPING_SET_ID, - MAPPING_SET_SLOTS, MAPPING_SET_SOURCE, - MULTIVALUED_SLOTS, OBJECT_CATEGORY, OBJECT_ID, OBJECT_LABEL, @@ -59,8 +56,6 @@ PREDICATE_MODIFIER_NOT, PREFIX_MAP_MODES, RDFS_SUBCLASS_OF, - SCHEMA_DICT, - SCHEMA_YAML, SEMAPV, SKOS_BROAD_MATCH, SKOS_CLOSE_MATCH, @@ -72,6 +67,7 @@ SUBJECT_ID, SUBJECT_LABEL, SUBJECT_SOURCE, + SSSOMSchemaView, ) from .context import ( SSSOM_BUILT_IN_PREFIXES, @@ -832,7 +828,7 @@ def inject_metadata_into_df(msdf: MappingSetDataFrame) -> MappingSetDataFrame: :return: MappingSetDataFrame with metadata as columns """ # TODO Check if 'k' is a valid 'slot' for 'mapping' [sssom.yaml] - with open(SCHEMA_YAML) as file: + with open(SSSOMSchemaView().yaml) as file: schema = yaml.safe_load(file) slots = schema["classes"]["mapping"]["slots"] if msdf.metadata is not None and msdf.df is not None: @@ -957,8 +953,8 @@ def to_mapping_set_dataframe(doc: MappingSetDocument) -> MappingSetDataFrame: data = [] slots_with_double_as_range = [ s - for s in SCHEMA_DICT["slots"].keys() - if SCHEMA_DICT["slots"][s]["range"] == "double" + for s in SSSOMSchemaView().dict["slots"].keys() + if SSSOMSchemaView().dict["slots"][s]["range"] == "double" ] if doc.mapping_set.mappings is not None: for mapping in doc.mapping_set.mappings: @@ -989,21 +985,22 @@ def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) -> :return: Dictionary """ map_dict = {} + schema_dict = SSSOMSchemaView().dict slots_with_double_as_range = [ s - for s in SCHEMA_DICT["slots"].keys() - if SCHEMA_DICT["slots"][s]["range"] == "double" + for s in schema_dict["slots"].keys() + if schema_dict["slots"][s]["range"] == "double" ] for property in map_obj: if map_obj[property] is not None: if isinstance(map_obj[property], list): # IF object is an enum if ( - SCHEMA_DICT["slots"][property]["range"] - in SCHEMA_DICT["enums"].keys() + schema_dict["slots"][property]["range"] + in schema_dict["enums"].keys() ): # IF object is a multivalued enum - if SCHEMA_DICT["slots"][property]["multivalued"]: + if schema_dict["slots"][property]["multivalued"]: map_dict[property] = "|".join( enum_value.code.text for enum_value in map_obj[property] ) @@ -1019,8 +1016,8 @@ def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) -> else: # IF object is an enum if ( - SCHEMA_DICT["slots"][property]["range"] - in SCHEMA_DICT["enums"].keys() + schema_dict["slots"][property]["range"] + in schema_dict["enums"].keys() ): map_dict[property] = map_obj[property].code.text else: @@ -1099,8 +1096,9 @@ def curie_from_uri(uri: str, prefix_map: Mapping[str, str]) -> str: def get_prefixes_used_in_table(df: pd.DataFrame) -> List[str]: """Get a list of prefixes used in CURIEs in key feature columns in a dataframe.""" prefixes = SSSOM_BUILT_IN_PREFIXES + schemaview_object = SSSOMSchemaView() if not df.empty: - for col in ENTITY_REFERENCE_SLOTS: + for col in schemaview_object.entity_reference_slots: if col in df.columns: for v in df[col].values: pref = get_prefix_from_curie(str(v)) @@ -1266,7 +1264,7 @@ def is_multivalued_slot(slot: str) -> bool: # view = SchemaView('schema/sssom.yaml') # return view.get_slot(slot).multivalued - return slot in MULTIVALUED_SLOTS + return slot in SSSOMSchemaView().multivalued_slots def reconcile_prefix_and_data( @@ -1329,7 +1327,7 @@ def reconcile_prefix_and_data( # Data editing if len(data_switch_dict) > 0: # Read schema file - slots = SCHEMA_DICT["slots"] + slots = SSSOMSchemaView().dict["slots"] entity_reference_columns = [ k for k, v in slots.items() if v["range"] == "EntityReference" ] @@ -1359,7 +1357,7 @@ def sort_df_rows_columns( """ if by_columns and len(df.columns) > 0: column_sequence = [ - col for col in SCHEMA_DICT["slots"].keys() if col in df.columns + col for col in SSSOMSchemaView().dict["slots"].keys() if col in df.columns ] df = df.reindex(column_sequence, axis=1) if by_rows and len(df) > 0: @@ -1380,7 +1378,9 @@ def get_all_prefixes(msdf: MappingSetDataFrame) -> list: metadata_keys = list(msdf.metadata.keys()) df_columns_list = msdf.df.columns.to_list() # type: ignore all_keys = metadata_keys + df_columns_list - ent_ref_slots = [s for s in all_keys if s in ENTITY_REFERENCE_SLOTS] + ent_ref_slots = [ + s for s in all_keys if s in SSSOMSchemaView().entity_reference_slots + ] for slot in ent_ref_slots: if slot in metadata_keys: @@ -1433,11 +1433,11 @@ def augment_metadata( :return: MSDF with updated metadata. """ are_params_slots(meta) - + multivalued_slots = SSSOMSchemaView().multivalued_slots if msdf.metadata: for k, v in meta.items(): # If slot is multivalued, add to list. - if k in MULTIVALUED_SLOTS and not replace_multivalued: + if k in multivalued_slots and not replace_multivalued: tmp_value: list = [] if isinstance(msdf.metadata[k], str): tmp_value = [msdf.metadata[k]] @@ -1450,7 +1450,7 @@ def augment_metadata( ) tmp_value.extend(v) msdf.metadata[k] = list(set(tmp_value)) - elif k in MULTIVALUED_SLOTS and replace_multivalued: + elif k in multivalued_slots and replace_multivalued: msdf.metadata[k] = list(v) else: msdf.metadata[k] = v[0] @@ -1466,13 +1466,14 @@ def are_params_slots(params: dict) -> bool: :return: True/False """ empty_params = {k: v for k, v in params.items() if v is None or v == ""} + mapping_set_slots = SSSOMSchemaView().mapping_set_slots if len(empty_params) > 0: logging.info(f"Parameters: {empty_params.keys()} has(ve) no value.") - legit_params = all(p in MAPPING_SET_SLOTS for p in params.keys()) + legit_params = all(p in mapping_set_slots for p in params.keys()) if not legit_params: - invalids = [p for p in params if p not in MAPPING_SET_SLOTS] + invalids = [p for p in params if p not in mapping_set_slots] raise ValueError( - f"The params are invalid: {invalids}. Should be any of the following: {MAPPING_SET_SLOTS}" + f"The params are invalid: {invalids}. Should be any of the following: {mapping_set_slots}" ) return True diff --git a/sssom/validators.py b/sssom/validators.py index 3e1cf90b..87e78653 100644 --- a/sssom/validators.py +++ b/sssom/validators.py @@ -8,12 +8,11 @@ from linkml.validators.sparqlvalidator import SparqlDataValidator # noqa: F401 from sssom_schema import MappingSet +from sssom.constants import SchemaValidationType, SSSOMSchemaView from sssom.context import add_built_in_prefixes_to_prefix_map from sssom.parsers import to_mapping_set_document from sssom.util import MappingSetDataFrame, get_all_prefixes -from .constants import SCHEMA_YAML, SchemaValidationType - def validate( msdf: MappingSetDataFrame, validation_types: List[SchemaValidationType] @@ -37,7 +36,8 @@ def validate_json_schema(msdf: MappingSetDataFrame) -> None: :param msdf: MappingSetDataFrame to eb validated. """ - validator = JsonSchemaDataValidator(SCHEMA_YAML) + schema_view_object = SSSOMSchemaView() + validator = JsonSchemaDataValidator(schema_view_object.yaml) mapping_set = to_mapping_set_document(msdf).mapping_set validator.validate_object(mapping_set, MappingSet) diff --git a/sssom/writers.py b/sssom/writers.py index ec0fa14e..c3b2452c 100644 --- a/sssom/writers.py +++ b/sssom/writers.py @@ -16,9 +16,9 @@ # from .sssom_datamodel import slots from sssom_schema import slots +from sssom.constants import SSSOMSchemaView from sssom.validators import check_all_prefixes_in_curie_map -from .constants import SCHEMA_YAML from .parsers import to_mapping_set_document from .util import ( PREFIX_MAP_KEY, @@ -284,7 +284,7 @@ def to_rdf_graph(msdf: MappingSetDataFrame) -> Graph: # os.remove("sssom.ttl") # remove the intermediate file. graph = rdflib_dumper.as_rdf_graph( element=doc.mapping_set, - schemaview=SchemaView(SCHEMA_YAML), + schemaview=SchemaView(SSSOMSchemaView().yaml), prefix_map=msdf.prefix_map, ) return graph diff --git a/tests/test_resources.py b/tests/test_resources.py index b1151fe9..b83814b9 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -5,7 +5,7 @@ import os import unittest -from sssom.constants import SCHEMA_YAML +from sssom.constants import SSSOMSchemaView class TestResources(unittest.TestCase): @@ -13,4 +13,4 @@ class TestResources(unittest.TestCase): def test_exists(self): """Test the schema YAML file is available to the package.""" - self.assertTrue(os.path.exists(SCHEMA_YAML)) + self.assertTrue(os.path.exists(SSSOMSchemaView().yaml)) diff --git a/tests/test_schemaview.py b/tests/test_schemaview.py new file mode 100644 index 00000000..7ab348f0 --- /dev/null +++ b/tests/test_schemaview.py @@ -0,0 +1,16 @@ +"""Test for sorting MappingSetDataFrame columns.""" +import unittest + +from sssom.constants import SSSOMSchemaView + + +class TestSort(unittest.TestCase): + """A test case for sorting msdf columns.""" + + def setUp(self) -> None: + """Test up the test cases with the third basic example.""" + self.sv = SSSOMSchemaView() + + def test_slots(self) -> None: + """Test slots.""" + self.assertEqual(len(self.sv.mapping_slots), 39) diff --git a/tests/test_sort.py b/tests/test_sort.py index 556341ba..cc840fef 100644 --- a/tests/test_sort.py +++ b/tests/test_sort.py @@ -2,7 +2,7 @@ import unittest -from sssom.constants import SCHEMA_DICT +from sssom.constants import SSSOMSchemaView from sssom.parsers import parse_sssom_table from sssom.util import sort_df_rows_columns from tests.constants import data_dir @@ -19,6 +19,8 @@ def test_sort(self): """Test sorting of columns.""" new_df = sort_df_rows_columns(self.msdf.df) column_sequence = [ - col for col in SCHEMA_DICT["slots"].keys() if col in new_df.columns + col + for col in SSSOMSchemaView().dict["slots"].keys() + if col in new_df.columns ] self.assertListEqual(column_sequence, list(new_df.columns)) diff --git a/tox.ini b/tox.ini index 1c9489ca..e8518b84 100644 --- a/tox.ini +++ b/tox.ini @@ -52,7 +52,7 @@ description = Run the flake8 code quality checker. [testenv:mypy] deps = mypy skip_install = true -commands = mypy --install-types --non-interactive --ignore-missing-imports sssom/ setup.py +commands = mypy --install-types --non-interactive --ignore-missing-imports --implicit-optional sssom/ setup.py description = Run the mypy tool to check static typing on the project. [testenv:manifest]