diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index cf277cc2..cce93a71 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -11,10 +11,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3.0.2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v3.1.2 + uses: actions/setup-python@v4.3.0 with: python-version: 3.9 diff --git a/sssom/cli.py b/sssom/cli.py index 8155b49d..33819221 100644 --- a/sssom/cli.py +++ b/sssom/cli.py @@ -25,10 +25,9 @@ from sssom.constants import ( DEFAULT_VALIDATION_TYPES, - MAPPING_SET_SLOTS, - MAPPING_SLOTS, PREFIX_MAP_MODES, SchemaValidationType, + SSSOMSchemaView, ) from sssom.context import get_default_metadata @@ -62,6 +61,12 @@ ) from .writers import write_table +SSSOM_SV_OBJECT = ( + SSSOMSchemaView.instance + if hasattr(SSSOMSchemaView, "instance") + else SSSOMSchemaView() +) + # Click input options common across commands input_argument = click.argument("input", required=True, type=click.Path()) @@ -628,7 +633,7 @@ def decorator(f): @main.command() @input_argument @output_option -@dynamically_generate_sssom_options(MAPPING_SLOTS) +@dynamically_generate_sssom_options(SSSOM_SV_OBJECT.mapping_slots) def filter(input: str, output: TextIO, **kwargs): """Filter a dataframe by dynamically generating queries based on user input. @@ -659,7 +664,7 @@ def filter(input: str, output: TextIO, **kwargs): type=bool, help="Multivalued slots should be replaced or not. [default: False]", ) -@dynamically_generate_sssom_options(MAPPING_SET_SLOTS) +@dynamically_generate_sssom_options(SSSOM_SV_OBJECT.mapping_set_slots) def annotate(input: str, output: TextIO, replace_multivalued: bool, **kwargs): """Annotate metadata of a mapping set. diff --git a/sssom/constants.py b/sssom/constants.py index 0cec65a2..e44870ca 100644 --- a/sssom/constants.py +++ b/sssom/constants.py @@ -2,6 +2,7 @@ import pathlib from enum import Enum +from typing import List import pkg_resources from linkml_runtime.utils.schema_as_dict import schema_as_dict @@ -14,11 +15,8 @@ SCHEMA_YAML = pkg_resources.resource_filename( "sssom_schema", "schema/sssom_schema.yaml" ) -SCHEMA_VIEW = SchemaView(SCHEMA_YAML) + # SCHEMA_VIEW = package_schemaview("sssom_schema") -SCHEMA_DICT = schema_as_dict(SCHEMA_VIEW.schema) -MAPPING_SLOTS = SCHEMA_DICT["classes"]["mapping"]["slots"] -MAPPING_SET_SLOTS = SCHEMA_DICT["classes"]["mapping set"]["slots"] OWL_EQUIV_CLASS = "http://www.w3.org/2002/07/owl#equivalentClass" RDFS_SUBCLASS_OF = "http://www.w3.org/2000/01/rdf-schema#subClassOf" @@ -44,15 +42,6 @@ ] ENTITY_REFERENCE = "EntityReference" -MULTIVALUED_SLOTS = [ - c for c in SCHEMA_VIEW.all_slots() if SCHEMA_VIEW.get_slot(c).multivalued -] -ENTITY_REFERENCE_SLOTS = [ - c - for c in SCHEMA_VIEW.all_slots() - if SCHEMA_VIEW.get_slot(c).range == ENTITY_REFERENCE -] - # Slot Constants MIRROR_FROM = "mirror_from" REGISTRY_CONFIDENCE = "registry_confidence" @@ -177,3 +166,59 @@ class SchemaValidationType(str, Enum): SchemaValidationType.JsonSchema, SchemaValidationType.PrefixMapCompleteness, ] + + +class SSSOMSchemaView(object): + """ + SchemaView class from linkml which is instantiated when necessary. + + Reason for this: https://github.com/mapping-commons/sssom-py/issues/322 + Implemented via PR: https://github.com/mapping-commons/sssom-py/pull/323 + """ + + _view = None + _dict = None + + def __new__(cls): + """Create a instance of the SSSOM schema view if non-existent.""" + if not hasattr(cls, "instance"): + cls.instance = super(SSSOMSchemaView, cls).__new__(cls) + return cls.instance + + @property + def view(self) -> SchemaView: + """Return SchemaView object.""" + if self._view is None: + self._view = SchemaView(SCHEMA_YAML) + return self._view + + @property + def dict(self) -> dict: + """Return SchemaView as a dictionary.""" + if self._dict is None: + self._dict = schema_as_dict(self.view.schema) + return self._dict + + @property + def mapping_slots(self) -> List[str]: + """Return list of mapping slots.""" + return self.view.get_class("mapping").slots + + @property + def mapping_set_slots(self) -> List[str]: + """Return list of mapping set slots.""" + return self.view.get_class("mapping set").slots + + @property + def multivalued_slots(self) -> List[str]: + """Return list of multivalued slots.""" + return [c for c in self.view.all_slots() if self.view.get_slot(c).multivalued] + + @property + def entity_reference_slots(self) -> List[str]: + """Return list of entity reference slots.""" + return [ + c + for c in self.view.all_slots() + if self.view.get_slot(c).range == ENTITY_REFERENCE + ] diff --git a/sssom/parsers.py b/sssom/parsers.py index 3bb7f95b..d40925ee 100644 --- a/sssom/parsers.py +++ b/sssom/parsers.py @@ -30,8 +30,6 @@ MAPPING_JUSTIFICATION, MAPPING_JUSTIFICATION_UNSPECIFIED, MAPPING_SET_ID, - MAPPING_SET_SLOTS, - MAPPING_SLOTS, OBJECT_ID, OBJECT_LABEL, OBJECT_SOURCE, @@ -43,6 +41,7 @@ SUBJECT_LABEL, SUBJECT_SOURCE, SUBJECT_SOURCE_ID, + SSSOMSchemaView, ) from .context import ( @@ -310,7 +309,9 @@ def _get_mdict_ms_and_bad_attrs( ) -> Tuple[dict, MappingSet, Counter]: mdict = {} - + sssom_schema_object = ( + SSSOMSchemaView.instance if SSSOMSchemaView.instance else SSSOMSchemaView() + ) for k, v in row.items(): if v and v == v: ok = False @@ -318,11 +319,11 @@ def _get_mdict_ms_and_bad_attrs( k = str(k) v = _address_multivalued_slot(k, v) # if hasattr(Mapping, k): - if k in MAPPING_SLOTS: + if k in sssom_schema_object.mapping_slots: mdict[k] = v ok = True # if hasattr(MappingSet, k): - if k in MAPPING_SET_SLOTS: + if k in sssom_schema_object.mapping_set_slots: ms[k] = v ok = True if not ok: diff --git a/sssom/util.py b/sssom/util.py index 31b916d2..a059ae58 100644 --- a/sssom/util.py +++ b/sssom/util.py @@ -40,12 +40,9 @@ from .constants import ( COMMENT, CONFIDENCE, - ENTITY_REFERENCE_SLOTS, MAPPING_JUSTIFICATION, MAPPING_SET_ID, - MAPPING_SET_SLOTS, MAPPING_SET_SOURCE, - MULTIVALUED_SLOTS, OBJECT_CATEGORY, OBJECT_ID, OBJECT_LABEL, @@ -59,7 +56,6 @@ PREDICATE_MODIFIER_NOT, PREFIX_MAP_MODES, RDFS_SUBCLASS_OF, - SCHEMA_DICT, SCHEMA_YAML, SEMAPV, SKOS_BROAD_MATCH, @@ -72,6 +68,7 @@ SUBJECT_ID, SUBJECT_LABEL, SUBJECT_SOURCE, + SSSOMSchemaView, ) from .context import ( SSSOM_BUILT_IN_PREFIXES, @@ -102,6 +99,12 @@ #: The 3 columns whose combination would be used as primary keys while merging/grouping KEY_FEATURES = [SUBJECT_ID, PREDICATE_ID, OBJECT_ID] +SSSOM_SV_OBJECT = ( + SSSOMSchemaView.instance + if hasattr(SSSOMSchemaView, "instance") + else SSSOMSchemaView() +) + @dataclass class MappingSetDataFrame: @@ -957,8 +960,8 @@ def to_mapping_set_dataframe(doc: MappingSetDocument) -> MappingSetDataFrame: data = [] slots_with_double_as_range = [ s - for s in SCHEMA_DICT["slots"].keys() - if SCHEMA_DICT["slots"][s]["range"] == "double" + for s in SSSOM_SV_OBJECT.dict["slots"].keys() + if SSSOM_SV_OBJECT.dict["slots"][s]["range"] == "double" ] if doc.mapping_set.mappings is not None: for mapping in doc.mapping_set.mappings: @@ -991,19 +994,19 @@ def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) -> map_dict = {} slots_with_double_as_range = [ s - for s in SCHEMA_DICT["slots"].keys() - if SCHEMA_DICT["slots"][s]["range"] == "double" + for s in SSSOM_SV_OBJECT.dict["slots"].keys() + if SSSOM_SV_OBJECT.dict["slots"][s]["range"] == "double" ] for property in map_obj: if map_obj[property] is not None: if isinstance(map_obj[property], list): # IF object is an enum if ( - SCHEMA_DICT["slots"][property]["range"] - in SCHEMA_DICT["enums"].keys() + SSSOM_SV_OBJECT.dict["slots"][property]["range"] + in SSSOM_SV_OBJECT.dict["enums"].keys() ): # IF object is a multivalued enum - if SCHEMA_DICT["slots"][property]["multivalued"]: + if SSSOM_SV_OBJECT.dict["slots"][property]["multivalued"]: map_dict[property] = "|".join( enum_value.code.text for enum_value in map_obj[property] ) @@ -1019,8 +1022,8 @@ def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) -> else: # IF object is an enum if ( - SCHEMA_DICT["slots"][property]["range"] - in SCHEMA_DICT["enums"].keys() + SSSOM_SV_OBJECT.dict["slots"][property]["range"] + in SSSOM_SV_OBJECT.dict["enums"].keys() ): map_dict[property] = map_obj[property].code.text else: @@ -1100,7 +1103,7 @@ def get_prefixes_used_in_table(df: pd.DataFrame) -> List[str]: """Get a list of prefixes used in CURIEs in key feature columns in a dataframe.""" prefixes = SSSOM_BUILT_IN_PREFIXES if not df.empty: - for col in ENTITY_REFERENCE_SLOTS: + for col in SSSOM_SV_OBJECT.entity_reference_slots: if col in df.columns: for v in df[col].values: pref = get_prefix_from_curie(str(v)) @@ -1265,8 +1268,7 @@ def is_multivalued_slot(slot: str) -> bool: # Ideally: # view = SchemaView('schema/sssom.yaml') # return view.get_slot(slot).multivalued - - return slot in MULTIVALUED_SLOTS + return slot in SSSOM_SV_OBJECT.multivalued_slots def reconcile_prefix_and_data( @@ -1329,7 +1331,7 @@ def reconcile_prefix_and_data( # Data editing if len(data_switch_dict) > 0: # Read schema file - slots = SCHEMA_DICT["slots"] + slots = SSSOM_SV_OBJECT.dict["slots"] entity_reference_columns = [ k for k, v in slots.items() if v["range"] == "EntityReference" ] @@ -1359,7 +1361,7 @@ def sort_df_rows_columns( """ if by_columns and len(df.columns) > 0: column_sequence = [ - col for col in SCHEMA_DICT["slots"].keys() if col in df.columns + col for col in SSSOM_SV_OBJECT.dict["slots"].keys() if col in df.columns ] df = df.reindex(column_sequence, axis=1) if by_rows and len(df) > 0: @@ -1380,7 +1382,9 @@ def get_all_prefixes(msdf: MappingSetDataFrame) -> list: metadata_keys = list(msdf.metadata.keys()) df_columns_list = msdf.df.columns.to_list() # type: ignore all_keys = metadata_keys + df_columns_list - ent_ref_slots = [s for s in all_keys if s in ENTITY_REFERENCE_SLOTS] + ent_ref_slots = [ + s for s in all_keys if s in SSSOM_SV_OBJECT.entity_reference_slots + ] for slot in ent_ref_slots: if slot in metadata_keys: @@ -1437,7 +1441,7 @@ def augment_metadata( if msdf.metadata: for k, v in meta.items(): # If slot is multivalued, add to list. - if k in MULTIVALUED_SLOTS and not replace_multivalued: + if k in SSSOM_SV_OBJECT.multivalued_slots and not replace_multivalued: tmp_value: list = [] if isinstance(msdf.metadata[k], str): tmp_value = [msdf.metadata[k]] @@ -1450,7 +1454,7 @@ def augment_metadata( ) tmp_value.extend(v) msdf.metadata[k] = list(set(tmp_value)) - elif k in MULTIVALUED_SLOTS and replace_multivalued: + elif k in SSSOM_SV_OBJECT.multivalued_slots and replace_multivalued: msdf.metadata[k] = list(v) else: msdf.metadata[k] = v[0] @@ -1469,10 +1473,10 @@ def are_params_slots(params: dict) -> bool: if len(empty_params) > 0: logging.info(f"Parameters: {empty_params.keys()} has(ve) no value.") - legit_params = all(p in MAPPING_SET_SLOTS for p in params.keys()) + legit_params = all(p in SSSOM_SV_OBJECT.mapping_set_slots for p in params.keys()) if not legit_params: - invalids = [p for p in params if p not in MAPPING_SET_SLOTS] + invalids = [p for p in params if p not in SSSOM_SV_OBJECT.mapping_set_slots] raise ValueError( - f"The params are invalid: {invalids}. Should be any of the following: {MAPPING_SET_SLOTS}" + f"The params are invalid: {invalids}. Should be any of the following: {SSSOM_SV_OBJECT.mapping_set_slots}" ) return True diff --git a/tests/test_sort.py b/tests/test_sort.py index 556341ba..0836d9f1 100644 --- a/tests/test_sort.py +++ b/tests/test_sort.py @@ -2,11 +2,13 @@ import unittest -from sssom.constants import SCHEMA_DICT +from sssom.constants import SSSOMSchemaView from sssom.parsers import parse_sssom_table from sssom.util import sort_df_rows_columns from tests.constants import data_dir +SCHEMA_DICT = SSSOMSchemaView.instance.dict + class TestSort(unittest.TestCase): """A test case for sorting msdf columns.""" diff --git a/tox.ini b/tox.ini index 1c9489ca..665adf36 100644 --- a/tox.ini +++ b/tox.ini @@ -15,7 +15,7 @@ envlist = commands = python -m pytest deps = - linkml + linkml==1.3.13 extras = test description = Run unit tests with pytest. This is a special environment that does not get a name, and @@ -52,7 +52,7 @@ description = Run the flake8 code quality checker. [testenv:mypy] deps = mypy skip_install = true -commands = mypy --install-types --non-interactive --ignore-missing-imports sssom/ setup.py +commands = mypy --install-types --non-interactive --ignore-missing-imports --implicit-optional sssom/ setup.py description = Run the mypy tool to check static typing on the project. [testenv:manifest]