From e4288f4fad9641186406e111b7ab19658ebaeade Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Wed, 9 Nov 2022 10:48:05 -0600 Subject: [PATCH 01/16] Update pypi workflow versions --- .github/workflows/pypi-publish.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index cf277cc2..cce93a71 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -11,10 +11,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3.0.2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v3.1.2 + uses: actions/setup-python@v4.3.0 with: python-version: 3.9 From 2ce9be8d215e669b27e2cbfb1a90488dd360e14d Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Wed, 9 Nov 2022 10:49:01 -0600 Subject: [PATCH 02/16] ignore myy errors for now --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 1c9489ca..e8518b84 100644 --- a/tox.ini +++ b/tox.ini @@ -52,7 +52,7 @@ description = Run the flake8 code quality checker. [testenv:mypy] deps = mypy skip_install = true -commands = mypy --install-types --non-interactive --ignore-missing-imports sssom/ setup.py +commands = mypy --install-types --non-interactive --ignore-missing-imports --implicit-optional sssom/ setup.py description = Run the mypy tool to check static typing on the project. [testenv:manifest] From 28bb5814ae25adef5d3b926d210e1477519f5a8c Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Wed, 9 Nov 2022 13:55:36 -0600 Subject: [PATCH 03/16] SchemaView called whenever necessary. --- sssom/cli.py | 7 +++--- sssom/constants.py | 58 ++++++++++++++++++++++++++++++++++++---------- sssom/parsers.py | 9 ++++--- sssom/util.py | 46 ++++++++++++++++++++++-------------- tests/test_sort.py | 4 +++- 5 files changed, 84 insertions(+), 40 deletions(-) diff --git a/sssom/cli.py b/sssom/cli.py index 8155b49d..8d640914 100644 --- a/sssom/cli.py +++ b/sssom/cli.py @@ -25,10 +25,9 @@ from sssom.constants import ( DEFAULT_VALIDATION_TYPES, - MAPPING_SET_SLOTS, - MAPPING_SLOTS, PREFIX_MAP_MODES, SchemaValidationType, + SSSOMSchemaView, ) from sssom.context import get_default_metadata @@ -628,7 +627,7 @@ def decorator(f): @main.command() @input_argument @output_option -@dynamically_generate_sssom_options(MAPPING_SLOTS) +@dynamically_generate_sssom_options(SSSOMSchemaView().mapping_slots) def filter(input: str, output: TextIO, **kwargs): """Filter a dataframe by dynamically generating queries based on user input. @@ -659,7 +658,7 @@ def filter(input: str, output: TextIO, **kwargs): type=bool, help="Multivalued slots should be replaced or not. [default: False]", ) -@dynamically_generate_sssom_options(MAPPING_SET_SLOTS) +@dynamically_generate_sssom_options(SSSOMSchemaView().mapping_set_slots) def annotate(input: str, output: TextIO, replace_multivalued: bool, **kwargs): """Annotate metadata of a mapping set. diff --git a/sssom/constants.py b/sssom/constants.py index 0cec65a2..a800830c 100644 --- a/sssom/constants.py +++ b/sssom/constants.py @@ -2,6 +2,7 @@ import pathlib from enum import Enum +from typing import List import pkg_resources from linkml_runtime.utils.schema_as_dict import schema_as_dict @@ -14,11 +15,8 @@ SCHEMA_YAML = pkg_resources.resource_filename( "sssom_schema", "schema/sssom_schema.yaml" ) -SCHEMA_VIEW = SchemaView(SCHEMA_YAML) + # SCHEMA_VIEW = package_schemaview("sssom_schema") -SCHEMA_DICT = schema_as_dict(SCHEMA_VIEW.schema) -MAPPING_SLOTS = SCHEMA_DICT["classes"]["mapping"]["slots"] -MAPPING_SET_SLOTS = SCHEMA_DICT["classes"]["mapping set"]["slots"] OWL_EQUIV_CLASS = "http://www.w3.org/2002/07/owl#equivalentClass" RDFS_SUBCLASS_OF = "http://www.w3.org/2000/01/rdf-schema#subClassOf" @@ -44,14 +42,14 @@ ] ENTITY_REFERENCE = "EntityReference" -MULTIVALUED_SLOTS = [ - c for c in SCHEMA_VIEW.all_slots() if SCHEMA_VIEW.get_slot(c).multivalued -] -ENTITY_REFERENCE_SLOTS = [ - c - for c in SCHEMA_VIEW.all_slots() - if SCHEMA_VIEW.get_slot(c).range == ENTITY_REFERENCE -] +# MULTIVALUED_SLOTS = [ +# c for c in SCHEMA_VIEW.all_slots() if SCHEMA_VIEW.get_slot(c).multivalued +# ] +# ENTITY_REFERENCE_SLOTS = [ +# c +# for c in SCHEMA_VIEW.all_slots() +# if SCHEMA_VIEW.get_slot(c).range == ENTITY_REFERENCE +# ] # Slot Constants MIRROR_FROM = "mirror_from" @@ -177,3 +175,39 @@ class SchemaValidationType(str, Enum): SchemaValidationType.JsonSchema, SchemaValidationType.PrefixMapCompleteness, ] + + +class SSSOMSchemaView: + """ + SchemaView class from linkml which is instantiated when necessary. + + Reason for this: https://github.com/mapping-commons/sssom-py/issues/322 + Implemented via PR: https://github.com/mapping-commons/sssom-py/pull/323 + """ + + _view = None + _dict = None + + @property + def view(self) -> SchemaView: + """Return SchemaView object.""" + if self._view is None: + self._view = SchemaView(SCHEMA_YAML) + return self._view + + @property + def dict(self) -> dict: + """Return SchemaView as a dictionary.""" + if self._dict is None: + self._dict = schema_as_dict(self.view.schema) + return self._dict + + @property + def mapping_slots(self) -> List[str]: + """Return list of mapping slots.""" + return self.dict["classes"]["mapping"]["slots"] + + @property + def mapping_set_slots(self) -> List[str]: + """Return list of mapping set slots.""" + return self.dict["classes"]["mapping set"]["slots"] diff --git a/sssom/parsers.py b/sssom/parsers.py index 3bb7f95b..41ca3a12 100644 --- a/sssom/parsers.py +++ b/sssom/parsers.py @@ -30,8 +30,6 @@ MAPPING_JUSTIFICATION, MAPPING_JUSTIFICATION_UNSPECIFIED, MAPPING_SET_ID, - MAPPING_SET_SLOTS, - MAPPING_SLOTS, OBJECT_ID, OBJECT_LABEL, OBJECT_SOURCE, @@ -43,6 +41,7 @@ SUBJECT_LABEL, SUBJECT_SOURCE, SUBJECT_SOURCE_ID, + SSSOMSchemaView, ) from .context import ( @@ -310,7 +309,7 @@ def _get_mdict_ms_and_bad_attrs( ) -> Tuple[dict, MappingSet, Counter]: mdict = {} - + sssom_schema_object = SSSOMSchemaView() for k, v in row.items(): if v and v == v: ok = False @@ -318,11 +317,11 @@ def _get_mdict_ms_and_bad_attrs( k = str(k) v = _address_multivalued_slot(k, v) # if hasattr(Mapping, k): - if k in MAPPING_SLOTS: + if k in sssom_schema_object.mapping_slots: mdict[k] = v ok = True # if hasattr(MappingSet, k): - if k in MAPPING_SET_SLOTS: + if k in sssom_schema_object.mapping_set_slots: ms[k] = v ok = True if not ok: diff --git a/sssom/util.py b/sssom/util.py index 31b916d2..5323e6ac 100644 --- a/sssom/util.py +++ b/sssom/util.py @@ -40,12 +40,10 @@ from .constants import ( COMMENT, CONFIDENCE, - ENTITY_REFERENCE_SLOTS, + ENTITY_REFERENCE, MAPPING_JUSTIFICATION, MAPPING_SET_ID, - MAPPING_SET_SLOTS, MAPPING_SET_SOURCE, - MULTIVALUED_SLOTS, OBJECT_CATEGORY, OBJECT_ID, OBJECT_LABEL, @@ -59,7 +57,6 @@ PREDICATE_MODIFIER_NOT, PREFIX_MAP_MODES, RDFS_SUBCLASS_OF, - SCHEMA_DICT, SCHEMA_YAML, SEMAPV, SKOS_BROAD_MATCH, @@ -72,6 +69,7 @@ SUBJECT_ID, SUBJECT_LABEL, SUBJECT_SOURCE, + SSSOMSchemaView, ) from .context import ( SSSOM_BUILT_IN_PREFIXES, @@ -102,6 +100,18 @@ #: The 3 columns whose combination would be used as primary keys while merging/grouping KEY_FEATURES = [SUBJECT_ID, PREDICATE_ID, OBJECT_ID] +SSSOM_SV_OBJECT = SSSOMSchemaView() +MULTIVALUED_SLOTS = [ + c + for c in SSSOM_SV_OBJECT.view.all_slots() + if SSSOM_SV_OBJECT.view.get_slot(c).multivalued +] +ENTITY_REFERENCE_SLOTS = [ + c + for c in SSSOM_SV_OBJECT.view.all_slots() + if SSSOM_SV_OBJECT.view.get_slot(c).range == ENTITY_REFERENCE +] + @dataclass class MappingSetDataFrame: @@ -957,8 +967,8 @@ def to_mapping_set_dataframe(doc: MappingSetDocument) -> MappingSetDataFrame: data = [] slots_with_double_as_range = [ s - for s in SCHEMA_DICT["slots"].keys() - if SCHEMA_DICT["slots"][s]["range"] == "double" + for s in SSSOM_SV_OBJECT.dict["slots"].keys() + if SSSOM_SV_OBJECT.dict["slots"][s]["range"] == "double" ] if doc.mapping_set.mappings is not None: for mapping in doc.mapping_set.mappings: @@ -991,19 +1001,19 @@ def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) -> map_dict = {} slots_with_double_as_range = [ s - for s in SCHEMA_DICT["slots"].keys() - if SCHEMA_DICT["slots"][s]["range"] == "double" + for s in SSSOM_SV_OBJECT.dict["slots"].keys() + if SSSOM_SV_OBJECT.dict["slots"][s]["range"] == "double" ] for property in map_obj: if map_obj[property] is not None: if isinstance(map_obj[property], list): # IF object is an enum if ( - SCHEMA_DICT["slots"][property]["range"] - in SCHEMA_DICT["enums"].keys() + SSSOM_SV_OBJECT.dict["slots"][property]["range"] + in SSSOM_SV_OBJECT.dict["enums"].keys() ): # IF object is a multivalued enum - if SCHEMA_DICT["slots"][property]["multivalued"]: + if SSSOM_SV_OBJECT.dict["slots"][property]["multivalued"]: map_dict[property] = "|".join( enum_value.code.text for enum_value in map_obj[property] ) @@ -1019,8 +1029,8 @@ def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) -> else: # IF object is an enum if ( - SCHEMA_DICT["slots"][property]["range"] - in SCHEMA_DICT["enums"].keys() + SSSOM_SV_OBJECT.dict["slots"][property]["range"] + in SSSOM_SV_OBJECT.dict["enums"].keys() ): map_dict[property] = map_obj[property].code.text else: @@ -1329,7 +1339,7 @@ def reconcile_prefix_and_data( # Data editing if len(data_switch_dict) > 0: # Read schema file - slots = SCHEMA_DICT["slots"] + slots = SSSOM_SV_OBJECT.dict["slots"] entity_reference_columns = [ k for k, v in slots.items() if v["range"] == "EntityReference" ] @@ -1359,7 +1369,7 @@ def sort_df_rows_columns( """ if by_columns and len(df.columns) > 0: column_sequence = [ - col for col in SCHEMA_DICT["slots"].keys() if col in df.columns + col for col in SSSOM_SV_OBJECT.dict["slots"].keys() if col in df.columns ] df = df.reindex(column_sequence, axis=1) if by_rows and len(df) > 0: @@ -1469,10 +1479,10 @@ def are_params_slots(params: dict) -> bool: if len(empty_params) > 0: logging.info(f"Parameters: {empty_params.keys()} has(ve) no value.") - legit_params = all(p in MAPPING_SET_SLOTS for p in params.keys()) + legit_params = all(p in SSSOM_SV_OBJECT.mapping_set_slots for p in params.keys()) if not legit_params: - invalids = [p for p in params if p not in MAPPING_SET_SLOTS] + invalids = [p for p in params if p not in SSSOM_SV_OBJECT.mapping_set_slots] raise ValueError( - f"The params are invalid: {invalids}. Should be any of the following: {MAPPING_SET_SLOTS}" + f"The params are invalid: {invalids}. Should be any of the following: {SSSOM_SV_OBJECT.mapping_set_slots}" ) return True diff --git a/tests/test_sort.py b/tests/test_sort.py index 556341ba..1bcddf27 100644 --- a/tests/test_sort.py +++ b/tests/test_sort.py @@ -2,11 +2,13 @@ import unittest -from sssom.constants import SCHEMA_DICT +from sssom.constants import SSSOMSchemaView from sssom.parsers import parse_sssom_table from sssom.util import sort_df_rows_columns from tests.constants import data_dir +SCHEMA_DICT = SSSOMSchemaView().dict + class TestSort(unittest.TestCase): """A test case for sorting msdf columns.""" From 7dfc69fc080351f54ce2e0b767964e32ba2a7466 Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Wed, 9 Nov 2022 13:57:58 -0600 Subject: [PATCH 04/16] cleanup --- sssom/constants.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/sssom/constants.py b/sssom/constants.py index a800830c..f6720fa4 100644 --- a/sssom/constants.py +++ b/sssom/constants.py @@ -42,15 +42,6 @@ ] ENTITY_REFERENCE = "EntityReference" -# MULTIVALUED_SLOTS = [ -# c for c in SCHEMA_VIEW.all_slots() if SCHEMA_VIEW.get_slot(c).multivalued -# ] -# ENTITY_REFERENCE_SLOTS = [ -# c -# for c in SCHEMA_VIEW.all_slots() -# if SCHEMA_VIEW.get_slot(c).range == ENTITY_REFERENCE -# ] - # Slot Constants MIRROR_FROM = "mirror_from" REGISTRY_CONFIDENCE = "registry_confidence" From 372b2f298e247f248645bfcd1c3539cf5c81ab00 Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Thu, 10 Nov 2022 16:26:11 -0600 Subject: [PATCH 05/16] Made SSSOMSchemaView a Singular class --- sssom/cli.py | 10 ++++-- sssom/constants.py | 77 ++++++++++++++++++++++++++++++++-------------- sssom/parsers.py | 4 ++- sssom/util.py | 29 ++++++++--------- tests/test_sort.py | 2 +- 5 files changed, 78 insertions(+), 44 deletions(-) diff --git a/sssom/cli.py b/sssom/cli.py index 8d640914..33819221 100644 --- a/sssom/cli.py +++ b/sssom/cli.py @@ -61,6 +61,12 @@ ) from .writers import write_table +SSSOM_SV_OBJECT = ( + SSSOMSchemaView.instance + if hasattr(SSSOMSchemaView, "instance") + else SSSOMSchemaView() +) + # Click input options common across commands input_argument = click.argument("input", required=True, type=click.Path()) @@ -627,7 +633,7 @@ def decorator(f): @main.command() @input_argument @output_option -@dynamically_generate_sssom_options(SSSOMSchemaView().mapping_slots) +@dynamically_generate_sssom_options(SSSOM_SV_OBJECT.mapping_slots) def filter(input: str, output: TextIO, **kwargs): """Filter a dataframe by dynamically generating queries based on user input. @@ -658,7 +664,7 @@ def filter(input: str, output: TextIO, **kwargs): type=bool, help="Multivalued slots should be replaced or not. [default: False]", ) -@dynamically_generate_sssom_options(SSSOMSchemaView().mapping_set_slots) +@dynamically_generate_sssom_options(SSSOM_SV_OBJECT.mapping_set_slots) def annotate(input: str, output: TextIO, replace_multivalued: bool, **kwargs): """Annotate metadata of a mapping set. diff --git a/sssom/constants.py b/sssom/constants.py index f6720fa4..d24fa014 100644 --- a/sssom/constants.py +++ b/sssom/constants.py @@ -168,7 +168,17 @@ class SchemaValidationType(str, Enum): ] -class SSSOMSchemaView: +class SSSOMSchemaViewSingleton(object): + """Singleton class that holds the SSSOM schema view.""" + + def __new__(cls): + """Create new instance of the SSSOM schema view.""" + if not hasattr(cls, "instance"): + cls.instance = super(SSSOMSchemaViewSingleton, cls).__new__(cls) + return cls.instance + + +class SSSOMSchemaView(SSSOMSchemaViewSingleton): """ SchemaView class from linkml which is instantiated when necessary. @@ -176,29 +186,50 @@ class SSSOMSchemaView: Implemented via PR: https://github.com/mapping-commons/sssom-py/pull/323 """ - _view = None - _dict = None - - @property - def view(self) -> SchemaView: - """Return SchemaView object.""" - if self._view is None: - self._view = SchemaView(SCHEMA_YAML) - return self._view - - @property - def dict(self) -> dict: - """Return SchemaView as a dictionary.""" - if self._dict is None: - self._dict = schema_as_dict(self.view.schema) - return self._dict + def __init__(self): + """Initialize class attributes""" + self.view = SchemaView(SCHEMA_YAML) + self.dict = schema_as_dict(self.view.schema) + self.mapping_slots = self.dict["classes"]["mapping"]["slots"] + self.mapping_set_slots = self.dict["classes"]["mapping set"]["slots"] @property - def mapping_slots(self) -> List[str]: - """Return list of mapping slots.""" - return self.dict["classes"]["mapping"]["slots"] + def multivalued_slots(self) -> List[str]: + """Return list of multivalued slots.""" + return [c for c in self.view.all_slots() if self.view.get_slot(c).multivalued] @property - def mapping_set_slots(self) -> List[str]: - """Return list of mapping set slots.""" - return self.dict["classes"]["mapping set"]["slots"] + def entity_reference_slots(self) -> List[str]: + """Return list of entity reference slots.""" + return [ + c + for c in self.view.all_slots() + if self.view.get_slot(c).range == ENTITY_REFERENCE + ] + + # _view = None + # _dict = None + + # @property + # def view(self) -> SchemaView: + # """Return SchemaView object.""" + # if self._view is None: + # self._view = SchemaView(SCHEMA_YAML) + # return self._view + + # @property + # def dict(self) -> dict: + # """Return SchemaView as a dictionary.""" + # if self._dict is None: + # self._dict = schema_as_dict(self.view.schema) + # return self._dict + + # @property + # def mapping_slots(self) -> List[str]: + # """Return list of mapping slots.""" + # return self.dict["classes"]["mapping"]["slots"] + + # @property + # def mapping_set_slots(self) -> List[str]: + # """Return list of mapping set slots.""" + # return self.dict["classes"]["mapping set"]["slots"] diff --git a/sssom/parsers.py b/sssom/parsers.py index 41ca3a12..d40925ee 100644 --- a/sssom/parsers.py +++ b/sssom/parsers.py @@ -309,7 +309,9 @@ def _get_mdict_ms_and_bad_attrs( ) -> Tuple[dict, MappingSet, Counter]: mdict = {} - sssom_schema_object = SSSOMSchemaView() + sssom_schema_object = ( + SSSOMSchemaView.instance if SSSOMSchemaView.instance else SSSOMSchemaView() + ) for k, v in row.items(): if v and v == v: ok = False diff --git a/sssom/util.py b/sssom/util.py index 5323e6ac..b683f290 100644 --- a/sssom/util.py +++ b/sssom/util.py @@ -100,17 +100,11 @@ #: The 3 columns whose combination would be used as primary keys while merging/grouping KEY_FEATURES = [SUBJECT_ID, PREDICATE_ID, OBJECT_ID] -SSSOM_SV_OBJECT = SSSOMSchemaView() -MULTIVALUED_SLOTS = [ - c - for c in SSSOM_SV_OBJECT.view.all_slots() - if SSSOM_SV_OBJECT.view.get_slot(c).multivalued -] -ENTITY_REFERENCE_SLOTS = [ - c - for c in SSSOM_SV_OBJECT.view.all_slots() - if SSSOM_SV_OBJECT.view.get_slot(c).range == ENTITY_REFERENCE -] +SSSOM_SV_OBJECT = ( + SSSOMSchemaView.instance + if hasattr(SSSOMSchemaView, "instance") + else SSSOMSchemaView() +) @dataclass @@ -1110,7 +1104,7 @@ def get_prefixes_used_in_table(df: pd.DataFrame) -> List[str]: """Get a list of prefixes used in CURIEs in key feature columns in a dataframe.""" prefixes = SSSOM_BUILT_IN_PREFIXES if not df.empty: - for col in ENTITY_REFERENCE_SLOTS: + for col in SSSOM_SV_OBJECT.entity_reference_slots: if col in df.columns: for v in df[col].values: pref = get_prefix_from_curie(str(v)) @@ -1275,8 +1269,7 @@ def is_multivalued_slot(slot: str) -> bool: # Ideally: # view = SchemaView('schema/sssom.yaml') # return view.get_slot(slot).multivalued - - return slot in MULTIVALUED_SLOTS + return slot in SSSOM_SV_OBJECT.multivalued_slots def reconcile_prefix_and_data( @@ -1390,7 +1383,9 @@ def get_all_prefixes(msdf: MappingSetDataFrame) -> list: metadata_keys = list(msdf.metadata.keys()) df_columns_list = msdf.df.columns.to_list() # type: ignore all_keys = metadata_keys + df_columns_list - ent_ref_slots = [s for s in all_keys if s in ENTITY_REFERENCE_SLOTS] + ent_ref_slots = [ + s for s in all_keys if s in SSSOM_SV_OBJECT.entity_reference_slots + ] for slot in ent_ref_slots: if slot in metadata_keys: @@ -1447,7 +1442,7 @@ def augment_metadata( if msdf.metadata: for k, v in meta.items(): # If slot is multivalued, add to list. - if k in MULTIVALUED_SLOTS and not replace_multivalued: + if k in SSSOM_SV_OBJECT.multivalued_slots and not replace_multivalued: tmp_value: list = [] if isinstance(msdf.metadata[k], str): tmp_value = [msdf.metadata[k]] @@ -1460,7 +1455,7 @@ def augment_metadata( ) tmp_value.extend(v) msdf.metadata[k] = list(set(tmp_value)) - elif k in MULTIVALUED_SLOTS and replace_multivalued: + elif k in SSSOM_SV_OBJECT.multivalued_slots and replace_multivalued: msdf.metadata[k] = list(v) else: msdf.metadata[k] = v[0] diff --git a/tests/test_sort.py b/tests/test_sort.py index 1bcddf27..0836d9f1 100644 --- a/tests/test_sort.py +++ b/tests/test_sort.py @@ -7,7 +7,7 @@ from sssom.util import sort_df_rows_columns from tests.constants import data_dir -SCHEMA_DICT = SSSOMSchemaView().dict +SCHEMA_DICT = SSSOMSchemaView.instance.dict class TestSort(unittest.TestCase): From fd83304d48cb4db9854f9e23d21debbb20dcf21f Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Thu, 10 Nov 2022 16:27:44 -0600 Subject: [PATCH 06/16] flake8 friendly --- sssom/util.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sssom/util.py b/sssom/util.py index b683f290..a059ae58 100644 --- a/sssom/util.py +++ b/sssom/util.py @@ -40,7 +40,6 @@ from .constants import ( COMMENT, CONFIDENCE, - ENTITY_REFERENCE, MAPPING_JUSTIFICATION, MAPPING_SET_ID, MAPPING_SET_SOURCE, From 5b1fcd71380069fe05eef8092e2d3da3f8a340e6 Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Thu, 10 Nov 2022 16:28:09 -0600 Subject: [PATCH 07/16] typo --- sssom/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sssom/constants.py b/sssom/constants.py index d24fa014..f4200f8e 100644 --- a/sssom/constants.py +++ b/sssom/constants.py @@ -187,7 +187,7 @@ class SSSOMSchemaView(SSSOMSchemaViewSingleton): """ def __init__(self): - """Initialize class attributes""" + """Initialize class attributes.""" self.view = SchemaView(SCHEMA_YAML) self.dict = schema_as_dict(self.view.schema) self.mapping_slots = self.dict["classes"]["mapping"]["slots"] From 004933fbde1effb94ab9c0d40c4100319aad5cb1 Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Thu, 10 Nov 2022 16:32:55 -0600 Subject: [PATCH 08/16] cleanup --- sssom/constants.py | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/sssom/constants.py b/sssom/constants.py index f4200f8e..2fe64d1d 100644 --- a/sssom/constants.py +++ b/sssom/constants.py @@ -172,7 +172,7 @@ class SSSOMSchemaViewSingleton(object): """Singleton class that holds the SSSOM schema view.""" def __new__(cls): - """Create new instance of the SSSOM schema view.""" + """Create a instance of the SSSOM schema view if non-existent.""" if not hasattr(cls, "instance"): cls.instance = super(SSSOMSchemaViewSingleton, cls).__new__(cls) return cls.instance @@ -206,30 +206,3 @@ def entity_reference_slots(self) -> List[str]: for c in self.view.all_slots() if self.view.get_slot(c).range == ENTITY_REFERENCE ] - - # _view = None - # _dict = None - - # @property - # def view(self) -> SchemaView: - # """Return SchemaView object.""" - # if self._view is None: - # self._view = SchemaView(SCHEMA_YAML) - # return self._view - - # @property - # def dict(self) -> dict: - # """Return SchemaView as a dictionary.""" - # if self._dict is None: - # self._dict = schema_as_dict(self.view.schema) - # return self._dict - - # @property - # def mapping_slots(self) -> List[str]: - # """Return list of mapping slots.""" - # return self.dict["classes"]["mapping"]["slots"] - - # @property - # def mapping_set_slots(self) -> List[str]: - # """Return list of mapping set slots.""" - # return self.dict["classes"]["mapping set"]["slots"] From d726125cdc9891aa220b50c4c02b75d41a107c7b Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Fri, 11 Nov 2022 08:43:28 -0600 Subject: [PATCH 09/16] reduced layers and back to lazy instantiation --- sssom/constants.py | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/sssom/constants.py b/sssom/constants.py index 2fe64d1d..c32434bb 100644 --- a/sssom/constants.py +++ b/sssom/constants.py @@ -167,31 +167,46 @@ class SchemaValidationType(str, Enum): SchemaValidationType.PrefixMapCompleteness, ] +class SSSOMSchemaView(object): + """ + SchemaView class from linkml which is instantiated when necessary. + + Reason for this: https://github.com/mapping-commons/sssom-py/issues/322 + Implemented via PR: https://github.com/mapping-commons/sssom-py/pull/323 + """ -class SSSOMSchemaViewSingleton(object): - """Singleton class that holds the SSSOM schema view.""" + _view = None + _dict = None def __new__(cls): """Create a instance of the SSSOM schema view if non-existent.""" if not hasattr(cls, "instance"): - cls.instance = super(SSSOMSchemaViewSingleton, cls).__new__(cls) + cls.instance = super(SSSOMSchemaView, cls).__new__(cls) return cls.instance + @property + def view(self) -> SchemaView: + """Return SchemaView object.""" + if self._view is None: + self._view = SchemaView(SCHEMA_YAML) + return self._view -class SSSOMSchemaView(SSSOMSchemaViewSingleton): - """ - SchemaView class from linkml which is instantiated when necessary. + @property + def dict(self) -> dict: + """Return SchemaView as a dictionary.""" + if self._dict is None: + self._dict = schema_as_dict(self.view.schema) + return self._dict - Reason for this: https://github.com/mapping-commons/sssom-py/issues/322 - Implemented via PR: https://github.com/mapping-commons/sssom-py/pull/323 - """ + @property + def mapping_slots(self) -> List[str]: + """Return list of mapping slots.""" + return self.dict["classes"]["mapping"]["slots"] - def __init__(self): - """Initialize class attributes.""" - self.view = SchemaView(SCHEMA_YAML) - self.dict = schema_as_dict(self.view.schema) - self.mapping_slots = self.dict["classes"]["mapping"]["slots"] - self.mapping_set_slots = self.dict["classes"]["mapping set"]["slots"] + @property + def mapping_set_slots(self) -> List[str]: + """Return list of mapping set slots.""" + return self.dict["classes"]["mapping set"]["slots"] @property def multivalued_slots(self) -> List[str]: From a0acd02bd297b6816eb69184c3346c03825b33ee Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Fri, 11 Nov 2022 08:44:50 -0600 Subject: [PATCH 10/16] formatted --- sssom/constants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sssom/constants.py b/sssom/constants.py index c32434bb..3e210027 100644 --- a/sssom/constants.py +++ b/sssom/constants.py @@ -167,6 +167,7 @@ class SchemaValidationType(str, Enum): SchemaValidationType.PrefixMapCompleteness, ] + class SSSOMSchemaView(object): """ SchemaView class from linkml which is instantiated when necessary. From 2c83ce3a7bf73f3bbe7a9610507677dafba63382 Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Sat, 12 Nov 2022 15:20:20 -0600 Subject: [PATCH 11/16] better wway of accessing slots from a class --- sssom/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sssom/constants.py b/sssom/constants.py index 3e210027..e44870ca 100644 --- a/sssom/constants.py +++ b/sssom/constants.py @@ -202,12 +202,12 @@ def dict(self) -> dict: @property def mapping_slots(self) -> List[str]: """Return list of mapping slots.""" - return self.dict["classes"]["mapping"]["slots"] + return self.view.get_class("mapping").slots @property def mapping_set_slots(self) -> List[str]: """Return list of mapping set slots.""" - return self.dict["classes"]["mapping set"]["slots"] + return self.view.get_class("mapping set").slots @property def multivalued_slots(self) -> List[str]: From 4aaba70da33619e69556fc32f3e988bfe17df28e Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Tue, 22 Nov 2022 12:30:05 -0600 Subject: [PATCH 12/16] moved SSSOM_SCHEMA_OBJECT centrally --- sssom/cli.py | 12 +++--------- sssom/constants.py | 7 +++++++ sssom/parsers.py | 6 ++---- sssom/util.py | 48 +++++++++++++++++++++------------------------- 4 files changed, 34 insertions(+), 39 deletions(-) diff --git a/sssom/cli.py b/sssom/cli.py index 33819221..0a1da7bb 100644 --- a/sssom/cli.py +++ b/sssom/cli.py @@ -26,8 +26,8 @@ from sssom.constants import ( DEFAULT_VALIDATION_TYPES, PREFIX_MAP_MODES, + SSSOM_SCHEMA_OBJECT, SchemaValidationType, - SSSOMSchemaView, ) from sssom.context import get_default_metadata @@ -61,12 +61,6 @@ ) from .writers import write_table -SSSOM_SV_OBJECT = ( - SSSOMSchemaView.instance - if hasattr(SSSOMSchemaView, "instance") - else SSSOMSchemaView() -) - # Click input options common across commands input_argument = click.argument("input", required=True, type=click.Path()) @@ -633,7 +627,7 @@ def decorator(f): @main.command() @input_argument @output_option -@dynamically_generate_sssom_options(SSSOM_SV_OBJECT.mapping_slots) +@dynamically_generate_sssom_options(SSSOM_SCHEMA_OBJECT.mapping_slots) def filter(input: str, output: TextIO, **kwargs): """Filter a dataframe by dynamically generating queries based on user input. @@ -664,7 +658,7 @@ def filter(input: str, output: TextIO, **kwargs): type=bool, help="Multivalued slots should be replaced or not. [default: False]", ) -@dynamically_generate_sssom_options(SSSOM_SV_OBJECT.mapping_set_slots) +@dynamically_generate_sssom_options(SSSOM_SCHEMA_OBJECT.mapping_set_slots) def annotate(input: str, output: TextIO, replace_multivalued: bool, **kwargs): """Annotate metadata of a mapping set. diff --git a/sssom/constants.py b/sssom/constants.py index e44870ca..f0faf72d 100644 --- a/sssom/constants.py +++ b/sssom/constants.py @@ -222,3 +222,10 @@ def entity_reference_slots(self) -> List[str]: for c in self.view.all_slots() if self.view.get_slot(c).range == ENTITY_REFERENCE ] + + +SSSOM_SCHEMA_OBJECT = ( + SSSOMSchemaView.instance # type: ignore + if hasattr(SSSOMSchemaView, "instance") + else SSSOMSchemaView() +) diff --git a/sssom/parsers.py b/sssom/parsers.py index d40925ee..2dc9f72b 100644 --- a/sssom/parsers.py +++ b/sssom/parsers.py @@ -37,11 +37,11 @@ OWL_EQUIV_CLASS, PREDICATE_ID, RDFS_SUBCLASS_OF, + SSSOM_SCHEMA_OBJECT, SUBJECT_ID, SUBJECT_LABEL, SUBJECT_SOURCE, SUBJECT_SOURCE_ID, - SSSOMSchemaView, ) from .context import ( @@ -309,9 +309,7 @@ def _get_mdict_ms_and_bad_attrs( ) -> Tuple[dict, MappingSet, Counter]: mdict = {} - sssom_schema_object = ( - SSSOMSchemaView.instance if SSSOMSchemaView.instance else SSSOMSchemaView() - ) + sssom_schema_object = SSSOM_SCHEMA_OBJECT for k, v in row.items(): if v and v == v: ok = False diff --git a/sssom/util.py b/sssom/util.py index a059ae58..52e8b8f1 100644 --- a/sssom/util.py +++ b/sssom/util.py @@ -63,12 +63,12 @@ SKOS_EXACT_MATCH, SKOS_NARROW_MATCH, SKOS_RELATED_MATCH, + SSSOM_SCHEMA_OBJECT, SSSOM_SUPERCLASS_OF, SUBJECT_CATEGORY, SUBJECT_ID, SUBJECT_LABEL, SUBJECT_SOURCE, - SSSOMSchemaView, ) from .context import ( SSSOM_BUILT_IN_PREFIXES, @@ -99,12 +99,6 @@ #: The 3 columns whose combination would be used as primary keys while merging/grouping KEY_FEATURES = [SUBJECT_ID, PREDICATE_ID, OBJECT_ID] -SSSOM_SV_OBJECT = ( - SSSOMSchemaView.instance - if hasattr(SSSOMSchemaView, "instance") - else SSSOMSchemaView() -) - @dataclass class MappingSetDataFrame: @@ -960,8 +954,8 @@ def to_mapping_set_dataframe(doc: MappingSetDocument) -> MappingSetDataFrame: data = [] slots_with_double_as_range = [ s - for s in SSSOM_SV_OBJECT.dict["slots"].keys() - if SSSOM_SV_OBJECT.dict["slots"][s]["range"] == "double" + for s in SSSOM_SCHEMA_OBJECT.dict["slots"].keys() + if SSSOM_SCHEMA_OBJECT.dict["slots"][s]["range"] == "double" ] if doc.mapping_set.mappings is not None: for mapping in doc.mapping_set.mappings: @@ -994,19 +988,19 @@ def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) -> map_dict = {} slots_with_double_as_range = [ s - for s in SSSOM_SV_OBJECT.dict["slots"].keys() - if SSSOM_SV_OBJECT.dict["slots"][s]["range"] == "double" + for s in SSSOM_SCHEMA_OBJECT.dict["slots"].keys() + if SSSOM_SCHEMA_OBJECT.dict["slots"][s]["range"] == "double" ] for property in map_obj: if map_obj[property] is not None: if isinstance(map_obj[property], list): # IF object is an enum if ( - SSSOM_SV_OBJECT.dict["slots"][property]["range"] - in SSSOM_SV_OBJECT.dict["enums"].keys() + SSSOM_SCHEMA_OBJECT.dict["slots"][property]["range"] + in SSSOM_SCHEMA_OBJECT.dict["enums"].keys() ): # IF object is a multivalued enum - if SSSOM_SV_OBJECT.dict["slots"][property]["multivalued"]: + if SSSOM_SCHEMA_OBJECT.dict["slots"][property]["multivalued"]: map_dict[property] = "|".join( enum_value.code.text for enum_value in map_obj[property] ) @@ -1022,8 +1016,8 @@ def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) -> else: # IF object is an enum if ( - SSSOM_SV_OBJECT.dict["slots"][property]["range"] - in SSSOM_SV_OBJECT.dict["enums"].keys() + SSSOM_SCHEMA_OBJECT.dict["slots"][property]["range"] + in SSSOM_SCHEMA_OBJECT.dict["enums"].keys() ): map_dict[property] = map_obj[property].code.text else: @@ -1103,7 +1097,7 @@ def get_prefixes_used_in_table(df: pd.DataFrame) -> List[str]: """Get a list of prefixes used in CURIEs in key feature columns in a dataframe.""" prefixes = SSSOM_BUILT_IN_PREFIXES if not df.empty: - for col in SSSOM_SV_OBJECT.entity_reference_slots: + for col in SSSOM_SCHEMA_OBJECT.entity_reference_slots: if col in df.columns: for v in df[col].values: pref = get_prefix_from_curie(str(v)) @@ -1268,7 +1262,7 @@ def is_multivalued_slot(slot: str) -> bool: # Ideally: # view = SchemaView('schema/sssom.yaml') # return view.get_slot(slot).multivalued - return slot in SSSOM_SV_OBJECT.multivalued_slots + return slot in SSSOM_SCHEMA_OBJECT.multivalued_slots def reconcile_prefix_and_data( @@ -1331,7 +1325,7 @@ def reconcile_prefix_and_data( # Data editing if len(data_switch_dict) > 0: # Read schema file - slots = SSSOM_SV_OBJECT.dict["slots"] + slots = SSSOM_SCHEMA_OBJECT.dict["slots"] entity_reference_columns = [ k for k, v in slots.items() if v["range"] == "EntityReference" ] @@ -1361,7 +1355,7 @@ def sort_df_rows_columns( """ if by_columns and len(df.columns) > 0: column_sequence = [ - col for col in SSSOM_SV_OBJECT.dict["slots"].keys() if col in df.columns + col for col in SSSOM_SCHEMA_OBJECT.dict["slots"].keys() if col in df.columns ] df = df.reindex(column_sequence, axis=1) if by_rows and len(df) > 0: @@ -1383,7 +1377,7 @@ def get_all_prefixes(msdf: MappingSetDataFrame) -> list: df_columns_list = msdf.df.columns.to_list() # type: ignore all_keys = metadata_keys + df_columns_list ent_ref_slots = [ - s for s in all_keys if s in SSSOM_SV_OBJECT.entity_reference_slots + s for s in all_keys if s in SSSOM_SCHEMA_OBJECT.entity_reference_slots ] for slot in ent_ref_slots: @@ -1441,7 +1435,7 @@ def augment_metadata( if msdf.metadata: for k, v in meta.items(): # If slot is multivalued, add to list. - if k in SSSOM_SV_OBJECT.multivalued_slots and not replace_multivalued: + if k in SSSOM_SCHEMA_OBJECT.multivalued_slots and not replace_multivalued: tmp_value: list = [] if isinstance(msdf.metadata[k], str): tmp_value = [msdf.metadata[k]] @@ -1454,7 +1448,7 @@ def augment_metadata( ) tmp_value.extend(v) msdf.metadata[k] = list(set(tmp_value)) - elif k in SSSOM_SV_OBJECT.multivalued_slots and replace_multivalued: + elif k in SSSOM_SCHEMA_OBJECT.multivalued_slots and replace_multivalued: msdf.metadata[k] = list(v) else: msdf.metadata[k] = v[0] @@ -1473,10 +1467,12 @@ def are_params_slots(params: dict) -> bool: if len(empty_params) > 0: logging.info(f"Parameters: {empty_params.keys()} has(ve) no value.") - legit_params = all(p in SSSOM_SV_OBJECT.mapping_set_slots for p in params.keys()) + legit_params = all( + p in SSSOM_SCHEMA_OBJECT.mapping_set_slots for p in params.keys() + ) if not legit_params: - invalids = [p for p in params if p not in SSSOM_SV_OBJECT.mapping_set_slots] + invalids = [p for p in params if p not in SSSOM_SCHEMA_OBJECT.mapping_set_slots] raise ValueError( - f"The params are invalid: {invalids}. Should be any of the following: {SSSOM_SV_OBJECT.mapping_set_slots}" + f"The params are invalid: {invalids}. Should be any of the following: {SSSOM_SCHEMA_OBJECT.mapping_set_slots}" ) return True From 0262577361a250dc6e5e681741d01481c59bf9bc Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Tue, 22 Nov 2022 12:35:38 -0600 Subject: [PATCH 13/16] rolled back previous commit --- sssom/cli.py | 12 +++++++++--- sssom/constants.py | 7 ------- sssom/parsers.py | 6 ++++-- sssom/util.py | 48 +++++++++++++++++++++++++--------------------- 4 files changed, 39 insertions(+), 34 deletions(-) diff --git a/sssom/cli.py b/sssom/cli.py index 0a1da7bb..33819221 100644 --- a/sssom/cli.py +++ b/sssom/cli.py @@ -26,8 +26,8 @@ from sssom.constants import ( DEFAULT_VALIDATION_TYPES, PREFIX_MAP_MODES, - SSSOM_SCHEMA_OBJECT, SchemaValidationType, + SSSOMSchemaView, ) from sssom.context import get_default_metadata @@ -61,6 +61,12 @@ ) from .writers import write_table +SSSOM_SV_OBJECT = ( + SSSOMSchemaView.instance + if hasattr(SSSOMSchemaView, "instance") + else SSSOMSchemaView() +) + # Click input options common across commands input_argument = click.argument("input", required=True, type=click.Path()) @@ -627,7 +633,7 @@ def decorator(f): @main.command() @input_argument @output_option -@dynamically_generate_sssom_options(SSSOM_SCHEMA_OBJECT.mapping_slots) +@dynamically_generate_sssom_options(SSSOM_SV_OBJECT.mapping_slots) def filter(input: str, output: TextIO, **kwargs): """Filter a dataframe by dynamically generating queries based on user input. @@ -658,7 +664,7 @@ def filter(input: str, output: TextIO, **kwargs): type=bool, help="Multivalued slots should be replaced or not. [default: False]", ) -@dynamically_generate_sssom_options(SSSOM_SCHEMA_OBJECT.mapping_set_slots) +@dynamically_generate_sssom_options(SSSOM_SV_OBJECT.mapping_set_slots) def annotate(input: str, output: TextIO, replace_multivalued: bool, **kwargs): """Annotate metadata of a mapping set. diff --git a/sssom/constants.py b/sssom/constants.py index f0faf72d..e44870ca 100644 --- a/sssom/constants.py +++ b/sssom/constants.py @@ -222,10 +222,3 @@ def entity_reference_slots(self) -> List[str]: for c in self.view.all_slots() if self.view.get_slot(c).range == ENTITY_REFERENCE ] - - -SSSOM_SCHEMA_OBJECT = ( - SSSOMSchemaView.instance # type: ignore - if hasattr(SSSOMSchemaView, "instance") - else SSSOMSchemaView() -) diff --git a/sssom/parsers.py b/sssom/parsers.py index 2dc9f72b..d40925ee 100644 --- a/sssom/parsers.py +++ b/sssom/parsers.py @@ -37,11 +37,11 @@ OWL_EQUIV_CLASS, PREDICATE_ID, RDFS_SUBCLASS_OF, - SSSOM_SCHEMA_OBJECT, SUBJECT_ID, SUBJECT_LABEL, SUBJECT_SOURCE, SUBJECT_SOURCE_ID, + SSSOMSchemaView, ) from .context import ( @@ -309,7 +309,9 @@ def _get_mdict_ms_and_bad_attrs( ) -> Tuple[dict, MappingSet, Counter]: mdict = {} - sssom_schema_object = SSSOM_SCHEMA_OBJECT + sssom_schema_object = ( + SSSOMSchemaView.instance if SSSOMSchemaView.instance else SSSOMSchemaView() + ) for k, v in row.items(): if v and v == v: ok = False diff --git a/sssom/util.py b/sssom/util.py index 52e8b8f1..a059ae58 100644 --- a/sssom/util.py +++ b/sssom/util.py @@ -63,12 +63,12 @@ SKOS_EXACT_MATCH, SKOS_NARROW_MATCH, SKOS_RELATED_MATCH, - SSSOM_SCHEMA_OBJECT, SSSOM_SUPERCLASS_OF, SUBJECT_CATEGORY, SUBJECT_ID, SUBJECT_LABEL, SUBJECT_SOURCE, + SSSOMSchemaView, ) from .context import ( SSSOM_BUILT_IN_PREFIXES, @@ -99,6 +99,12 @@ #: The 3 columns whose combination would be used as primary keys while merging/grouping KEY_FEATURES = [SUBJECT_ID, PREDICATE_ID, OBJECT_ID] +SSSOM_SV_OBJECT = ( + SSSOMSchemaView.instance + if hasattr(SSSOMSchemaView, "instance") + else SSSOMSchemaView() +) + @dataclass class MappingSetDataFrame: @@ -954,8 +960,8 @@ def to_mapping_set_dataframe(doc: MappingSetDocument) -> MappingSetDataFrame: data = [] slots_with_double_as_range = [ s - for s in SSSOM_SCHEMA_OBJECT.dict["slots"].keys() - if SSSOM_SCHEMA_OBJECT.dict["slots"][s]["range"] == "double" + for s in SSSOM_SV_OBJECT.dict["slots"].keys() + if SSSOM_SV_OBJECT.dict["slots"][s]["range"] == "double" ] if doc.mapping_set.mappings is not None: for mapping in doc.mapping_set.mappings: @@ -988,19 +994,19 @@ def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) -> map_dict = {} slots_with_double_as_range = [ s - for s in SSSOM_SCHEMA_OBJECT.dict["slots"].keys() - if SSSOM_SCHEMA_OBJECT.dict["slots"][s]["range"] == "double" + for s in SSSOM_SV_OBJECT.dict["slots"].keys() + if SSSOM_SV_OBJECT.dict["slots"][s]["range"] == "double" ] for property in map_obj: if map_obj[property] is not None: if isinstance(map_obj[property], list): # IF object is an enum if ( - SSSOM_SCHEMA_OBJECT.dict["slots"][property]["range"] - in SSSOM_SCHEMA_OBJECT.dict["enums"].keys() + SSSOM_SV_OBJECT.dict["slots"][property]["range"] + in SSSOM_SV_OBJECT.dict["enums"].keys() ): # IF object is a multivalued enum - if SSSOM_SCHEMA_OBJECT.dict["slots"][property]["multivalued"]: + if SSSOM_SV_OBJECT.dict["slots"][property]["multivalued"]: map_dict[property] = "|".join( enum_value.code.text for enum_value in map_obj[property] ) @@ -1016,8 +1022,8 @@ def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) -> else: # IF object is an enum if ( - SSSOM_SCHEMA_OBJECT.dict["slots"][property]["range"] - in SSSOM_SCHEMA_OBJECT.dict["enums"].keys() + SSSOM_SV_OBJECT.dict["slots"][property]["range"] + in SSSOM_SV_OBJECT.dict["enums"].keys() ): map_dict[property] = map_obj[property].code.text else: @@ -1097,7 +1103,7 @@ def get_prefixes_used_in_table(df: pd.DataFrame) -> List[str]: """Get a list of prefixes used in CURIEs in key feature columns in a dataframe.""" prefixes = SSSOM_BUILT_IN_PREFIXES if not df.empty: - for col in SSSOM_SCHEMA_OBJECT.entity_reference_slots: + for col in SSSOM_SV_OBJECT.entity_reference_slots: if col in df.columns: for v in df[col].values: pref = get_prefix_from_curie(str(v)) @@ -1262,7 +1268,7 @@ def is_multivalued_slot(slot: str) -> bool: # Ideally: # view = SchemaView('schema/sssom.yaml') # return view.get_slot(slot).multivalued - return slot in SSSOM_SCHEMA_OBJECT.multivalued_slots + return slot in SSSOM_SV_OBJECT.multivalued_slots def reconcile_prefix_and_data( @@ -1325,7 +1331,7 @@ def reconcile_prefix_and_data( # Data editing if len(data_switch_dict) > 0: # Read schema file - slots = SSSOM_SCHEMA_OBJECT.dict["slots"] + slots = SSSOM_SV_OBJECT.dict["slots"] entity_reference_columns = [ k for k, v in slots.items() if v["range"] == "EntityReference" ] @@ -1355,7 +1361,7 @@ def sort_df_rows_columns( """ if by_columns and len(df.columns) > 0: column_sequence = [ - col for col in SSSOM_SCHEMA_OBJECT.dict["slots"].keys() if col in df.columns + col for col in SSSOM_SV_OBJECT.dict["slots"].keys() if col in df.columns ] df = df.reindex(column_sequence, axis=1) if by_rows and len(df) > 0: @@ -1377,7 +1383,7 @@ def get_all_prefixes(msdf: MappingSetDataFrame) -> list: df_columns_list = msdf.df.columns.to_list() # type: ignore all_keys = metadata_keys + df_columns_list ent_ref_slots = [ - s for s in all_keys if s in SSSOM_SCHEMA_OBJECT.entity_reference_slots + s for s in all_keys if s in SSSOM_SV_OBJECT.entity_reference_slots ] for slot in ent_ref_slots: @@ -1435,7 +1441,7 @@ def augment_metadata( if msdf.metadata: for k, v in meta.items(): # If slot is multivalued, add to list. - if k in SSSOM_SCHEMA_OBJECT.multivalued_slots and not replace_multivalued: + if k in SSSOM_SV_OBJECT.multivalued_slots and not replace_multivalued: tmp_value: list = [] if isinstance(msdf.metadata[k], str): tmp_value = [msdf.metadata[k]] @@ -1448,7 +1454,7 @@ def augment_metadata( ) tmp_value.extend(v) msdf.metadata[k] = list(set(tmp_value)) - elif k in SSSOM_SCHEMA_OBJECT.multivalued_slots and replace_multivalued: + elif k in SSSOM_SV_OBJECT.multivalued_slots and replace_multivalued: msdf.metadata[k] = list(v) else: msdf.metadata[k] = v[0] @@ -1467,12 +1473,10 @@ def are_params_slots(params: dict) -> bool: if len(empty_params) > 0: logging.info(f"Parameters: {empty_params.keys()} has(ve) no value.") - legit_params = all( - p in SSSOM_SCHEMA_OBJECT.mapping_set_slots for p in params.keys() - ) + legit_params = all(p in SSSOM_SV_OBJECT.mapping_set_slots for p in params.keys()) if not legit_params: - invalids = [p for p in params if p not in SSSOM_SCHEMA_OBJECT.mapping_set_slots] + invalids = [p for p in params if p not in SSSOM_SV_OBJECT.mapping_set_slots] raise ValueError( - f"The params are invalid: {invalids}. Should be any of the following: {SSSOM_SCHEMA_OBJECT.mapping_set_slots}" + f"The params are invalid: {invalids}. Should be any of the following: {SSSOM_SV_OBJECT.mapping_set_slots}" ) return True From 88ae3279536fddfb99b994bb7a734ca026bd5874 Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Tue, 22 Nov 2022 13:08:37 -0600 Subject: [PATCH 14/16] added space --- sssom/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sssom/cli.py b/sssom/cli.py index 33819221..360ffefb 100644 --- a/sssom/cli.py +++ b/sssom/cli.py @@ -74,7 +74,7 @@ "-I", "--input-format", help=f'The string denoting the input format, e.g. {",".join(SSSOM_READ_FORMATS)}', -) +) output_option = click.option( "-o", "--output", From 0a24ebe270b11f12d91902e5c90b94df400f6985 Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Tue, 22 Nov 2022 13:08:50 -0600 Subject: [PATCH 15/16] removed space --- sssom/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sssom/cli.py b/sssom/cli.py index 360ffefb..33819221 100644 --- a/sssom/cli.py +++ b/sssom/cli.py @@ -74,7 +74,7 @@ "-I", "--input-format", help=f'The string denoting the input format, e.g. {",".join(SSSOM_READ_FORMATS)}', -) +) output_option = click.option( "-o", "--output", From 6507e453ea31cc3b5caea028501e49c36f7ee7b4 Mon Sep 17 00:00:00 2001 From: Harshad Hegde Date: Tue, 22 Nov 2022 13:45:34 -0600 Subject: [PATCH 16/16] anchor linkml 1.3.13 since 1.3.14 throws error --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index e8518b84..665adf36 100644 --- a/tox.ini +++ b/tox.ini @@ -15,7 +15,7 @@ envlist = commands = python -m pytest deps = - linkml + linkml==1.3.13 extras = test description = Run unit tests with pytest. This is a special environment that does not get a name, and