Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moving SchemaView() calls from global space to parts where necessary. #324

Merged
merged 16 commits into from
Nov 23, 2022
Merged
4 changes: 2 additions & 2 deletions .github/workflows/pypi-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3.0.2
- uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v3.1.2
uses: actions/setup-python@v4.3.0
with:
python-version: 3.9

Expand Down
13 changes: 9 additions & 4 deletions sssom/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,9 @@

from sssom.constants import (
DEFAULT_VALIDATION_TYPES,
MAPPING_SET_SLOTS,
MAPPING_SLOTS,
PREFIX_MAP_MODES,
SchemaValidationType,
SSSOMSchemaView,
)
from sssom.context import get_default_metadata

Expand Down Expand Up @@ -62,6 +61,12 @@
)
from .writers import write_table

SSSOM_SV_OBJECT = (
SSSOMSchemaView.instance
if hasattr(SSSOMSchemaView, "instance")
else SSSOMSchemaView()
)

# Click input options common across commands
input_argument = click.argument("input", required=True, type=click.Path())

Expand Down Expand Up @@ -628,7 +633,7 @@ def decorator(f):
@main.command()
@input_argument
@output_option
@dynamically_generate_sssom_options(MAPPING_SLOTS)
@dynamically_generate_sssom_options(SSSOM_SV_OBJECT.mapping_slots)
def filter(input: str, output: TextIO, **kwargs):
"""Filter a dataframe by dynamically generating queries based on user input.

Expand Down Expand Up @@ -659,7 +664,7 @@ def filter(input: str, output: TextIO, **kwargs):
type=bool,
help="Multivalued slots should be replaced or not. [default: False]",
)
@dynamically_generate_sssom_options(MAPPING_SET_SLOTS)
@dynamically_generate_sssom_options(SSSOM_SV_OBJECT.mapping_set_slots)
def annotate(input: str, output: TextIO, replace_multivalued: bool, **kwargs):
"""Annotate metadata of a mapping set.

Expand Down
55 changes: 42 additions & 13 deletions sssom/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pathlib
from enum import Enum
from typing import List

import pkg_resources
from linkml_runtime.utils.schema_as_dict import schema_as_dict
Expand All @@ -14,11 +15,8 @@
SCHEMA_YAML = pkg_resources.resource_filename(
"sssom_schema", "schema/sssom_schema.yaml"
)
SCHEMA_VIEW = SchemaView(SCHEMA_YAML)

# SCHEMA_VIEW = package_schemaview("sssom_schema")
SCHEMA_DICT = schema_as_dict(SCHEMA_VIEW.schema)
MAPPING_SLOTS = SCHEMA_DICT["classes"]["mapping"]["slots"]
MAPPING_SET_SLOTS = SCHEMA_DICT["classes"]["mapping set"]["slots"]

OWL_EQUIV_CLASS = "http://www.w3.org/2002/07/owl#equivalentClass"
RDFS_SUBCLASS_OF = "http://www.w3.org/2000/01/rdf-schema#subClassOf"
Expand All @@ -44,15 +42,6 @@
]
ENTITY_REFERENCE = "EntityReference"

MULTIVALUED_SLOTS = [
c for c in SCHEMA_VIEW.all_slots() if SCHEMA_VIEW.get_slot(c).multivalued
]
ENTITY_REFERENCE_SLOTS = [
c
for c in SCHEMA_VIEW.all_slots()
if SCHEMA_VIEW.get_slot(c).range == ENTITY_REFERENCE
]

# Slot Constants
MIRROR_FROM = "mirror_from"
REGISTRY_CONFIDENCE = "registry_confidence"
Expand Down Expand Up @@ -177,3 +166,43 @@ class SchemaValidationType(str, Enum):
SchemaValidationType.JsonSchema,
SchemaValidationType.PrefixMapCompleteness,
]


class SSSOMSchemaViewSingleton(object):
hrshdhgd marked this conversation as resolved.
Show resolved Hide resolved
"""Singleton class that holds the SSSOM schema view."""

def __new__(cls):
"""Create a instance of the SSSOM schema view if non-existent."""
if not hasattr(cls, "instance"):
cls.instance = super(SSSOMSchemaViewSingleton, cls).__new__(cls)
return cls.instance


class SSSOMSchemaView(SSSOMSchemaViewSingleton):
"""
SchemaView class from linkml which is instantiated when necessary.

Reason for this: https://github.com/mapping-commons/sssom-py/issues/322
Implemented via PR: https://github.com/mapping-commons/sssom-py/pull/323
"""

def __init__(self):
"""Initialize class attributes."""
self.view = SchemaView(SCHEMA_YAML)
self.dict = schema_as_dict(self.view.schema)
hrshdhgd marked this conversation as resolved.
Show resolved Hide resolved
self.mapping_slots = self.dict["classes"]["mapping"]["slots"]
self.mapping_set_slots = self.dict["classes"]["mapping set"]["slots"]

@property
def multivalued_slots(self) -> List[str]:
"""Return list of multivalued slots."""
return [c for c in self.view.all_slots() if self.view.get_slot(c).multivalued]

@property
def entity_reference_slots(self) -> List[str]:
"""Return list of entity reference slots."""
return [
c
for c in self.view.all_slots()
if self.view.get_slot(c).range == ENTITY_REFERENCE
]
11 changes: 6 additions & 5 deletions sssom/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@
MAPPING_JUSTIFICATION,
MAPPING_JUSTIFICATION_UNSPECIFIED,
MAPPING_SET_ID,
MAPPING_SET_SLOTS,
MAPPING_SLOTS,
OBJECT_ID,
OBJECT_LABEL,
OBJECT_SOURCE,
Expand All @@ -43,6 +41,7 @@
SUBJECT_LABEL,
SUBJECT_SOURCE,
SUBJECT_SOURCE_ID,
SSSOMSchemaView,
)

from .context import (
Expand Down Expand Up @@ -310,19 +309,21 @@ def _get_mdict_ms_and_bad_attrs(
) -> Tuple[dict, MappingSet, Counter]:

mdict = {}

sssom_schema_object = (
SSSOMSchemaView.instance if SSSOMSchemaView.instance else SSSOMSchemaView()
)
for k, v in row.items():
if v and v == v:
ok = False
if k:
k = str(k)
v = _address_multivalued_slot(k, v)
# if hasattr(Mapping, k):
if k in MAPPING_SLOTS:
if k in sssom_schema_object.mapping_slots:
mdict[k] = v
ok = True
# if hasattr(MappingSet, k):
if k in MAPPING_SET_SLOTS:
if k in sssom_schema_object.mapping_set_slots:
ms[k] = v
ok = True
if not ok:
Expand Down
52 changes: 28 additions & 24 deletions sssom/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,9 @@
from .constants import (
COMMENT,
CONFIDENCE,
ENTITY_REFERENCE_SLOTS,
MAPPING_JUSTIFICATION,
MAPPING_SET_ID,
MAPPING_SET_SLOTS,
MAPPING_SET_SOURCE,
MULTIVALUED_SLOTS,
OBJECT_CATEGORY,
OBJECT_ID,
OBJECT_LABEL,
Expand All @@ -59,7 +56,6 @@
PREDICATE_MODIFIER_NOT,
PREFIX_MAP_MODES,
RDFS_SUBCLASS_OF,
SCHEMA_DICT,
SCHEMA_YAML,
SEMAPV,
SKOS_BROAD_MATCH,
Expand All @@ -72,6 +68,7 @@
SUBJECT_ID,
SUBJECT_LABEL,
SUBJECT_SOURCE,
SSSOMSchemaView,
)
from .context import (
SSSOM_BUILT_IN_PREFIXES,
Expand Down Expand Up @@ -102,6 +99,12 @@
#: The 3 columns whose combination would be used as primary keys while merging/grouping
KEY_FEATURES = [SUBJECT_ID, PREDICATE_ID, OBJECT_ID]

SSSOM_SV_OBJECT = (
SSSOMSchemaView.instance
if hasattr(SSSOMSchemaView, "instance")
else SSSOMSchemaView()
)


@dataclass
class MappingSetDataFrame:
Expand Down Expand Up @@ -957,8 +960,8 @@ def to_mapping_set_dataframe(doc: MappingSetDocument) -> MappingSetDataFrame:
data = []
slots_with_double_as_range = [
s
for s in SCHEMA_DICT["slots"].keys()
if SCHEMA_DICT["slots"][s]["range"] == "double"
for s in SSSOM_SV_OBJECT.dict["slots"].keys()
if SSSOM_SV_OBJECT.dict["slots"][s]["range"] == "double"
]
if doc.mapping_set.mappings is not None:
for mapping in doc.mapping_set.mappings:
Expand Down Expand Up @@ -991,19 +994,19 @@ def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) ->
map_dict = {}
slots_with_double_as_range = [
s
for s in SCHEMA_DICT["slots"].keys()
if SCHEMA_DICT["slots"][s]["range"] == "double"
for s in SSSOM_SV_OBJECT.dict["slots"].keys()
if SSSOM_SV_OBJECT.dict["slots"][s]["range"] == "double"
]
for property in map_obj:
if map_obj[property] is not None:
if isinstance(map_obj[property], list):
# IF object is an enum
if (
SCHEMA_DICT["slots"][property]["range"]
in SCHEMA_DICT["enums"].keys()
SSSOM_SV_OBJECT.dict["slots"][property]["range"]
in SSSOM_SV_OBJECT.dict["enums"].keys()
):
# IF object is a multivalued enum
if SCHEMA_DICT["slots"][property]["multivalued"]:
if SSSOM_SV_OBJECT.dict["slots"][property]["multivalued"]:
map_dict[property] = "|".join(
enum_value.code.text for enum_value in map_obj[property]
)
Expand All @@ -1019,8 +1022,8 @@ def get_dict_from_mapping(map_obj: Union[Any, Dict[Any, Any], SSSOM_Mapping]) ->
else:
# IF object is an enum
if (
SCHEMA_DICT["slots"][property]["range"]
in SCHEMA_DICT["enums"].keys()
SSSOM_SV_OBJECT.dict["slots"][property]["range"]
in SSSOM_SV_OBJECT.dict["enums"].keys()
):
map_dict[property] = map_obj[property].code.text
else:
Expand Down Expand Up @@ -1100,7 +1103,7 @@ def get_prefixes_used_in_table(df: pd.DataFrame) -> List[str]:
"""Get a list of prefixes used in CURIEs in key feature columns in a dataframe."""
prefixes = SSSOM_BUILT_IN_PREFIXES
if not df.empty:
for col in ENTITY_REFERENCE_SLOTS:
for col in SSSOM_SV_OBJECT.entity_reference_slots:
if col in df.columns:
for v in df[col].values:
pref = get_prefix_from_curie(str(v))
Expand Down Expand Up @@ -1265,8 +1268,7 @@ def is_multivalued_slot(slot: str) -> bool:
# Ideally:
# view = SchemaView('schema/sssom.yaml')
# return view.get_slot(slot).multivalued

return slot in MULTIVALUED_SLOTS
return slot in SSSOM_SV_OBJECT.multivalued_slots


def reconcile_prefix_and_data(
Expand Down Expand Up @@ -1329,7 +1331,7 @@ def reconcile_prefix_and_data(
# Data editing
if len(data_switch_dict) > 0:
# Read schema file
slots = SCHEMA_DICT["slots"]
slots = SSSOM_SV_OBJECT.dict["slots"]
entity_reference_columns = [
k for k, v in slots.items() if v["range"] == "EntityReference"
]
Expand Down Expand Up @@ -1359,7 +1361,7 @@ def sort_df_rows_columns(
"""
if by_columns and len(df.columns) > 0:
column_sequence = [
col for col in SCHEMA_DICT["slots"].keys() if col in df.columns
col for col in SSSOM_SV_OBJECT.dict["slots"].keys() if col in df.columns
]
df = df.reindex(column_sequence, axis=1)
if by_rows and len(df) > 0:
Expand All @@ -1380,7 +1382,9 @@ def get_all_prefixes(msdf: MappingSetDataFrame) -> list:
metadata_keys = list(msdf.metadata.keys())
df_columns_list = msdf.df.columns.to_list() # type: ignore
all_keys = metadata_keys + df_columns_list
ent_ref_slots = [s for s in all_keys if s in ENTITY_REFERENCE_SLOTS]
ent_ref_slots = [
s for s in all_keys if s in SSSOM_SV_OBJECT.entity_reference_slots
]

for slot in ent_ref_slots:
if slot in metadata_keys:
Expand Down Expand Up @@ -1437,7 +1441,7 @@ def augment_metadata(
if msdf.metadata:
for k, v in meta.items():
# If slot is multivalued, add to list.
if k in MULTIVALUED_SLOTS and not replace_multivalued:
if k in SSSOM_SV_OBJECT.multivalued_slots and not replace_multivalued:
tmp_value: list = []
if isinstance(msdf.metadata[k], str):
tmp_value = [msdf.metadata[k]]
Expand All @@ -1450,7 +1454,7 @@ def augment_metadata(
)
tmp_value.extend(v)
msdf.metadata[k] = list(set(tmp_value))
elif k in MULTIVALUED_SLOTS and replace_multivalued:
elif k in SSSOM_SV_OBJECT.multivalued_slots and replace_multivalued:
msdf.metadata[k] = list(v)
else:
msdf.metadata[k] = v[0]
Expand All @@ -1469,10 +1473,10 @@ def are_params_slots(params: dict) -> bool:
if len(empty_params) > 0:
logging.info(f"Parameters: {empty_params.keys()} has(ve) no value.")

legit_params = all(p in MAPPING_SET_SLOTS for p in params.keys())
legit_params = all(p in SSSOM_SV_OBJECT.mapping_set_slots for p in params.keys())
if not legit_params:
invalids = [p for p in params if p not in MAPPING_SET_SLOTS]
invalids = [p for p in params if p not in SSSOM_SV_OBJECT.mapping_set_slots]
raise ValueError(
f"The params are invalid: {invalids}. Should be any of the following: {MAPPING_SET_SLOTS}"
f"The params are invalid: {invalids}. Should be any of the following: {SSSOM_SV_OBJECT.mapping_set_slots}"
)
return True
4 changes: 3 additions & 1 deletion tests/test_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

import unittest

from sssom.constants import SCHEMA_DICT
from sssom.constants import SSSOMSchemaView
from sssom.parsers import parse_sssom_table
from sssom.util import sort_df_rows_columns
from tests.constants import data_dir

SCHEMA_DICT = SSSOMSchemaView.instance.dict


class TestSort(unittest.TestCase):
"""A test case for sorting msdf columns."""
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ description = Run the flake8 code quality checker.
[testenv:mypy]
deps = mypy
skip_install = true
commands = mypy --install-types --non-interactive --ignore-missing-imports sssom/ setup.py
commands = mypy --install-types --non-interactive --ignore-missing-imports --implicit-optional sssom/ setup.py
description = Run the mypy tool to check static typing on the project.

[testenv:manifest]
Expand Down