Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: utility to normalize ordering of elements in a data object according to a schema #668

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions linkml/utils/normalizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from copy import copy
from dataclasses import dataclass
from functools import lru_cache
from typing import Any, Dict, List

from linkml_runtime import SchemaView
from linkml_runtime.linkml_model import SlotDefinitionName, ClassDefinitionName
from linkml_runtime.utils.formatutils import underscore
from linkml_runtime.utils.yamlutils import YAMLRoot




@lru_cache()
def class_slots_ordered(sv: SchemaView, class_name: str) -> List[SlotDefinitionName]:
seed = [class_name]
slot_names = []
visited = []
print(f'ANC: {class_name}')
while len(seed) > 0:
ix = 0
while ix < len(seed):
if sv.get_class(seed[ix]).mixin:
break
ix += 1
if ix == len(seed):
# no mixin: process next class
ix = 0
cn = seed[ix]
del seed[ix]
print(f' NEXT: {cn}')
if cn in visited:
continue
c = sv.get_class(cn)
this_slot_names = c.slots + list(c.attributes.keys())
print(f' THIS SNs: {this_slot_names}')
slot_names = this_slot_names + slot_names
seed = c.mixins + seed
if c.is_a:
seed.append(c.is_a)
visited.append(cn)
print(f'FINAL1: {slot_names}')
slot_names = list(dict.fromkeys(slot_names))
print(f'FINAL: {slot_names}')
return slot_names




@dataclass
class DataNormalizer:
schemaview: SchemaView
_alias_map: Dict[str, str] = None

def as_normalized_obj(self, obj: Any):
"""
TODO: this method doesn't currently work as ordering is determined by __post_init__

:param obj:
:return:
"""
typ = type(obj)
return typ(**self.as_normalized_dict(obj))

def alias_map(self):
if self._alias_map is not None:
return self._alias_map
sv = self.schemaview
snm = sv.slot_name_mappings()
snm_inv = {}
for k, v in snm.items():
alias = v.alias if v.alias else v.name
alias = underscore(alias)
snm_inv[alias] = k
self._alias_map = snm_inv
return snm_inv

def as_normalized_dict(self, obj: Any) -> Any:
"""
TODO: for this to work we need to be able to specify
traversal order for class_induced_slots
:param obj:
:return:
"""
sv = self.schemaview
cnm = sv.class_name_mappings()
snm = sv.slot_name_mappings()
snm_inv = self.alias_map()
typ = type(obj)
if isinstance(obj, YAMLRoot):
cn = typ.class_name
cls = sv.get_class(cn)
nu_dict = {}
for slot_name in class_slots_ordered(sv, cls.name):
slot = sv.induced_slot(slot_name, cls.name)
att_name = snm_inv[slot.alias]
v = getattr(obj, att_name, None)
if v is not None:
v2 = self.as_normalized_dict(v)
nu_dict[att_name] = v2
return nu_dict
elif isinstance(obj, list):
return [self.as_normalized_dict(x) for x in obj]
elif isinstance(obj, dict):
return {k: self.as_normalized_dict(v) for k, v in obj.items()}
else:
return obj

60 changes: 60 additions & 0 deletions tests/test_utils/test_normalizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import unittest
from typing import Dict

import yaml
from linkml_runtime.dumpers import yaml_dumper
from linkml_runtime.linkml_model import SlotDefinition, ClassDefinition, EnumDefinition
from linkml_runtime.utils.introspection import package_schemaview
from linkml_runtime.utils.schemaview import SchemaView

from linkml.transformers.relmodel_transformer import RelationalModelTransformer, TransformationResult, \
get_primary_key_attributes, get_foreign_key_map, ForeignKeyPolicy
from linkml.transformers.schema_renamer import SchemaRenamer
from linkml.utils.normalizer import DataNormalizer
from linkml.utils.schema_builder import SchemaBuilder
from tests.test_generators.environment import env

SCHEMA = env.input_path('personinfo.yaml')
OUT_PATH = env.expected_path('personinfo.normalized.yaml')

MAP = {
ClassDefinition: lambda s: s.upper(),
SlotDefinition: lambda s: f'{s.lower()}_slot',
EnumDefinition: lambda s: f'{s.lower()}_ENUM',
}

class NormalizerTestCase(unittest.TestCase):
"""
Tests data normalizer
"""

def test_normalizer(self):
metamodel = package_schemaview('linkml_runtime.linkml_model.meta')
normalizer = DataNormalizer(schemaview=metamodel)
data = SchemaView(SCHEMA).schema
data2 = normalizer.as_normalized_dict(data)
print(yaml.safe_dump(data2, sort_keys=False))
address = data2['classes']['Address']
elt_keys = address.keys()
def before(d: Dict, k1, k2):
l = list(d)
print(f'CHECK: {k1} < {k2} {l.index(k1)} < {l.index(k2)}')
assert l.index(k1) < l.index(k2)
before(elt_keys, 'attributes', 'all_of')
#before(elt_keys, 'name', 'slots')
before(elt_keys, 'name', 'id_prefixes')
before(elt_keys, 'slots', 'slot_usage')
data2_obj = normalizer.as_normalized_obj(data)
with open(OUT_PATH, 'w') as stream:
stream.write(yaml_dumper.dumps(data2_obj))









if __name__ == '__main__':
unittest.main()