diff --git a/AGENTS.md b/AGENTS.md index d67458f..be836b5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -3,7 +3,7 @@ ## Project Structure & Module Organization - `src/metamodel` (`linkml_meta`): Autogenerated metamodel types; optional Serde/PyO3. - `src/schemaview` (`schemaview`/`linkml_schemaview`): Schema loading/resolution and views (SchemaView, ClassView, SlotView, EnumView). -- `src/runtime` (`linkml_runtime`): Core runtime (LinkMLValue parse/validate, diff/patch, turtle). Pure Rust lib. +- `src/runtime` (`linkml_runtime`): Core runtime (LinkMLInstance parse/validate, diff/patch, turtle). Pure Rust lib. - `src/tools` (`linkml_tools`): CLI binaries: `linkml-validate`, `linkml-convert`, `linkml-diff`, `linkml-patch`, `linkml-schema-validate`. - `src/python` (`linkml_runtime_python`): PyO3 bindings and Python package `linkml_runtime_rust._native` plus helpers. - `src/wasm` (`linkml_wasm`): WASM build target. @@ -25,12 +25,14 @@ - Python (bindings helpers): follow PEP 8; prefer type hints where feasible. ## Testing Guidelines +- When testing locally, always provide network access. never try to run the tests offline - Add integration tests under `src/runtime/tests/` when changing CLI/runtime behavior. - Prefer `assert_cmd` for CLI and `predicates` for output checks. Keep fixtures in `src/runtime/tests/data/`. - Run `cargo test --workspace` locally; ensure tests don’t rely on network input. + - Prefer modifying existing tests over adding new ones for new code paths. Extend current scenarios with extra assertions/fixtures to avoid redundant tests proliferating. For example, if adding null-handling in diff/patch, enhance the existing diff tests rather than introducing separate "basic diff works" tests that become redundant. ## Commit & Pull Request Guidelines -- Commits: short, imperative summary (e.g., “Add __repr__ for LinkMLValue”); group related changes. +- Commits: short, imperative summary (e.g., “Add __repr__ for LinkMLInstance”); group related changes. - PRs: include clear description, rationale, and usage examples; link issues; update docs/tests; ensure CI passes: fmt, clippy (no warnings), and tests. ### Pre-commit checklist diff --git a/README.md b/README.md index 5228669..51f72b9 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,15 @@ # linkml-core -core linkml schema operations written in rust :) +core linkml schema operations written in rust ## Crates - linkml_meta (`src/metamodel`): Autogenerated LinkML metamodel types and helpers. Optional Serde/PyO3 features for serialization and Python interop. - schemaview (`src/schemaview`): Schema loading, CURIE/URI conversion, resolution (feature-gated), and view utilities: `SchemaView`, `ClassView`, `SlotView`, `EnumView`. -- linkml_runtime (`src/runtime`): Core runtime: `LinkMLValue` parsing (YAML/JSON), basic validation, diff/patch, and Turtle serialization. Pure Rust library. +- linkml_runtime (`src/runtime`): Core runtime: `LinkMLInstance` parsing (YAML/JSON), basic validation, diff/patch, and Turtle serialization. Pure Rust library. - linkml_tools (`src/tools`): CLI tools wrapping the runtime and schemaview: - `linkml-validate`, `linkml-convert`, `linkml-diff`, `linkml-patch`, `linkml-schema-validate`. -- linkml_runtime_python (`src/python`): PyO3 bindings and Python package (`linkml_runtime_rust._native`) exposing SchemaView and LinkMLValue to Python; includes small Python helpers. +- linkml_runtime_python (`src/python`): PyO3 bindings and Python package (`linkml_runtime_rust._native`) exposing SchemaView and LinkMLInstance to Python; includes small Python helpers. - linkml_wasm (`src/wasm`): WASM build target (experimental). ## Regenerating the metamodel diff --git a/src/python/python/linkml_runtime_rust/__init__.py b/src/python/python/linkml_runtime_rust/__init__.py index b4c6d91..8630a7b 100644 --- a/src/python/python/linkml_runtime_rust/__init__.py +++ b/src/python/python/linkml_runtime_rust/__init__.py @@ -3,6 +3,5 @@ from ._native import * # noqa: F401,F403 from ._resolver import resolve_schemas from .schemaview import SchemaView -from .debug_utils import pretty_linkml_value +from .debug_utils import pretty_linkml_instance __all__ = [name for name in globals() if not name.startswith("_")] - diff --git a/src/python/python/linkml_runtime_rust/debug_utils.py b/src/python/python/linkml_runtime_rust/debug_utils.py index 00cd9af..e74fb4f 100644 --- a/src/python/python/linkml_runtime_rust/debug_utils.py +++ b/src/python/python/linkml_runtime_rust/debug_utils.py @@ -1,6 +1,6 @@ -"""Utilities for pretty-printing LinkMLValue trees. +"""Utilities for pretty-printing LinkMLInstance trees. -This module provides a pure Python helper to render a :class:`LinkMLValue` +This module provides a pure Python helper to render a :class:`LinkMLInstance` as a human readable tree. It is intended for debugging and diagnostic use. """ @@ -9,19 +9,19 @@ from typing import Any, List try: # pragma: no cover - runtime optional during type checking - from . import LinkMLValue + from . import LinkMLInstance except Exception: # pragma: no cover - fallback when extension missing - LinkMLValue = Any # type: ignore[misc] + LinkMLInstance = Any # type: ignore[misc] -__all__ = ["pretty_linkml_value"] +__all__ = ["pretty_linkml_instance"] -def pretty_linkml_value(value: "LinkMLValue", prefix: str = '', nofirstline: bool = False) -> str: +def pretty_linkml_instance(value: "LinkMLInstance", prefix: str = '', nofirstline: bool = False) -> str: """Return a tree-style string representation of ``value``. Parameters ---------- - value: LinkMLValue + value: LinkMLInstance The value to render. indent: int, optional Starting indentation (number of spaces). @@ -35,10 +35,10 @@ def pretty_linkml_value(value: "LinkMLValue", prefix: str = '', nofirstline: boo if rval.kind == "scalar": txt += f"{prefix} | {key}={rval.as_python()}\n" elif rval.kind == "list": - txt += f"{prefix} | {key}:\n{pretty_linkml_value(rval, prefix + ' | ')}" + txt += f"{prefix} | {key}:\n{pretty_linkml_instance(rval, prefix + ' | ')}" else: pfx = f"{prefix} | " + ' ' * len(key) - txt += f"{prefix} | {key}: {pretty_linkml_value(rval, pfx, nofirstline=True)}" + txt += f"{prefix} | {key}: {pretty_linkml_instance(rval, pfx, nofirstline=True)}" txt += f"{prefix} |\n" elif value.kind == "list": for idx in range(len(value)): @@ -46,7 +46,7 @@ def pretty_linkml_value(value: "LinkMLValue", prefix: str = '', nofirstline: boo if rval.kind == "scalar": txt = f"{prefix} - {rval.as_python()}\n" else: - txt = f"{prefix} - {pretty_linkml_value(rval, prefix + ' ', nofirstline=True)}" + txt = f"{prefix} - {pretty_linkml_instance(rval, prefix + ' ', nofirstline=True)}" elif value.kind == "scalar": txt = f"{prefix}{value.as_python()}\n" else: diff --git a/src/python/src/lib.rs b/src/python/src/lib.rs index cbfe6b5..55a550a 100644 --- a/src/python/src/lib.rs +++ b/src/python/src/lib.rs @@ -1,7 +1,7 @@ use linkml_meta::{ClassDefinition, EnumDefinition, SchemaDefinition, SlotDefinition}; use linkml_runtime::diff::{diff as diff_internal, patch as patch_internal, Delta}; use linkml_runtime::turtle::{turtle_to_string, TurtleOptions}; -use linkml_runtime::{load_json_str, load_yaml_str, LinkMLValue}; +use linkml_runtime::{load_json_str, load_yaml_str, LinkMLInstance}; use linkml_schemaview::identifier::Identifier; use linkml_schemaview::io; use linkml_schemaview::schemaview::SchemaView; @@ -445,18 +445,18 @@ pub fn runtime_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(py_diff, m)?)?; m.add_function(wrap_pyfunction!(py_patch, m)?)?; m.add_function(wrap_pyfunction!(py_to_turtle, m)?)?; - m.add_class::()?; + m.add_class::()?; Ok(()) } -#[pyclass(name = "LinkMLValue")] -pub struct PyLinkMLValue { - value: LinkMLValue, +#[pyclass(name = "LinkMLInstance")] +pub struct PyLinkMLInstance { + value: LinkMLInstance, sv: Py, } -impl PyLinkMLValue { - fn new(value: LinkMLValue, sv: Py) -> Self { +impl PyLinkMLInstance { + fn new(value: LinkMLInstance, sv: Py) -> Self { Self { value, sv } } } @@ -467,7 +467,7 @@ fn json_value_to_py(py: Python<'_>, v: &JsonValue) -> PyObject { json_mod.call_method1("loads", (s,)).unwrap().unbind() } -impl Clone for PyLinkMLValue { +impl Clone for PyLinkMLInstance { fn clone(&self) -> Self { Python::with_gil(|py| Self { value: self.value.clone(), @@ -477,12 +477,19 @@ impl Clone for PyLinkMLValue { } #[pymethods] -impl PyLinkMLValue { +impl PyLinkMLInstance { + /// Semantic equality per LinkML Instances spec. + /// Compares this value with another `LinkMLInstance`. + #[pyo3(signature = (other, treat_missing_as_null = false))] + fn equals(&self, other: &PyLinkMLInstance, treat_missing_as_null: bool) -> bool { + self.value.equals(&other.value, treat_missing_as_null) + } #[getter] fn slot_name(&self) -> Option { match &self.value { - LinkMLValue::Scalar { slot, .. } => Some(slot.name.clone()), - LinkMLValue::List { slot, .. } => Some(slot.name.clone()), + LinkMLInstance::Scalar { slot, .. } => Some(slot.name.clone()), + LinkMLInstance::List { slot, .. } => Some(slot.name.clone()), + LinkMLInstance::Null { slot, .. } => Some(slot.name.clone()), _ => None, } } @@ -490,18 +497,25 @@ impl PyLinkMLValue { #[getter] fn kind(&self) -> String { match &self.value { - LinkMLValue::Scalar { .. } => "scalar".to_string(), - LinkMLValue::List { .. } => "list".to_string(), - LinkMLValue::Mapping { .. } => "mapping".to_string(), - LinkMLValue::Object { .. } => "object".to_string(), + LinkMLInstance::Scalar { .. } => "scalar".to_string(), + LinkMLInstance::Null { .. } => "null".to_string(), + LinkMLInstance::List { .. } => "list".to_string(), + LinkMLInstance::Mapping { .. } => "mapping".to_string(), + LinkMLInstance::Object { .. } => "object".to_string(), } } + #[getter] + fn node_id(&self) -> u64 { + self.value.node_id() + } + #[getter] fn slot_definition(&self) -> Option { match &self.value { - LinkMLValue::Scalar { slot, .. } => Some(slot.definition().clone()), - LinkMLValue::List { slot, .. } => Some(slot.definition().clone()), + LinkMLInstance::Scalar { slot, .. } => Some(slot.definition().clone()), + LinkMLInstance::List { slot, .. } => Some(slot.definition().clone()), + LinkMLInstance::Null { slot, .. } => Some(slot.definition().clone()), _ => None, } } @@ -509,9 +523,10 @@ impl PyLinkMLValue { #[getter] fn class_definition(&self) -> Option { match &self.value { - LinkMLValue::Object { class, .. } => Some(class.def().clone()), - LinkMLValue::Scalar { class: Some(c), .. } => Some(c.def().clone()), - LinkMLValue::List { class: Some(c), .. } => Some(c.def().clone()), + LinkMLInstance::Object { class, .. } => Some(class.def().clone()), + LinkMLInstance::Scalar { class: Some(c), .. } => Some(c.def().clone()), + LinkMLInstance::List { class: Some(c), .. } => Some(c.def().clone()), + LinkMLInstance::Null { class: Some(c), .. } => Some(c.def().clone()), _ => None, } } @@ -519,19 +534,21 @@ impl PyLinkMLValue { #[getter] fn class_name(&self) -> Option { match &self.value { - LinkMLValue::Object { class, .. } => Some(class.def().name.clone()), - LinkMLValue::Scalar { class: Some(c), .. } => Some(c.def().name.clone()), - LinkMLValue::List { class: Some(c), .. } => Some(c.def().name.clone()), + LinkMLInstance::Object { class, .. } => Some(class.def().name.clone()), + LinkMLInstance::Scalar { class: Some(c), .. } => Some(c.def().name.clone()), + LinkMLInstance::List { class: Some(c), .. } => Some(c.def().name.clone()), + LinkMLInstance::Null { class: Some(c), .. } => Some(c.def().name.clone()), _ => None, } } fn __len__(&self) -> PyResult { Ok(match &self.value { - LinkMLValue::Scalar { .. } => 0, - LinkMLValue::List { values, .. } => values.len(), - LinkMLValue::Mapping { values, .. } => values.len(), - LinkMLValue::Object { values, .. } => values.len(), + LinkMLInstance::Scalar { .. } => 0, + LinkMLInstance::Null { .. } => 0, + LinkMLInstance::List { values, .. } => values.len(), + LinkMLInstance::Mapping { values, .. } => values.len(), + LinkMLInstance::Object { values, .. } => values.len(), }) } @@ -539,27 +556,27 @@ impl PyLinkMLValue { &self, py: Python<'py>, key: &Bound<'py, PyAny>, - ) -> PyResult { + ) -> PyResult { match &self.value { - LinkMLValue::List { values, .. } => { + LinkMLInstance::List { values, .. } => { let idx: usize = key.extract()?; values .get(idx) - .map(|v| PyLinkMLValue::new(v.clone(), self.sv.clone_ref(py))) + .map(|v| PyLinkMLInstance::new(v.clone(), self.sv.clone_ref(py))) .ok_or_else(|| PyException::new_err("index out of range")) } - LinkMLValue::Object { values, .. } => { + LinkMLInstance::Object { values, .. } => { let k: String = key.extract()?; values .get(&k) - .map(|v| PyLinkMLValue::new(v.clone(), self.sv.clone_ref(py))) + .map(|v| PyLinkMLInstance::new(v.clone(), self.sv.clone_ref(py))) .ok_or_else(|| PyException::new_err("key not found")) } - LinkMLValue::Mapping { values, .. } => { + LinkMLInstance::Mapping { values, .. } => { let k: String = key.extract()?; values .get(&k) - .map(|v| PyLinkMLValue::new(v.clone(), self.sv.clone_ref(py))) + .map(|v| PyLinkMLInstance::new(v.clone(), self.sv.clone_ref(py))) .ok_or_else(|| PyException::new_err("key not found")) } _ => Err(PyException::new_err("not indexable")), @@ -567,19 +584,19 @@ impl PyLinkMLValue { } /// Navigate by a path of strings (map keys or list indices). - /// Returns a new LinkMLValue if found, otherwise None. + /// Returns a new LinkMLInstance if found, otherwise None. #[pyo3(name = "navigate")] fn py_navigate<'py>( &self, py: Python<'py>, path: &Bound<'py, PyAny>, - ) -> PyResult> { + ) -> PyResult> { // Expect any iterable of strings let path_vec: Vec = path .extract() .map_err(|_| PyException::new_err("path must be a sequence of strings"))?; if let Some(found) = self.value.navigate_path(&path_vec) { - Ok(Some(PyLinkMLValue::new( + Ok(Some(PyLinkMLInstance::new( found.clone(), self.sv.clone_ref(py), ))) @@ -590,28 +607,28 @@ impl PyLinkMLValue { fn keys(&self) -> PyResult> { match &self.value { - LinkMLValue::Object { values, .. } => Ok(values.keys().cloned().collect()), - LinkMLValue::Mapping { values, .. } => Ok(values.keys().cloned().collect()), + LinkMLInstance::Object { values, .. } => Ok(values.keys().cloned().collect()), + LinkMLInstance::Mapping { values, .. } => Ok(values.keys().cloned().collect()), _ => Ok(Vec::new()), } } - fn values<'py>(&self, py: Python<'py>) -> PyResult> { + fn values<'py>(&self, py: Python<'py>) -> PyResult> { match &self.value { - LinkMLValue::Object { values, .. } => Ok(values + LinkMLInstance::Object { values, .. } => Ok(values .values() .cloned() - .map(|v| PyLinkMLValue::new(v, self.sv.clone_ref(py))) + .map(|v| PyLinkMLInstance::new(v, self.sv.clone_ref(py))) .collect()), - LinkMLValue::Mapping { values, .. } => Ok(values + LinkMLInstance::Mapping { values, .. } => Ok(values .values() .cloned() - .map(|v| PyLinkMLValue::new(v, self.sv.clone_ref(py))) + .map(|v| PyLinkMLInstance::new(v, self.sv.clone_ref(py))) .collect()), - LinkMLValue::List { values, .. } => Ok(values + LinkMLInstance::List { values, .. } => Ok(values .iter() .cloned() - .map(|v| PyLinkMLValue::new(v, self.sv.clone_ref(py))) + .map(|v| PyLinkMLInstance::new(v, self.sv.clone_ref(py))) .collect()), _ => Ok(Vec::new()), } @@ -642,27 +659,33 @@ impl PyLinkMLValue { fn __repr__(&self) -> PyResult { Ok(match &self.value { - LinkMLValue::Scalar { value, slot, .. } => { - format!("LinkMLValue.Scalar(slot='{}', value={})", slot.name, value) + LinkMLInstance::Scalar { value, slot, .. } => { + format!( + "LinkMLInstance.Scalar(slot='{}', value={})", + slot.name, value + ) + } + LinkMLInstance::Null { slot, .. } => { + format!("LinkMLInstance.Null(slot='{}')", slot.name) } - LinkMLValue::List { values, slot, .. } => { + LinkMLInstance::List { values, slot, .. } => { format!( - "LinkMLValue.List(slot='{}', len={})", + "LinkMLInstance.List(slot='{}', len={})", slot.name, values.len() ) } - LinkMLValue::Mapping { values, slot, .. } => { + LinkMLInstance::Mapping { values, slot, .. } => { format!( - "LinkMLValue.Mapping(slot='{}', keys={:?})", + "LinkMLInstance.Mapping(slot='{}', keys={:?})", slot.name, values.keys().collect::>() ) } - LinkMLValue::Object { values, class, .. } => { + LinkMLInstance::Object { values, class, .. } => { let keys: Vec<&String> = values.keys().collect(); format!( - "LinkMLValue.Object(class='{}', keys={:?})", + "LinkMLInstance.Object(class='{}', keys={:?})", class.def().name.clone(), keys ) @@ -681,7 +704,7 @@ fn load_yaml( source: &Bound<'_, PyAny>, sv: Py, class: Option>, -) -> PyResult { +) -> PyResult { let sv_ref = sv.bind(py).borrow(); let rust_sv = sv_ref.as_rust(); let conv = rust_sv.converter(); @@ -697,7 +720,7 @@ fn load_yaml( .ok_or_else(|| PyException::new_err("class not found, please provide a valid class"))?; let v = load_yaml_str(&text, rust_sv, cv.as_rust(), &conv) .map_err(|e| PyException::new_err(e.to_string()))?; - Ok(PyLinkMLValue::new(v, sv)) + Ok(PyLinkMLInstance::new(v, sv)) } #[pyfunction] @@ -706,7 +729,7 @@ fn load_json( source: &Bound<'_, PyAny>, sv: Py, class: Option>, -) -> PyResult { +) -> PyResult { let sv_ref = sv.bind(py).borrow(); let rust_sv = sv_ref.as_rust(); let conv = rust_sv.converter(); @@ -722,20 +745,20 @@ fn load_json( let (text, _) = py_filelike_or_string_to_string(source)?; let v = load_json_str(&text, rust_sv, cv.as_rust(), &conv) .map_err(|e| PyException::new_err(e.to_string()))?; - Ok(PyLinkMLValue::new(v, sv)) + Ok(PyLinkMLInstance::new(v, sv)) } -#[pyfunction(name = "diff", signature = (source, target, ignore_missing_target=None))] +#[pyfunction(name = "diff", signature = (source, target, treat_missing_as_null=None))] fn py_diff( py: Python<'_>, - source: &PyLinkMLValue, - target: &PyLinkMLValue, - ignore_missing_target: Option, + source: &PyLinkMLInstance, + target: &PyLinkMLInstance, + treat_missing_as_null: Option, ) -> PyResult { let deltas = diff_internal( &source.value, &target.value, - ignore_missing_target.unwrap_or(false), + treat_missing_as_null.unwrap_or(false), ); let vals: Vec = deltas .iter() @@ -744,23 +767,47 @@ fn py_diff( Ok(json_value_to_py(py, &JsonValue::Array(vals))) } -#[pyfunction(name = "patch")] +#[pyfunction(name = "patch", signature = (source, deltas, treat_missing_as_null = true, ignore_no_ops = true))] fn py_patch( py: Python<'_>, - source: &PyLinkMLValue, + source: &PyLinkMLInstance, deltas: &Bound<'_, PyAny>, -) -> PyResult { + treat_missing_as_null: bool, + ignore_no_ops: bool, +) -> PyResult { let json_mod = PyModule::import(py, "json")?; let deltas_str: String = json_mod.call_method1("dumps", (deltas,))?.extract()?; let deltas_vec: Vec = serde_json::from_str(&deltas_str).map_err(|e| PyException::new_err(e.to_string()))?; let sv_ref = source.sv.bind(py).borrow(); let rust_sv = sv_ref.as_rust(); - let new_value = patch_internal(&source.value, &deltas_vec, rust_sv); - Ok(PyLinkMLValue::new(new_value, source.sv.clone_ref(py))) + let (new_value, trace) = patch_internal( + &source.value, + &deltas_vec, + rust_sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops, + treat_missing_as_null, + }, + ) + .map_err(|e| PyException::new_err(e.to_string()))?; + let trace_json = serde_json::json!({ + "added": trace.added, + "deleted": trace.deleted, + "updated": trace.updated, + }); + let py_val = PyLinkMLInstance::new(new_value, source.sv.clone_ref(py)); + let dict = pyo3::types::PyDict::new(py); + dict.set_item("value", Py::new(py, py_val)?)?; + dict.set_item("trace", json_value_to_py(py, &trace_json))?; + Ok(dict.into_any().unbind()) } #[pyfunction(name = "to_turtle", signature = (value, skolem=None))] -fn py_to_turtle(py: Python<'_>, value: &PyLinkMLValue, skolem: Option) -> PyResult { +fn py_to_turtle( + py: Python<'_>, + value: &PyLinkMLInstance, + skolem: Option, +) -> PyResult { value.as_turtle(py, skolem) } diff --git a/src/python/tests/python_equals.rs b/src/python/tests/python_equals.rs new file mode 100644 index 0000000..eb51043 --- /dev/null +++ b/src/python/tests/python_equals.rs @@ -0,0 +1,75 @@ +use linkml_runtime_python::runtime_module; +use pyo3::prelude::*; +use pyo3::types::PyDict; +use std::path::PathBuf; + +fn data_path(name: &str) -> PathBuf { + let base = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let candidates = [ + base.join("../runtime/tests/data").join(name), + base.join("../schemaview/tests/data").join(name), + base.join("tests/data").join(name), + ]; + for c in candidates { + if c.exists() { + return c; + } + } + panic!("test data not found: {}", name); +} + +#[test] +fn python_equals_api() { + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + let module = PyModule::new(py, "linkml_runtime").unwrap(); + runtime_module(&module).unwrap(); + let sys = py.import("sys").unwrap(); + let modules = sys.getattr("modules").unwrap(); + let sys_modules = modules.downcast::().unwrap(); + sys_modules.set_item("linkml_runtime", module).unwrap(); + + let locals = PyDict::new(py); + locals + .set_item( + "schema_path", + data_path("personinfo.yaml").to_str().unwrap(), + ) + .unwrap(); + + pyo3::py_run!( + py, + *locals, + r#" +import linkml_runtime as lr +import json +sv = lr.make_schema_view(schema_path) +cls = sv.get_class_view('Container') + +doc1 = { + 'objects': [ + { + 'objecttype': 'personinfo:Person', + 'id': 'P:1', + 'name': 'Alice', + 'current_address': None + } + ] +} +doc2 = { + 'objects': [ + { + 'objecttype': 'personinfo:Person', + 'id': 'P:1', + 'name': 'Alice' + } + ] +} + +v1 = lr.load_json(json.dumps(doc1), sv, cls) +v2 = lr.load_json(json.dumps(doc2), sv, cls) +assert v1['objects'][0].equals(v2['objects'][0], True) +"# + ); + }); +} diff --git a/src/runtime/src/diff.rs b/src/runtime/src/diff.rs index a64068e..e152e51 100644 --- a/src/runtime/src/diff.rs +++ b/src/runtime/src/diff.rs @@ -1,7 +1,7 @@ -use crate::{load_json_str, LinkMLValue}; +use crate::{LResult, LinkMLError, LinkMLInstance, NodeId}; use linkml_schemaview::schemaview::{SchemaView, SlotView}; use serde::{Deserialize, Serialize}; -use serde_json::{Map, Value as JsonValue}; +use serde_json::Value as JsonValue; const IGNORE_ANNOTATION: &str = "diff.linkml.io/ignore"; @@ -23,20 +23,21 @@ pub struct Delta { pub new: Option, } -impl LinkMLValue { +impl LinkMLInstance { pub fn to_json(&self) -> JsonValue { match self { - LinkMLValue::Scalar { value, .. } => value.clone(), - LinkMLValue::List { values, .. } => { + LinkMLInstance::Scalar { value, .. } => value.clone(), + LinkMLInstance::Null { .. } => JsonValue::Null, + LinkMLInstance::List { values, .. } => { JsonValue::Array(values.iter().map(|v| v.to_json()).collect()) } - LinkMLValue::Mapping { values, .. } => JsonValue::Object( + LinkMLInstance::Mapping { values, .. } => JsonValue::Object( values .iter() .map(|(k, v)| (k.clone(), v.to_json())) .collect(), ), - LinkMLValue::Object { values, .. } => JsonValue::Object( + LinkMLInstance::Object { values, .. } => JsonValue::Object( values .iter() .map(|(k, v)| (k.clone(), v.to_json())) @@ -46,13 +47,24 @@ impl LinkMLValue { } } -pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: bool) -> Vec { +/// Compute a semantic diff between two LinkMLInstance trees. +/// +/// Semantics of nulls and missing values: +/// - X -> null: update to null (old = X, new = null). +/// - null -> X: update from null (old = null, new = X). +/// - missing -> X: add (old = None, new = X). +/// - X -> missing: ignored by default; if `treat_missing_as_null` is true, update to null (old = X, new = null). +pub fn diff( + source: &LinkMLInstance, + target: &LinkMLInstance, + treat_missing_as_null: bool, +) -> Vec { fn inner( path: &mut Vec, slot: Option<&SlotView>, - s: &LinkMLValue, - t: &LinkMLValue, - ignore_missing: bool, + s: &LinkMLInstance, + t: &LinkMLInstance, + treat_missing_as_null: bool, out: &mut Vec, ) { if let Some(sl) = slot { @@ -62,17 +74,41 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b } match (s, t) { ( - LinkMLValue::Object { + LinkMLInstance::Object { values: sm, class: sc, .. }, - LinkMLValue::Object { + LinkMLInstance::Object { values: tm, class: tc, .. }, ) => { + // If objects have an identifier or key slot and it changed, treat as whole-object replacement + // This applies for single-valued and list-valued inlined objects. + let key_slot_name = sc + .key_or_identifier_slot() + .or_else(|| tc.key_or_identifier_slot()) + .map(|s| s.name.clone()); + if let Some(ks) = key_slot_name { + let sid = sm.get(&ks); + let tid = tm.get(&ks); + if let ( + Some(LinkMLInstance::Scalar { value: s_id, .. }), + Some(LinkMLInstance::Scalar { value: t_id, .. }), + ) = (sid, tid) + { + if s_id != t_id { + out.push(Delta { + path: path.clone(), + old: Some(s.to_json()), + new: Some(t.to_json()), + }); + return; + } + } + } for (k, sv) in sm { let slot_view = sc .slots() @@ -81,14 +117,17 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b .or_else(|| tc.slots().iter().find(|s| s.name == *k)); path.push(k.clone()); match tm.get(k) { - Some(tv) => inner(path, slot_view, sv, tv, ignore_missing, out), + Some(tv) => inner(path, slot_view, sv, tv, treat_missing_as_null, out), None => { - if !ignore_missing && !slot_view.is_some_and(slot_is_ignored) { - out.push(Delta { - path: path.clone(), - old: Some(sv.to_json()), - new: None, - }); + if !slot_view.is_some_and(slot_is_ignored) { + // Missing target slot: either ignore (default) or treat as update to null + if treat_missing_as_null { + out.push(Delta { + path: path.clone(), + old: Some(sv.to_json()), + new: Some(JsonValue::Null), + }); + } } } } @@ -113,12 +152,14 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b } } } - (LinkMLValue::List { values: sl, .. }, LinkMLValue::List { values: tl, .. }) => { + (LinkMLInstance::List { values: sl, .. }, LinkMLInstance::List { values: tl, .. }) => { let max_len = std::cmp::max(sl.len(), tl.len()); for i in 0..max_len { path.push(i.to_string()); match (sl.get(i), tl.get(i)) { - (Some(sv), Some(tv)) => inner(path, None, sv, tv, ignore_missing, out), + (Some(sv), Some(tv)) => { + inner(path, None, sv, tv, treat_missing_as_null, out) + } (Some(sv), None) => out.push(Delta { path: path.clone(), old: Some(sv.to_json()), @@ -134,13 +175,18 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b path.pop(); } } - (LinkMLValue::Mapping { values: sm, .. }, LinkMLValue::Mapping { values: tm, .. }) => { + ( + LinkMLInstance::Mapping { values: sm, .. }, + LinkMLInstance::Mapping { values: tm, .. }, + ) => { use std::collections::BTreeSet; let keys: BTreeSet<_> = sm.keys().chain(tm.keys()).cloned().collect(); for k in keys { path.push(k.clone()); match (sm.get(&k), tm.get(&k)) { - (Some(sv), Some(tv)) => inner(path, None, sv, tv, ignore_missing, out), + (Some(sv), Some(tv)) => { + inner(path, None, sv, tv, treat_missing_as_null, out) + } (Some(sv), None) => out.push(Delta { path: path.clone(), old: Some(sv.to_json()), @@ -156,14 +202,29 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b path.pop(); } } - _ => { - let sv = s.to_json(); - let tv = t.to_json(); - if sv != tv { + (LinkMLInstance::Null { .. }, LinkMLInstance::Null { .. }) => {} + (LinkMLInstance::Null { .. }, tv) => { + out.push(Delta { + path: path.clone(), + old: Some(JsonValue::Null), + new: Some(tv.to_json()), + }); + } + (sv, LinkMLInstance::Null { .. }) => { + out.push(Delta { + path: path.clone(), + old: Some(sv.to_json()), + new: Some(JsonValue::Null), + }); + } + (sv, tv) => { + let sj = sv.to_json(); + let tj = tv.to_json(); + if sj != tj { out.push(Delta { path: path.clone(), - old: Some(sv), - new: Some(tv), + old: Some(sj), + new: Some(tj), }); } } @@ -175,90 +236,303 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b None, source, target, - ignore_missing_target, + treat_missing_as_null, &mut out, ); out } -pub fn patch(source: &LinkMLValue, deltas: &[Delta], sv: &SchemaView) -> LinkMLValue { - let mut json = source.to_json(); +#[derive(Debug, Clone, Default)] +pub struct PatchTrace { + /// Node IDs of subtrees that were newly created by the patch. + /// + /// See [`crate::NodeId`] for semantics: these are internal, ephemeral IDs + /// that are useful for tooling and provenance, not object identifiers. + pub added: Vec, + /// Node IDs of subtrees that were removed by the patch. + pub deleted: Vec, + /// Node IDs of nodes that were directly updated (e.g., parent containers, scalars). + pub updated: Vec, +} + +#[derive(Debug, Clone, Copy)] +pub struct PatchOptions { + pub ignore_no_ops: bool, + pub treat_missing_as_null: bool, +} + +impl Default for PatchOptions { + fn default() -> Self { + Self { + ignore_no_ops: true, + treat_missing_as_null: true, + } + } +} + +pub fn patch( + source: &LinkMLInstance, + deltas: &[Delta], + sv: &SchemaView, + opts: PatchOptions, +) -> LResult<(LinkMLInstance, PatchTrace)> { + let mut out = source.clone(); + let mut trace = PatchTrace::default(); for d in deltas { - apply_delta(&mut json, d); + apply_delta_linkml(&mut out, &d.path, &d.new, sv, &mut trace, opts)?; } - let json_str = serde_json::to_string(&json).unwrap(); - let conv = sv.converter(); - match source { - LinkMLValue::Object { class: ref c, .. } => load_json_str(&json_str, sv, c, &conv).unwrap(), - _ => panic!("patching non-map values is not supported here"), + Ok((out, trace)) +} + +fn collect_all_ids(value: &LinkMLInstance, ids: &mut Vec) { + ids.push(value.node_id()); + match value { + LinkMLInstance::Scalar { .. } => {} + LinkMLInstance::Null { .. } => {} + LinkMLInstance::List { values, .. } => { + for v in values { + collect_all_ids(v, ids); + } + } + LinkMLInstance::Mapping { values, .. } | LinkMLInstance::Object { values, .. } => { + for v in values.values() { + collect_all_ids(v, ids); + } + } } } -fn apply_delta(value: &mut JsonValue, delta: &Delta) { - apply_delta_inner(value, &delta.path, &delta.new); +fn mark_added_subtree(v: &LinkMLInstance, trace: &mut PatchTrace) { + collect_all_ids(v, &mut trace.added); +} + +fn mark_deleted_subtree(v: &LinkMLInstance, trace: &mut PatchTrace) { + collect_all_ids(v, &mut trace.deleted); } -fn apply_delta_inner(value: &mut JsonValue, path: &[String], newv: &Option) { +// Removed thin wrappers; call LinkMLInstance builders directly at call sites. + +fn apply_delta_linkml( + current: &mut LinkMLInstance, + path: &[String], + newv: &Option, + sv: &SchemaView, + trace: &mut PatchTrace, + opts: PatchOptions, +) -> LResult<()> { if path.is_empty() { if let Some(v) = newv { - *value = v.clone(); + let (class_opt, slot_opt) = match current { + LinkMLInstance::Object { class, .. } => (Some(class.clone()), None), + LinkMLInstance::List { class, slot, .. } => (class.clone(), Some(slot.clone())), + LinkMLInstance::Mapping { class, slot, .. } => (class.clone(), Some(slot.clone())), + LinkMLInstance::Scalar { class, slot, .. } => (class.clone(), Some(slot.clone())), + LinkMLInstance::Null { class, slot, .. } => (class.clone(), Some(slot.clone())), + }; + let conv = sv.converter(); + if let Some(cls) = class_opt { + let new_node = + LinkMLInstance::from_json(v.clone(), cls, slot_opt, sv, &conv, false)?; + if opts.ignore_no_ops && current.equals(&new_node, opts.treat_missing_as_null) { + // No-op delta; skip to preserve node IDs + return Ok(()); + } + mark_deleted_subtree(current, trace); + mark_added_subtree(&new_node, trace); + *current = new_node; + } } - return; + return Ok(()); } - match value { - JsonValue::Object(map) => { + + match current { + LinkMLInstance::Object { values, class, .. } => { let key = &path[0]; if path.len() == 1 { match newv { Some(v) => { - map.insert(key.clone(), v.clone()); + let conv = sv.converter(); + let slot = class.slots().iter().find(|s| s.name == *key).cloned(); + let new_child = LinkMLInstance::from_json( + v.clone(), + class.clone(), + slot.clone(), + sv, + &conv, + false, + )?; + if let Some(old_child) = values.get_mut(key) { + if opts.ignore_no_ops + && old_child.equals(&new_child, opts.treat_missing_as_null) + { + // no-op; skip + return Ok(()); + } + match (&mut *old_child, &new_child) { + ( + LinkMLInstance::Scalar { value: ov, .. }, + LinkMLInstance::Scalar { value: nv, .. }, + ) if !v.is_object() && !v.is_array() => { + // In-place scalar update: keep node_id stable and mark child node + *ov = nv.clone(); + trace.updated.push(old_child.node_id()); + } + _ => { + let old_snapshot = std::mem::replace(old_child, new_child); + mark_deleted_subtree(&old_snapshot, trace); + mark_added_subtree(old_child, trace); + trace.updated.push(current.node_id()); + } + } + } else { + // adding a Null assignment may be a no-op when treating missing as null + if opts.ignore_no_ops + && opts.treat_missing_as_null + && matches!(new_child, LinkMLInstance::Null { .. }) + { + return Ok(()); + } + // mark before insert + mark_added_subtree(&new_child, trace); + values.insert(key.clone(), new_child); + trace.updated.push(current.node_id()); + } } None => { - map.remove(key); + if let Some(old_child) = values.get(key) { + if opts.ignore_no_ops + && opts.treat_missing_as_null + && matches!(old_child, LinkMLInstance::Null { .. }) + { + // deleting a Null assignment: no-op + return Ok(()); + } + } + if let Some(old_child) = values.remove(key) { + mark_deleted_subtree(&old_child, trace); + trace.updated.push(current.node_id()); + } } } - } else { - let entry = map - .entry(key.clone()) - .or_insert(JsonValue::Object(Map::new())); - apply_delta_inner(entry, &path[1..], newv); + } else if let Some(child) = values.get_mut(key) { + apply_delta_linkml(child, &path[1..], newv, sv, trace, opts)?; } } - JsonValue::Array(arr) => { - let idx: usize = path[0].parse().unwrap(); + LinkMLInstance::Mapping { values, slot, .. } => { + let key = &path[0]; if path.len() == 1 { match newv { Some(v) => { - if idx < arr.len() { - arr[idx] = v.clone(); - } else if idx == arr.len() { - arr.push(v.clone()); - } else { - while arr.len() < idx { - arr.push(JsonValue::Null); + let conv = sv.converter(); + let new_child = LinkMLInstance::build_mapping_entry_for_slot( + slot, + v.clone(), + sv, + &conv, + Vec::new(), + )?; + if let Some(old_child) = values.get(key) { + if opts.ignore_no_ops + && old_child.equals(&new_child, opts.treat_missing_as_null) + { + return Ok(()); } - arr.push(v.clone()); + mark_deleted_subtree(old_child, trace); } + // mark before insert + mark_added_subtree(&new_child, trace); + values.insert(key.clone(), new_child); + trace.updated.push(current.node_id()); } None => { - if idx < arr.len() { - arr.remove(idx); + if let Some(old_child) = values.remove(key) { + mark_deleted_subtree(&old_child, trace); + trace.updated.push(current.node_id()); } } } - } else { - if idx >= arr.len() { - arr.resize(idx + 1, JsonValue::Null); - } - apply_delta_inner(&mut arr[idx], &path[1..], newv); + } else if let Some(child) = values.get_mut(key) { + apply_delta_linkml(child, &path[1..], newv, sv, trace, opts)?; } } - _ => { - if path.is_empty() { - if let Some(v) = newv { - *value = v.clone(); + LinkMLInstance::List { + values, + slot, + class, + .. + } => { + let idx: usize = path[0].parse().map_err(|_| { + LinkMLError(format!("invalid list index '{}' in delta path", path[0])) + })?; + if path.len() == 1 { + match newv { + Some(v) => { + if idx < values.len() { + let conv = sv.converter(); + let new_child = LinkMLInstance::build_list_item_for_slot( + slot, + class.as_ref(), + v.clone(), + sv, + &conv, + Vec::new(), + )?; + if opts.ignore_no_ops + && values[idx].equals(&new_child, opts.treat_missing_as_null) + { + return Ok(()); + } + match (&mut values[idx], &new_child) { + ( + LinkMLInstance::Scalar { value: ov, .. }, + LinkMLInstance::Scalar { value: nv, .. }, + ) if !v.is_object() && !v.is_array() => { + *ov = nv.clone(); + trace.updated.push(values[idx].node_id()); + } + _ => { + let old = std::mem::replace(&mut values[idx], new_child); + mark_deleted_subtree(&old, trace); + mark_added_subtree(&values[idx], trace); + trace.updated.push(current.node_id()); + } + } + } else if idx == values.len() { + let conv = sv.converter(); + let new_child = LinkMLInstance::build_list_item_for_slot( + slot, + class.as_ref(), + v.clone(), + sv, + &conv, + Vec::new(), + )?; + // mark before push + mark_added_subtree(&new_child, trace); + values.push(new_child); + trace.updated.push(current.node_id()); + } else { + return Err(LinkMLError(format!( + "list index out of bounds in add: {} > {}", + idx, + values.len() + ))); + } + } + None => { + if idx < values.len() { + let old = values.remove(idx); + mark_deleted_subtree(&old, trace); + trace.updated.push(current.node_id()); + } + } } + } else if idx < values.len() { + apply_delta_linkml(&mut values[idx], &path[1..], newv, sv, trace, opts)?; } } + LinkMLInstance::Scalar { .. } => {} + LinkMLInstance::Null { .. } => {} } + Ok(()) } diff --git a/src/runtime/src/lib.rs b/src/runtime/src/lib.rs index a8cd2c9..2e91186 100644 --- a/src/runtime/src/lib.rs +++ b/src/runtime/src/lib.rs @@ -5,10 +5,11 @@ use serde_json::Value as JsonValue; use std::collections::HashMap; use std::fs; use std::path::Path; +use std::sync::atomic::{AtomicU64, Ordering}; pub mod diff; pub mod turtle; -pub use diff::{diff, patch, Delta}; +pub use diff::{diff, patch, Delta, PatchTrace}; #[derive(Debug)] pub struct LinkMLError(pub String); @@ -54,60 +55,265 @@ fn slot_matches_key(slot: &SlotView, key: &str) -> bool { } #[derive(Clone)] -pub enum LinkMLValue { +pub enum LinkMLInstance { Scalar { + node_id: NodeId, value: JsonValue, slot: SlotView, class: Option, sv: SchemaView, }, + Null { + node_id: NodeId, + slot: SlotView, + class: Option, + sv: SchemaView, + }, List { - values: Vec, + node_id: NodeId, + values: Vec, slot: SlotView, class: Option, sv: SchemaView, }, Mapping { - values: HashMap, + node_id: NodeId, + values: HashMap, slot: SlotView, class: Option, sv: SchemaView, }, Object { - values: HashMap, + node_id: NodeId, + values: HashMap, class: ClassView, sv: SchemaView, }, } -impl LinkMLValue { +/// Internal node identifier used for provenance and update tracking. +/// +/// Node IDs are assigned to every `LinkMLInstance` node when values are constructed or +/// transformed. They exist solely as technical identifiers to help with patching and +/// provenance (for example, `PatchTrace.added`/`deleted` collect `NodeId`s of affected +/// subtrees). They are not intended to identify domain objects — for that, use LinkML +/// identifier or key slots as defined in the schema. +/// +/// Important properties: +/// - Local and ephemeral: loading the same data twice will yield different `NodeId`s. +/// - Non-persistent: never serialize or expose as a model identifier. +/// - Useful for tracking modifications within a single in-memory value. +pub type NodeId = u64; + +static NEXT_NODE_ID: AtomicU64 = AtomicU64::new(1); + +fn new_node_id() -> NodeId { + NEXT_NODE_ID.fetch_add(1, Ordering::Relaxed) +} + +impl LinkMLInstance { + /// Returns the internal [`NodeId`] of this node. + /// + /// This ID is only for internal provenance/update tracking and is not a + /// semantic identifier of the represented object. + pub fn node_id(&self) -> NodeId { + match self { + LinkMLInstance::Scalar { node_id, .. } + | LinkMLInstance::List { node_id, .. } + | LinkMLInstance::Mapping { node_id, .. } + | LinkMLInstance::Object { node_id, .. } + | LinkMLInstance::Null { node_id, .. } => *node_id, + } + } /// Navigate the value by a path of strings, where each element is either /// a dictionary key (for maps) or a list index (for lists). - /// Returns `Some(&LinkMLValue)` if the full path can be resolved, otherwise `None`. - pub fn navigate_path(&self, path: I) -> Option<&LinkMLValue> + /// Returns `Some(&LinkMLInstance)` if the full path can be resolved, otherwise `None`. + pub fn navigate_path(&self, path: I) -> Option<&LinkMLInstance> where I: IntoIterator, S: AsRef, { - let mut current: &LinkMLValue = self; + let mut current: &LinkMLInstance = self; for seg in path { let key = seg.as_ref(); match current { - LinkMLValue::Object { values, .. } => { + LinkMLInstance::Object { values, .. } => { current = values.get(key)?; } - LinkMLValue::List { values, .. } => { + LinkMLInstance::List { values, .. } => { let idx: usize = key.parse().ok()?; current = values.get(idx)?; } - LinkMLValue::Mapping { values, .. } => { + LinkMLInstance::Mapping { values, .. } => { current = values.get(key)?; } - LinkMLValue::Scalar { .. } => return None, + LinkMLInstance::Scalar { .. } => return None, + LinkMLInstance::Null { .. } => return None, } } Some(current) } + + /// Compare two LinkMLInstance instances for semantic equality per the + /// LinkML Instances specification (Identity conditions). + /// + /// Key points implemented: + /// - Null equals Null. + /// - Scalars: equal iff same underlying atomic value and compatible typed context + /// (same Enum range when present; otherwise same TypeDefinition range name when present). + /// - Lists: equal iff same length and pairwise equal in order. + /// - Mappings: equal iff same keys and values equal for each key (order-insensitive). + /// - Objects: equal iff same instantiated class (by identity) and slot assignments match; when + /// `treat_missing_as_null` is true, Null is treated as omitted (normalized), otherwise Null is + /// distinct from missing. + pub fn equals(&self, other: &LinkMLInstance, treat_missing_as_null: bool) -> bool { + use LinkMLInstance::*; + match (self, other) { + (Null { .. }, Null { .. }) => true, + ( + Scalar { + value: v1, + slot: s1, + .. + }, + Scalar { + value: v2, + slot: s2, + .. + }, + ) => { + // If either slot has an enum range, both must and enum names must match + let e1 = s1.get_range_enum(); + let e2 = s2.get_range_enum(); + if e1.is_some() || e2.is_some() { + match (e1, e2) { + (Some(ev1), Some(ev2)) => { + if ev1.schema_id() != ev2.schema_id() || ev1.name() != ev2.name() { + return false; + } + } + _ => return false, + } + } else { + // Compare type ranges if explicitly set on both + let t1 = s1.definition().range.as_ref(); + let t2 = s2.definition().range.as_ref(); + if let (Some(r1), Some(r2)) = (t1, t2) { + if r1 != r2 { + return false; + } + } + } + v1 == v2 + } + (List { values: a, .. }, List { values: b, .. }) => { + if a.len() != b.len() { + return false; + } + for (x, y) in a.iter().zip(b.iter()) { + if !x.equals(y, treat_missing_as_null) { + return false; + } + } + true + } + (Mapping { values: a, .. }, Mapping { values: b, .. }) => { + if a.len() != b.len() { + return false; + } + for (k, va) in a.iter() { + match b.get(k) { + Some(vb) => { + if !va.equals(vb, treat_missing_as_null) { + return false; + } + } + None => return false, + } + } + true + } + ( + Object { + values: a, + class: ca, + sv: sva, + .. + }, + Object { + values: b, + class: cb, + sv: svb, + .. + }, + ) => { + // Compare class identity via canonical URIs if possible + let ida = ca.canonical_uri(); + let idb = cb.canonical_uri(); + let class_equal = if let Some(conv) = sva.converter_for_schema(ca.schema_id()) { + // Use 'sva' for comparison; identifiers are global across schemas + sva.identifier_equals(&ida, &idb, conv).unwrap_or(false) + } else if let Some(conv) = svb.converter_for_schema(cb.schema_id()) { + svb.identifier_equals(&ida, &idb, conv).unwrap_or(false) + } else { + ca.name() == cb.name() + }; + if !class_equal { + return false; + } + + if treat_missing_as_null { + // Normalize conceptually by ignoring entries whose value is Null + let count_a = a.iter().filter(|(_, v)| !matches!(v, Null { .. })).count(); + let count_b = b.iter().filter(|(_, v)| !matches!(v, Null { .. })).count(); + if count_a != count_b { + return false; + } + for (k, va) in a.iter().filter(|(_, v)| !matches!(v, Null { .. })) { + match b.get(k) { + Some(vb) => { + if matches!(vb, Null { .. }) { + return false; + } + if !va.equals(vb, treat_missing_as_null) { + return false; + } + } + None => return false, + } + } + // Ensure b has no extra non-null keys not in a + for (k, _vb) in b.iter().filter(|(_, v)| !matches!(v, Null { .. })) { + match a.get(k) { + Some(va) => { + if matches!(va, Null { .. }) { + return false; + } + } + None => return false, + } + } + true + } else { + if a.len() != b.len() { + return false; + } + for (k, va) in a.iter() { + match b.get(k) { + Some(vb) => { + if !va.equals(vb, treat_missing_as_null) { + return false; + } + } + None => return false, + } + } + true + } + } + _ => false, + } + } fn find_scalar_slot_for_inlined_map( class: &ClassView, key_slot_name: &str, @@ -196,7 +402,8 @@ impl LinkMLValue { Self::from_json_internal(v, class.clone(), slot_ref, sv, conv, false, p)?, ); } - Ok(LinkMLValue::Object { + Ok(LinkMLInstance::Object { + node_id: new_node_id(), values, class: class.clone(), sv: sv.clone(), @@ -215,51 +422,41 @@ impl LinkMLValue { match (inside_list, value) { (false, JsonValue::Array(arr)) => { let mut values = Vec::new(); - let class_range: Option = sl.get_range_class(); - let slot_for_item = if class_range.is_some() { - None - } else { - Some(sl.clone()) - }; for (i, v) in arr.into_iter().enumerate() { let mut p = path.clone(); p.push(format!("{}[{}]", sl.name, i)); - let v_transformed = - if let (Some(cr), JsonValue::String(s)) = (class_range.as_ref(), &v) { - if let Some(id_slot) = cr.identifier_slot() { - let mut m = serde_json::Map::new(); - m.insert(id_slot.name.clone(), JsonValue::String(s.clone())); - JsonValue::Object(m) - } else { - v - } - } else { - v - }; - values.push(Self::from_json_internal( - v_transformed, - class_range.as_ref().unwrap_or(&class).clone(), - slot_for_item.clone(), + values.push(Self::build_list_item_for_slot( + &sl, + Some(&class), + v, sv, conv, - true, p, )?); } - Ok(LinkMLValue::List { + Ok(LinkMLInstance::List { + node_id: new_node_id(), values, slot: sl.clone(), class: Some(class.clone()), sv: sv.clone(), }) } + // Preserve explicit null as a Null value for list-valued slot + (false, JsonValue::Null) => Ok(LinkMLInstance::Null { + node_id: new_node_id(), + slot: sl.clone(), + class: Some(class.clone()), + sv: sv.clone(), + }), (false, other) => Err(LinkMLError(format!( "expected list for slot `{}`, found {:?} at {}", sl.name, other, path_to_string(&path) ))), - (true, other) => Ok(LinkMLValue::Scalar { + (true, other) => Ok(LinkMLInstance::Scalar { + node_id: new_node_id(), value: other, slot: sl.clone(), class: Some(class.clone()), @@ -278,101 +475,30 @@ impl LinkMLValue { ) -> LResult { match value { JsonValue::Object(map) => { - let range_cv = sl - .definition() - .range - .as_ref() - .and_then(|r| sv.get_class(&Identifier::new(r), conv).ok().flatten()) - .ok_or_else(|| { - LinkMLError(format!( - "mapping slot must have class range at {}", - path_to_string(&path) - )) - })?; let mut values = HashMap::new(); for (k, v) in map.into_iter() { - let base = sv - .get_class(&Identifier::new(range_cv.name()), conv) - .ok() - .flatten() - .unwrap_or_else(|| range_cv.clone()); - let child = match v { - JsonValue::Object(m) => { - // Select the most specific subclass using any type designator in the map - let selected = Self::select_class(&m, &base, sv, conv); - let mut child_values = HashMap::new(); - for (ck, cv) in m.into_iter() { - let slot_tmp = selected - .slots() - .iter() - .find(|s| slot_matches_key(s, &ck)) - .cloned(); - let mut p = path.clone(); - p.push(format!("{}:{}", k, ck)); - let key_name = slot_tmp - .as_ref() - .map(|s| s.name.clone()) - .unwrap_or_else(|| ck.clone()); - child_values.insert( - key_name, - Self::from_json_internal( - cv, - selected.clone(), - slot_tmp, - sv, - conv, - false, - p, - )?, - ); - } - LinkMLValue::Object { - values: child_values, - class: selected, - sv: sv.clone(), - } - } - other => { - // Scalar mapping value: attach it to a chosen scalar slot if any - let scalar_slot = Self::find_scalar_slot_for_inlined_map( - &base, - range_cv - .key_or_identifier_slot() - .map(|s| s.name.as_str()) - .unwrap_or(""), - ) - .ok_or_else(|| { - LinkMLError(format!( - "no scalar slot available for inlined mapping at {}", - path_to_string(&path) - )) - })?; - let mut child_values = HashMap::new(); - child_values.insert( - scalar_slot.name.clone(), - LinkMLValue::Scalar { - value: other, - slot: scalar_slot.clone(), - class: Some(base.clone()), - sv: sv.clone(), - }, - ); - LinkMLValue::Object { - values: child_values, - class: base.clone(), - sv: sv.clone(), - } - } - }; + let child = Self::build_mapping_entry_for_slot(sl, v, sv, conv, { + let mut p = path.clone(); + p.push(k.clone()); + p + })?; values.insert(k, child); } - Ok(LinkMLValue::Mapping { + Ok(LinkMLInstance::Mapping { + node_id: new_node_id(), values, slot: sl.clone(), class: class.clone(), sv: sv.clone(), }) } + // Preserve explicit null as a Null value for mapping-valued slot + JsonValue::Null => Ok(LinkMLInstance::Null { + node_id: new_node_id(), + slot: sl.clone(), + class: class.clone(), + sv: sv.clone(), + }), other => Err(LinkMLError(format!( "expected mapping for slot `{}`, found {:?} at {}", sl.name, @@ -397,27 +523,21 @@ impl LinkMLValue { class.name() )) })?; - let class_range: Option = sl.get_range_class(); - let slot_for_item = if class_range.is_some() { - None - } else { - Some(sl.clone()) - }; let mut values = Vec::new(); for (i, v) in arr.into_iter().enumerate() { let mut p = path.clone(); p.push(format!("[{}]", i)); - values.push(Self::from_json_internal( + values.push(Self::build_list_item_for_slot( + &sl, + Some(&class), v, - class_range.as_ref().unwrap_or(&class).clone(), - slot_for_item.clone(), sv, conv, - false, p, )?); } - Ok(LinkMLValue::List { + Ok(LinkMLInstance::List { + node_id: new_node_id(), values, slot: sl, class: Some(class), @@ -463,7 +583,8 @@ impl LinkMLValue { Self::from_json_internal(v, chosen.clone(), slot_tmp, sv, conv, false, p)?, ); } - Ok(LinkMLValue::Object { + Ok(LinkMLInstance::Object { + node_id: new_node_id(), values, class: chosen, sv: sv.clone(), @@ -485,12 +606,22 @@ impl LinkMLValue { classview_name )) })?; - Ok(LinkMLValue::Scalar { - value, - slot: sl, - class: Some(class.clone()), - sv: sv.clone(), - }) + if value.is_null() { + Ok(LinkMLInstance::Null { + node_id: new_node_id(), + slot: sl, + class: Some(class.clone()), + sv: sv.clone(), + }) + } else { + Ok(LinkMLInstance::Scalar { + node_id: new_node_id(), + value, + slot: sl, + class: Some(class.clone()), + sv: sv.clone(), + }) + } } fn from_json_internal( @@ -542,6 +673,135 @@ impl LinkMLValue { ) -> LResult { Self::from_json_internal(value, class, slot, sv, conv, inside_list, Vec::new()) } + + // Shared builders (used by loaders and patch logic) + pub(crate) fn build_list_item_for_slot( + list_slot: &SlotView, + list_class: Option<&ClassView>, + value: JsonValue, + sv: &SchemaView, + conv: &Converter, + path: Vec, + ) -> LResult { + let class_range: Option = list_slot.get_range_class(); + let slot_for_item = if class_range.is_some() { + None + } else { + Some(list_slot.clone()) + }; + let v_transformed = if let (Some(cr), JsonValue::String(s)) = (class_range.as_ref(), &value) + { + if let Some(id_slot) = cr.identifier_slot() { + let mut m = serde_json::Map::new(); + m.insert(id_slot.name.clone(), JsonValue::String(s.clone())); + JsonValue::Object(m) + } else { + value + } + } else { + value + }; + Self::from_json_internal( + v_transformed, + class_range + .as_ref() + .or(list_class) + .cloned() + .ok_or_else(|| LinkMLError("list item class context".to_string()))?, + slot_for_item, + sv, + conv, + true, + path, + ) + } + + pub(crate) fn build_mapping_entry_for_slot( + map_slot: &SlotView, + value: JsonValue, + sv: &SchemaView, + conv: &Converter, + path: Vec, + ) -> LResult { + let range_cv = map_slot + .definition() + .range + .as_ref() + .and_then(|r| sv.get_class(&Identifier::new(r), conv).ok().flatten()) + .ok_or_else(|| { + LinkMLError(format!( + "mapping slot must have class range at {}", + path_to_string(&path) + )) + })?; + match value { + JsonValue::Object(m) => { + let selected = Self::select_class(&m, &range_cv, sv, conv); + let mut child_values = HashMap::new(); + for (ck, cv) in m.into_iter() { + let slot_tmp = selected + .slots() + .iter() + .find(|s| slot_matches_key(s, &ck)) + .cloned(); + let mut p = path.clone(); + p.push(ck.clone()); + let key_name = slot_tmp + .as_ref() + .map(|s| s.name.clone()) + .unwrap_or_else(|| ck.clone()); + child_values.insert( + key_name, + Self::from_json_internal( + cv, + selected.clone(), + slot_tmp, + sv, + conv, + false, + p, + )?, + ); + } + Ok(LinkMLInstance::Object { + node_id: new_node_id(), + values: child_values, + class: selected, + sv: sv.clone(), + }) + } + other => { + let key_slot_name = range_cv + .key_or_identifier_slot() + .map(|s| s.name.as_str()) + .unwrap_or(""); + let scalar_slot = Self::find_scalar_slot_for_inlined_map(&range_cv, key_slot_name) + .ok_or_else(|| { + LinkMLError(format!( + "no scalar slot available for inlined mapping at {}", + path_to_string(&path) + )) + })?; + let mut child_values = HashMap::new(); + child_values.insert( + scalar_slot.name.clone(), + LinkMLInstance::Scalar { + node_id: new_node_id(), + value: other, + slot: scalar_slot.clone(), + class: Some(range_cv.clone()), + sv: sv.clone(), + }, + ); + Ok(LinkMLInstance::Object { + node_id: new_node_id(), + values: child_values, + class: range_cv, + sv: sv.clone(), + }) + } + } + } } pub fn load_yaml_file( @@ -549,7 +809,7 @@ pub fn load_yaml_file( sv: &SchemaView, class: &ClassView, conv: &Converter, -) -> std::result::Result> { +) -> std::result::Result> { let text = fs::read_to_string(path)?; load_yaml_str(&text, sv, class, conv) } @@ -559,10 +819,10 @@ pub fn load_yaml_str( sv: &SchemaView, class: &ClassView, conv: &Converter, -) -> std::result::Result> { +) -> std::result::Result> { let value: serde_yaml::Value = serde_yaml::from_str(data)?; let json = serde_json::to_value(value)?; - LinkMLValue::from_json(json, class.clone(), None, sv, conv, false) + LinkMLInstance::from_json(json, class.clone(), None, sv, conv, false) .map_err(|e| Box::new(e) as Box) } @@ -571,7 +831,7 @@ pub fn load_json_file( sv: &SchemaView, class: &ClassView, conv: &Converter, -) -> std::result::Result> { +) -> std::result::Result> { let text = fs::read_to_string(path)?; load_json_str(&text, sv, class, conv) } @@ -581,15 +841,15 @@ pub fn load_json_str( sv: &SchemaView, class: &ClassView, conv: &Converter, -) -> std::result::Result> { +) -> std::result::Result> { let value: JsonValue = serde_json::from_str(data)?; - LinkMLValue::from_json(value, class.clone(), None, sv, conv, false) + LinkMLInstance::from_json(value, class.clone(), None, sv, conv, false) .map_err(|e| Box::new(e) as Box) } -fn validate_inner(value: &LinkMLValue) -> std::result::Result<(), String> { +fn validate_inner(value: &LinkMLInstance) -> std::result::Result<(), String> { match value { - LinkMLValue::Scalar { + LinkMLInstance::Scalar { value: jv, slot, .. } => { if let Some(ev) = slot.get_range_enum() { @@ -618,19 +878,20 @@ fn validate_inner(value: &LinkMLValue) -> std::result::Result<(), String> { } Ok(()) } - LinkMLValue::List { values, .. } => { + LinkMLInstance::Null { .. } => Ok(()), + LinkMLInstance::List { values, .. } => { for v in values { validate_inner(v)?; } Ok(()) } - LinkMLValue::Mapping { values, .. } => { + LinkMLInstance::Mapping { values, .. } => { for v in values.values() { validate_inner(v)?; } Ok(()) } - LinkMLValue::Object { values, class, .. } => { + LinkMLInstance::Object { values, class, .. } => { for (k, v) in values { if class.slots().iter().all(|s| s.name != *k) { return Err(format!("unknown slot `{}` for class `{}`", k, class.name())); @@ -642,24 +903,25 @@ fn validate_inner(value: &LinkMLValue) -> std::result::Result<(), String> { } } -pub fn validate(value: &LinkMLValue) -> std::result::Result<(), String> { +pub fn validate(value: &LinkMLInstance) -> std::result::Result<(), String> { validate_inner(value) } -fn validate_collect(value: &LinkMLValue, errors: &mut Vec) { +fn validate_collect(value: &LinkMLInstance, errors: &mut Vec) { match value { - LinkMLValue::Scalar { .. } => {} - LinkMLValue::List { values, .. } => { + LinkMLInstance::Scalar { .. } => {} + LinkMLInstance::Null { .. } => {} + LinkMLInstance::List { values, .. } => { for v in values { validate_collect(v, errors); } } - LinkMLValue::Mapping { values, .. } => { + LinkMLInstance::Mapping { values, .. } => { for v in values.values() { validate_collect(v, errors); } } - LinkMLValue::Object { values, class, .. } => { + LinkMLInstance::Object { values, class, .. } => { for (k, v) in values { if class.slots().iter().all(|s| s.name != *k) { errors.push(format!("unknown slot `{}` for class `{}`", k, class.name())); @@ -670,7 +932,7 @@ fn validate_collect(value: &LinkMLValue, errors: &mut Vec) { } } -pub fn validate_errors(value: &LinkMLValue) -> Vec { +pub fn validate_errors(value: &LinkMLInstance) -> Vec { let mut errs = Vec::new(); validate_collect(value, &mut errs); errs diff --git a/src/runtime/src/turtle.rs b/src/runtime/src/turtle.rs index 073dcfc..ef99314 100644 --- a/src/runtime/src/turtle.rs +++ b/src/runtime/src/turtle.rs @@ -13,7 +13,7 @@ use oxttl::TurtleSerializer; use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use regex::Regex; -use crate::LinkMLValue; +use crate::LinkMLInstance; pub struct TurtleOptions { pub skolem: bool, @@ -99,7 +99,7 @@ fn literal_and_type(value: &JsonValue, slot: &SlotView) -> (String, Option, + map: &std::collections::HashMap, class: &ClassView, conv: &Converter, state: &mut State, @@ -107,7 +107,7 @@ fn identifier_node( index: Option, ) -> (Node, Option) { if let Some(id_slot) = class.identifier_slot() { - if let Some(LinkMLValue::Scalar { value, .. }) = map.get(&id_slot.name) { + if let Some(LinkMLInstance::Scalar { value, .. }) = map.get(&id_slot.name) { let lit = literal_value(value); if let Ok(iri) = Identifier::new(&lit).to_uri(conv) { return (Node::Named(iri.0), Some(id_slot.name.clone())); @@ -120,7 +120,7 @@ fn identifier_node( if let Some(p) = parent { let part_opt = class.key_or_identifier_slot().and_then(|ks| { map.get(&ks.name).and_then(|v| match v { - LinkMLValue::Scalar { value, .. } => { + LinkMLInstance::Scalar { value, .. } => { if let JsonValue::String(s) = value { Some(encode_path_part(s)) } else { @@ -146,7 +146,7 @@ fn identifier_node( #[allow(clippy::too_many_arguments)] fn serialize_map( subject: &Node, - map: &std::collections::HashMap, + map: &std::collections::HashMap, class: Option<&ClassView>, formatter: &mut WriterTurtleSerializer, _sv: &SchemaView, @@ -177,8 +177,10 @@ fn serialize_map( continue; } let skip = match v { - LinkMLValue::Scalar { slot, .. } => slot.definition().designates_type.unwrap_or(false), - LinkMLValue::List { slot, .. } => slot.definition().designates_type.unwrap_or(false), + LinkMLInstance::Scalar { slot, .. } => { + slot.definition().designates_type.unwrap_or(false) + } + LinkMLInstance::List { slot, .. } => slot.definition().designates_type.unwrap_or(false), _ => false, }; if skip { @@ -187,7 +189,7 @@ fn serialize_map( let pred_iri = format!("{}:{}", state.default_prefix, k); let predicate = NamedNode::new_unchecked(pred_iri.clone()); match v { - LinkMLValue::Scalar { value, slot, .. } => { + LinkMLInstance::Scalar { value, slot, .. } => { let inline_mode = slot.determine_slot_inline_mode(); if inline_mode == SlotInlineMode::Reference { let lit = literal_value(value); @@ -225,7 +227,10 @@ fn serialize_map( } } } - LinkMLValue::Object { values, class, .. } => { + LinkMLInstance::Null { .. } => { + // Null is treated as absent; emit nothing + } + LinkMLInstance::Object { values, class, .. } => { let class_ref = &class; let (obj, child_id) = identifier_node(values, class_ref, conv, state, Some(subject), None); @@ -246,10 +251,10 @@ fn serialize_map( child_id.as_deref(), )?; } - LinkMLValue::List { values, slot, .. } => { + LinkMLInstance::List { values, slot, .. } => { for (idx, item) in values.iter().enumerate() { match item { - LinkMLValue::Scalar { value, .. } => { + LinkMLInstance::Scalar { value, .. } => { let inline_mode = slot.determine_slot_inline_mode(); if inline_mode == SlotInlineMode::Reference { let lit = literal_value(value); @@ -288,7 +293,10 @@ fn serialize_map( } } } - LinkMLValue::Object { + LinkMLInstance::Null { .. } => { + // Skip null items + } + LinkMLInstance::Object { values: mv, class, .. } => { let class_ref = &class; @@ -317,15 +325,15 @@ fn serialize_map( child_id.as_deref(), )?; } - LinkMLValue::List { .. } => {} - LinkMLValue::Mapping { .. } => {} + LinkMLInstance::List { .. } => {} + LinkMLInstance::Mapping { .. } => {} } } } - LinkMLValue::Mapping { values, .. } => { + LinkMLInstance::Mapping { values, .. } => { for (idx, item) in values.values().enumerate() { match item { - LinkMLValue::Scalar { value: v, slot, .. } => { + LinkMLInstance::Scalar { value: v, slot, .. } => { let inline_mode = slot.determine_slot_inline_mode(); if inline_mode == SlotInlineMode::Reference { let lit = literal_value(v); @@ -364,7 +372,10 @@ fn serialize_map( } } } - LinkMLValue::Object { + LinkMLInstance::Null { .. } => { + // nothing + } + LinkMLInstance::Object { values: mv, class, .. } => { let class_ref = class; @@ -393,8 +404,8 @@ fn serialize_map( child_id.as_deref(), )?; } - LinkMLValue::List { .. } => {} - LinkMLValue::Mapping { .. } => {} + LinkMLInstance::List { .. } => {} + LinkMLInstance::Mapping { .. } => {} } } } @@ -404,7 +415,7 @@ fn serialize_map( } pub fn write_turtle( - value: &LinkMLValue, + value: &LinkMLInstance, sv: &SchemaView, schema: &SchemaDefinition, conv: &Converter, @@ -444,11 +455,11 @@ pub fn write_turtle( }; let mut formatter = TurtleSerializer::new().for_writer(Vec::new()); match value { - LinkMLValue::Object { values, class, .. } => { + LinkMLInstance::Object { values, class, .. } => { let cv = &class; let mut id_slot_name = None; let subj = if let Some(id_slot) = cv.identifier_slot() { - if let Some(LinkMLValue::Scalar { value, .. }) = values.get(&id_slot.name) { + if let Some(LinkMLInstance::Scalar { value, .. }) = values.get(&id_slot.name) { let lit = literal_value(value); let iri = Identifier::new(&lit) .to_uri(conv) @@ -473,7 +484,7 @@ pub fn write_turtle( id_slot_name.as_deref(), )?; } - LinkMLValue::Mapping { values, .. } => { + LinkMLInstance::Mapping { values, .. } => { for (idx, item) in values.values().enumerate() { let subj = if options.skolem { Node::Named(format!("{}root/{}", state.base, idx)) @@ -481,7 +492,7 @@ pub fn write_turtle( state.next_subject() }; match item { - LinkMLValue::Object { + LinkMLInstance::Object { values: mv, class, .. } => { let class = Some(class); @@ -496,7 +507,7 @@ pub fn write_turtle( None, )?; } - LinkMLValue::Scalar { value: v, slot, .. } => { + LinkMLInstance::Scalar { value: v, slot, .. } => { let (lit, dt_opt) = literal_and_type(v, slot); if let Some(dt) = dt_opt { let object = Term::Literal(Literal::new_typed_literal( @@ -523,12 +534,13 @@ pub fn write_turtle( formatter.serialize_triple(triple.as_ref())?; } } - LinkMLValue::List { .. } => {} - LinkMLValue::Mapping { .. } => {} + LinkMLInstance::Null { .. } => {} + LinkMLInstance::List { .. } => {} + LinkMLInstance::Mapping { .. } => {} } } } - LinkMLValue::List { values, .. } => { + LinkMLInstance::List { values, .. } => { for (idx, item) in values.iter().enumerate() { let subj = if options.skolem { Node::Named(format!("{}root/{}", state.base, idx)) @@ -536,7 +548,7 @@ pub fn write_turtle( state.next_subject() }; match item { - LinkMLValue::Object { + LinkMLInstance::Object { values: mv, class, .. } => { let class = Some(class); @@ -551,7 +563,7 @@ pub fn write_turtle( None, )?; } - LinkMLValue::Scalar { value, slot, .. } => { + LinkMLInstance::Scalar { value, slot, .. } => { let (lit, dt_opt) = literal_and_type(value, slot); if let Some(dt) = dt_opt { let object = Term::Literal(Literal::new_typed_literal( @@ -578,12 +590,16 @@ pub fn write_turtle( formatter.serialize_triple(triple.as_ref())?; } } - LinkMLValue::List { .. } => {} - LinkMLValue::Mapping { .. } => {} + LinkMLInstance::Null { .. } => { + // nothing + } + LinkMLInstance::List { .. } => {} + LinkMLInstance::Mapping { .. } => {} } } } - LinkMLValue::Scalar { .. } => {} + LinkMLInstance::Scalar { .. } => {} + LinkMLInstance::Null { .. } => {} } let out_buf = formatter.finish()?; let mut out = String::from_utf8(out_buf).unwrap_or_default(); @@ -606,7 +622,7 @@ pub fn write_turtle( } pub fn turtle_to_string( - value: &LinkMLValue, + value: &LinkMLInstance, sv: &SchemaView, schema: &SchemaDefinition, conv: &Converter, diff --git a/src/runtime/tests/alias.rs b/src/runtime/tests/alias.rs index 24730f3..2833ae1 100644 --- a/src/runtime/tests/alias.rs +++ b/src/runtime/tests/alias.rs @@ -33,20 +33,20 @@ fn parse_alias_fields() { println!("JSON: {:?}", v.to_json()); panic!("validation failed: {}", e); } - if let linkml_runtime::LinkMLValue::Object { values, .. } = &v { + if let linkml_runtime::LinkMLInstance::Object { values, .. } = &v { let desc = values.get("description").expect("desc"); - if let linkml_runtime::LinkMLValue::Object { values: item, .. } = desc { + if let linkml_runtime::LinkMLInstance::Object { values: item, .. } = desc { println!("json: {:?}", v.to_json()); let desc_v = item.get("alt_description_text"); assert!(desc_v.is_some(), "desc field missing"); - if let linkml_runtime::LinkMLValue::Scalar { slot, .. } = desc_v.unwrap() { + if let linkml_runtime::LinkMLInstance::Scalar { slot, .. } = desc_v.unwrap() { assert_eq!(slot.name, "alt_description_text"); } else { panic!("wrong type for description"); } let src_v = item.get("alt_description_source"); assert!(src_v.is_some(), "src field missing"); - if let linkml_runtime::LinkMLValue::Scalar { slot, .. } = src_v.unwrap() { + if let linkml_runtime::LinkMLInstance::Scalar { slot, .. } = src_v.unwrap() { assert_eq!(slot.name, "alt_description_source"); } else { panic!("wrong type for source"); diff --git a/src/runtime/tests/data/example_personinfo_data_nulls.yaml b/src/runtime/tests/data/example_personinfo_data_nulls.yaml new file mode 100644 index 0000000..0921942 --- /dev/null +++ b/src/runtime/tests/data/example_personinfo_data_nulls.yaml @@ -0,0 +1,11 @@ +objects: + - id: P:100 + objecttype: https://w3id.org/linkml/examples/personinfo/Person + name: Null Collections Person + # multivalued scalar list + aliases: null + # inlined-as-list of class instances + has_employment_history: null + # inlined-as-dict of class instances + has_familial_relationships: null + diff --git a/src/runtime/tests/diff.rs b/src/runtime/tests/diff.rs index 2df1971..b46adf4 100644 --- a/src/runtime/tests/diff.rs +++ b/src/runtime/tests/diff.rs @@ -1,4 +1,4 @@ -use linkml_runtime::{diff, load_yaml_file, patch}; +use linkml_runtime::{diff, load_json_str, load_yaml_file, patch}; use linkml_schemaview::identifier::{converter_from_schema, Identifier}; use linkml_schemaview::io::from_yaml; use linkml_schemaview::schemaview::SchemaView; @@ -57,7 +57,16 @@ fn diff_and_patch_person() { } } - let patched = patch(&src, &deltas, &sv); + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); let patched_json = patched.to_json(); let target_json = tgt.to_json(); let src_json = src.to_json(); @@ -91,9 +100,18 @@ fn diff_ignore_missing_target() { ) .unwrap(); - let deltas = diff(&src, &tgt, true); + let deltas = diff(&src, &tgt, false); assert!(deltas.is_empty()); - let patched = patch(&src, &deltas, &sv); + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); let patched_json = patched.to_json(); let src_json = src.to_json(); assert_eq!(patched_json, src_json); @@ -135,10 +153,118 @@ fn diff_and_patch_personinfo() { assert!(tgt.navigate_path(&d.path).is_some()); } } - let patched = patch(&src, &deltas, &sv); + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); assert_eq!(patched.to_json(), tgt.to_json()); } +#[test] +fn diff_null_and_missing_semantics() { + use linkml_runtime::LinkMLInstance; + let schema = from_yaml(Path::new(&data_path("schema.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let class = sv + .get_class(&Identifier::new("Person"), &conv) + .unwrap() + .expect("class not found"); + + let src = load_yaml_file( + Path::new(&data_path("person_valid.yaml")), + &sv, + &class, + &conv, + ) + .unwrap(); + + // X -> null => update to null + if let LinkMLInstance::Object { .. } = src.clone() { + let mut tgt_json = src.to_json(); + if let serde_json::Value::Object(ref mut m) = tgt_json { + m.insert("age".to_string(), serde_json::Value::Null); + } + let tgt = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &class, + &conv, + ) + .unwrap(); + let deltas = diff(&src, &tgt, false); + assert!(deltas + .iter() + .any(|d| d.path == vec!["age".to_string()] && d.new == Some(serde_json::Value::Null))); + } + + // null -> X => update from null + if let LinkMLInstance::Object { .. } = src.clone() { + let mut src_json = src.to_json(); + if let serde_json::Value::Object(ref mut m) = src_json { + m.insert("age".to_string(), serde_json::Value::Null); + } + let src_with_null = load_json_str( + &serde_json::to_string(&src_json).unwrap(), + &sv, + &class, + &conv, + ) + .unwrap(); + let deltas = diff(&src_with_null, &src, false); + assert!(deltas.iter().any(|d| d.path == vec!["age".to_string()] + && d.old == Some(serde_json::Value::Null) + && d.new.is_some())); + } + + // missing -> X => add + if let LinkMLInstance::Object { .. } = src.clone() { + let mut src_json = src.to_json(); + if let serde_json::Value::Object(ref mut m) = src_json { + m.remove("age"); + } + let src_missing = load_json_str( + &serde_json::to_string(&src_json).unwrap(), + &sv, + &class, + &conv, + ) + .unwrap(); + let deltas = diff(&src_missing, &src, false); + assert!(deltas + .iter() + .any(|d| d.path == vec!["age".to_string()] && d.old.is_none() && d.new.is_some())); + } + + // X -> missing: ignored by default; produce update-to-null when treat_missing_as_null=true + if let LinkMLInstance::Object { .. } = src.clone() { + let mut tgt_json = src.to_json(); + if let serde_json::Value::Object(ref mut m) = tgt_json { + m.remove("age"); + } + let tgt_missing = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &class, + &conv, + ) + .unwrap(); + let deltas = diff(&src, &tgt_missing, false); + assert!(deltas.iter().all(|d| d.path != vec!["age".to_string()])); + let deltas2 = diff(&src, &tgt_missing, true); + assert!(deltas2 + .iter() + .any(|d| d.path == vec!["age".to_string()] && d.new == Some(serde_json::Value::Null))) + } +} + #[test] fn personinfo_invalid_fails() { let schema = from_yaml(Path::new(&info_path("personinfo.yaml"))).unwrap(); diff --git a/src/runtime/tests/diff_identifier.rs b/src/runtime/tests/diff_identifier.rs new file mode 100644 index 0000000..56aa667 --- /dev/null +++ b/src/runtime/tests/diff_identifier.rs @@ -0,0 +1,282 @@ +use linkml_runtime::{diff, load_json_str, load_yaml_file, patch}; +use linkml_schemaview::identifier::{converter_from_schema, Identifier}; +use linkml_schemaview::io::from_yaml; +use linkml_schemaview::schemaview::SchemaView; +use serde_json::Value as JsonValue; +use std::path::{Path, PathBuf}; + +fn data_path(name: &str) -> PathBuf { + let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push("tests"); + p.push("data"); + p.push(name); + p +} + +#[test] +fn single_inlined_object_identifier_change_is_replacement() { + // Use personinfo schema; diagnosis is an inlined object with identifier (via NamedThing) + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class(&Identifier::new("Container"), &conv) + .unwrap() + .expect("class not found"); + + let src = load_yaml_file( + Path::new(&data_path("example_personinfo_data.yaml")), + &sv, + &container, + &conv, + ) + .unwrap(); + + // Modify diagnosis.id of the first medical history event for P:002 + let mut tgt_json = src.to_json(); + if let JsonValue::Object(ref mut root) = tgt_json { + if let Some(JsonValue::Array(objects)) = root.get_mut("objects") { + if let Some(JsonValue::Object(p2)) = objects.get_mut(2) { + if let Some(JsonValue::Array(mh)) = p2.get_mut("has_medical_history") { + if let Some(JsonValue::Object(ev0)) = mh.get_mut(0) { + if let Some(JsonValue::Object(diag)) = ev0.get_mut("diagnosis") { + diag.insert( + "id".to_string(), + JsonValue::String("CODE:D9999".to_string()), + ); + } + } + } + } + } + } + let tgt = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &container, + &conv, + ) + .unwrap(); + + let deltas = diff(&src, &tgt, false); + // Expect a single replacement at the diagnosis object path + assert_eq!(deltas.len(), 1); + let d = &deltas[0]; + assert_eq!( + d.path, + vec![ + "objects".to_string(), + "2".to_string(), + "has_medical_history".to_string(), + "0".to_string(), + "diagnosis".to_string() + ] + ); + assert!(d.old.is_some() && d.new.is_some()); + + // Patch should yield target + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); + assert_eq!(patched.to_json(), tgt.to_json()); +} + +#[test] +fn single_inlined_object_non_identifier_change_is_field_delta() { + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class(&Identifier::new("Container"), &conv) + .unwrap() + .expect("class not found"); + + let src = load_yaml_file( + Path::new(&data_path("example_personinfo_data.yaml")), + &sv, + &container, + &conv, + ) + .unwrap(); + + // Modify diagnosis.name only + let mut tgt_json = src.to_json(); + if let JsonValue::Object(ref mut root) = tgt_json { + if let Some(JsonValue::Array(objects)) = root.get_mut("objects") { + if let Some(JsonValue::Object(p2)) = objects.get_mut(2) { + if let Some(JsonValue::Array(mh)) = p2.get_mut("has_medical_history") { + if let Some(JsonValue::Object(ev0)) = mh.get_mut(0) { + if let Some(JsonValue::Object(diag)) = ev0.get_mut("diagnosis") { + diag.insert( + "name".to_string(), + JsonValue::String("new name".to_string()), + ); + } + } + } + } + } + } + let tgt = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &container, + &conv, + ) + .unwrap(); + + let deltas = diff(&src, &tgt, false); + assert!(deltas.iter().any(|d| d.path + == vec![ + "objects".to_string(), + "2".to_string(), + "has_medical_history".to_string(), + "0".to_string(), + "diagnosis".to_string(), + "name".to_string() + ])); + // Must not collapse to whole-object replacement here + assert!(!deltas.iter().any(|d| d.path + == vec![ + "objects".to_string(), + "2".to_string(), + "has_medical_history".to_string(), + "0".to_string(), + "diagnosis".to_string() + ])); + + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); + assert_eq!(patched.to_json(), tgt.to_json()); +} + +#[test] +fn list_inlined_object_identifier_change_is_replacement() { + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class(&Identifier::new("Container"), &conv) + .unwrap() + .expect("class not found"); + + let src = load_yaml_file( + Path::new(&data_path("example_personinfo_data.yaml")), + &sv, + &container, + &conv, + ) + .unwrap(); + + // Change the id of the third object (P:002) + let mut tgt_json = src.to_json(); + if let JsonValue::Object(ref mut root) = tgt_json { + if let Some(JsonValue::Array(objects)) = root.get_mut("objects") { + if let Some(JsonValue::Object(p2)) = objects.get_mut(2) { + p2.insert("id".to_string(), JsonValue::String("P:099".to_string())); + } + } + } + let tgt = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &container, + &conv, + ) + .unwrap(); + + let deltas = diff(&src, &tgt, false); + // Expect a single replacement at the list item path + assert!(deltas + .iter() + .any(|d| d.path == vec!["objects".to_string(), "2".to_string()])); + assert!(!deltas + .iter() + .any(|d| d.path == vec!["objects".to_string(), "2".to_string(), "id".to_string()])); + + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); + assert_eq!(patched.to_json(), tgt.to_json()); +} + +#[test] +fn mapping_inlined_identifier_change_is_add_delete() { + // Use mapping schema with inlined_as_dict keyed by 'key' + let schema = from_yaml(Path::new(&data_path("mapping_schema.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let bag = sv + .get_class(&Identifier::new("Bag"), &conv) + .unwrap() + .expect("class not found"); + + let src = linkml_runtime::load_json_file( + Path::new(&data_path("mapping_data.json")), + &sv, + &bag, + &conv, + ) + .unwrap(); + + // Rename mapping key 'alpha' to 'alpha2' + let mut tgt_json = src.to_json(); + if let JsonValue::Object(ref mut root) = tgt_json { + if let Some(JsonValue::Object(things)) = root.get_mut("things") { + if let Some(alpha) = things.remove("alpha") { + things.insert("alpha2".to_string(), alpha); + } + } + } + let tgt = load_json_str(&serde_json::to_string(&tgt_json).unwrap(), &sv, &bag, &conv).unwrap(); + + let deltas = diff(&src, &tgt, false); + // Expect one delete and one add at mapping keys; no inner key-slot deltas + assert!(deltas + .iter() + .any(|d| d.path == vec!["things".to_string(), "alpha".to_string()] && d.new.is_none())); + assert!(deltas + .iter() + .any(|d| d.path == vec!["things".to_string(), "alpha2".to_string()] && d.old.is_none())); + assert!(!deltas + .iter() + .any(|d| d.path == vec!["things".to_string(), "alpha".to_string(), "key".to_string()])); + + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); + assert_eq!(patched.to_json(), tgt.to_json()); +} diff --git a/src/runtime/tests/enum.rs b/src/runtime/tests/enum.rs index 6f2ffd9..0d7d158 100644 --- a/src/runtime/tests/enum.rs +++ b/src/runtime/tests/enum.rs @@ -1,4 +1,4 @@ -use linkml_runtime::{load_yaml_file, validate, LinkMLValue}; +use linkml_runtime::{load_yaml_file, validate, LinkMLInstance}; use linkml_schemaview::identifier::{converter_from_schema, Identifier}; use linkml_schemaview::io::from_yaml; use linkml_schemaview::schemaview::SchemaView; @@ -33,10 +33,10 @@ fn enum_valid_value() { assert!(validate(&v).is_ok()); // Sanity check parsed value shape - if let LinkMLValue::Object { values, .. } = v { + if let LinkMLInstance::Object { values, .. } = v { let status = values.get("status").expect("status not found"); match status { - LinkMLValue::Scalar { value, .. } => { + LinkMLInstance::Scalar { value, .. } => { assert_eq!(value.as_str(), Some("active")); } _ => panic!("expected scalar for status"), diff --git a/src/runtime/tests/equality.rs b/src/runtime/tests/equality.rs new file mode 100644 index 0000000..7e1dfa3 --- /dev/null +++ b/src/runtime/tests/equality.rs @@ -0,0 +1,180 @@ +use linkml_runtime::{load_json_str, load_yaml_str, LinkMLInstance}; +use linkml_schemaview::identifier::converter_from_schema; +use linkml_schemaview::io::from_yaml; +use linkml_schemaview::schemaview::SchemaView; +use std::path::Path; + +fn data_path(name: &str) -> std::path::PathBuf { + let mut p = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push("tests"); + p.push("data"); + p.push(name); + p +} + +#[test] +fn object_equality_ignores_null_assignments() { + // Load personinfo schema and Container class + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class( + &linkml_schemaview::identifier::Identifier::new("Container"), + &conv, + ) + .unwrap() + .expect("class not found"); + + let doc_with_null = r#" +objects: + - objecttype: personinfo:Person + id: "P:1" + name: "Alice" + current_address: null +"#; + let doc_without_slot = r#" +objects: + - objecttype: personinfo:Person + id: "P:1" + name: "Alice" +"#; + let v1 = load_yaml_str(doc_with_null, &sv, &container, &conv).unwrap(); + let v2 = load_yaml_str(doc_without_slot, &sv, &container, &conv).unwrap(); + let p1 = v1.navigate_path(["objects", "0"]).unwrap(); + let p2 = v2.navigate_path(["objects", "0"]).unwrap(); + assert!( + p1.equals(p2, true), + "Person with null assignment should equal omission" + ); +} + +#[test] +fn list_identity_is_order_sensitive() { + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class( + &linkml_schemaview::identifier::Identifier::new("Container"), + &conv, + ) + .unwrap() + .expect("class not found"); + + let doc_a = r#" +objects: + - objecttype: personinfo:Person + id: "P:1" + name: "Alice" + has_employment_history: + - started_at_time: 2019-01-01 + is_current: true + - started_at_time: 2020-01-01 + is_current: false +"#; + let doc_b = r#" +objects: + - objecttype: personinfo:Person + id: "P:1" + name: "Alice" + has_employment_history: + - started_at_time: 2020-01-01 + is_current: false + - started_at_time: 2019-01-01 + is_current: true +"#; + let v1 = load_yaml_str(doc_a, &sv, &container, &conv).unwrap(); + let v2 = load_yaml_str(doc_b, &sv, &container, &conv).unwrap(); + let p1 = v1.navigate_path(["objects", "0"]).unwrap(); + let p2 = v2.navigate_path(["objects", "0"]).unwrap(); + assert!(matches!(p1, LinkMLInstance::Object { .. })); + assert!(matches!(p2, LinkMLInstance::Object { .. })); + assert!(!p1.equals(p2, true), "List order must affect equality"); +} + +#[test] +fn mapping_equality_is_key_based_not_ordered() { + // Load mapping schema and Bag class + let schema = from_yaml(Path::new(&data_path("mapping_schema.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let bag = sv + .get_class( + &linkml_schemaview::identifier::Identifier::new("Bag"), + &conv, + ) + .unwrap() + .expect("class not found"); + + let doc1 = r#"{ + "things": { + "alpha": {"typeURI": "ThingA", "a_only": "foo", "common": "shared"}, + "beta": {"typeURI": "ThingB", "b_only": true, "common": "shared"} + } +}"#; + let doc2 = r#"{ + "things": { + "beta": {"typeURI": "ThingB", "b_only": true, "common": "shared"}, + "alpha": {"typeURI": "ThingA", "a_only": "foo", "common": "shared"} + } +}"#; + let v1 = load_json_str(doc1, &sv, &bag, &conv).unwrap(); + let v2 = load_json_str(doc2, &sv, &bag, &conv).unwrap(); + let m1 = v1.navigate_path(["things"]).unwrap(); + let m2 = v2.navigate_path(["things"]).unwrap(); + assert!(matches!(m1, LinkMLInstance::Mapping { .. })); + assert!(matches!(m2, LinkMLInstance::Mapping { .. })); + assert!( + m1.equals(m2, true), + "Mapping equality should ignore key order" + ); +} + +#[test] +fn enum_scalar_equality_respects_value_and_range() { + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class( + &linkml_schemaview::identifier::Identifier::new("Container"), + &conv, + ) + .unwrap() + .expect("class not found"); + + let doc1 = r#" +objects: + - objecttype: personinfo:Person + id: "P:1" + name: "Alice" + gender: "cisgender man" +"#; + let doc2 = r#" +objects: + - objecttype: personinfo:Person + id: "P:2" + name: "Bob" + gender: "cisgender man" +"#; + let doc3 = r#" +objects: + - objecttype: personinfo:Person + id: "P:3" + name: "Carol" + gender: "cisgender woman" +"#; + let v1 = load_yaml_str(doc1, &sv, &container, &conv).unwrap(); + let v2 = load_yaml_str(doc2, &sv, &container, &conv).unwrap(); + let v3 = load_yaml_str(doc3, &sv, &container, &conv).unwrap(); + let g1 = v1.navigate_path(["objects", "0", "gender"]).unwrap(); + let g2 = v2.navigate_path(["objects", "0", "gender"]).unwrap(); + let g3 = v3.navigate_path(["objects", "0", "gender"]).unwrap(); + assert!(g1.equals(g2, true)); + assert!(!g1.equals(g3, true)); +} diff --git a/src/runtime/tests/inlined_mapping_subclass.rs b/src/runtime/tests/inlined_mapping_subclass.rs index e16803a..1d3752e 100644 --- a/src/runtime/tests/inlined_mapping_subclass.rs +++ b/src/runtime/tests/inlined_mapping_subclass.rs @@ -1,4 +1,4 @@ -use linkml_runtime::{load_json_file, validate, LinkMLValue}; +use linkml_runtime::{load_json_file, validate, LinkMLInstance}; use linkml_schemaview::identifier::{converter_from_schema, Identifier}; use linkml_schemaview::io::from_yaml; use linkml_schemaview::schemaview::SchemaView; @@ -34,16 +34,16 @@ fn inlined_mapping_selects_subclass_by_typeuri() { // Ensure inlined mapping children select subclasses based on typeURI match v { - LinkMLValue::Object { values, .. } => { + LinkMLInstance::Object { values, .. } => { let things = values.get("things").expect("things slot missing"); match things { - LinkMLValue::Mapping { values, .. } => { + LinkMLInstance::Mapping { values, .. } => { match values.get("alpha").expect("alpha missing") { - LinkMLValue::Object { class, .. } => assert_eq!(class.name(), "ThingA"), + LinkMLInstance::Object { class, .. } => assert_eq!(class.name(), "ThingA"), _ => panic!("alpha should be an object"), } match values.get("beta").expect("beta missing") { - LinkMLValue::Object { class, .. } => assert_eq!(class.name(), "ThingB"), + LinkMLInstance::Object { class, .. } => assert_eq!(class.name(), "ThingB"), _ => panic!("beta should be an object"), } } diff --git a/src/runtime/tests/navigate.rs b/src/runtime/tests/navigate.rs index b4e18aa..da4dc5f 100644 --- a/src/runtime/tests/navigate.rs +++ b/src/runtime/tests/navigate.rs @@ -31,7 +31,7 @@ fn navigate_basic() { .unwrap(); // Map root should have key 'objects' match &v { - linkml_runtime::LinkMLValue::Object { values, .. } => { + linkml_runtime::LinkMLInstance::Object { values, .. } => { assert!(values.contains_key("objects")); let inner = v.navigate_path([ "objects", diff --git a/src/runtime/tests/polymorphic.rs b/src/runtime/tests/polymorphic.rs index c0c5f43..5652365 100644 --- a/src/runtime/tests/polymorphic.rs +++ b/src/runtime/tests/polymorphic.rs @@ -1,4 +1,4 @@ -use linkml_runtime::LinkMLValue; +use linkml_runtime::LinkMLInstance; use linkml_runtime::{load_yaml_file, validate}; use linkml_schemaview::identifier::{converter_from_schema, Identifier}; use linkml_schemaview::io::from_yaml; @@ -105,20 +105,20 @@ fn array_polymorphism() { .expect("class not found"); let v = load_yaml_file(Path::new(&data_path("poly_array.yaml")), &sv, &class, &conv).unwrap(); assert!(validate(&v).is_ok()); - if let LinkMLValue::Object { values, .. } = v { + if let LinkMLInstance::Object { values, .. } = v { let objs = values.get("objs").expect("objs not found"); - if let LinkMLValue::List { values: arr, .. } = objs { + if let LinkMLInstance::List { values: arr, .. } = objs { assert_eq!(arr.len(), 3); match &arr[0] { - LinkMLValue::Object { class, .. } => assert_eq!(class.name(), "Child"), + LinkMLInstance::Object { class, .. } => assert_eq!(class.name(), "Child"), _ => panic!("expected map"), } match &arr[1] { - LinkMLValue::Object { class, .. } => assert_eq!(class.name(), "Child"), + LinkMLInstance::Object { class, .. } => assert_eq!(class.name(), "Child"), _ => panic!("expected map"), } match &arr[2] { - LinkMLValue::Object { class, .. } => assert_eq!(class.name(), "Parent"), + LinkMLInstance::Object { class, .. } => assert_eq!(class.name(), "Parent"), _ => panic!("expected map"), } } else { diff --git a/src/runtime/tests/trace.rs b/src/runtime/tests/trace.rs new file mode 100644 index 0000000..13ccbfc --- /dev/null +++ b/src/runtime/tests/trace.rs @@ -0,0 +1,228 @@ +use linkml_runtime::{diff, load_json_str, load_yaml_file}; +use linkml_schemaview::identifier::{converter_from_schema, Identifier}; +use linkml_schemaview::io::from_yaml; +use linkml_schemaview::schemaview::SchemaView; +use std::collections::HashSet; +use std::path::{Path, PathBuf}; + +fn data_path(name: &str) -> PathBuf { + let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push("tests"); + p.push("data"); + p.push(name); + p +} + +fn info_path(name: &str) -> PathBuf { + let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push("tests"); + p.push("data"); + p.push(name); + p +} + +fn collect_ids(v: &linkml_runtime::LinkMLInstance, out: &mut Vec) { + out.push(v.node_id()); + match v { + linkml_runtime::LinkMLInstance::Scalar { .. } => {} + linkml_runtime::LinkMLInstance::Null { .. } => {} + linkml_runtime::LinkMLInstance::List { values, .. } => { + for c in values { + collect_ids(c, out); + } + } + linkml_runtime::LinkMLInstance::Mapping { values, .. } + | linkml_runtime::LinkMLInstance::Object { values, .. } => { + for c in values.values() { + collect_ids(c, out); + } + } + } +} + +#[test] +fn node_ids_preserved_scalar_update() { + let schema = from_yaml(Path::new(&data_path("schema.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let class = sv + .get_class(&Identifier::new("Person"), &conv) + .unwrap() + .expect("class not found"); + let src = load_yaml_file( + Path::new(&data_path("person_valid.yaml")), + &sv, + &class, + &conv, + ) + .unwrap(); + let mut tgt_json = src.to_json(); + if let serde_json::Value::Object(ref mut m) = tgt_json { + m.insert("age".to_string(), serde_json::json!(99)); + } + let tgt = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &class, + &conv, + ) + .unwrap(); + + let deltas = diff(&src, &tgt, false); + let (patched, trace) = linkml_runtime::patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); + + assert!(trace.added.is_empty()); + assert!(trace.deleted.is_empty()); + assert!(!trace.updated.is_empty()); + + let src_age = src.navigate_path(["age"]).unwrap(); + let pat_age = patched.navigate_path(["age"]).unwrap(); + assert_eq!(src_age.node_id(), pat_age.node_id()); + assert!(trace.updated.contains(&pat_age.node_id())); +} + +#[test] +fn patch_trace_add_in_list() { + let schema = from_yaml(Path::new(&info_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class(&Identifier::new("Container"), &conv) + .unwrap() + .expect("class not found"); + let base = load_yaml_file( + Path::new(&info_path("example_personinfo_data.yaml")), + &sv, + &container, + &conv, + ) + .unwrap(); + + // Add a new object to the 'objects' list + let mut base_json = base.to_json(); + if let serde_json::Value::Object(ref mut root) = base_json { + if let Some(serde_json::Value::Array(ref mut arr)) = root.get_mut("objects") { + let new_obj = serde_json::json!({ + "id": "P:999", + "name": "Added Person", + "objecttype": "https://w3id.org/linkml/examples/personinfo/Person" + }); + arr.push(new_obj); + } + } + let target = load_json_str( + &serde_json::to_string(&base_json).unwrap(), + &sv, + &container, + &conv, + ) + .unwrap(); + + let deltas = diff(&base, &target, false); + let mut pre = Vec::new(); + collect_ids(&base, &mut pre); + let (patched, trace) = linkml_runtime::patch( + &base, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); + let mut post = Vec::new(); + collect_ids(&patched, &mut post); + + let pre_set: HashSet = pre.into_iter().collect(); + let post_set: HashSet = post.into_iter().collect(); + let added: HashSet = post_set.difference(&pre_set).copied().collect(); + let trace_added: HashSet = trace.added.iter().copied().collect(); + assert_eq!(added, trace_added); + assert!(!added.is_empty()); +} + +#[test] +fn patch_missing_to_null_semantics() { + // Use simple schema + let schema = from_yaml(Path::new(&data_path("schema.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let class = sv + .get_class(&Identifier::new("Person"), &conv) + .unwrap() + .expect("class not found"); + + let src = load_yaml_file( + Path::new(&data_path("person_partial.yaml")), + &sv, + &class, + &conv, + ) + .unwrap(); + // Build delta: set age to explicit null + let deltas = vec![linkml_runtime::Delta { + path: vec!["age".to_string()], + old: None, + new: Some(serde_json::Value::Null), + }]; + + // treat_missing_as_null = true => no-op; no trace changes, no node id changes + let pre_id = src.node_id(); + let (patched_same, trace_same) = linkml_runtime::patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: true, + }, + ) + .unwrap(); + assert!( + trace_same.added.is_empty() + && trace_same.deleted.is_empty() + && trace_same.updated.is_empty() + ); + assert_eq!(pre_id, patched_same.node_id()); + // Equality under treat_missing_as_null=true must hold + assert!(src.equals(&patched_same, true)); + // And age remains absent (since explicit null is treated as omitted) + if let linkml_runtime::LinkMLInstance::Object { values, .. } = &patched_same { + assert!(!values.contains_key("age")); + } + + // treat_missing_as_null = false => apply explicit null + let (patched_null, trace_applied) = linkml_runtime::patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); + assert!(trace_applied.updated.contains(&patched_null.node_id())); + // age present as Null + if let linkml_runtime::LinkMLInstance::Object { values, .. } = &patched_null { + assert!(matches!( + values.get("age"), + Some(linkml_runtime::LinkMLInstance::Null { .. }) + )); + } else { + panic!("expected object root"); + } +} diff --git a/src/runtime/tests/validation.rs b/src/runtime/tests/validation.rs index b91163a..1cd9e1b 100644 --- a/src/runtime/tests/validation.rs +++ b/src/runtime/tests/validation.rs @@ -51,3 +51,52 @@ fn validate_personinfo_example2() { .unwrap(); assert!(validate(&v).is_ok()); } + +#[test] +fn validate_personinfo_null_collections() { + let schema = from_yaml(Path::new(&info_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class(&Identifier::new("Container"), &conv) + .unwrap() + .expect("class not found"); + let v = load_yaml_file( + Path::new(&info_path("example_personinfo_data_nulls.yaml")), + &sv, + &container, + &conv, + ) + .unwrap(); + assert!(validate(&v).is_ok()); + // Assert that nulls are preserved as LinkMLInstance::Null (not empty collections) + if let linkml_runtime::LinkMLInstance::Object { values, .. } = &v { + if let Some(linkml_runtime::LinkMLInstance::List { values: objs, .. }) = + values.get("objects") + { + if let Some(linkml_runtime::LinkMLInstance::Object { values: person, .. }) = + objs.first() + { + assert!(matches!( + person.get("aliases"), + Some(linkml_runtime::LinkMLInstance::Null { .. }) + )); + assert!(matches!( + person.get("has_employment_history"), + Some(linkml_runtime::LinkMLInstance::Null { .. }) + )); + assert!(matches!( + person.get("has_familial_relationships"), + Some(linkml_runtime::LinkMLInstance::Null { .. }) + )); + } else { + panic!("expected first object to be an Object"); + } + } else { + panic!("expected Container.objects to be a List"); + } + } else { + panic!("expected root to be an Object"); + } +} diff --git a/src/tools/src/bin/linkml_diff.rs b/src/tools/src/bin/linkml_diff.rs index 8aee75c..8c2d0b3 100644 --- a/src/tools/src/bin/linkml_diff.rs +++ b/src/tools/src/bin/linkml_diff.rs @@ -30,7 +30,7 @@ fn load_value( sv: &SchemaView, class: &ClassView, conv: &curies::Converter, -) -> Result> { +) -> Result> { if let Some(ext) = path.extension().and_then(|s| s.to_str()) { if ext == "json" { load_json_file(path, sv, class, conv) diff --git a/src/tools/src/bin/linkml_patch.rs b/src/tools/src/bin/linkml_patch.rs index cf1555a..50157a9 100644 --- a/src/tools/src/bin/linkml_patch.rs +++ b/src/tools/src/bin/linkml_patch.rs @@ -23,6 +23,12 @@ struct Args { /// Output patched file; defaults to stdout #[arg(short, long)] output: Option, + /// Treat missing assignments as equivalent to explicit null for equality + #[arg(long, default_value_t = true)] + treat_missing_as_null: bool, + /// Skip deltas that do not change the value (no-ops) + #[arg(long, default_value_t = true)] + ignore_noop: bool, } fn load_value( @@ -30,7 +36,7 @@ fn load_value( sv: &SchemaView, class: &ClassView, conv: &curies::Converter, -) -> Result> { +) -> Result> { if let Some(ext) = path.extension().and_then(|s| s.to_str()) { if ext == "json" { load_json_file(path, sv, class, conv) @@ -44,7 +50,7 @@ fn load_value( fn write_value( path: Option<&Path>, - value: &linkml_runtime::LinkMLValue, + value: &linkml_runtime::LinkMLInstance, ) -> Result<(), Box> { let json = value.to_json(); let mut writer: Box = if let Some(p) = path { @@ -92,7 +98,15 @@ fn main() -> Result<(), Box> { } else { serde_yaml::from_str(&delta_text)? }; - let patched = patch(&src, &deltas, &sv); + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: args.ignore_noop, + treat_missing_as_null: args.treat_missing_as_null, + }, + )?; write_value(args.output.as_deref(), &patched)?; Ok(()) }