From d317729248525788911c1bd717ceb76ad71b5b4d Mon Sep 17 00:00:00 2001 From: Frank Dekervel Date: Wed, 10 Sep 2025 22:57:05 +0200 Subject: [PATCH 01/13] feat(runtime): add NodeIds to LinkMLValue and return PatchTrace from patch(); expose node_id and trace in Python; adjust CLI and tests --- src/python/src/lib.rs | 22 ++++- src/runtime/src/diff.rs | 154 +++++++++++++++++++++++++++++- src/runtime/src/lib.rs | 34 ++++++- src/runtime/tests/diff.rs | 6 +- src/runtime/tests/trace.rs | 126 ++++++++++++++++++++++++ src/tools/src/bin/linkml_patch.rs | 2 +- 6 files changed, 331 insertions(+), 13 deletions(-) create mode 100644 src/runtime/tests/trace.rs diff --git a/src/python/src/lib.rs b/src/python/src/lib.rs index cbfe6b5..91eeb52 100644 --- a/src/python/src/lib.rs +++ b/src/python/src/lib.rs @@ -1,5 +1,5 @@ use linkml_meta::{ClassDefinition, EnumDefinition, SchemaDefinition, SlotDefinition}; -use linkml_runtime::diff::{diff as diff_internal, patch as patch_internal, Delta}; +use linkml_runtime::diff::{diff as diff_internal, patch as patch_internal, Delta, PatchTrace}; use linkml_runtime::turtle::{turtle_to_string, TurtleOptions}; use linkml_runtime::{load_json_str, load_yaml_str, LinkMLValue}; use linkml_schemaview::identifier::Identifier; @@ -497,6 +497,11 @@ impl PyLinkMLValue { } } + #[getter] + fn node_id(&self) -> u64 { + self.value.node_id() + } + #[getter] fn slot_definition(&self) -> Option { match &self.value { @@ -749,15 +754,24 @@ fn py_patch( py: Python<'_>, source: &PyLinkMLValue, deltas: &Bound<'_, PyAny>, -) -> PyResult { +) -> PyResult { let json_mod = PyModule::import(py, "json")?; let deltas_str: String = json_mod.call_method1("dumps", (deltas,))?.extract()?; let deltas_vec: Vec = serde_json::from_str(&deltas_str).map_err(|e| PyException::new_err(e.to_string()))?; let sv_ref = source.sv.bind(py).borrow(); let rust_sv = sv_ref.as_rust(); - let new_value = patch_internal(&source.value, &deltas_vec, rust_sv); - Ok(PyLinkMLValue::new(new_value, source.sv.clone_ref(py))) + let (new_value, trace) = patch_internal(&source.value, &deltas_vec, rust_sv); + let trace_json = serde_json::json!({ + "added": trace.added, + "deleted": trace.deleted, + "updated": trace.updated, + }); + let py_val = PyLinkMLValue::new(new_value, source.sv.clone_ref(py)); + let dict = pyo3::types::PyDict::new(py); + dict.set_item("value", Py::new(py, py_val)?)?; + dict.set_item("trace", json_value_to_py(py, &trace_json))?; + Ok(dict.into_any().unbind().into()) } #[pyfunction(name = "to_turtle", signature = (value, skolem=None))] diff --git a/src/runtime/src/diff.rs b/src/runtime/src/diff.rs index a64068e..45c4ef0 100644 --- a/src/runtime/src/diff.rs +++ b/src/runtime/src/diff.rs @@ -1,4 +1,4 @@ -use crate::{load_json_str, LinkMLValue}; +use crate::{load_json_str, LinkMLValue, NodeId}; use linkml_schemaview::schemaview::{SchemaView, SlotView}; use serde::{Deserialize, Serialize}; use serde_json::{Map, Value as JsonValue}; @@ -181,17 +181,163 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b out } -pub fn patch(source: &LinkMLValue, deltas: &[Delta], sv: &SchemaView) -> LinkMLValue { +#[derive(Debug, Clone, Default)] +pub struct PatchTrace { + pub added: Vec, + pub deleted: Vec, + pub updated: Vec, +} + +fn collect_ids(value: &LinkMLValue, out: &mut Vec) { + out.push(value.node_id()); + match value { + LinkMLValue::Scalar { .. } => {} + LinkMLValue::List { values, .. } => { + for v in values { + collect_ids(v, out); + } + } + LinkMLValue::Mapping { values, .. } | LinkMLValue::Object { values, .. } => { + for v in values.values() { + collect_ids(v, out); + } + } + } +} + +fn reconcile_ids(old: &LinkMLValue, new: &mut LinkMLValue) { + match (old, new) { + (LinkMLValue::Scalar { node_id: oid, .. }, LinkMLValue::Scalar { node_id: nid, .. }) => { + *nid = *oid; + } + ( + LinkMLValue::List { + node_id: oid, + values: ov, + .. + }, + LinkMLValue::List { + node_id: nid, + values: nv, + .. + }, + ) => { + *nid = *oid; + let n = std::cmp::min(ov.len(), nv.len()); + for i in 0..n { + reconcile_ids(&ov[i], &mut nv[i]); + } + } + ( + LinkMLValue::Mapping { + node_id: oid, + values: om, + .. + }, + LinkMLValue::Mapping { + node_id: nid, + values: nm, + .. + }, + ) => { + *nid = *oid; + for (k, ov) in om { + if let Some(nv) = nm.get_mut(k) { + reconcile_ids(ov, nv); + } + } + } + ( + LinkMLValue::Object { + node_id: oid, + values: om, + .. + }, + LinkMLValue::Object { + node_id: nid, + values: nm, + .. + }, + ) => { + *nid = *oid; + for (k, ov) in om { + if let Some(nv) = nm.get_mut(k) { + reconcile_ids(ov, nv); + } + } + } + _ => {} + } +} + +fn collect_updated(old: &LinkMLValue, new: &LinkMLValue, updated: &mut Vec) { + if old.node_id() == new.node_id() && old.to_json() != new.to_json() { + updated.push(new.node_id()); + } + match (old, new) { + (LinkMLValue::Scalar { .. }, LinkMLValue::Scalar { .. }) => {} + (LinkMLValue::List { values: ov, .. }, LinkMLValue::List { values: nv, .. }) => { + let n = std::cmp::min(ov.len(), nv.len()); + for i in 0..n { + collect_updated(&ov[i], &nv[i], updated); + } + } + ( + LinkMLValue::Mapping { values: om, .. }, + LinkMLValue::Mapping { values: nm, .. }, + ) + | ( + LinkMLValue::Object { values: om, .. }, + LinkMLValue::Object { values: nm, .. }, + ) => { + for (k, ov) in om { + if let Some(nv) = nm.get(k) { + collect_updated(ov, nv, updated); + } + } + } + _ => {} + } +} + +pub fn patch(source: &LinkMLValue, deltas: &[Delta], sv: &SchemaView) -> (LinkMLValue, PatchTrace) { + // Pre-snapshot + let mut pre_ids = Vec::new(); + collect_ids(source, &mut pre_ids); + let mut json = source.to_json(); for d in deltas { apply_delta(&mut json, d); } let json_str = serde_json::to_string(&json).unwrap(); let conv = sv.converter(); - match source { + let mut new_value = match source { LinkMLValue::Object { class: ref c, .. } => load_json_str(&json_str, sv, c, &conv).unwrap(), _ => panic!("patching non-map values is not supported here"), - } + }; + + reconcile_ids(source, &mut new_value); + + let mut post_ids = Vec::new(); + collect_ids(&new_value, &mut post_ids); + + use std::collections::HashSet; + let pre: HashSet = pre_ids.into_iter().collect(); + let post: HashSet = post_ids.into_iter().collect(); + let added: Vec = post.difference(&pre).copied().collect(); + let deleted: Vec = pre.difference(&post).copied().collect(); + + let mut updated = Vec::new(); + collect_updated(source, &new_value, &mut updated); + + ( + new_value, + PatchTrace { + added, + deleted, + updated, + }, + ) } fn apply_delta(value: &mut JsonValue, delta: &Delta) { diff --git a/src/runtime/src/lib.rs b/src/runtime/src/lib.rs index a8cd2c9..f0ef4d5 100644 --- a/src/runtime/src/lib.rs +++ b/src/runtime/src/lib.rs @@ -5,10 +5,11 @@ use serde_json::Value as JsonValue; use std::collections::HashMap; use std::fs; use std::path::Path; +use std::sync::atomic::{AtomicU64, Ordering}; pub mod diff; pub mod turtle; -pub use diff::{diff, patch, Delta}; +pub use diff::{diff, patch, Delta, PatchTrace}; #[derive(Debug)] pub struct LinkMLError(pub String); @@ -56,31 +57,52 @@ fn slot_matches_key(slot: &SlotView, key: &str) -> bool { #[derive(Clone)] pub enum LinkMLValue { Scalar { + node_id: NodeId, value: JsonValue, slot: SlotView, class: Option, sv: SchemaView, }, List { + node_id: NodeId, values: Vec, slot: SlotView, class: Option, sv: SchemaView, }, Mapping { + node_id: NodeId, values: HashMap, slot: SlotView, class: Option, sv: SchemaView, }, Object { + node_id: NodeId, values: HashMap, class: ClassView, sv: SchemaView, }, } +// Stable node identifiers assigned to every LinkMLValue node +pub type NodeId = u64; + +static NEXT_NODE_ID: AtomicU64 = AtomicU64::new(1); + +fn new_node_id() -> NodeId { + NEXT_NODE_ID.fetch_add(1, Ordering::Relaxed) +} + impl LinkMLValue { + pub fn node_id(&self) -> NodeId { + match self { + LinkMLValue::Scalar { node_id, .. } + | LinkMLValue::List { node_id, .. } + | LinkMLValue::Mapping { node_id, .. } + | LinkMLValue::Object { node_id, .. } => *node_id, + } + } /// Navigate the value by a path of strings, where each element is either /// a dictionary key (for maps) or a list index (for lists). /// Returns `Some(&LinkMLValue)` if the full path can be resolved, otherwise `None`. @@ -197,6 +219,7 @@ impl LinkMLValue { ); } Ok(LinkMLValue::Object { + node_id: new_node_id(), values, class: class.clone(), sv: sv.clone(), @@ -247,6 +270,7 @@ impl LinkMLValue { )?); } Ok(LinkMLValue::List { + node_id: new_node_id(), values, slot: sl.clone(), class: Some(class.clone()), @@ -260,6 +284,7 @@ impl LinkMLValue { path_to_string(&path) ))), (true, other) => Ok(LinkMLValue::Scalar { + node_id: new_node_id(), value: other, slot: sl.clone(), class: Some(class.clone()), @@ -327,6 +352,7 @@ impl LinkMLValue { ); } LinkMLValue::Object { + node_id: new_node_id(), values: child_values, class: selected, sv: sv.clone(), @@ -351,6 +377,7 @@ impl LinkMLValue { child_values.insert( scalar_slot.name.clone(), LinkMLValue::Scalar { + node_id: new_node_id(), value: other, slot: scalar_slot.clone(), class: Some(base.clone()), @@ -358,6 +385,7 @@ impl LinkMLValue { }, ); LinkMLValue::Object { + node_id: new_node_id(), values: child_values, class: base.clone(), sv: sv.clone(), @@ -367,6 +395,7 @@ impl LinkMLValue { values.insert(k, child); } Ok(LinkMLValue::Mapping { + node_id: new_node_id(), values, slot: sl.clone(), class: class.clone(), @@ -418,6 +447,7 @@ impl LinkMLValue { )?); } Ok(LinkMLValue::List { + node_id: new_node_id(), values, slot: sl, class: Some(class), @@ -464,6 +494,7 @@ impl LinkMLValue { ); } Ok(LinkMLValue::Object { + node_id: new_node_id(), values, class: chosen, sv: sv.clone(), @@ -486,6 +517,7 @@ impl LinkMLValue { )) })?; Ok(LinkMLValue::Scalar { + node_id: new_node_id(), value, slot: sl, class: Some(class.clone()), diff --git a/src/runtime/tests/diff.rs b/src/runtime/tests/diff.rs index 2df1971..4243105 100644 --- a/src/runtime/tests/diff.rs +++ b/src/runtime/tests/diff.rs @@ -57,7 +57,7 @@ fn diff_and_patch_person() { } } - let patched = patch(&src, &deltas, &sv); + let (patched, _trace) = patch(&src, &deltas, &sv); let patched_json = patched.to_json(); let target_json = tgt.to_json(); let src_json = src.to_json(); @@ -93,7 +93,7 @@ fn diff_ignore_missing_target() { let deltas = diff(&src, &tgt, true); assert!(deltas.is_empty()); - let patched = patch(&src, &deltas, &sv); + let (patched, _trace) = patch(&src, &deltas, &sv); let patched_json = patched.to_json(); let src_json = src.to_json(); assert_eq!(patched_json, src_json); @@ -135,7 +135,7 @@ fn diff_and_patch_personinfo() { assert!(tgt.navigate_path(&d.path).is_some()); } } - let patched = patch(&src, &deltas, &sv); + let (patched, _trace) = patch(&src, &deltas, &sv); assert_eq!(patched.to_json(), tgt.to_json()); } diff --git a/src/runtime/tests/trace.rs b/src/runtime/tests/trace.rs new file mode 100644 index 0000000..6504b22 --- /dev/null +++ b/src/runtime/tests/trace.rs @@ -0,0 +1,126 @@ +use linkml_runtime::{diff, load_json_str, load_yaml_file}; +use linkml_schemaview::identifier::{converter_from_schema, Identifier}; +use linkml_schemaview::io::from_yaml; +use linkml_schemaview::schemaview::SchemaView; +use std::collections::HashSet; +use std::path::{Path, PathBuf}; + +fn data_path(name: &str) -> PathBuf { + let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push("tests"); + p.push("data"); + p.push(name); + p +} + +fn info_path(name: &str) -> PathBuf { + let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push("tests"); + p.push("data"); + p.push(name); + p +} + +fn collect_ids(v: &linkml_runtime::LinkMLValue, out: &mut Vec) { + out.push(v.node_id()); + match v { + linkml_runtime::LinkMLValue::Scalar { .. } => {} + linkml_runtime::LinkMLValue::List { values, .. } => { + for c in values { + collect_ids(c, out); + } + } + linkml_runtime::LinkMLValue::Mapping { values, .. } + | linkml_runtime::LinkMLValue::Object { values, .. } => { + for c in values.values() { + collect_ids(c, out); + } + } + } +} + +#[test] +fn node_ids_preserved_scalar_update() { + let schema = from_yaml(Path::new(&data_path("schema.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let class = sv + .get_class(&Identifier::new("Person"), &conv) + .unwrap() + .expect("class not found"); + let src = load_yaml_file( + Path::new(&data_path("person_valid.yaml")), + &sv, + &class, + &conv, + ) + .unwrap(); + let mut tgt_json = src.to_json(); + if let serde_json::Value::Object(ref mut m) = tgt_json { + m.insert("age".to_string(), serde_json::json!(99)); + } + let tgt = load_json_str(&serde_json::to_string(&tgt_json).unwrap(), &sv, &class, &conv) + .unwrap(); + + let deltas = diff(&src, &tgt, false); + let (patched, trace) = linkml_runtime::patch(&src, &deltas, &sv); + + assert!(trace.added.is_empty()); + assert!(trace.deleted.is_empty()); + assert!(!trace.updated.is_empty()); + + let src_age = src.navigate_path(["age"]).unwrap(); + let pat_age = patched.navigate_path(["age"]).unwrap(); + assert_eq!(src_age.node_id(), pat_age.node_id()); + assert!(trace.updated.contains(&pat_age.node_id())); +} + +#[test] +fn patch_trace_add_in_list() { + let schema = from_yaml(Path::new(&info_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class(&Identifier::new("Container"), &conv) + .unwrap() + .expect("class not found"); + let base = load_yaml_file( + Path::new(&info_path("example_personinfo_data.yaml")), + &sv, + &container, + &conv, + ) + .unwrap(); + + // Add a new object to the 'objects' list + let mut base_json = base.to_json(); + if let serde_json::Value::Object(ref mut root) = base_json { + if let Some(serde_json::Value::Array(ref mut arr)) = root.get_mut("objects") { + let new_obj = serde_json::json!({ + "id": "P:999", + "name": "Added Person", + "objecttype": "https://w3id.org/linkml/examples/personinfo/Person" + }); + arr.push(new_obj); + } + } + let target = load_json_str(&serde_json::to_string(&base_json).unwrap(), &sv, &container, &conv) + .unwrap(); + + let deltas = diff(&base, &target, false); + let mut pre = Vec::new(); + collect_ids(&base, &mut pre); + let (patched, trace) = linkml_runtime::patch(&base, &deltas, &sv); + let mut post = Vec::new(); + collect_ids(&patched, &mut post); + + let pre_set: HashSet = pre.into_iter().collect(); + let post_set: HashSet = post.into_iter().collect(); + let added: HashSet = post_set.difference(&pre_set).copied().collect(); + let trace_added: HashSet = trace.added.iter().copied().collect(); + assert_eq!(added, trace_added); + assert!(!added.is_empty()); +} + diff --git a/src/tools/src/bin/linkml_patch.rs b/src/tools/src/bin/linkml_patch.rs index cf1555a..4546bd4 100644 --- a/src/tools/src/bin/linkml_patch.rs +++ b/src/tools/src/bin/linkml_patch.rs @@ -92,7 +92,7 @@ fn main() -> Result<(), Box> { } else { serde_yaml::from_str(&delta_text)? }; - let patched = patch(&src, &deltas, &sv); + let (patched, _trace) = patch(&src, &deltas, &sv); write_value(args.output.as_deref(), &patched)?; Ok(()) } From 3744161c778be93830ed10b6ae9a9e0ce0ff734e Mon Sep 17 00:00:00 2001 From: Frank Dekervel Date: Wed, 10 Sep 2025 23:19:41 +0200 Subject: [PATCH 02/13] refactor(runtime): implement LinkMLValue-native patch with incremental PatchTrace; remove JSON roundtrip and ID reconciliation; preserve scalar node IDs on in-place updates --- src/runtime/src/diff.rs | 366 ++++++++++++++++++++++------------------ 1 file changed, 198 insertions(+), 168 deletions(-) diff --git a/src/runtime/src/diff.rs b/src/runtime/src/diff.rs index 45c4ef0..e030181 100644 --- a/src/runtime/src/diff.rs +++ b/src/runtime/src/diff.rs @@ -1,7 +1,8 @@ -use crate::{load_json_str, LinkMLValue, NodeId}; -use linkml_schemaview::schemaview::{SchemaView, SlotView}; +use crate::{LinkMLValue, NodeId}; +use linkml_schemaview::identifier::Identifier; +use linkml_schemaview::schemaview::{ClassView, SchemaView, SlotView}; use serde::{Deserialize, Serialize}; -use serde_json::{Map, Value as JsonValue}; +use serde_json::Value as JsonValue; const IGNORE_ANNOTATION: &str = "diff.linkml.io/ignore"; @@ -188,223 +189,252 @@ pub struct PatchTrace { pub updated: Vec, } -fn collect_ids(value: &LinkMLValue, out: &mut Vec) { - out.push(value.node_id()); +pub fn patch(source: &LinkMLValue, deltas: &[Delta], sv: &SchemaView) -> (LinkMLValue, PatchTrace) { + let mut out = source.clone(); + let mut trace = PatchTrace::default(); + for d in deltas { + apply_delta_linkml(&mut out, &d.path, &d.new, sv, &mut trace); + } + (out, trace) +} + +fn collect_all_ids(value: &LinkMLValue, ids: &mut Vec) { + ids.push(value.node_id()); match value { LinkMLValue::Scalar { .. } => {} LinkMLValue::List { values, .. } => { for v in values { - collect_ids(v, out); + collect_all_ids(v, ids); } } LinkMLValue::Mapping { values, .. } | LinkMLValue::Object { values, .. } => { for v in values.values() { - collect_ids(v, out); + collect_all_ids(v, ids); } } } } -fn reconcile_ids(old: &LinkMLValue, new: &mut LinkMLValue) { - match (old, new) { - (LinkMLValue::Scalar { node_id: oid, .. }, LinkMLValue::Scalar { node_id: nid, .. }) => { - *nid = *oid; - } - ( - LinkMLValue::List { - node_id: oid, - values: ov, - .. - }, - LinkMLValue::List { - node_id: nid, - values: nv, - .. - }, - ) => { - *nid = *oid; - let n = std::cmp::min(ov.len(), nv.len()); - for i in 0..n { - reconcile_ids(&ov[i], &mut nv[i]); - } - } - ( - LinkMLValue::Mapping { - node_id: oid, - values: om, - .. - }, - LinkMLValue::Mapping { - node_id: nid, - values: nm, - .. - }, - ) => { - *nid = *oid; - for (k, ov) in om { - if let Some(nv) = nm.get_mut(k) { - reconcile_ids(ov, nv); - } - } - } - ( - LinkMLValue::Object { - node_id: oid, - values: om, - .. - }, - LinkMLValue::Object { - node_id: nid, - values: nm, - .. - }, - ) => { - *nid = *oid; - for (k, ov) in om { - if let Some(nv) = nm.get_mut(k) { - reconcile_ids(ov, nv); - } - } - } - _ => {} - } +fn mark_added_subtree(v: &LinkMLValue, trace: &mut PatchTrace) { + collect_all_ids(v, &mut trace.added); } -fn collect_updated(old: &LinkMLValue, new: &LinkMLValue, updated: &mut Vec) { - if old.node_id() == new.node_id() && old.to_json() != new.to_json() { - updated.push(new.node_id()); - } - match (old, new) { - (LinkMLValue::Scalar { .. }, LinkMLValue::Scalar { .. }) => {} - (LinkMLValue::List { values: ov, .. }, LinkMLValue::List { values: nv, .. }) => { - let n = std::cmp::min(ov.len(), nv.len()); - for i in 0..n { - collect_updated(&ov[i], &nv[i], updated); - } - } - ( - LinkMLValue::Mapping { values: om, .. }, - LinkMLValue::Mapping { values: nm, .. }, - ) - | ( - LinkMLValue::Object { values: om, .. }, - LinkMLValue::Object { values: nm, .. }, - ) => { - for (k, ov) in om { - if let Some(nv) = nm.get(k) { - collect_updated(ov, nv, updated); - } - } - } - _ => {} - } +fn mark_deleted_subtree(v: &LinkMLValue, trace: &mut PatchTrace) { + collect_all_ids(v, &mut trace.deleted); } -pub fn patch(source: &LinkMLValue, deltas: &[Delta], sv: &SchemaView) -> (LinkMLValue, PatchTrace) { - // Pre-snapshot - let mut pre_ids = Vec::new(); - collect_ids(source, &mut pre_ids); - - let mut json = source.to_json(); - for d in deltas { - apply_delta(&mut json, d); - } - let json_str = serde_json::to_string(&json).unwrap(); +fn build_for_object_slot( + parent_class: &ClassView, + key: &str, + json: serde_json::Value, + sv: &SchemaView, +) -> LinkMLValue { let conv = sv.converter(); - let mut new_value = match source { - LinkMLValue::Object { class: ref c, .. } => load_json_str(&json_str, sv, c, &conv).unwrap(), - _ => panic!("patching non-map values is not supported here"), - }; - - reconcile_ids(source, &mut new_value); - - let mut post_ids = Vec::new(); - collect_ids(&new_value, &mut post_ids); - - use std::collections::HashSet; - let pre: HashSet = pre_ids.into_iter().collect(); - let post: HashSet = post_ids.into_iter().collect(); - let added: Vec = post.difference(&pre).copied().collect(); - let deleted: Vec = pre.difference(&post).copied().collect(); + let slot = parent_class + .slots() + .into_iter() + .find(|s| s.name == key) + .cloned(); + let class_for_value = parent_class.clone(); + LinkMLValue::from_json(json, class_for_value, slot, sv, &conv, false) + .expect("failed to parse object slot value") +} - let mut updated = Vec::new(); - collect_updated(source, &new_value, &mut updated); +fn build_for_list_item( + list_slot: &SlotView, + list_class: Option<&ClassView>, + json: serde_json::Value, + sv: &SchemaView, +) -> LinkMLValue { + let conv = sv.converter(); + let class_range: Option = list_slot.get_range_class(); + let slot_for_item = if class_range.is_some() { None } else { Some(list_slot.clone()) }; + let class_for_item = class_range.as_ref().or(list_class).cloned().expect("list item class context"); + LinkMLValue::from_json(json, class_for_item, slot_for_item, sv, &conv, true) + .expect("failed to parse list item") +} - ( - new_value, - PatchTrace { - added, - deleted, - updated, - }, - ) +fn find_scalar_slot_for_inlined_map(class: &ClassView, key_slot_name: &str) -> Option { + for s in class.slots() { + if s.name == key_slot_name { + continue; + } + let def = s.definition(); + if def.multivalued.unwrap_or(false) { + continue; + } + if !s.is_range_scalar() { + continue; + } + return Some(s.clone()); + } + None } -fn apply_delta(value: &mut JsonValue, delta: &Delta) { - apply_delta_inner(value, &delta.path, &delta.new); +fn build_for_mapping_value( + map_slot: &SlotView, + json: serde_json::Value, + sv: &SchemaView, +) -> LinkMLValue { + let conv = sv.converter(); + let range_cv = map_slot + .definition() + .range + .as_ref() + .and_then(|r| sv.get_class(&Identifier::new(r), &conv).ok().flatten()) + .expect("mapping slot must have class range"); + + match json { + serde_json::Value::Object(_) => { + LinkMLValue::from_json(json, range_cv.clone(), None, sv, &conv, false) + .expect("failed to parse mapping object value") + } + other => { + let key_slot_name = range_cv + .key_or_identifier_slot() + .map(|s| s.name.as_str()) + .unwrap_or(""); + let scalar_slot = find_scalar_slot_for_inlined_map(&range_cv, key_slot_name) + .expect("no scalar slot available for inlined mapping"); + let mut m = serde_json::Map::new(); + m.insert(scalar_slot.name.clone(), other); + LinkMLValue::from_json(serde_json::Value::Object(m), range_cv, None, sv, &conv, false) + .expect("failed to parse scalar mapping value") + } + } } -fn apply_delta_inner(value: &mut JsonValue, path: &[String], newv: &Option) { +fn apply_delta_linkml( + current: &mut LinkMLValue, + path: &[String], + newv: &Option, + sv: &SchemaView, + trace: &mut PatchTrace, +) { if path.is_empty() { if let Some(v) = newv { - *value = v.clone(); + let (class_opt, slot_opt) = match current { + LinkMLValue::Object { class, .. } => (Some(class.clone()), None), + LinkMLValue::List { class, slot, .. } => (class.clone(), Some(slot.clone())), + LinkMLValue::Mapping { class, slot, .. } => (class.clone(), Some(slot.clone())), + LinkMLValue::Scalar { class, slot, .. } => (class.clone(), Some(slot.clone())), + }; + let conv = sv.converter(); + if let Some(cls) = class_opt { + let new_node = LinkMLValue::from_json(v.clone(), cls, slot_opt, sv, &conv, false) + .expect("failed to parse replacement value"); + mark_deleted_subtree(current, trace); + mark_added_subtree(&new_node, trace); + *current = new_node; + } } return; } - match value { - JsonValue::Object(map) => { + + match current { + LinkMLValue::Object { values, class, .. } => { let key = &path[0]; if path.len() == 1 { match newv { Some(v) => { - map.insert(key.clone(), v.clone()); + if let Some(old_child) = values.get_mut(key) { + match old_child { + LinkMLValue::Scalar { value, .. } => { + if !v.is_object() && !v.is_array() { + *value = v.clone(); + trace.updated.push(old_child.node_id()); + return; + } + } + _ => {} + } + let new_child = build_for_object_slot(class, key, v.clone(), sv); + let old_snapshot = std::mem::replace(old_child, new_child); + mark_deleted_subtree(&old_snapshot, trace); + mark_added_subtree(values.get(key).unwrap(), trace); + trace.updated.push(current.node_id()); + } else { + let new_child = build_for_object_slot(class, key, v.clone(), sv); + values.insert(key.clone(), new_child); + mark_added_subtree(values.get(key).unwrap(), trace); + trace.updated.push(current.node_id()); + } } None => { - map.remove(key); + if let Some(old_child) = values.remove(key) { + mark_deleted_subtree(&old_child, trace); + trace.updated.push(current.node_id()); + } } } - } else { - let entry = map - .entry(key.clone()) - .or_insert(JsonValue::Object(Map::new())); - apply_delta_inner(entry, &path[1..], newv); + } else if let Some(child) = values.get_mut(key) { + apply_delta_linkml(child, &path[1..], newv, sv, trace); } } - JsonValue::Array(arr) => { - let idx: usize = path[0].parse().unwrap(); + LinkMLValue::Mapping { values, slot, .. } => { + let key = &path[0]; if path.len() == 1 { match newv { Some(v) => { - if idx < arr.len() { - arr[idx] = v.clone(); - } else if idx == arr.len() { - arr.push(v.clone()); - } else { - while arr.len() < idx { - arr.push(JsonValue::Null); - } - arr.push(v.clone()); + let new_child = build_for_mapping_value(slot, v.clone(), sv); + if let Some(old_child) = values.get(key) { + mark_deleted_subtree(old_child, trace); } + values.insert(key.clone(), new_child); + mark_added_subtree(values.get(key).unwrap(), trace); + trace.updated.push(current.node_id()); } None => { - if idx < arr.len() { - arr.remove(idx); + if let Some(old_child) = values.remove(key) { + mark_deleted_subtree(&old_child, trace); + trace.updated.push(current.node_id()); } } } - } else { - if idx >= arr.len() { - arr.resize(idx + 1, JsonValue::Null); - } - apply_delta_inner(&mut arr[idx], &path[1..], newv); + } else if let Some(child) = values.get_mut(key) { + apply_delta_linkml(child, &path[1..], newv, sv, trace); } } - _ => { - if path.is_empty() { - if let Some(v) = newv { - *value = v.clone(); + LinkMLValue::List { values, slot, class, .. } => { + let idx: usize = path[0].parse().expect("invalid list index in delta path"); + if path.len() == 1 { + match newv { + Some(v) => { + if idx < values.len() { + let is_scalar_target = matches!(values[idx], LinkMLValue::Scalar { .. }); + if is_scalar_target && !v.is_object() && !v.is_array() { + if let LinkMLValue::Scalar { value, .. } = &mut values[idx] { + *value = v.clone(); + trace.updated.push(values[idx].node_id()); + } + } else { + let new_child = build_for_list_item(slot, class.as_ref(), v.clone(), sv); + let old = std::mem::replace(&mut values[idx], new_child); + mark_deleted_subtree(&old, trace); + mark_added_subtree(&values[idx], trace); + trace.updated.push(current.node_id()); + } + } else if idx == values.len() { + let new_child = build_for_list_item(slot, class.as_ref(), v.clone(), sv); + values.push(new_child); + mark_added_subtree(values.last().unwrap(), trace); + trace.updated.push(current.node_id()); + } else { + panic!("list index out of bounds in add: {} > {}", idx, values.len()); + } + } + None => { + if idx < values.len() { + let old = values.remove(idx); + mark_deleted_subtree(&old, trace); + trace.updated.push(current.node_id()); + } + } } + } else if idx < values.len() { + apply_delta_linkml(&mut values[idx], &path[1..], newv, sv, trace); } } + LinkMLValue::Scalar { .. } => {} } } From 2c99faf10b36b21cf1354a91d6807053328c5926 Mon Sep 17 00:00:00 2001 From: Frank Dekervel Date: Wed, 10 Sep 2025 23:33:54 +0200 Subject: [PATCH 03/13] Refactor LinkMLValue builders and reuse in patch; fix fmt/clippy; restore network-based CLI tests; remove dead code --- src/python/src/lib.rs | 4 +- src/runtime/src/diff.rs | 107 ++++++--------- src/runtime/src/lib.rs | 264 +++++++++++++++++++------------------ src/runtime/tests/trace.rs | 19 ++- 4 files changed, 190 insertions(+), 204 deletions(-) diff --git a/src/python/src/lib.rs b/src/python/src/lib.rs index 91eeb52..61edd0c 100644 --- a/src/python/src/lib.rs +++ b/src/python/src/lib.rs @@ -1,5 +1,5 @@ use linkml_meta::{ClassDefinition, EnumDefinition, SchemaDefinition, SlotDefinition}; -use linkml_runtime::diff::{diff as diff_internal, patch as patch_internal, Delta, PatchTrace}; +use linkml_runtime::diff::{diff as diff_internal, patch as patch_internal, Delta}; use linkml_runtime::turtle::{turtle_to_string, TurtleOptions}; use linkml_runtime::{load_json_str, load_yaml_str, LinkMLValue}; use linkml_schemaview::identifier::Identifier; @@ -771,7 +771,7 @@ fn py_patch( let dict = pyo3::types::PyDict::new(py); dict.set_item("value", Py::new(py, py_val)?)?; dict.set_item("trace", json_value_to_py(py, &trace_json))?; - Ok(dict.into_any().unbind().into()) + Ok(dict.into_any().unbind()) } #[pyfunction(name = "to_turtle", signature = (value, skolem=None))] diff --git a/src/runtime/src/diff.rs b/src/runtime/src/diff.rs index e030181..2e800bd 100644 --- a/src/runtime/src/diff.rs +++ b/src/runtime/src/diff.rs @@ -1,5 +1,4 @@ use crate::{LinkMLValue, NodeId}; -use linkml_schemaview::identifier::Identifier; use linkml_schemaview::schemaview::{ClassView, SchemaView, SlotView}; use serde::{Deserialize, Serialize}; use serde_json::Value as JsonValue; @@ -223,21 +222,26 @@ fn mark_deleted_subtree(v: &LinkMLValue, trace: &mut PatchTrace) { collect_all_ids(v, &mut trace.deleted); } +fn build_from_json_with_context( + json: serde_json::Value, + class_ctx: ClassView, + slot_ctx: Option, + sv: &SchemaView, + allow_inlined: bool, +) -> LinkMLValue { + let conv = sv.converter(); + LinkMLValue::from_json(json, class_ctx, slot_ctx, sv, &conv, allow_inlined) + .expect("failed to construct LinkMLValue from JSON") +} + fn build_for_object_slot( parent_class: &ClassView, key: &str, json: serde_json::Value, sv: &SchemaView, ) -> LinkMLValue { - let conv = sv.converter(); - let slot = parent_class - .slots() - .into_iter() - .find(|s| s.name == key) - .cloned(); - let class_for_value = parent_class.clone(); - LinkMLValue::from_json(json, class_for_value, slot, sv, &conv, false) - .expect("failed to parse object slot value") + let slot = parent_class.slots().iter().find(|s| s.name == key).cloned(); + build_from_json_with_context(json, parent_class.clone(), slot, sv, false) } fn build_for_list_item( @@ -247,29 +251,10 @@ fn build_for_list_item( sv: &SchemaView, ) -> LinkMLValue { let conv = sv.converter(); - let class_range: Option = list_slot.get_range_class(); - let slot_for_item = if class_range.is_some() { None } else { Some(list_slot.clone()) }; - let class_for_item = class_range.as_ref().or(list_class).cloned().expect("list item class context"); - LinkMLValue::from_json(json, class_for_item, slot_for_item, sv, &conv, true) + LinkMLValue::build_list_item_for_slot(list_slot, list_class, json, sv, &conv, Vec::new()) .expect("failed to parse list item") } -fn find_scalar_slot_for_inlined_map(class: &ClassView, key_slot_name: &str) -> Option { - for s in class.slots() { - if s.name == key_slot_name { - continue; - } - let def = s.definition(); - if def.multivalued.unwrap_or(false) { - continue; - } - if !s.is_range_scalar() { - continue; - } - return Some(s.clone()); - } - None -} fn build_for_mapping_value( map_slot: &SlotView, @@ -277,31 +262,8 @@ fn build_for_mapping_value( sv: &SchemaView, ) -> LinkMLValue { let conv = sv.converter(); - let range_cv = map_slot - .definition() - .range - .as_ref() - .and_then(|r| sv.get_class(&Identifier::new(r), &conv).ok().flatten()) - .expect("mapping slot must have class range"); - - match json { - serde_json::Value::Object(_) => { - LinkMLValue::from_json(json, range_cv.clone(), None, sv, &conv, false) - .expect("failed to parse mapping object value") - } - other => { - let key_slot_name = range_cv - .key_or_identifier_slot() - .map(|s| s.name.as_str()) - .unwrap_or(""); - let scalar_slot = find_scalar_slot_for_inlined_map(&range_cv, key_slot_name) - .expect("no scalar slot available for inlined mapping"); - let mut m = serde_json::Map::new(); - m.insert(scalar_slot.name.clone(), other); - LinkMLValue::from_json(serde_json::Value::Object(m), range_cv, None, sv, &conv, false) - .expect("failed to parse scalar mapping value") - } - } + LinkMLValue::build_mapping_entry_for_slot(map_slot, json, sv, &conv, Vec::new()) + .expect("failed to parse mapping value") } fn apply_delta_linkml( @@ -338,15 +300,12 @@ fn apply_delta_linkml( match newv { Some(v) => { if let Some(old_child) = values.get_mut(key) { - match old_child { - LinkMLValue::Scalar { value, .. } => { - if !v.is_object() && !v.is_array() { - *value = v.clone(); - trace.updated.push(old_child.node_id()); - return; - } + if let LinkMLValue::Scalar { value, .. } = old_child { + if !v.is_object() && !v.is_array() { + *value = v.clone(); + trace.updated.push(old_child.node_id()); + return; } - _ => {} } let new_child = build_for_object_slot(class, key, v.clone(), sv); let old_snapshot = std::mem::replace(old_child, new_child); @@ -395,32 +354,44 @@ fn apply_delta_linkml( apply_delta_linkml(child, &path[1..], newv, sv, trace); } } - LinkMLValue::List { values, slot, class, .. } => { + LinkMLValue::List { + values, + slot, + class, + .. + } => { let idx: usize = path[0].parse().expect("invalid list index in delta path"); if path.len() == 1 { match newv { Some(v) => { if idx < values.len() { - let is_scalar_target = matches!(values[idx], LinkMLValue::Scalar { .. }); + let is_scalar_target = + matches!(values[idx], LinkMLValue::Scalar { .. }); if is_scalar_target && !v.is_object() && !v.is_array() { if let LinkMLValue::Scalar { value, .. } = &mut values[idx] { *value = v.clone(); trace.updated.push(values[idx].node_id()); } } else { - let new_child = build_for_list_item(slot, class.as_ref(), v.clone(), sv); + let new_child = + build_for_list_item(slot, class.as_ref(), v.clone(), sv); let old = std::mem::replace(&mut values[idx], new_child); mark_deleted_subtree(&old, trace); mark_added_subtree(&values[idx], trace); trace.updated.push(current.node_id()); } } else if idx == values.len() { - let new_child = build_for_list_item(slot, class.as_ref(), v.clone(), sv); + let new_child = + build_for_list_item(slot, class.as_ref(), v.clone(), sv); values.push(new_child); mark_added_subtree(values.last().unwrap(), trace); trace.updated.push(current.node_id()); } else { - panic!("list index out of bounds in add: {} > {}", idx, values.len()); + panic!( + "list index out of bounds in add: {} > {}", + idx, + values.len() + ); } } None => { diff --git a/src/runtime/src/lib.rs b/src/runtime/src/lib.rs index f0ef4d5..202530e 100644 --- a/src/runtime/src/lib.rs +++ b/src/runtime/src/lib.rs @@ -238,36 +238,10 @@ impl LinkMLValue { match (inside_list, value) { (false, JsonValue::Array(arr)) => { let mut values = Vec::new(); - let class_range: Option = sl.get_range_class(); - let slot_for_item = if class_range.is_some() { - None - } else { - Some(sl.clone()) - }; for (i, v) in arr.into_iter().enumerate() { let mut p = path.clone(); p.push(format!("{}[{}]", sl.name, i)); - let v_transformed = - if let (Some(cr), JsonValue::String(s)) = (class_range.as_ref(), &v) { - if let Some(id_slot) = cr.identifier_slot() { - let mut m = serde_json::Map::new(); - m.insert(id_slot.name.clone(), JsonValue::String(s.clone())); - JsonValue::Object(m) - } else { - v - } - } else { - v - }; - values.push(Self::from_json_internal( - v_transformed, - class_range.as_ref().unwrap_or(&class).clone(), - slot_for_item.clone(), - sv, - conv, - true, - p, - )?); + values.push(Self::build_list_item_for_slot(&sl, Some(&class), v, sv, conv, p)?); } Ok(LinkMLValue::List { node_id: new_node_id(), @@ -303,95 +277,13 @@ impl LinkMLValue { ) -> LResult { match value { JsonValue::Object(map) => { - let range_cv = sl - .definition() - .range - .as_ref() - .and_then(|r| sv.get_class(&Identifier::new(r), conv).ok().flatten()) - .ok_or_else(|| { - LinkMLError(format!( - "mapping slot must have class range at {}", - path_to_string(&path) - )) - })?; let mut values = HashMap::new(); for (k, v) in map.into_iter() { - let base = sv - .get_class(&Identifier::new(range_cv.name()), conv) - .ok() - .flatten() - .unwrap_or_else(|| range_cv.clone()); - let child = match v { - JsonValue::Object(m) => { - // Select the most specific subclass using any type designator in the map - let selected = Self::select_class(&m, &base, sv, conv); - let mut child_values = HashMap::new(); - for (ck, cv) in m.into_iter() { - let slot_tmp = selected - .slots() - .iter() - .find(|s| slot_matches_key(s, &ck)) - .cloned(); - let mut p = path.clone(); - p.push(format!("{}:{}", k, ck)); - let key_name = slot_tmp - .as_ref() - .map(|s| s.name.clone()) - .unwrap_or_else(|| ck.clone()); - child_values.insert( - key_name, - Self::from_json_internal( - cv, - selected.clone(), - slot_tmp, - sv, - conv, - false, - p, - )?, - ); - } - LinkMLValue::Object { - node_id: new_node_id(), - values: child_values, - class: selected, - sv: sv.clone(), - } - } - other => { - // Scalar mapping value: attach it to a chosen scalar slot if any - let scalar_slot = Self::find_scalar_slot_for_inlined_map( - &base, - range_cv - .key_or_identifier_slot() - .map(|s| s.name.as_str()) - .unwrap_or(""), - ) - .ok_or_else(|| { - LinkMLError(format!( - "no scalar slot available for inlined mapping at {}", - path_to_string(&path) - )) - })?; - let mut child_values = HashMap::new(); - child_values.insert( - scalar_slot.name.clone(), - LinkMLValue::Scalar { - node_id: new_node_id(), - value: other, - slot: scalar_slot.clone(), - class: Some(base.clone()), - sv: sv.clone(), - }, - ); - LinkMLValue::Object { - node_id: new_node_id(), - values: child_values, - class: base.clone(), - sv: sv.clone(), - } - } - }; + let child = Self::build_mapping_entry_for_slot(sl, v, sv, conv, { + let mut p = path.clone(); + p.push(k.clone()); + p + })?; values.insert(k, child); } Ok(LinkMLValue::Mapping { @@ -426,25 +318,11 @@ impl LinkMLValue { class.name() )) })?; - let class_range: Option = sl.get_range_class(); - let slot_for_item = if class_range.is_some() { - None - } else { - Some(sl.clone()) - }; let mut values = Vec::new(); for (i, v) in arr.into_iter().enumerate() { let mut p = path.clone(); p.push(format!("[{}]", i)); - values.push(Self::from_json_internal( - v, - class_range.as_ref().unwrap_or(&class).clone(), - slot_for_item.clone(), - sv, - conv, - false, - p, - )?); + values.push(Self::build_list_item_for_slot(&sl, Some(&class), v, sv, conv, p)?); } Ok(LinkMLValue::List { node_id: new_node_id(), @@ -574,6 +452,134 @@ impl LinkMLValue { ) -> LResult { Self::from_json_internal(value, class, slot, sv, conv, inside_list, Vec::new()) } + + // Shared builders (used by loaders and patch logic) + pub(crate) fn build_list_item_for_slot( + list_slot: &SlotView, + list_class: Option<&ClassView>, + value: JsonValue, + sv: &SchemaView, + conv: &Converter, + path: Vec, + ) -> LResult { + let class_range: Option = list_slot.get_range_class(); + let slot_for_item = if class_range.is_some() { + None + } else { + Some(list_slot.clone()) + }; + let v_transformed = if let (Some(cr), JsonValue::String(s)) = (class_range.as_ref(), &value) { + if let Some(id_slot) = cr.identifier_slot() { + let mut m = serde_json::Map::new(); + m.insert(id_slot.name.clone(), JsonValue::String(s.clone())); + JsonValue::Object(m) + } else { + value + } + } else { + value + }; + Self::from_json_internal( + v_transformed, + class_range + .as_ref() + .or(list_class) + .cloned() + .ok_or_else(|| LinkMLError("list item class context".to_string()))?, + slot_for_item, + sv, + conv, + true, + path, + ) + } + + pub(crate) fn build_mapping_entry_for_slot( + map_slot: &SlotView, + value: JsonValue, + sv: &SchemaView, + conv: &Converter, + path: Vec, + ) -> LResult { + let range_cv = map_slot + .definition() + .range + .as_ref() + .and_then(|r| sv.get_class(&Identifier::new(r), conv).ok().flatten()) + .ok_or_else(|| { + LinkMLError(format!( + "mapping slot must have class range at {}", + path_to_string(&path) + )) + })?; + match value { + JsonValue::Object(m) => { + let selected = Self::select_class(&m, &range_cv, sv, conv); + let mut child_values = HashMap::new(); + for (ck, cv) in m.into_iter() { + let slot_tmp = selected + .slots() + .iter() + .find(|s| slot_matches_key(s, &ck)) + .cloned(); + let mut p = path.clone(); + p.push(ck.clone()); + let key_name = slot_tmp + .as_ref() + .map(|s| s.name.clone()) + .unwrap_or_else(|| ck.clone()); + child_values.insert( + key_name, + Self::from_json_internal( + cv, + selected.clone(), + slot_tmp, + sv, + conv, + false, + p, + )?, + ); + } + Ok(LinkMLValue::Object { + node_id: new_node_id(), + values: child_values, + class: selected, + sv: sv.clone(), + }) + } + other => { + let key_slot_name = range_cv + .key_or_identifier_slot() + .map(|s| s.name.as_str()) + .unwrap_or(""); + let scalar_slot = Self::find_scalar_slot_for_inlined_map(&range_cv, key_slot_name) + .ok_or_else(|| { + LinkMLError(format!( + "no scalar slot available for inlined mapping at {}", + path_to_string(&path) + )) + })?; + let mut child_values = HashMap::new(); + child_values.insert( + scalar_slot.name.clone(), + LinkMLValue::Scalar { + node_id: new_node_id(), + value: other, + slot: scalar_slot.clone(), + class: Some(range_cv.clone()), + sv: sv.clone(), + }, + ); + Ok(LinkMLValue::Object { + node_id: new_node_id(), + values: child_values, + class: range_cv, + sv: sv.clone(), + }) + } + } + } } pub fn load_yaml_file( diff --git a/src/runtime/tests/trace.rs b/src/runtime/tests/trace.rs index 6504b22..37db334 100644 --- a/src/runtime/tests/trace.rs +++ b/src/runtime/tests/trace.rs @@ -60,8 +60,13 @@ fn node_ids_preserved_scalar_update() { if let serde_json::Value::Object(ref mut m) = tgt_json { m.insert("age".to_string(), serde_json::json!(99)); } - let tgt = load_json_str(&serde_json::to_string(&tgt_json).unwrap(), &sv, &class, &conv) - .unwrap(); + let tgt = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &class, + &conv, + ) + .unwrap(); let deltas = diff(&src, &tgt, false); let (patched, trace) = linkml_runtime::patch(&src, &deltas, &sv); @@ -106,8 +111,13 @@ fn patch_trace_add_in_list() { arr.push(new_obj); } } - let target = load_json_str(&serde_json::to_string(&base_json).unwrap(), &sv, &container, &conv) - .unwrap(); + let target = load_json_str( + &serde_json::to_string(&base_json).unwrap(), + &sv, + &container, + &conv, + ) + .unwrap(); let deltas = diff(&base, &target, false); let mut pre = Vec::new(); @@ -123,4 +133,3 @@ fn patch_trace_add_in_list() { assert_eq!(added, trace_added); assert!(!added.is_empty()); } - From 6d9f7534289f262c01b2cb084594e4da4f1ba886 Mon Sep 17 00:00:00 2001 From: Frank Dekervel Date: Wed, 10 Sep 2025 23:43:36 +0200 Subject: [PATCH 04/13] fmt: apply rustfmt after refactor; clippy clean excluding linkml_meta --- src/runtime/src/diff.rs | 1 - src/runtime/src/lib.rs | 21 ++++++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/runtime/src/diff.rs b/src/runtime/src/diff.rs index 2e800bd..73ca81d 100644 --- a/src/runtime/src/diff.rs +++ b/src/runtime/src/diff.rs @@ -255,7 +255,6 @@ fn build_for_list_item( .expect("failed to parse list item") } - fn build_for_mapping_value( map_slot: &SlotView, json: serde_json::Value, diff --git a/src/runtime/src/lib.rs b/src/runtime/src/lib.rs index 202530e..3b9e956 100644 --- a/src/runtime/src/lib.rs +++ b/src/runtime/src/lib.rs @@ -241,7 +241,14 @@ impl LinkMLValue { for (i, v) in arr.into_iter().enumerate() { let mut p = path.clone(); p.push(format!("{}[{}]", sl.name, i)); - values.push(Self::build_list_item_for_slot(&sl, Some(&class), v, sv, conv, p)?); + values.push(Self::build_list_item_for_slot( + &sl, + Some(&class), + v, + sv, + conv, + p, + )?); } Ok(LinkMLValue::List { node_id: new_node_id(), @@ -322,7 +329,14 @@ impl LinkMLValue { for (i, v) in arr.into_iter().enumerate() { let mut p = path.clone(); p.push(format!("[{}]", i)); - values.push(Self::build_list_item_for_slot(&sl, Some(&class), v, sv, conv, p)?); + values.push(Self::build_list_item_for_slot( + &sl, + Some(&class), + v, + sv, + conv, + p, + )?); } Ok(LinkMLValue::List { node_id: new_node_id(), @@ -468,7 +482,8 @@ impl LinkMLValue { } else { Some(list_slot.clone()) }; - let v_transformed = if let (Some(cr), JsonValue::String(s)) = (class_range.as_ref(), &value) { + let v_transformed = if let (Some(cr), JsonValue::String(s)) = (class_range.as_ref(), &value) + { if let Some(id_slot) = cr.identifier_slot() { let mut m = serde_json::Map::new(); m.insert(id_slot.name.clone(), JsonValue::String(s.clone())); From be5dacc414581033f5c1512f42876a4a5a339e7c Mon Sep 17 00:00:00 2001 From: Frank Dekervel Date: Wed, 10 Sep 2025 23:46:14 +0200 Subject: [PATCH 05/13] diff: inline LinkMLValue builder calls; remove thin wrappers and unused imports --- src/runtime/src/diff.rs | 103 +++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 50 deletions(-) diff --git a/src/runtime/src/diff.rs b/src/runtime/src/diff.rs index 73ca81d..cb0654f 100644 --- a/src/runtime/src/diff.rs +++ b/src/runtime/src/diff.rs @@ -1,5 +1,5 @@ use crate::{LinkMLValue, NodeId}; -use linkml_schemaview::schemaview::{ClassView, SchemaView, SlotView}; +use linkml_schemaview::schemaview::{SchemaView, SlotView}; use serde::{Deserialize, Serialize}; use serde_json::Value as JsonValue; @@ -222,48 +222,7 @@ fn mark_deleted_subtree(v: &LinkMLValue, trace: &mut PatchTrace) { collect_all_ids(v, &mut trace.deleted); } -fn build_from_json_with_context( - json: serde_json::Value, - class_ctx: ClassView, - slot_ctx: Option, - sv: &SchemaView, - allow_inlined: bool, -) -> LinkMLValue { - let conv = sv.converter(); - LinkMLValue::from_json(json, class_ctx, slot_ctx, sv, &conv, allow_inlined) - .expect("failed to construct LinkMLValue from JSON") -} - -fn build_for_object_slot( - parent_class: &ClassView, - key: &str, - json: serde_json::Value, - sv: &SchemaView, -) -> LinkMLValue { - let slot = parent_class.slots().iter().find(|s| s.name == key).cloned(); - build_from_json_with_context(json, parent_class.clone(), slot, sv, false) -} - -fn build_for_list_item( - list_slot: &SlotView, - list_class: Option<&ClassView>, - json: serde_json::Value, - sv: &SchemaView, -) -> LinkMLValue { - let conv = sv.converter(); - LinkMLValue::build_list_item_for_slot(list_slot, list_class, json, sv, &conv, Vec::new()) - .expect("failed to parse list item") -} - -fn build_for_mapping_value( - map_slot: &SlotView, - json: serde_json::Value, - sv: &SchemaView, -) -> LinkMLValue { - let conv = sv.converter(); - LinkMLValue::build_mapping_entry_for_slot(map_slot, json, sv, &conv, Vec::new()) - .expect("failed to parse mapping value") -} +// Removed thin wrappers; call LinkMLValue builders directly at call sites. fn apply_delta_linkml( current: &mut LinkMLValue, @@ -306,13 +265,33 @@ fn apply_delta_linkml( return; } } - let new_child = build_for_object_slot(class, key, v.clone(), sv); + let conv = sv.converter(); + let slot = class.slots().iter().find(|s| s.name == *key).cloned(); + let new_child = LinkMLValue::from_json( + v.clone(), + class.clone(), + slot, + sv, + &conv, + false, + ) + .expect("failed to parse object slot value"); let old_snapshot = std::mem::replace(old_child, new_child); mark_deleted_subtree(&old_snapshot, trace); mark_added_subtree(values.get(key).unwrap(), trace); trace.updated.push(current.node_id()); } else { - let new_child = build_for_object_slot(class, key, v.clone(), sv); + let conv = sv.converter(); + let slot = class.slots().iter().find(|s| s.name == *key).cloned(); + let new_child = LinkMLValue::from_json( + v.clone(), + class.clone(), + slot, + sv, + &conv, + false, + ) + .expect("failed to parse object slot value"); values.insert(key.clone(), new_child); mark_added_subtree(values.get(key).unwrap(), trace); trace.updated.push(current.node_id()); @@ -334,7 +313,15 @@ fn apply_delta_linkml( if path.len() == 1 { match newv { Some(v) => { - let new_child = build_for_mapping_value(slot, v.clone(), sv); + let conv = sv.converter(); + let new_child = LinkMLValue::build_mapping_entry_for_slot( + slot, + v.clone(), + sv, + &conv, + Vec::new(), + ) + .expect("failed to parse mapping value"); if let Some(old_child) = values.get(key) { mark_deleted_subtree(old_child, trace); } @@ -372,16 +359,32 @@ fn apply_delta_linkml( trace.updated.push(values[idx].node_id()); } } else { - let new_child = - build_for_list_item(slot, class.as_ref(), v.clone(), sv); + let conv = sv.converter(); + let new_child = LinkMLValue::build_list_item_for_slot( + slot, + class.as_ref(), + v.clone(), + sv, + &conv, + Vec::new(), + ) + .expect("failed to parse list item"); let old = std::mem::replace(&mut values[idx], new_child); mark_deleted_subtree(&old, trace); mark_added_subtree(&values[idx], trace); trace.updated.push(current.node_id()); } } else if idx == values.len() { - let new_child = - build_for_list_item(slot, class.as_ref(), v.clone(), sv); + let conv = sv.converter(); + let new_child = LinkMLValue::build_list_item_for_slot( + slot, + class.as_ref(), + v.clone(), + sv, + &conv, + Vec::new(), + ) + .expect("failed to parse list item"); values.push(new_child); mark_added_subtree(values.last().unwrap(), trace); trace.updated.push(current.node_id()); From 8746af6b3fa0b41ceef4f591c16d7ceb779ccc4e Mon Sep 17 00:00:00 2001 From: Frank Dekervel Date: Wed, 10 Sep 2025 23:49:25 +0200 Subject: [PATCH 06/13] diff: make patch panic-safe; return LResult and propagate errors instead of expect/panic; update tests, CLI, and Python bindings --- src/python/src/lib.rs | 3 +- src/runtime/src/diff.rs | 62 ++++++++++++++++--------------- src/runtime/tests/diff.rs | 6 +-- src/runtime/tests/trace.rs | 4 +- src/tools/src/bin/linkml_patch.rs | 2 +- 5 files changed, 41 insertions(+), 36 deletions(-) diff --git a/src/python/src/lib.rs b/src/python/src/lib.rs index 61edd0c..05f586a 100644 --- a/src/python/src/lib.rs +++ b/src/python/src/lib.rs @@ -761,7 +761,8 @@ fn py_patch( serde_json::from_str(&deltas_str).map_err(|e| PyException::new_err(e.to_string()))?; let sv_ref = source.sv.bind(py).borrow(); let rust_sv = sv_ref.as_rust(); - let (new_value, trace) = patch_internal(&source.value, &deltas_vec, rust_sv); + let (new_value, trace) = patch_internal(&source.value, &deltas_vec, rust_sv) + .map_err(|e| PyException::new_err(e.to_string()))?; let trace_json = serde_json::json!({ "added": trace.added, "deleted": trace.deleted, diff --git a/src/runtime/src/diff.rs b/src/runtime/src/diff.rs index cb0654f..dd9b754 100644 --- a/src/runtime/src/diff.rs +++ b/src/runtime/src/diff.rs @@ -1,4 +1,4 @@ -use crate::{LinkMLValue, NodeId}; +use crate::{LResult, LinkMLError, LinkMLValue, NodeId}; use linkml_schemaview::schemaview::{SchemaView, SlotView}; use serde::{Deserialize, Serialize}; use serde_json::Value as JsonValue; @@ -188,13 +188,17 @@ pub struct PatchTrace { pub updated: Vec, } -pub fn patch(source: &LinkMLValue, deltas: &[Delta], sv: &SchemaView) -> (LinkMLValue, PatchTrace) { +pub fn patch( + source: &LinkMLValue, + deltas: &[Delta], + sv: &SchemaView, +) -> LResult<(LinkMLValue, PatchTrace)> { let mut out = source.clone(); let mut trace = PatchTrace::default(); for d in deltas { - apply_delta_linkml(&mut out, &d.path, &d.new, sv, &mut trace); + apply_delta_linkml(&mut out, &d.path, &d.new, sv, &mut trace)?; } - (out, trace) + Ok((out, trace)) } fn collect_all_ids(value: &LinkMLValue, ids: &mut Vec) { @@ -230,7 +234,7 @@ fn apply_delta_linkml( newv: &Option, sv: &SchemaView, trace: &mut PatchTrace, -) { +) -> LResult<()> { if path.is_empty() { if let Some(v) = newv { let (class_opt, slot_opt) = match current { @@ -241,14 +245,13 @@ fn apply_delta_linkml( }; let conv = sv.converter(); if let Some(cls) = class_opt { - let new_node = LinkMLValue::from_json(v.clone(), cls, slot_opt, sv, &conv, false) - .expect("failed to parse replacement value"); + let new_node = LinkMLValue::from_json(v.clone(), cls, slot_opt, sv, &conv, false)?; mark_deleted_subtree(current, trace); mark_added_subtree(&new_node, trace); *current = new_node; } } - return; + return Ok(()); } match current { @@ -262,7 +265,7 @@ fn apply_delta_linkml( if !v.is_object() && !v.is_array() { *value = v.clone(); trace.updated.push(old_child.node_id()); - return; + return Ok(()); } } let conv = sv.converter(); @@ -274,11 +277,10 @@ fn apply_delta_linkml( sv, &conv, false, - ) - .expect("failed to parse object slot value"); + )?; let old_snapshot = std::mem::replace(old_child, new_child); mark_deleted_subtree(&old_snapshot, trace); - mark_added_subtree(values.get(key).unwrap(), trace); + mark_added_subtree(old_child, trace); trace.updated.push(current.node_id()); } else { let conv = sv.converter(); @@ -290,10 +292,10 @@ fn apply_delta_linkml( sv, &conv, false, - ) - .expect("failed to parse object slot value"); + )?; + // mark before insert + mark_added_subtree(&new_child, trace); values.insert(key.clone(), new_child); - mark_added_subtree(values.get(key).unwrap(), trace); trace.updated.push(current.node_id()); } } @@ -305,7 +307,7 @@ fn apply_delta_linkml( } } } else if let Some(child) = values.get_mut(key) { - apply_delta_linkml(child, &path[1..], newv, sv, trace); + apply_delta_linkml(child, &path[1..], newv, sv, trace)?; } } LinkMLValue::Mapping { values, slot, .. } => { @@ -320,13 +322,13 @@ fn apply_delta_linkml( sv, &conv, Vec::new(), - ) - .expect("failed to parse mapping value"); + )?; if let Some(old_child) = values.get(key) { mark_deleted_subtree(old_child, trace); } + // mark before insert + mark_added_subtree(&new_child, trace); values.insert(key.clone(), new_child); - mark_added_subtree(values.get(key).unwrap(), trace); trace.updated.push(current.node_id()); } None => { @@ -337,7 +339,7 @@ fn apply_delta_linkml( } } } else if let Some(child) = values.get_mut(key) { - apply_delta_linkml(child, &path[1..], newv, sv, trace); + apply_delta_linkml(child, &path[1..], newv, sv, trace)?; } } LinkMLValue::List { @@ -346,7 +348,9 @@ fn apply_delta_linkml( class, .. } => { - let idx: usize = path[0].parse().expect("invalid list index in delta path"); + let idx: usize = path[0].parse().map_err(|_| { + LinkMLError(format!("invalid list index '{}' in delta path", path[0])) + })?; if path.len() == 1 { match newv { Some(v) => { @@ -367,8 +371,7 @@ fn apply_delta_linkml( sv, &conv, Vec::new(), - ) - .expect("failed to parse list item"); + )?; let old = std::mem::replace(&mut values[idx], new_child); mark_deleted_subtree(&old, trace); mark_added_subtree(&values[idx], trace); @@ -383,17 +386,17 @@ fn apply_delta_linkml( sv, &conv, Vec::new(), - ) - .expect("failed to parse list item"); + )?; + // mark before push + mark_added_subtree(&new_child, trace); values.push(new_child); - mark_added_subtree(values.last().unwrap(), trace); trace.updated.push(current.node_id()); } else { - panic!( + return Err(LinkMLError(format!( "list index out of bounds in add: {} > {}", idx, values.len() - ); + ))); } } None => { @@ -405,9 +408,10 @@ fn apply_delta_linkml( } } } else if idx < values.len() { - apply_delta_linkml(&mut values[idx], &path[1..], newv, sv, trace); + apply_delta_linkml(&mut values[idx], &path[1..], newv, sv, trace)?; } } LinkMLValue::Scalar { .. } => {} } + Ok(()) } diff --git a/src/runtime/tests/diff.rs b/src/runtime/tests/diff.rs index 4243105..4477080 100644 --- a/src/runtime/tests/diff.rs +++ b/src/runtime/tests/diff.rs @@ -57,7 +57,7 @@ fn diff_and_patch_person() { } } - let (patched, _trace) = patch(&src, &deltas, &sv); + let (patched, _trace) = patch(&src, &deltas, &sv).unwrap(); let patched_json = patched.to_json(); let target_json = tgt.to_json(); let src_json = src.to_json(); @@ -93,7 +93,7 @@ fn diff_ignore_missing_target() { let deltas = diff(&src, &tgt, true); assert!(deltas.is_empty()); - let (patched, _trace) = patch(&src, &deltas, &sv); + let (patched, _trace) = patch(&src, &deltas, &sv).unwrap(); let patched_json = patched.to_json(); let src_json = src.to_json(); assert_eq!(patched_json, src_json); @@ -135,7 +135,7 @@ fn diff_and_patch_personinfo() { assert!(tgt.navigate_path(&d.path).is_some()); } } - let (patched, _trace) = patch(&src, &deltas, &sv); + let (patched, _trace) = patch(&src, &deltas, &sv).unwrap(); assert_eq!(patched.to_json(), tgt.to_json()); } diff --git a/src/runtime/tests/trace.rs b/src/runtime/tests/trace.rs index 37db334..ac6b55b 100644 --- a/src/runtime/tests/trace.rs +++ b/src/runtime/tests/trace.rs @@ -69,7 +69,7 @@ fn node_ids_preserved_scalar_update() { .unwrap(); let deltas = diff(&src, &tgt, false); - let (patched, trace) = linkml_runtime::patch(&src, &deltas, &sv); + let (patched, trace) = linkml_runtime::patch(&src, &deltas, &sv).unwrap(); assert!(trace.added.is_empty()); assert!(trace.deleted.is_empty()); @@ -122,7 +122,7 @@ fn patch_trace_add_in_list() { let deltas = diff(&base, &target, false); let mut pre = Vec::new(); collect_ids(&base, &mut pre); - let (patched, trace) = linkml_runtime::patch(&base, &deltas, &sv); + let (patched, trace) = linkml_runtime::patch(&base, &deltas, &sv).unwrap(); let mut post = Vec::new(); collect_ids(&patched, &mut post); diff --git a/src/tools/src/bin/linkml_patch.rs b/src/tools/src/bin/linkml_patch.rs index 4546bd4..cfe8c60 100644 --- a/src/tools/src/bin/linkml_patch.rs +++ b/src/tools/src/bin/linkml_patch.rs @@ -92,7 +92,7 @@ fn main() -> Result<(), Box> { } else { serde_yaml::from_str(&delta_text)? }; - let (patched, _trace) = patch(&src, &deltas, &sv); + let (patched, _trace) = patch(&src, &deltas, &sv)?; write_value(args.output.as_deref(), &patched)?; Ok(()) } From 3d99bd425e763bb7dbb6e485323b9e1570267e6c Mon Sep 17 00:00:00 2001 From: Frank Dekervel Date: Thu, 11 Sep 2025 00:00:08 +0200 Subject: [PATCH 07/13] tests: update for Null variant; use JSON roundtrip instead of constructing Null directly; handle Null in trace traversal --- src/runtime/src/diff.rs | 2 + src/runtime/tests/diff.rs | 156 +++++++++++++------------------------ src/runtime/tests/trace.rs | 1 + 3 files changed, 57 insertions(+), 102 deletions(-) diff --git a/src/runtime/src/diff.rs b/src/runtime/src/diff.rs index 2b7dcb4..853b960 100644 --- a/src/runtime/src/diff.rs +++ b/src/runtime/src/diff.rs @@ -273,6 +273,7 @@ fn apply_delta_linkml( LinkMLValue::List { class, slot, .. } => (class.clone(), Some(slot.clone())), LinkMLValue::Mapping { class, slot, .. } => (class.clone(), Some(slot.clone())), LinkMLValue::Scalar { class, slot, .. } => (class.clone(), Some(slot.clone())), + LinkMLValue::Null { class, slot, .. } => (class.clone(), Some(slot.clone())), }; let conv = sv.converter(); if let Some(cls) = class_opt { @@ -443,6 +444,7 @@ fn apply_delta_linkml( } } LinkMLValue::Scalar { .. } => {} + LinkMLValue::Null { .. } => {} } Ok(()) } diff --git a/src/runtime/tests/diff.rs b/src/runtime/tests/diff.rs index b2cb144..09a2bf2 100644 --- a/src/runtime/tests/diff.rs +++ b/src/runtime/tests/diff.rs @@ -1,4 +1,4 @@ -use linkml_runtime::{diff, load_yaml_file, patch}; +use linkml_runtime::{diff, load_json_str, load_yaml_file, patch}; use linkml_schemaview::identifier::{converter_from_schema, Identifier}; use linkml_schemaview::io::from_yaml; use linkml_schemaview::schemaview::SchemaView; @@ -160,126 +160,78 @@ fn diff_null_and_missing_semantics() { .unwrap(); // X -> null => update to null - if let LinkMLValue::Object { values, .. } = src.clone() { - let mut tgt_values = values.clone(); - let age_slot = class - .slots() - .iter() - .find(|s| s.name == "age") - .expect("age slot"); - tgt_values.insert( - "age".to_string(), - LinkMLValue::Null { - slot: age_slot.clone(), - class: Some(class.clone()), - sv: sv.clone(), - }, - ); - let deltas = diff( - &LinkMLValue::Object { - values: values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - &LinkMLValue::Object { - values: tgt_values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - false, - ); + if let LinkMLValue::Object { .. } = src.clone() { + let mut tgt_json = src.to_json(); + if let serde_json::Value::Object(ref mut m) = tgt_json { + m.insert("age".to_string(), serde_json::Value::Null); + } + let tgt = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &class, + &conv, + ) + .unwrap(); + let deltas = diff(&src, &tgt, false); assert!(deltas .iter() .any(|d| d.path == vec!["age".to_string()] && d.new == Some(serde_json::Value::Null))); } // null -> X => update from null - if let LinkMLValue::Object { values, .. } = src.clone() { - let mut src_values = values.clone(); - let age_slot = class - .slots() - .iter() - .find(|s| s.name == "age") - .expect("age slot"); - src_values.insert( - "age".to_string(), - LinkMLValue::Null { - slot: age_slot.clone(), - class: Some(class.clone()), - sv: sv.clone(), - }, - ); - let deltas = diff( - &LinkMLValue::Object { - values: src_values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - &LinkMLValue::Object { - values: values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - false, - ); + if let LinkMLValue::Object { .. } = src.clone() { + let mut src_json = src.to_json(); + if let serde_json::Value::Object(ref mut m) = src_json { + m.insert("age".to_string(), serde_json::Value::Null); + } + let src_with_null = load_json_str( + &serde_json::to_string(&src_json).unwrap(), + &sv, + &class, + &conv, + ) + .unwrap(); + let deltas = diff(&src_with_null, &src, false); assert!(deltas.iter().any(|d| d.path == vec!["age".to_string()] && d.old == Some(serde_json::Value::Null) && d.new.is_some())); } // missing -> X => add - if let LinkMLValue::Object { values, .. } = src.clone() { - let mut src_values = values.clone(); - src_values.remove("age"); - let deltas = diff( - &LinkMLValue::Object { - values: src_values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - &LinkMLValue::Object { - values: values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - false, - ); + if let LinkMLValue::Object { .. } = src.clone() { + let mut src_json = src.to_json(); + if let serde_json::Value::Object(ref mut m) = src_json { + m.remove("age"); + } + let src_missing = load_json_str( + &serde_json::to_string(&src_json).unwrap(), + &sv, + &class, + &conv, + ) + .unwrap(); + let deltas = diff(&src_missing, &src, false); assert!(deltas .iter() .any(|d| d.path == vec!["age".to_string()] && d.old.is_none() && d.new.is_some())); } // X -> missing: ignored by default; produce update-to-null when treat_missing_as_null=true - if let LinkMLValue::Object { values, .. } = src.clone() { - let mut tgt_values = values.clone(); - tgt_values.remove("age"); - let deltas = diff( - &LinkMLValue::Object { - values: values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - &LinkMLValue::Object { - values: tgt_values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - false, - ); + if let LinkMLValue::Object { .. } = src.clone() { + let mut tgt_json = src.to_json(); + if let serde_json::Value::Object(ref mut m) = tgt_json { + m.remove("age"); + } + let tgt_missing = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &class, + &conv, + ) + .unwrap(); + let deltas = diff(&src, &tgt_missing, false); assert!(deltas.iter().all(|d| d.path != vec!["age".to_string()])); - let deltas2 = diff( - &LinkMLValue::Object { - values: values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - &LinkMLValue::Object { - values: tgt_values.clone(), - class: class.clone(), - sv: sv.clone(), - }, - true, - ); + let deltas2 = diff(&src, &tgt_missing, true); assert!(deltas2 .iter() .any(|d| d.path == vec!["age".to_string()] && d.new == Some(serde_json::Value::Null))) diff --git a/src/runtime/tests/trace.rs b/src/runtime/tests/trace.rs index ac6b55b..051aba7 100644 --- a/src/runtime/tests/trace.rs +++ b/src/runtime/tests/trace.rs @@ -25,6 +25,7 @@ fn collect_ids(v: &linkml_runtime::LinkMLValue, out: &mut Vec) { out.push(v.node_id()); match v { linkml_runtime::LinkMLValue::Scalar { .. } => {} + linkml_runtime::LinkMLValue::Null { .. } => {} linkml_runtime::LinkMLValue::List { values, .. } => { for c in values { collect_ids(c, out); From 91ec1de656543f52f5750fb4c0e2642f5440435d Mon Sep 17 00:00:00 2001 From: Frank Dekervel Date: Thu, 11 Sep 2025 07:17:35 +0200 Subject: [PATCH 08/13] diff: treat identifier/key changes as object replacement; add tests for single, list, and mapping inlined cases --- src/runtime/src/diff.rs | 24 +++ src/runtime/tests/diff_identifier.rs | 246 +++++++++++++++++++++++++++ 2 files changed, 270 insertions(+) create mode 100644 src/runtime/tests/diff_identifier.rs diff --git a/src/runtime/src/diff.rs b/src/runtime/src/diff.rs index 853b960..0a3a5ea 100644 --- a/src/runtime/src/diff.rs +++ b/src/runtime/src/diff.rs @@ -81,6 +81,30 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, treat_missing_as_null: b .. }, ) => { + // If objects have an identifier or key slot and it changed, treat as whole-object replacement + // This applies for single-valued and list-valued inlined objects. + let key_slot_name = sc + .key_or_identifier_slot() + .or_else(|| tc.key_or_identifier_slot()) + .map(|s| s.name.clone()); + if let Some(ks) = key_slot_name { + let sid = sm.get(&ks); + let tid = tm.get(&ks); + if let ( + Some(LinkMLValue::Scalar { value: s_id, .. }), + Some(LinkMLValue::Scalar { value: t_id, .. }), + ) = (sid, tid) + { + if s_id != t_id { + out.push(Delta { + path: path.clone(), + old: Some(s.to_json()), + new: Some(t.to_json()), + }); + return; + } + } + } for (k, sv) in sm { let slot_view = sc .slots() diff --git a/src/runtime/tests/diff_identifier.rs b/src/runtime/tests/diff_identifier.rs new file mode 100644 index 0000000..7ba4960 --- /dev/null +++ b/src/runtime/tests/diff_identifier.rs @@ -0,0 +1,246 @@ +use linkml_runtime::{diff, load_json_str, load_yaml_file, patch}; +use linkml_schemaview::identifier::{converter_from_schema, Identifier}; +use linkml_schemaview::io::from_yaml; +use linkml_schemaview::schemaview::SchemaView; +use serde_json::Value as JsonValue; +use std::path::{Path, PathBuf}; + +fn data_path(name: &str) -> PathBuf { + let mut p = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push("tests"); + p.push("data"); + p.push(name); + p +} + +#[test] +fn single_inlined_object_identifier_change_is_replacement() { + // Use personinfo schema; diagnosis is an inlined object with identifier (via NamedThing) + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class(&Identifier::new("Container"), &conv) + .unwrap() + .expect("class not found"); + + let src = load_yaml_file( + Path::new(&data_path("example_personinfo_data.yaml")), + &sv, + &container, + &conv, + ) + .unwrap(); + + // Modify diagnosis.id of the first medical history event for P:002 + let mut tgt_json = src.to_json(); + if let JsonValue::Object(ref mut root) = tgt_json { + if let Some(JsonValue::Array(objects)) = root.get_mut("objects") { + if let Some(JsonValue::Object(p2)) = objects.get_mut(2) { + if let Some(JsonValue::Array(mh)) = p2.get_mut("has_medical_history") { + if let Some(JsonValue::Object(ev0)) = mh.get_mut(0) { + if let Some(JsonValue::Object(diag)) = ev0.get_mut("diagnosis") { + diag.insert( + "id".to_string(), + JsonValue::String("CODE:D9999".to_string()), + ); + } + } + } + } + } + } + let tgt = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &container, + &conv, + ) + .unwrap(); + + let deltas = diff(&src, &tgt, false); + // Expect a single replacement at the diagnosis object path + assert_eq!(deltas.len(), 1); + let d = &deltas[0]; + assert_eq!( + d.path, + vec![ + "objects".to_string(), + "2".to_string(), + "has_medical_history".to_string(), + "0".to_string(), + "diagnosis".to_string() + ] + ); + assert!(d.old.is_some() && d.new.is_some()); + + // Patch should yield target + let (patched, _trace) = patch(&src, &deltas, &sv).unwrap(); + assert_eq!(patched.to_json(), tgt.to_json()); +} + +#[test] +fn single_inlined_object_non_identifier_change_is_field_delta() { + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class(&Identifier::new("Container"), &conv) + .unwrap() + .expect("class not found"); + + let src = load_yaml_file( + Path::new(&data_path("example_personinfo_data.yaml")), + &sv, + &container, + &conv, + ) + .unwrap(); + + // Modify diagnosis.name only + let mut tgt_json = src.to_json(); + if let JsonValue::Object(ref mut root) = tgt_json { + if let Some(JsonValue::Array(objects)) = root.get_mut("objects") { + if let Some(JsonValue::Object(p2)) = objects.get_mut(2) { + if let Some(JsonValue::Array(mh)) = p2.get_mut("has_medical_history") { + if let Some(JsonValue::Object(ev0)) = mh.get_mut(0) { + if let Some(JsonValue::Object(diag)) = ev0.get_mut("diagnosis") { + diag.insert( + "name".to_string(), + JsonValue::String("new name".to_string()), + ); + } + } + } + } + } + } + let tgt = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &container, + &conv, + ) + .unwrap(); + + let deltas = diff(&src, &tgt, false); + assert!(deltas.iter().any(|d| d.path + == vec![ + "objects".to_string(), + "2".to_string(), + "has_medical_history".to_string(), + "0".to_string(), + "diagnosis".to_string(), + "name".to_string() + ])); + // Must not collapse to whole-object replacement here + assert!(!deltas.iter().any(|d| d.path + == vec![ + "objects".to_string(), + "2".to_string(), + "has_medical_history".to_string(), + "0".to_string(), + "diagnosis".to_string() + ])); + + let (patched, _trace) = patch(&src, &deltas, &sv).unwrap(); + assert_eq!(patched.to_json(), tgt.to_json()); +} + +#[test] +fn list_inlined_object_identifier_change_is_replacement() { + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class(&Identifier::new("Container"), &conv) + .unwrap() + .expect("class not found"); + + let src = load_yaml_file( + Path::new(&data_path("example_personinfo_data.yaml")), + &sv, + &container, + &conv, + ) + .unwrap(); + + // Change the id of the third object (P:002) + let mut tgt_json = src.to_json(); + if let JsonValue::Object(ref mut root) = tgt_json { + if let Some(JsonValue::Array(objects)) = root.get_mut("objects") { + if let Some(JsonValue::Object(p2)) = objects.get_mut(2) { + p2.insert("id".to_string(), JsonValue::String("P:099".to_string())); + } + } + } + let tgt = load_json_str( + &serde_json::to_string(&tgt_json).unwrap(), + &sv, + &container, + &conv, + ) + .unwrap(); + + let deltas = diff(&src, &tgt, false); + // Expect a single replacement at the list item path + assert!(deltas + .iter() + .any(|d| d.path == vec!["objects".to_string(), "2".to_string()])); + assert!(!deltas + .iter() + .any(|d| d.path == vec!["objects".to_string(), "2".to_string(), "id".to_string()])); + + let (patched, _trace) = patch(&src, &deltas, &sv).unwrap(); + assert_eq!(patched.to_json(), tgt.to_json()); +} + +#[test] +fn mapping_inlined_identifier_change_is_add_delete() { + // Use mapping schema with inlined_as_dict keyed by 'key' + let schema = from_yaml(Path::new(&data_path("mapping_schema.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let bag = sv + .get_class(&Identifier::new("Bag"), &conv) + .unwrap() + .expect("class not found"); + + let src = linkml_runtime::load_json_file( + Path::new(&data_path("mapping_data.json")), + &sv, + &bag, + &conv, + ) + .unwrap(); + + // Rename mapping key 'alpha' to 'alpha2' + let mut tgt_json = src.to_json(); + if let JsonValue::Object(ref mut root) = tgt_json { + if let Some(JsonValue::Object(things)) = root.get_mut("things") { + if let Some(alpha) = things.remove("alpha") { + things.insert("alpha2".to_string(), alpha); + } + } + } + let tgt = load_json_str(&serde_json::to_string(&tgt_json).unwrap(), &sv, &bag, &conv).unwrap(); + + let deltas = diff(&src, &tgt, false); + // Expect one delete and one add at mapping keys; no inner key-slot deltas + assert!(deltas + .iter() + .any(|d| d.path == vec!["things".to_string(), "alpha".to_string()] && d.new.is_none())); + assert!(deltas + .iter() + .any(|d| d.path == vec!["things".to_string(), "alpha2".to_string()] && d.old.is_none())); + assert!(!deltas + .iter() + .any(|d| d.path == vec!["things".to_string(), "alpha".to_string(), "key".to_string()])); + + let (patched, _trace) = patch(&src, &deltas, &sv).unwrap(); + assert_eq!(patched.to_json(), tgt.to_json()); +} From e078ce944f16743e358544383683d7efad9ba1a7 Mon Sep 17 00:00:00 2001 From: Frank Dekervel Date: Thu, 11 Sep 2025 07:39:06 +0200 Subject: [PATCH 09/13] docs(runtime): clarify semantics of internal NodeId and PatchTrace fields --- src/runtime/src/diff.rs | 6 ++++++ src/runtime/src/lib.rs | 17 ++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/runtime/src/diff.rs b/src/runtime/src/diff.rs index 0a3a5ea..817f915 100644 --- a/src/runtime/src/diff.rs +++ b/src/runtime/src/diff.rs @@ -237,8 +237,14 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, treat_missing_as_null: b #[derive(Debug, Clone, Default)] pub struct PatchTrace { + /// Node IDs of subtrees that were newly created by the patch. + /// + /// See [`crate::NodeId`] for semantics: these are internal, ephemeral IDs + /// that are useful for tooling and provenance, not object identifiers. pub added: Vec, + /// Node IDs of subtrees that were removed by the patch. pub deleted: Vec, + /// Node IDs of nodes that were directly updated (e.g., parent containers, scalars). pub updated: Vec, } diff --git a/src/runtime/src/lib.rs b/src/runtime/src/lib.rs index 150354f..8d6627c 100644 --- a/src/runtime/src/lib.rs +++ b/src/runtime/src/lib.rs @@ -91,7 +91,18 @@ pub enum LinkMLValue { }, } -// Stable node identifiers assigned to every LinkMLValue node +/// Internal node identifier used for provenance and update tracking. +/// +/// Node IDs are assigned to every `LinkMLValue` node when values are constructed or +/// transformed. They exist solely as technical identifiers to help with patching and +/// provenance (for example, `PatchTrace.added`/`deleted` collect `NodeId`s of affected +/// subtrees). They are not intended to identify domain objects — for that, use LinkML +/// identifier or key slots as defined in the schema. +/// +/// Important properties: +/// - Local and ephemeral: loading the same data twice will yield different `NodeId`s. +/// - Non-persistent: never serialize or expose as a model identifier. +/// - Useful for tracking modifications within a single in-memory value. pub type NodeId = u64; static NEXT_NODE_ID: AtomicU64 = AtomicU64::new(1); @@ -101,6 +112,10 @@ fn new_node_id() -> NodeId { } impl LinkMLValue { + /// Returns the internal [`NodeId`] of this node. + /// + /// This ID is only for internal provenance/update tracking and is not a + /// semantic identifier of the represented object. pub fn node_id(&self) -> NodeId { match self { LinkMLValue::Scalar { node_id, .. } From 5c31a7bd0dca731faee12bfb3a025ad6d6118e47 Mon Sep 17 00:00:00 2001 From: Frank Dekervel Date: Thu, 11 Sep 2025 11:30:14 +0200 Subject: [PATCH 10/13] runtime: add LinkMLValue::equals per Instances spec; tests: add equality coverage; revert CLI offline toggle and run tests with network --- AGENTS.md | 1 + src/runtime/src/lib.rs | 143 +++++++++++++++++++++++++++ src/runtime/tests/equality.rs | 177 ++++++++++++++++++++++++++++++++++ 3 files changed, 321 insertions(+) create mode 100644 src/runtime/tests/equality.rs diff --git a/AGENTS.md b/AGENTS.md index 06f1266..526760e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -25,6 +25,7 @@ - Python (bindings helpers): follow PEP 8; prefer type hints where feasible. ## Testing Guidelines +- When testing locally, always provide network access. never try to run the tests offline - Add integration tests under `src/runtime/tests/` when changing CLI/runtime behavior. - Prefer `assert_cmd` for CLI and `predicates` for output checks. Keep fixtures in `src/runtime/tests/data/`. - Run `cargo test --workspace` locally; ensure tests don’t rely on network input. diff --git a/src/runtime/src/lib.rs b/src/runtime/src/lib.rs index 8d6627c..13e5953 100644 --- a/src/runtime/src/lib.rs +++ b/src/runtime/src/lib.rs @@ -153,6 +153,149 @@ impl LinkMLValue { } Some(current) } + + /// Compare two LinkMLValue instances for semantic equality per the + /// LinkML Instances specification (Identity conditions). + /// + /// Key points implemented: + /// - Null equals Null. + /// - Scalars: equal iff same underlying atomic value and compatible typed context + /// (same Enum range when present; otherwise same TypeDefinition range name when present). + /// - Lists: equal iff same length and pairwise equal in order. + /// - Mappings: equal iff same keys and values equal for each key (order-insensitive). + /// - Objects: equal iff same instantiated class (by identity) and slot assignments match + /// after normalization (i.e., treating assignments with value Null as omitted), ignoring order. + pub fn equals(&self, other: &LinkMLValue) -> bool { + use LinkMLValue::*; + match (self, other) { + (Null { .. }, Null { .. }) => true, + ( + Scalar { + value: v1, + slot: s1, + .. + }, + Scalar { + value: v2, + slot: s2, + .. + }, + ) => { + // If either slot has an enum range, both must and enum names must match + let e1 = s1.get_range_enum(); + let e2 = s2.get_range_enum(); + if e1.is_some() || e2.is_some() { + match (e1, e2) { + (Some(ev1), Some(ev2)) => { + if ev1.schema_id() != ev2.schema_id() || ev1.name() != ev2.name() { + return false; + } + } + _ => return false, + } + } else { + // Compare type ranges if explicitly set on both + let t1 = s1.definition().range.as_ref(); + let t2 = s2.definition().range.as_ref(); + if let (Some(r1), Some(r2)) = (t1, t2) { + if r1 != r2 { + return false; + } + } + } + v1 == v2 + } + (List { values: a, .. }, List { values: b, .. }) => { + if a.len() != b.len() { + return false; + } + for (x, y) in a.iter().zip(b.iter()) { + if !x.equals(y) { + return false; + } + } + true + } + (Mapping { values: a, .. }, Mapping { values: b, .. }) => { + if a.len() != b.len() { + return false; + } + for (k, va) in a.iter() { + match b.get(k) { + Some(vb) => { + if !va.equals(vb) { + return false; + } + } + None => return false, + } + } + true + } + ( + Object { + values: a, + class: ca, + sv: sva, + .. + }, + Object { + values: b, + class: cb, + sv: svb, + .. + }, + ) => { + // Compare class identity via canonical URIs if possible + let ida = ca.canonical_uri(); + let idb = cb.canonical_uri(); + let class_equal = if let Some(conv) = sva.converter_for_schema(ca.schema_id()) { + // Use 'sva' for comparison; identifiers are global across schemas + sva.identifier_equals(&ida, &idb, conv).unwrap_or(false) + } else if let Some(conv) = svb.converter_for_schema(cb.schema_id()) { + svb.identifier_equals(&ida, &idb, conv).unwrap_or(false) + } else { + ca.name() == cb.name() + }; + if !class_equal { + return false; + } + + // Normalize conceptually by ignoring entries whose value is Null + let count_a = a.iter().filter(|(_, v)| !matches!(v, Null { .. })).count(); + let count_b = b.iter().filter(|(_, v)| !matches!(v, Null { .. })).count(); + if count_a != count_b { + return false; + } + for (k, va) in a.iter().filter(|(_, v)| !matches!(v, Null { .. })) { + match b.get(k) { + Some(vb) => { + if matches!(vb, Null { .. }) { + return false; + } + if !va.equals(vb) { + return false; + } + } + None => return false, + } + } + // Ensure b has no extra non-null keys not in a (counts already equal, so this is defensive) + for (k, _vb) in b.iter().filter(|(_, v)| !matches!(v, Null { .. })) { + match a.get(k) { + Some(va) => { + if matches!(va, Null { .. }) { + return false; + } + } + None => return false, + } + } + true + } + _ => false, + } + } fn find_scalar_slot_for_inlined_map( class: &ClassView, key_slot_name: &str, diff --git a/src/runtime/tests/equality.rs b/src/runtime/tests/equality.rs new file mode 100644 index 0000000..4637539 --- /dev/null +++ b/src/runtime/tests/equality.rs @@ -0,0 +1,177 @@ +use linkml_runtime::{load_json_str, load_yaml_str, LinkMLValue}; +use linkml_schemaview::identifier::converter_from_schema; +use linkml_schemaview::io::from_yaml; +use linkml_schemaview::schemaview::SchemaView; +use std::path::Path; + +fn data_path(name: &str) -> std::path::PathBuf { + let mut p = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + p.push("tests"); + p.push("data"); + p.push(name); + p +} + +#[test] +fn object_equality_ignores_null_assignments() { + // Load personinfo schema and Container class + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class( + &linkml_schemaview::identifier::Identifier::new("Container"), + &conv, + ) + .unwrap() + .expect("class not found"); + + let doc_with_null = r#" +objects: + - objecttype: personinfo:Person + id: "P:1" + name: "Alice" + current_address: null +"#; + let doc_without_slot = r#" +objects: + - objecttype: personinfo:Person + id: "P:1" + name: "Alice" +"#; + let v1 = load_yaml_str(doc_with_null, &sv, &container, &conv).unwrap(); + let v2 = load_yaml_str(doc_without_slot, &sv, &container, &conv).unwrap(); + let p1 = v1.navigate_path(["objects", "0"]).unwrap(); + let p2 = v2.navigate_path(["objects", "0"]).unwrap(); + assert!( + p1.equals(p2), + "Person with null assignment should equal omission" + ); +} + +#[test] +fn list_identity_is_order_sensitive() { + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class( + &linkml_schemaview::identifier::Identifier::new("Container"), + &conv, + ) + .unwrap() + .expect("class not found"); + + let doc_a = r#" +objects: + - objecttype: personinfo:Person + id: "P:1" + name: "Alice" + has_employment_history: + - started_at_time: 2019-01-01 + is_current: true + - started_at_time: 2020-01-01 + is_current: false +"#; + let doc_b = r#" +objects: + - objecttype: personinfo:Person + id: "P:1" + name: "Alice" + has_employment_history: + - started_at_time: 2020-01-01 + is_current: false + - started_at_time: 2019-01-01 + is_current: true +"#; + let v1 = load_yaml_str(doc_a, &sv, &container, &conv).unwrap(); + let v2 = load_yaml_str(doc_b, &sv, &container, &conv).unwrap(); + let p1 = v1.navigate_path(["objects", "0"]).unwrap(); + let p2 = v2.navigate_path(["objects", "0"]).unwrap(); + assert!(matches!(p1, LinkMLValue::Object { .. })); + assert!(matches!(p2, LinkMLValue::Object { .. })); + assert!(!p1.equals(p2), "List order must affect equality"); +} + +#[test] +fn mapping_equality_is_key_based_not_ordered() { + // Load mapping schema and Bag class + let schema = from_yaml(Path::new(&data_path("mapping_schema.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let bag = sv + .get_class( + &linkml_schemaview::identifier::Identifier::new("Bag"), + &conv, + ) + .unwrap() + .expect("class not found"); + + let doc1 = r#"{ + "things": { + "alpha": {"typeURI": "ThingA", "a_only": "foo", "common": "shared"}, + "beta": {"typeURI": "ThingB", "b_only": true, "common": "shared"} + } +}"#; + let doc2 = r#"{ + "things": { + "beta": {"typeURI": "ThingB", "b_only": true, "common": "shared"}, + "alpha": {"typeURI": "ThingA", "a_only": "foo", "common": "shared"} + } +}"#; + let v1 = load_json_str(doc1, &sv, &bag, &conv).unwrap(); + let v2 = load_json_str(doc2, &sv, &bag, &conv).unwrap(); + let m1 = v1.navigate_path(["things"]).unwrap(); + let m2 = v2.navigate_path(["things"]).unwrap(); + assert!(matches!(m1, LinkMLValue::Mapping { .. })); + assert!(matches!(m2, LinkMLValue::Mapping { .. })); + assert!(m1.equals(m2), "Mapping equality should ignore key order"); +} + +#[test] +fn enum_scalar_equality_respects_value_and_range() { + let schema = from_yaml(Path::new(&data_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class( + &linkml_schemaview::identifier::Identifier::new("Container"), + &conv, + ) + .unwrap() + .expect("class not found"); + + let doc1 = r#" +objects: + - objecttype: personinfo:Person + id: "P:1" + name: "Alice" + gender: "cisgender man" +"#; + let doc2 = r#" +objects: + - objecttype: personinfo:Person + id: "P:2" + name: "Bob" + gender: "cisgender man" +"#; + let doc3 = r#" +objects: + - objecttype: personinfo:Person + id: "P:3" + name: "Carol" + gender: "cisgender woman" +"#; + let v1 = load_yaml_str(doc1, &sv, &container, &conv).unwrap(); + let v2 = load_yaml_str(doc2, &sv, &container, &conv).unwrap(); + let v3 = load_yaml_str(doc3, &sv, &container, &conv).unwrap(); + let g1 = v1.navigate_path(["objects", "0", "gender"]).unwrap(); + let g2 = v2.navigate_path(["objects", "0", "gender"]).unwrap(); + let g3 = v3.navigate_path(["objects", "0", "gender"]).unwrap(); + assert!(g1.equals(g2)); + assert!(!g1.equals(g3)); +} From 661e9ad7649bdebecc22de49ec69604da8f696f2 Mon Sep 17 00:00:00 2001 From: Frank Dekervel Date: Thu, 11 Sep 2025 11:39:13 +0200 Subject: [PATCH 11/13] python: expose LinkMLValue.equals(); tests: add python_equals coverage --- src/python/src/lib.rs | 5 +++ src/python/tests/python_equals.rs | 75 +++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 src/python/tests/python_equals.rs diff --git a/src/python/src/lib.rs b/src/python/src/lib.rs index 7c561f0..9370a05 100644 --- a/src/python/src/lib.rs +++ b/src/python/src/lib.rs @@ -478,6 +478,11 @@ impl Clone for PyLinkMLValue { #[pymethods] impl PyLinkMLValue { + /// Semantic equality per LinkML Instances spec. + /// Compares this value with another `LinkMLValue`. + fn equals(&self, other: &PyLinkMLValue) -> bool { + self.value.equals(&other.value) + } #[getter] fn slot_name(&self) -> Option { match &self.value { diff --git a/src/python/tests/python_equals.rs b/src/python/tests/python_equals.rs new file mode 100644 index 0000000..62a2ea7 --- /dev/null +++ b/src/python/tests/python_equals.rs @@ -0,0 +1,75 @@ +use linkml_runtime_python::runtime_module; +use pyo3::prelude::*; +use pyo3::types::PyDict; +use std::path::PathBuf; + +fn data_path(name: &str) -> PathBuf { + let base = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let candidates = [ + base.join("../runtime/tests/data").join(name), + base.join("../schemaview/tests/data").join(name), + base.join("tests/data").join(name), + ]; + for c in candidates { + if c.exists() { + return c; + } + } + panic!("test data not found: {}", name); +} + +#[test] +fn python_equals_api() { + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + let module = PyModule::new(py, "linkml_runtime").unwrap(); + runtime_module(&module).unwrap(); + let sys = py.import("sys").unwrap(); + let modules = sys.getattr("modules").unwrap(); + let sys_modules = modules.downcast::().unwrap(); + sys_modules.set_item("linkml_runtime", module).unwrap(); + + let locals = PyDict::new(py); + locals + .set_item( + "schema_path", + data_path("personinfo.yaml").to_str().unwrap(), + ) + .unwrap(); + + pyo3::py_run!( + py, + *locals, + r#" +import linkml_runtime as lr +import json +sv = lr.make_schema_view(schema_path) +cls = sv.get_class_view('Container') + +doc1 = { + 'objects': [ + { + 'objecttype': 'personinfo:Person', + 'id': 'P:1', + 'name': 'Alice', + 'current_address': None + } + ] +} +doc2 = { + 'objects': [ + { + 'objecttype': 'personinfo:Person', + 'id': 'P:1', + 'name': 'Alice' + } + ] +} + +v1 = lr.load_json(json.dumps(doc1), sv, cls) +v2 = lr.load_json(json.dumps(doc2), sv, cls) +assert v1['objects'][0].equals(v2['objects'][0]) +"# + ); + }); +} From 7c58d56104cc711f1122f32bf67e19e2566e723d Mon Sep 17 00:00:00 2001 From: Frank Dekervel Date: Thu, 11 Sep 2025 12:17:22 +0200 Subject: [PATCH 12/13] runtime: equals(treat_missing_as_null) and patch no-op skipping; patch API takes treat_missing_as_null; CLI flag; Python equals/patch args; tests for equality and no-op semantics; preserve scalar node_ids --- src/python/src/lib.rs | 13 ++- src/python/tests/python_equals.rs | 2 +- src/runtime/src/diff.rs | 143 +++++++++++++++++---------- src/runtime/src/lib.rs | 74 ++++++++------ src/runtime/tests/diff.rs | 6 +- src/runtime/tests/diff_identifier.rs | 8 +- src/runtime/tests/equality.rs | 13 ++- src/runtime/tests/trace.rs | 58 ++++++++++- src/tools/src/bin/linkml_patch.rs | 5 +- 9 files changed, 222 insertions(+), 100 deletions(-) diff --git a/src/python/src/lib.rs b/src/python/src/lib.rs index 9370a05..591d12e 100644 --- a/src/python/src/lib.rs +++ b/src/python/src/lib.rs @@ -480,8 +480,9 @@ impl Clone for PyLinkMLValue { impl PyLinkMLValue { /// Semantic equality per LinkML Instances spec. /// Compares this value with another `LinkMLValue`. - fn equals(&self, other: &PyLinkMLValue) -> bool { - self.value.equals(&other.value) + #[pyo3(signature = (other, treat_missing_as_null = false))] + fn equals(&self, other: &PyLinkMLValue, treat_missing_as_null: bool) -> bool { + self.value.equals(&other.value, treat_missing_as_null) } #[getter] fn slot_name(&self) -> Option { @@ -763,11 +764,12 @@ fn py_diff( Ok(json_value_to_py(py, &JsonValue::Array(vals))) } -#[pyfunction(name = "patch")] +#[pyfunction(name = "patch", signature = (source, deltas, treat_missing_as_null = false))] fn py_patch( py: Python<'_>, source: &PyLinkMLValue, deltas: &Bound<'_, PyAny>, + treat_missing_as_null: bool, ) -> PyResult { let json_mod = PyModule::import(py, "json")?; let deltas_str: String = json_mod.call_method1("dumps", (deltas,))?.extract()?; @@ -775,8 +777,9 @@ fn py_patch( serde_json::from_str(&deltas_str).map_err(|e| PyException::new_err(e.to_string()))?; let sv_ref = source.sv.bind(py).borrow(); let rust_sv = sv_ref.as_rust(); - let (new_value, trace) = patch_internal(&source.value, &deltas_vec, rust_sv) - .map_err(|e| PyException::new_err(e.to_string()))?; + let (new_value, trace) = + patch_internal(&source.value, &deltas_vec, rust_sv, treat_missing_as_null) + .map_err(|e| PyException::new_err(e.to_string()))?; let trace_json = serde_json::json!({ "added": trace.added, "deleted": trace.deleted, diff --git a/src/python/tests/python_equals.rs b/src/python/tests/python_equals.rs index 62a2ea7..eb51043 100644 --- a/src/python/tests/python_equals.rs +++ b/src/python/tests/python_equals.rs @@ -68,7 +68,7 @@ doc2 = { v1 = lr.load_json(json.dumps(doc1), sv, cls) v2 = lr.load_json(json.dumps(doc2), sv, cls) -assert v1['objects'][0].equals(v2['objects'][0]) +assert v1['objects'][0].equals(v2['objects'][0], True) "# ); }); diff --git a/src/runtime/src/diff.rs b/src/runtime/src/diff.rs index 817f915..f94e47f 100644 --- a/src/runtime/src/diff.rs +++ b/src/runtime/src/diff.rs @@ -252,11 +252,19 @@ pub fn patch( source: &LinkMLValue, deltas: &[Delta], sv: &SchemaView, + treat_missing_as_null: bool, ) -> LResult<(LinkMLValue, PatchTrace)> { let mut out = source.clone(); let mut trace = PatchTrace::default(); for d in deltas { - apply_delta_linkml(&mut out, &d.path, &d.new, sv, &mut trace)?; + apply_delta_linkml( + &mut out, + &d.path, + &d.new, + sv, + &mut trace, + treat_missing_as_null, + )?; } Ok((out, trace)) } @@ -295,6 +303,7 @@ fn apply_delta_linkml( newv: &Option, sv: &SchemaView, trace: &mut PatchTrace, + treat_missing_as_null: bool, ) -> LResult<()> { if path.is_empty() { if let Some(v) = newv { @@ -308,6 +317,10 @@ fn apply_delta_linkml( let conv = sv.converter(); if let Some(cls) = class_opt { let new_node = LinkMLValue::from_json(v.clone(), cls, slot_opt, sv, &conv, false)?; + if current.equals(&new_node, treat_missing_as_null) { + // No-op delta; skip to preserve node IDs + return Ok(()); + } mark_deleted_subtree(current, trace); mark_added_subtree(&new_node, trace); *current = new_node; @@ -322,39 +335,44 @@ fn apply_delta_linkml( if path.len() == 1 { match newv { Some(v) => { + let conv = sv.converter(); + let slot = class.slots().iter().find(|s| s.name == *key).cloned(); + let new_child = LinkMLValue::from_json( + v.clone(), + class.clone(), + slot.clone(), + sv, + &conv, + false, + )?; if let Some(old_child) = values.get_mut(key) { - if let LinkMLValue::Scalar { value, .. } = old_child { - if !v.is_object() && !v.is_array() { - *value = v.clone(); + if old_child.equals(&new_child, treat_missing_as_null) { + // no-op; skip + return Ok(()); + } + match (&mut *old_child, &new_child) { + ( + LinkMLValue::Scalar { value: ov, .. }, + LinkMLValue::Scalar { value: nv, .. }, + ) if !v.is_object() && !v.is_array() => { + // In-place scalar update: keep node_id stable and mark child node + *ov = nv.clone(); trace.updated.push(old_child.node_id()); - return Ok(()); + } + _ => { + let old_snapshot = std::mem::replace(old_child, new_child); + mark_deleted_subtree(&old_snapshot, trace); + mark_added_subtree(old_child, trace); + trace.updated.push(current.node_id()); } } - let conv = sv.converter(); - let slot = class.slots().iter().find(|s| s.name == *key).cloned(); - let new_child = LinkMLValue::from_json( - v.clone(), - class.clone(), - slot, - sv, - &conv, - false, - )?; - let old_snapshot = std::mem::replace(old_child, new_child); - mark_deleted_subtree(&old_snapshot, trace); - mark_added_subtree(old_child, trace); - trace.updated.push(current.node_id()); } else { - let conv = sv.converter(); - let slot = class.slots().iter().find(|s| s.name == *key).cloned(); - let new_child = LinkMLValue::from_json( - v.clone(), - class.clone(), - slot, - sv, - &conv, - false, - )?; + // adding a Null assignment may be a no-op when treating missing as null + if treat_missing_as_null + && matches!(new_child, LinkMLValue::Null { .. }) + { + return Ok(()); + } // mark before insert mark_added_subtree(&new_child, trace); values.insert(key.clone(), new_child); @@ -362,6 +380,14 @@ fn apply_delta_linkml( } } None => { + if let Some(old_child) = values.get(key) { + if treat_missing_as_null + && matches!(old_child, LinkMLValue::Null { .. }) + { + // deleting a Null assignment: no-op + return Ok(()); + } + } if let Some(old_child) = values.remove(key) { mark_deleted_subtree(&old_child, trace); trace.updated.push(current.node_id()); @@ -369,7 +395,7 @@ fn apply_delta_linkml( } } } else if let Some(child) = values.get_mut(key) { - apply_delta_linkml(child, &path[1..], newv, sv, trace)?; + apply_delta_linkml(child, &path[1..], newv, sv, trace, treat_missing_as_null)?; } } LinkMLValue::Mapping { values, slot, .. } => { @@ -386,6 +412,9 @@ fn apply_delta_linkml( Vec::new(), )?; if let Some(old_child) = values.get(key) { + if old_child.equals(&new_child, treat_missing_as_null) { + return Ok(()); + } mark_deleted_subtree(old_child, trace); } // mark before insert @@ -401,7 +430,7 @@ fn apply_delta_linkml( } } } else if let Some(child) = values.get_mut(key) { - apply_delta_linkml(child, &path[1..], newv, sv, trace)?; + apply_delta_linkml(child, &path[1..], newv, sv, trace, treat_missing_as_null)?; } } LinkMLValue::List { @@ -417,27 +446,32 @@ fn apply_delta_linkml( match newv { Some(v) => { if idx < values.len() { - let is_scalar_target = - matches!(values[idx], LinkMLValue::Scalar { .. }); - if is_scalar_target && !v.is_object() && !v.is_array() { - if let LinkMLValue::Scalar { value, .. } = &mut values[idx] { - *value = v.clone(); + let conv = sv.converter(); + let new_child = LinkMLValue::build_list_item_for_slot( + slot, + class.as_ref(), + v.clone(), + sv, + &conv, + Vec::new(), + )?; + if values[idx].equals(&new_child, treat_missing_as_null) { + return Ok(()); + } + match (&mut values[idx], &new_child) { + ( + LinkMLValue::Scalar { value: ov, .. }, + LinkMLValue::Scalar { value: nv, .. }, + ) if !v.is_object() && !v.is_array() => { + *ov = nv.clone(); trace.updated.push(values[idx].node_id()); } - } else { - let conv = sv.converter(); - let new_child = LinkMLValue::build_list_item_for_slot( - slot, - class.as_ref(), - v.clone(), - sv, - &conv, - Vec::new(), - )?; - let old = std::mem::replace(&mut values[idx], new_child); - mark_deleted_subtree(&old, trace); - mark_added_subtree(&values[idx], trace); - trace.updated.push(current.node_id()); + _ => { + let old = std::mem::replace(&mut values[idx], new_child); + mark_deleted_subtree(&old, trace); + mark_added_subtree(&values[idx], trace); + trace.updated.push(current.node_id()); + } } } else if idx == values.len() { let conv = sv.converter(); @@ -470,7 +504,14 @@ fn apply_delta_linkml( } } } else if idx < values.len() { - apply_delta_linkml(&mut values[idx], &path[1..], newv, sv, trace)?; + apply_delta_linkml( + &mut values[idx], + &path[1..], + newv, + sv, + trace, + treat_missing_as_null, + )?; } } LinkMLValue::Scalar { .. } => {} diff --git a/src/runtime/src/lib.rs b/src/runtime/src/lib.rs index 13e5953..8d3d242 100644 --- a/src/runtime/src/lib.rs +++ b/src/runtime/src/lib.rs @@ -163,9 +163,10 @@ impl LinkMLValue { /// (same Enum range when present; otherwise same TypeDefinition range name when present). /// - Lists: equal iff same length and pairwise equal in order. /// - Mappings: equal iff same keys and values equal for each key (order-insensitive). - /// - Objects: equal iff same instantiated class (by identity) and slot assignments match - /// after normalization (i.e., treating assignments with value Null as omitted), ignoring order. - pub fn equals(&self, other: &LinkMLValue) -> bool { + /// - Objects: equal iff same instantiated class (by identity) and slot assignments match; when + /// `treat_missing_as_null` is true, Null is treated as omitted (normalized), otherwise Null is + /// distinct from missing. + pub fn equals(&self, other: &LinkMLValue, treat_missing_as_null: bool) -> bool { use LinkMLValue::*; match (self, other) { (Null { .. }, Null { .. }) => true, @@ -210,7 +211,7 @@ impl LinkMLValue { return false; } for (x, y) in a.iter().zip(b.iter()) { - if !x.equals(y) { + if !x.equals(y, treat_missing_as_null) { return false; } } @@ -223,7 +224,7 @@ impl LinkMLValue { for (k, va) in a.iter() { match b.get(k) { Some(vb) => { - if !va.equals(vb) { + if !va.equals(vb, treat_missing_as_null) { return false; } } @@ -261,37 +262,54 @@ impl LinkMLValue { return false; } - // Normalize conceptually by ignoring entries whose value is Null - let count_a = a.iter().filter(|(_, v)| !matches!(v, Null { .. })).count(); - let count_b = b.iter().filter(|(_, v)| !matches!(v, Null { .. })).count(); - if count_a != count_b { - return false; - } - for (k, va) in a.iter().filter(|(_, v)| !matches!(v, Null { .. })) { - match b.get(k) { - Some(vb) => { - if matches!(vb, Null { .. }) { - return false; + if treat_missing_as_null { + // Normalize conceptually by ignoring entries whose value is Null + let count_a = a.iter().filter(|(_, v)| !matches!(v, Null { .. })).count(); + let count_b = b.iter().filter(|(_, v)| !matches!(v, Null { .. })).count(); + if count_a != count_b { + return false; + } + for (k, va) in a.iter().filter(|(_, v)| !matches!(v, Null { .. })) { + match b.get(k) { + Some(vb) => { + if matches!(vb, Null { .. }) { + return false; + } + if !va.equals(vb, treat_missing_as_null) { + return false; + } } - if !va.equals(vb) { - return false; + None => return false, + } + } + // Ensure b has no extra non-null keys not in a + for (k, _vb) in b.iter().filter(|(_, v)| !matches!(v, Null { .. })) { + match a.get(k) { + Some(va) => { + if matches!(va, Null { .. }) { + return false; + } } + None => return false, } - None => return false, } - } - // Ensure b has no extra non-null keys not in a (counts already equal, so this is defensive) - for (k, _vb) in b.iter().filter(|(_, v)| !matches!(v, Null { .. })) { - match a.get(k) { - Some(va) => { - if matches!(va, Null { .. }) { - return false; + true + } else { + if a.len() != b.len() { + return false; + } + for (k, va) in a.iter() { + match b.get(k) { + Some(vb) => { + if !va.equals(vb, treat_missing_as_null) { + return false; + } } + None => return false, } - None => return false, } + true } - true } _ => false, } diff --git a/src/runtime/tests/diff.rs b/src/runtime/tests/diff.rs index 09a2bf2..15146c8 100644 --- a/src/runtime/tests/diff.rs +++ b/src/runtime/tests/diff.rs @@ -57,7 +57,7 @@ fn diff_and_patch_person() { } } - let (patched, _trace) = patch(&src, &deltas, &sv).unwrap(); + let (patched, _trace) = patch(&src, &deltas, &sv, false).unwrap(); let patched_json = patched.to_json(); let target_json = tgt.to_json(); let src_json = src.to_json(); @@ -93,7 +93,7 @@ fn diff_ignore_missing_target() { let deltas = diff(&src, &tgt, false); assert!(deltas.is_empty()); - let (patched, _trace) = patch(&src, &deltas, &sv).unwrap(); + let (patched, _trace) = patch(&src, &deltas, &sv, false).unwrap(); let patched_json = patched.to_json(); let src_json = src.to_json(); assert_eq!(patched_json, src_json); @@ -135,7 +135,7 @@ fn diff_and_patch_personinfo() { assert!(tgt.navigate_path(&d.path).is_some()); } } - let (patched, _trace) = patch(&src, &deltas, &sv).unwrap(); + let (patched, _trace) = patch(&src, &deltas, &sv, false).unwrap(); assert_eq!(patched.to_json(), tgt.to_json()); } diff --git a/src/runtime/tests/diff_identifier.rs b/src/runtime/tests/diff_identifier.rs index 7ba4960..10b73af 100644 --- a/src/runtime/tests/diff_identifier.rs +++ b/src/runtime/tests/diff_identifier.rs @@ -76,7 +76,7 @@ fn single_inlined_object_identifier_change_is_replacement() { assert!(d.old.is_some() && d.new.is_some()); // Patch should yield target - let (patched, _trace) = patch(&src, &deltas, &sv).unwrap(); + let (patched, _trace) = patch(&src, &deltas, &sv, false).unwrap(); assert_eq!(patched.to_json(), tgt.to_json()); } @@ -145,7 +145,7 @@ fn single_inlined_object_non_identifier_change_is_field_delta() { "diagnosis".to_string() ])); - let (patched, _trace) = patch(&src, &deltas, &sv).unwrap(); + let (patched, _trace) = patch(&src, &deltas, &sv, false).unwrap(); assert_eq!(patched.to_json(), tgt.to_json()); } @@ -194,7 +194,7 @@ fn list_inlined_object_identifier_change_is_replacement() { .iter() .any(|d| d.path == vec!["objects".to_string(), "2".to_string(), "id".to_string()])); - let (patched, _trace) = patch(&src, &deltas, &sv).unwrap(); + let (patched, _trace) = patch(&src, &deltas, &sv, false).unwrap(); assert_eq!(patched.to_json(), tgt.to_json()); } @@ -241,6 +241,6 @@ fn mapping_inlined_identifier_change_is_add_delete() { .iter() .any(|d| d.path == vec!["things".to_string(), "alpha".to_string(), "key".to_string()])); - let (patched, _trace) = patch(&src, &deltas, &sv).unwrap(); + let (patched, _trace) = patch(&src, &deltas, &sv, false).unwrap(); assert_eq!(patched.to_json(), tgt.to_json()); } diff --git a/src/runtime/tests/equality.rs b/src/runtime/tests/equality.rs index 4637539..e9520f5 100644 --- a/src/runtime/tests/equality.rs +++ b/src/runtime/tests/equality.rs @@ -45,7 +45,7 @@ objects: let p1 = v1.navigate_path(["objects", "0"]).unwrap(); let p2 = v2.navigate_path(["objects", "0"]).unwrap(); assert!( - p1.equals(p2), + p1.equals(p2, true), "Person with null assignment should equal omission" ); } @@ -92,7 +92,7 @@ objects: let p2 = v2.navigate_path(["objects", "0"]).unwrap(); assert!(matches!(p1, LinkMLValue::Object { .. })); assert!(matches!(p2, LinkMLValue::Object { .. })); - assert!(!p1.equals(p2), "List order must affect equality"); + assert!(!p1.equals(p2, true), "List order must affect equality"); } #[test] @@ -128,7 +128,10 @@ fn mapping_equality_is_key_based_not_ordered() { let m2 = v2.navigate_path(["things"]).unwrap(); assert!(matches!(m1, LinkMLValue::Mapping { .. })); assert!(matches!(m2, LinkMLValue::Mapping { .. })); - assert!(m1.equals(m2), "Mapping equality should ignore key order"); + assert!( + m1.equals(m2, true), + "Mapping equality should ignore key order" + ); } #[test] @@ -172,6 +175,6 @@ objects: let g1 = v1.navigate_path(["objects", "0", "gender"]).unwrap(); let g2 = v2.navigate_path(["objects", "0", "gender"]).unwrap(); let g3 = v3.navigate_path(["objects", "0", "gender"]).unwrap(); - assert!(g1.equals(g2)); - assert!(!g1.equals(g3)); + assert!(g1.equals(g2, true)); + assert!(!g1.equals(g3, true)); } diff --git a/src/runtime/tests/trace.rs b/src/runtime/tests/trace.rs index 051aba7..62c92a9 100644 --- a/src/runtime/tests/trace.rs +++ b/src/runtime/tests/trace.rs @@ -70,7 +70,7 @@ fn node_ids_preserved_scalar_update() { .unwrap(); let deltas = diff(&src, &tgt, false); - let (patched, trace) = linkml_runtime::patch(&src, &deltas, &sv).unwrap(); + let (patched, trace) = linkml_runtime::patch(&src, &deltas, &sv, false).unwrap(); assert!(trace.added.is_empty()); assert!(trace.deleted.is_empty()); @@ -123,7 +123,7 @@ fn patch_trace_add_in_list() { let deltas = diff(&base, &target, false); let mut pre = Vec::new(); collect_ids(&base, &mut pre); - let (patched, trace) = linkml_runtime::patch(&base, &deltas, &sv).unwrap(); + let (patched, trace) = linkml_runtime::patch(&base, &deltas, &sv, false).unwrap(); let mut post = Vec::new(); collect_ids(&patched, &mut post); @@ -134,3 +134,57 @@ fn patch_trace_add_in_list() { assert_eq!(added, trace_added); assert!(!added.is_empty()); } + +#[test] +fn patch_missing_to_null_semantics() { + use linkml_runtime::LinkMLValue; + // Use simple schema + let schema = from_yaml(Path::new(&data_path("schema.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let class = sv + .get_class(&Identifier::new("Person"), &conv) + .unwrap() + .expect("class not found"); + + let src = load_yaml_file( + Path::new(&data_path("person_partial.yaml")), + &sv, + &class, + &conv, + ) + .unwrap(); + // Build delta: set age to explicit null + let deltas = vec![linkml_runtime::Delta { + path: vec!["age".to_string()], + old: None, + new: Some(serde_json::Value::Null), + }]; + + // treat_missing_as_null = true => no-op; no trace changes, no node id changes + let pre_id = src.node_id(); + let (patched_same, trace_same) = linkml_runtime::patch(&src, &deltas, &sv, true).unwrap(); + assert!( + trace_same.added.is_empty() + && trace_same.deleted.is_empty() + && trace_same.updated.is_empty() + ); + assert_eq!(pre_id, patched_same.node_id()); + // Equality under treat_missing_as_null=true must hold + assert!(src.equals(&patched_same, true)); + // And age remains absent (since explicit null is treated as omitted) + if let LinkMLValue::Object { values, .. } = &patched_same { + assert!(!values.contains_key("age")); + } + + // treat_missing_as_null = false => apply explicit null + let (patched_null, trace_applied) = linkml_runtime::patch(&src, &deltas, &sv, false).unwrap(); + assert!(trace_applied.updated.contains(&patched_null.node_id())); + // age present as Null + if let LinkMLValue::Object { values, .. } = &patched_null { + assert!(matches!(values.get("age"), Some(LinkMLValue::Null { .. }))); + } else { + panic!("expected object root"); + } +} diff --git a/src/tools/src/bin/linkml_patch.rs b/src/tools/src/bin/linkml_patch.rs index cfe8c60..1123982 100644 --- a/src/tools/src/bin/linkml_patch.rs +++ b/src/tools/src/bin/linkml_patch.rs @@ -23,6 +23,9 @@ struct Args { /// Output patched file; defaults to stdout #[arg(short, long)] output: Option, + /// Treat missing assignments as equivalent to explicit null when determining no-ops + #[arg(long, default_value_t = false)] + treat_missing_as_null: bool, } fn load_value( @@ -92,7 +95,7 @@ fn main() -> Result<(), Box> { } else { serde_yaml::from_str(&delta_text)? }; - let (patched, _trace) = patch(&src, &deltas, &sv)?; + let (patched, _trace) = patch(&src, &deltas, &sv, args.treat_missing_as_null)?; write_value(args.output.as_deref(), &patched)?; Ok(()) } From 4b34691e19bdf705eb9420ab0e52ebfe0093c3b9 Mon Sep 17 00:00:00 2001 From: Frank Dekervel Date: Thu, 11 Sep 2025 15:29:24 +0200 Subject: [PATCH 13/13] runtime: introduce PatchOptions {ignore_no_ops, treat_missing_as_null} with defaults; make no-op skipping configurable; wire CLI --ignore-noop/--treat-missing-as-null; update Python patch signature; update tests --- src/python/src/lib.rs | 16 ++++++-- src/runtime/src/diff.rs | 61 ++++++++++++++++------------ src/runtime/tests/diff.rs | 33 +++++++++++++-- src/runtime/tests/diff_identifier.rs | 44 ++++++++++++++++++-- src/runtime/tests/trace.rs | 44 ++++++++++++++++++-- src/tools/src/bin/linkml_patch.rs | 17 ++++++-- 6 files changed, 171 insertions(+), 44 deletions(-) diff --git a/src/python/src/lib.rs b/src/python/src/lib.rs index 591d12e..ade3444 100644 --- a/src/python/src/lib.rs +++ b/src/python/src/lib.rs @@ -764,12 +764,13 @@ fn py_diff( Ok(json_value_to_py(py, &JsonValue::Array(vals))) } -#[pyfunction(name = "patch", signature = (source, deltas, treat_missing_as_null = false))] +#[pyfunction(name = "patch", signature = (source, deltas, treat_missing_as_null = true, ignore_no_ops = true))] fn py_patch( py: Python<'_>, source: &PyLinkMLValue, deltas: &Bound<'_, PyAny>, treat_missing_as_null: bool, + ignore_no_ops: bool, ) -> PyResult { let json_mod = PyModule::import(py, "json")?; let deltas_str: String = json_mod.call_method1("dumps", (deltas,))?.extract()?; @@ -777,9 +778,16 @@ fn py_patch( serde_json::from_str(&deltas_str).map_err(|e| PyException::new_err(e.to_string()))?; let sv_ref = source.sv.bind(py).borrow(); let rust_sv = sv_ref.as_rust(); - let (new_value, trace) = - patch_internal(&source.value, &deltas_vec, rust_sv, treat_missing_as_null) - .map_err(|e| PyException::new_err(e.to_string()))?; + let (new_value, trace) = patch_internal( + &source.value, + &deltas_vec, + rust_sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops, + treat_missing_as_null, + }, + ) + .map_err(|e| PyException::new_err(e.to_string()))?; let trace_json = serde_json::json!({ "added": trace.added, "deleted": trace.deleted, diff --git a/src/runtime/src/diff.rs b/src/runtime/src/diff.rs index f94e47f..5edbd44 100644 --- a/src/runtime/src/diff.rs +++ b/src/runtime/src/diff.rs @@ -248,23 +248,31 @@ pub struct PatchTrace { pub updated: Vec, } +#[derive(Debug, Clone, Copy)] +pub struct PatchOptions { + pub ignore_no_ops: bool, + pub treat_missing_as_null: bool, +} + +impl Default for PatchOptions { + fn default() -> Self { + Self { + ignore_no_ops: true, + treat_missing_as_null: true, + } + } +} + pub fn patch( source: &LinkMLValue, deltas: &[Delta], sv: &SchemaView, - treat_missing_as_null: bool, + opts: PatchOptions, ) -> LResult<(LinkMLValue, PatchTrace)> { let mut out = source.clone(); let mut trace = PatchTrace::default(); for d in deltas { - apply_delta_linkml( - &mut out, - &d.path, - &d.new, - sv, - &mut trace, - treat_missing_as_null, - )?; + apply_delta_linkml(&mut out, &d.path, &d.new, sv, &mut trace, opts)?; } Ok((out, trace)) } @@ -303,7 +311,7 @@ fn apply_delta_linkml( newv: &Option, sv: &SchemaView, trace: &mut PatchTrace, - treat_missing_as_null: bool, + opts: PatchOptions, ) -> LResult<()> { if path.is_empty() { if let Some(v) = newv { @@ -317,7 +325,7 @@ fn apply_delta_linkml( let conv = sv.converter(); if let Some(cls) = class_opt { let new_node = LinkMLValue::from_json(v.clone(), cls, slot_opt, sv, &conv, false)?; - if current.equals(&new_node, treat_missing_as_null) { + if opts.ignore_no_ops && current.equals(&new_node, opts.treat_missing_as_null) { // No-op delta; skip to preserve node IDs return Ok(()); } @@ -346,7 +354,9 @@ fn apply_delta_linkml( false, )?; if let Some(old_child) = values.get_mut(key) { - if old_child.equals(&new_child, treat_missing_as_null) { + if opts.ignore_no_ops + && old_child.equals(&new_child, opts.treat_missing_as_null) + { // no-op; skip return Ok(()); } @@ -368,7 +378,8 @@ fn apply_delta_linkml( } } else { // adding a Null assignment may be a no-op when treating missing as null - if treat_missing_as_null + if opts.ignore_no_ops + && opts.treat_missing_as_null && matches!(new_child, LinkMLValue::Null { .. }) { return Ok(()); @@ -381,7 +392,8 @@ fn apply_delta_linkml( } None => { if let Some(old_child) = values.get(key) { - if treat_missing_as_null + if opts.ignore_no_ops + && opts.treat_missing_as_null && matches!(old_child, LinkMLValue::Null { .. }) { // deleting a Null assignment: no-op @@ -395,7 +407,7 @@ fn apply_delta_linkml( } } } else if let Some(child) = values.get_mut(key) { - apply_delta_linkml(child, &path[1..], newv, sv, trace, treat_missing_as_null)?; + apply_delta_linkml(child, &path[1..], newv, sv, trace, opts)?; } } LinkMLValue::Mapping { values, slot, .. } => { @@ -412,7 +424,9 @@ fn apply_delta_linkml( Vec::new(), )?; if let Some(old_child) = values.get(key) { - if old_child.equals(&new_child, treat_missing_as_null) { + if opts.ignore_no_ops + && old_child.equals(&new_child, opts.treat_missing_as_null) + { return Ok(()); } mark_deleted_subtree(old_child, trace); @@ -430,7 +444,7 @@ fn apply_delta_linkml( } } } else if let Some(child) = values.get_mut(key) { - apply_delta_linkml(child, &path[1..], newv, sv, trace, treat_missing_as_null)?; + apply_delta_linkml(child, &path[1..], newv, sv, trace, opts)?; } } LinkMLValue::List { @@ -455,7 +469,9 @@ fn apply_delta_linkml( &conv, Vec::new(), )?; - if values[idx].equals(&new_child, treat_missing_as_null) { + if opts.ignore_no_ops + && values[idx].equals(&new_child, opts.treat_missing_as_null) + { return Ok(()); } match (&mut values[idx], &new_child) { @@ -504,14 +520,7 @@ fn apply_delta_linkml( } } } else if idx < values.len() { - apply_delta_linkml( - &mut values[idx], - &path[1..], - newv, - sv, - trace, - treat_missing_as_null, - )?; + apply_delta_linkml(&mut values[idx], &path[1..], newv, sv, trace, opts)?; } } LinkMLValue::Scalar { .. } => {} diff --git a/src/runtime/tests/diff.rs b/src/runtime/tests/diff.rs index 15146c8..e60a71c 100644 --- a/src/runtime/tests/diff.rs +++ b/src/runtime/tests/diff.rs @@ -57,7 +57,16 @@ fn diff_and_patch_person() { } } - let (patched, _trace) = patch(&src, &deltas, &sv, false).unwrap(); + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); let patched_json = patched.to_json(); let target_json = tgt.to_json(); let src_json = src.to_json(); @@ -93,7 +102,16 @@ fn diff_ignore_missing_target() { let deltas = diff(&src, &tgt, false); assert!(deltas.is_empty()); - let (patched, _trace) = patch(&src, &deltas, &sv, false).unwrap(); + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); let patched_json = patched.to_json(); let src_json = src.to_json(); assert_eq!(patched_json, src_json); @@ -135,7 +153,16 @@ fn diff_and_patch_personinfo() { assert!(tgt.navigate_path(&d.path).is_some()); } } - let (patched, _trace) = patch(&src, &deltas, &sv, false).unwrap(); + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); assert_eq!(patched.to_json(), tgt.to_json()); } diff --git a/src/runtime/tests/diff_identifier.rs b/src/runtime/tests/diff_identifier.rs index 10b73af..56aa667 100644 --- a/src/runtime/tests/diff_identifier.rs +++ b/src/runtime/tests/diff_identifier.rs @@ -76,7 +76,16 @@ fn single_inlined_object_identifier_change_is_replacement() { assert!(d.old.is_some() && d.new.is_some()); // Patch should yield target - let (patched, _trace) = patch(&src, &deltas, &sv, false).unwrap(); + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); assert_eq!(patched.to_json(), tgt.to_json()); } @@ -145,7 +154,16 @@ fn single_inlined_object_non_identifier_change_is_field_delta() { "diagnosis".to_string() ])); - let (patched, _trace) = patch(&src, &deltas, &sv, false).unwrap(); + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); assert_eq!(patched.to_json(), tgt.to_json()); } @@ -194,7 +212,16 @@ fn list_inlined_object_identifier_change_is_replacement() { .iter() .any(|d| d.path == vec!["objects".to_string(), "2".to_string(), "id".to_string()])); - let (patched, _trace) = patch(&src, &deltas, &sv, false).unwrap(); + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); assert_eq!(patched.to_json(), tgt.to_json()); } @@ -241,6 +268,15 @@ fn mapping_inlined_identifier_change_is_add_delete() { .iter() .any(|d| d.path == vec!["things".to_string(), "alpha".to_string(), "key".to_string()])); - let (patched, _trace) = patch(&src, &deltas, &sv, false).unwrap(); + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); assert_eq!(patched.to_json(), tgt.to_json()); } diff --git a/src/runtime/tests/trace.rs b/src/runtime/tests/trace.rs index 62c92a9..72a52ee 100644 --- a/src/runtime/tests/trace.rs +++ b/src/runtime/tests/trace.rs @@ -70,7 +70,16 @@ fn node_ids_preserved_scalar_update() { .unwrap(); let deltas = diff(&src, &tgt, false); - let (patched, trace) = linkml_runtime::patch(&src, &deltas, &sv, false).unwrap(); + let (patched, trace) = linkml_runtime::patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); assert!(trace.added.is_empty()); assert!(trace.deleted.is_empty()); @@ -123,7 +132,16 @@ fn patch_trace_add_in_list() { let deltas = diff(&base, &target, false); let mut pre = Vec::new(); collect_ids(&base, &mut pre); - let (patched, trace) = linkml_runtime::patch(&base, &deltas, &sv, false).unwrap(); + let (patched, trace) = linkml_runtime::patch( + &base, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); let mut post = Vec::new(); collect_ids(&patched, &mut post); @@ -164,7 +182,16 @@ fn patch_missing_to_null_semantics() { // treat_missing_as_null = true => no-op; no trace changes, no node id changes let pre_id = src.node_id(); - let (patched_same, trace_same) = linkml_runtime::patch(&src, &deltas, &sv, true).unwrap(); + let (patched_same, trace_same) = linkml_runtime::patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: true, + }, + ) + .unwrap(); assert!( trace_same.added.is_empty() && trace_same.deleted.is_empty() @@ -179,7 +206,16 @@ fn patch_missing_to_null_semantics() { } // treat_missing_as_null = false => apply explicit null - let (patched_null, trace_applied) = linkml_runtime::patch(&src, &deltas, &sv, false).unwrap(); + let (patched_null, trace_applied) = linkml_runtime::patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: true, + treat_missing_as_null: false, + }, + ) + .unwrap(); assert!(trace_applied.updated.contains(&patched_null.node_id())); // age present as Null if let LinkMLValue::Object { values, .. } = &patched_null { diff --git a/src/tools/src/bin/linkml_patch.rs b/src/tools/src/bin/linkml_patch.rs index 1123982..99a990d 100644 --- a/src/tools/src/bin/linkml_patch.rs +++ b/src/tools/src/bin/linkml_patch.rs @@ -23,9 +23,12 @@ struct Args { /// Output patched file; defaults to stdout #[arg(short, long)] output: Option, - /// Treat missing assignments as equivalent to explicit null when determining no-ops - #[arg(long, default_value_t = false)] + /// Treat missing assignments as equivalent to explicit null for equality + #[arg(long, default_value_t = true)] treat_missing_as_null: bool, + /// Skip deltas that do not change the value (no-ops) + #[arg(long, default_value_t = true)] + ignore_noop: bool, } fn load_value( @@ -95,7 +98,15 @@ fn main() -> Result<(), Box> { } else { serde_yaml::from_str(&delta_text)? }; - let (patched, _trace) = patch(&src, &deltas, &sv, args.treat_missing_as_null)?; + let (patched, _trace) = patch( + &src, + &deltas, + &sv, + linkml_runtime::diff::PatchOptions { + ignore_no_ops: args.ignore_noop, + treat_missing_as_null: args.treat_missing_as_null, + }, + )?; write_value(args.output.as_deref(), &patched)?; Ok(()) }