diff --git a/AGENTS.md b/AGENTS.md index d67458f..06f1266 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -28,6 +28,7 @@ - Add integration tests under `src/runtime/tests/` when changing CLI/runtime behavior. - Prefer `assert_cmd` for CLI and `predicates` for output checks. Keep fixtures in `src/runtime/tests/data/`. - Run `cargo test --workspace` locally; ensure tests don’t rely on network input. + - Prefer modifying existing tests over adding new ones for new code paths. Extend current scenarios with extra assertions/fixtures to avoid redundant tests proliferating. For example, if adding null-handling in diff/patch, enhance the existing diff tests rather than introducing separate "basic diff works" tests that become redundant. ## Commit & Pull Request Guidelines - Commits: short, imperative summary (e.g., “Add __repr__ for LinkMLValue”); group related changes. diff --git a/src/python/src/lib.rs b/src/python/src/lib.rs index cbfe6b5..8b6dfbc 100644 --- a/src/python/src/lib.rs +++ b/src/python/src/lib.rs @@ -483,6 +483,7 @@ impl PyLinkMLValue { match &self.value { LinkMLValue::Scalar { slot, .. } => Some(slot.name.clone()), LinkMLValue::List { slot, .. } => Some(slot.name.clone()), + LinkMLValue::Null { slot, .. } => Some(slot.name.clone()), _ => None, } } @@ -491,6 +492,7 @@ impl PyLinkMLValue { fn kind(&self) -> String { match &self.value { LinkMLValue::Scalar { .. } => "scalar".to_string(), + LinkMLValue::Null { .. } => "null".to_string(), LinkMLValue::List { .. } => "list".to_string(), LinkMLValue::Mapping { .. } => "mapping".to_string(), LinkMLValue::Object { .. } => "object".to_string(), @@ -502,6 +504,7 @@ impl PyLinkMLValue { match &self.value { LinkMLValue::Scalar { slot, .. } => Some(slot.definition().clone()), LinkMLValue::List { slot, .. } => Some(slot.definition().clone()), + LinkMLValue::Null { slot, .. } => Some(slot.definition().clone()), _ => None, } } @@ -512,6 +515,7 @@ impl PyLinkMLValue { LinkMLValue::Object { class, .. } => Some(class.def().clone()), LinkMLValue::Scalar { class: Some(c), .. } => Some(c.def().clone()), LinkMLValue::List { class: Some(c), .. } => Some(c.def().clone()), + LinkMLValue::Null { class: Some(c), .. } => Some(c.def().clone()), _ => None, } } @@ -522,6 +526,7 @@ impl PyLinkMLValue { LinkMLValue::Object { class, .. } => Some(class.def().name.clone()), LinkMLValue::Scalar { class: Some(c), .. } => Some(c.def().name.clone()), LinkMLValue::List { class: Some(c), .. } => Some(c.def().name.clone()), + LinkMLValue::Null { class: Some(c), .. } => Some(c.def().name.clone()), _ => None, } } @@ -529,6 +534,7 @@ impl PyLinkMLValue { fn __len__(&self) -> PyResult { Ok(match &self.value { LinkMLValue::Scalar { .. } => 0, + LinkMLValue::Null { .. } => 0, LinkMLValue::List { values, .. } => values.len(), LinkMLValue::Mapping { values, .. } => values.len(), LinkMLValue::Object { values, .. } => values.len(), @@ -645,6 +651,9 @@ impl PyLinkMLValue { LinkMLValue::Scalar { value, slot, .. } => { format!("LinkMLValue.Scalar(slot='{}', value={})", slot.name, value) } + LinkMLValue::Null { slot, .. } => { + format!("LinkMLValue.Null(slot='{}')", slot.name) + } LinkMLValue::List { values, slot, .. } => { format!( "LinkMLValue.List(slot='{}', len={})", @@ -725,17 +734,17 @@ fn load_json( Ok(PyLinkMLValue::new(v, sv)) } -#[pyfunction(name = "diff", signature = (source, target, ignore_missing_target=None))] +#[pyfunction(name = "diff", signature = (source, target, treat_missing_as_null=None))] fn py_diff( py: Python<'_>, source: &PyLinkMLValue, target: &PyLinkMLValue, - ignore_missing_target: Option, + treat_missing_as_null: Option, ) -> PyResult { let deltas = diff_internal( &source.value, &target.value, - ignore_missing_target.unwrap_or(false), + treat_missing_as_null.unwrap_or(false), ); let vals: Vec = deltas .iter() diff --git a/src/runtime/src/diff.rs b/src/runtime/src/diff.rs index a64068e..cf05210 100644 --- a/src/runtime/src/diff.rs +++ b/src/runtime/src/diff.rs @@ -27,6 +27,7 @@ impl LinkMLValue { pub fn to_json(&self) -> JsonValue { match self { LinkMLValue::Scalar { value, .. } => value.clone(), + LinkMLValue::Null { .. } => JsonValue::Null, LinkMLValue::List { values, .. } => { JsonValue::Array(values.iter().map(|v| v.to_json()).collect()) } @@ -46,13 +47,20 @@ impl LinkMLValue { } } -pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: bool) -> Vec { +/// Compute a semantic diff between two LinkMLValue trees. +/// +/// Semantics of nulls and missing values: +/// - X -> null: update to null (old = X, new = null). +/// - null -> X: update from null (old = null, new = X). +/// - missing -> X: add (old = None, new = X). +/// - X -> missing: ignored by default; if `treat_missing_as_null` is true, update to null (old = X, new = null). +pub fn diff(source: &LinkMLValue, target: &LinkMLValue, treat_missing_as_null: bool) -> Vec { fn inner( path: &mut Vec, slot: Option<&SlotView>, s: &LinkMLValue, t: &LinkMLValue, - ignore_missing: bool, + treat_missing_as_null: bool, out: &mut Vec, ) { if let Some(sl) = slot { @@ -81,14 +89,17 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b .or_else(|| tc.slots().iter().find(|s| s.name == *k)); path.push(k.clone()); match tm.get(k) { - Some(tv) => inner(path, slot_view, sv, tv, ignore_missing, out), + Some(tv) => inner(path, slot_view, sv, tv, treat_missing_as_null, out), None => { - if !ignore_missing && !slot_view.is_some_and(slot_is_ignored) { - out.push(Delta { - path: path.clone(), - old: Some(sv.to_json()), - new: None, - }); + if !slot_view.is_some_and(slot_is_ignored) { + // Missing target slot: either ignore (default) or treat as update to null + if treat_missing_as_null { + out.push(Delta { + path: path.clone(), + old: Some(sv.to_json()), + new: Some(JsonValue::Null), + }); + } } } } @@ -118,7 +129,9 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b for i in 0..max_len { path.push(i.to_string()); match (sl.get(i), tl.get(i)) { - (Some(sv), Some(tv)) => inner(path, None, sv, tv, ignore_missing, out), + (Some(sv), Some(tv)) => { + inner(path, None, sv, tv, treat_missing_as_null, out) + } (Some(sv), None) => out.push(Delta { path: path.clone(), old: Some(sv.to_json()), @@ -140,7 +153,9 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b for k in keys { path.push(k.clone()); match (sm.get(&k), tm.get(&k)) { - (Some(sv), Some(tv)) => inner(path, None, sv, tv, ignore_missing, out), + (Some(sv), Some(tv)) => { + inner(path, None, sv, tv, treat_missing_as_null, out) + } (Some(sv), None) => out.push(Delta { path: path.clone(), old: Some(sv.to_json()), @@ -156,14 +171,29 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b path.pop(); } } - _ => { - let sv = s.to_json(); - let tv = t.to_json(); - if sv != tv { + (LinkMLValue::Null { .. }, LinkMLValue::Null { .. }) => {} + (LinkMLValue::Null { .. }, tv) => { + out.push(Delta { + path: path.clone(), + old: Some(JsonValue::Null), + new: Some(tv.to_json()), + }); + } + (sv, LinkMLValue::Null { .. }) => { + out.push(Delta { + path: path.clone(), + old: Some(sv.to_json()), + new: Some(JsonValue::Null), + }); + } + (sv, tv) => { + let sj = sv.to_json(); + let tj = tv.to_json(); + if sj != tj { out.push(Delta { path: path.clone(), - old: Some(sv), - new: Some(tv), + old: Some(sj), + new: Some(tj), }); } } @@ -175,7 +205,7 @@ pub fn diff(source: &LinkMLValue, target: &LinkMLValue, ignore_missing_target: b None, source, target, - ignore_missing_target, + treat_missing_as_null, &mut out, ); out diff --git a/src/runtime/src/lib.rs b/src/runtime/src/lib.rs index 0fd45b5..be868ac 100644 --- a/src/runtime/src/lib.rs +++ b/src/runtime/src/lib.rs @@ -61,6 +61,11 @@ pub enum LinkMLValue { class: Option, sv: SchemaView, }, + Null { + slot: SlotView, + class: Option, + sv: SchemaView, + }, List { values: Vec, slot: SlotView, @@ -104,6 +109,7 @@ impl LinkMLValue { current = values.get(key)?; } LinkMLValue::Scalar { .. } => return None, + LinkMLValue::Null { .. } => return None, } } Some(current) @@ -253,6 +259,12 @@ impl LinkMLValue { sv: sv.clone(), }) } + // Preserve explicit null as a Null value for list-valued slot + (false, JsonValue::Null) => Ok(LinkMLValue::Null { + slot: sl.clone(), + class: Some(class.clone()), + sv: sv.clone(), + }), (false, other) => Err(LinkMLError(format!( "expected list for slot `{}`, found {:?} at {}", sl.name, @@ -371,6 +383,12 @@ impl LinkMLValue { sv: sv.clone(), }) } + // Preserve explicit null as a Null value for mapping-valued slot + JsonValue::Null => Ok(LinkMLValue::Null { + slot: sl.clone(), + class: class.clone(), + sv: sv.clone(), + }), other => Err(LinkMLError(format!( "expected mapping for slot `{}`, found {:?} at {}", sl.name, @@ -483,12 +501,20 @@ impl LinkMLValue { classview_name )) })?; - Ok(LinkMLValue::Scalar { - value, - slot: sl, - class: Some(class.clone()), - sv: sv.clone(), - }) + if value.is_null() { + Ok(LinkMLValue::Null { + slot: sl, + class: Some(class.clone()), + sv: sv.clone(), + }) + } else { + Ok(LinkMLValue::Scalar { + value, + slot: sl, + class: Some(class.clone()), + sv: sv.clone(), + }) + } } fn from_json_internal( @@ -616,6 +642,7 @@ fn validate_inner(value: &LinkMLValue) -> std::result::Result<(), String> { } Ok(()) } + LinkMLValue::Null { .. } => Ok(()), LinkMLValue::List { values, .. } => { for v in values { validate_inner(v)?; @@ -647,6 +674,7 @@ pub fn validate(value: &LinkMLValue) -> std::result::Result<(), String> { fn validate_collect(value: &LinkMLValue, errors: &mut Vec) { match value { LinkMLValue::Scalar { .. } => {} + LinkMLValue::Null { .. } => {} LinkMLValue::List { values, .. } => { for v in values { validate_collect(v, errors); diff --git a/src/runtime/src/turtle.rs b/src/runtime/src/turtle.rs index 073dcfc..2221a94 100644 --- a/src/runtime/src/turtle.rs +++ b/src/runtime/src/turtle.rs @@ -225,6 +225,9 @@ fn serialize_map( } } } + LinkMLValue::Null { .. } => { + // Null is treated as absent; emit nothing + } LinkMLValue::Object { values, class, .. } => { let class_ref = &class; let (obj, child_id) = @@ -288,6 +291,9 @@ fn serialize_map( } } } + LinkMLValue::Null { .. } => { + // Skip null items + } LinkMLValue::Object { values: mv, class, .. } => { @@ -364,6 +370,9 @@ fn serialize_map( } } } + LinkMLValue::Null { .. } => { + // nothing + } LinkMLValue::Object { values: mv, class, .. } => { @@ -523,6 +532,7 @@ pub fn write_turtle( formatter.serialize_triple(triple.as_ref())?; } } + LinkMLValue::Null { .. } => {} LinkMLValue::List { .. } => {} LinkMLValue::Mapping { .. } => {} } @@ -578,12 +588,16 @@ pub fn write_turtle( formatter.serialize_triple(triple.as_ref())?; } } + LinkMLValue::Null { .. } => { + // nothing + } LinkMLValue::List { .. } => {} LinkMLValue::Mapping { .. } => {} } } } LinkMLValue::Scalar { .. } => {} + LinkMLValue::Null { .. } => {} } let out_buf = formatter.finish()?; let mut out = String::from_utf8(out_buf).unwrap_or_default(); diff --git a/src/runtime/tests/data/example_personinfo_data_nulls.yaml b/src/runtime/tests/data/example_personinfo_data_nulls.yaml new file mode 100644 index 0000000..0921942 --- /dev/null +++ b/src/runtime/tests/data/example_personinfo_data_nulls.yaml @@ -0,0 +1,11 @@ +objects: + - id: P:100 + objecttype: https://w3id.org/linkml/examples/personinfo/Person + name: Null Collections Person + # multivalued scalar list + aliases: null + # inlined-as-list of class instances + has_employment_history: null + # inlined-as-dict of class instances + has_familial_relationships: null + diff --git a/src/runtime/tests/diff.rs b/src/runtime/tests/diff.rs index 2df1971..187cf73 100644 --- a/src/runtime/tests/diff.rs +++ b/src/runtime/tests/diff.rs @@ -91,7 +91,7 @@ fn diff_ignore_missing_target() { ) .unwrap(); - let deltas = diff(&src, &tgt, true); + let deltas = diff(&src, &tgt, false); assert!(deltas.is_empty()); let patched = patch(&src, &deltas, &sv); let patched_json = patched.to_json(); @@ -139,6 +139,153 @@ fn diff_and_patch_personinfo() { assert_eq!(patched.to_json(), tgt.to_json()); } +#[test] +fn diff_null_and_missing_semantics() { + use linkml_runtime::LinkMLValue; + let schema = from_yaml(Path::new(&data_path("schema.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let class = sv + .get_class(&Identifier::new("Person"), &conv) + .unwrap() + .expect("class not found"); + + let src = load_yaml_file( + Path::new(&data_path("person_valid.yaml")), + &sv, + &class, + &conv, + ) + .unwrap(); + + // X -> null => update to null + if let LinkMLValue::Object { values, .. } = src.clone() { + let mut tgt_values = values.clone(); + let age_slot = class + .slots() + .iter() + .find(|s| s.name == "age") + .expect("age slot"); + tgt_values.insert( + "age".to_string(), + LinkMLValue::Null { + slot: age_slot.clone(), + class: Some(class.clone()), + sv: sv.clone(), + }, + ); + let deltas = diff( + &LinkMLValue::Object { + values: values.clone(), + class: class.clone(), + sv: sv.clone(), + }, + &LinkMLValue::Object { + values: tgt_values.clone(), + class: class.clone(), + sv: sv.clone(), + }, + false, + ); + assert!(deltas + .iter() + .any(|d| d.path == vec!["age".to_string()] && d.new == Some(serde_json::Value::Null))); + } + + // null -> X => update from null + if let LinkMLValue::Object { values, .. } = src.clone() { + let mut src_values = values.clone(); + let age_slot = class + .slots() + .iter() + .find(|s| s.name == "age") + .expect("age slot"); + src_values.insert( + "age".to_string(), + LinkMLValue::Null { + slot: age_slot.clone(), + class: Some(class.clone()), + sv: sv.clone(), + }, + ); + let deltas = diff( + &LinkMLValue::Object { + values: src_values.clone(), + class: class.clone(), + sv: sv.clone(), + }, + &LinkMLValue::Object { + values: values.clone(), + class: class.clone(), + sv: sv.clone(), + }, + false, + ); + assert!(deltas.iter().any(|d| d.path == vec!["age".to_string()] + && d.old == Some(serde_json::Value::Null) + && d.new.is_some())); + } + + // missing -> X => add + if let LinkMLValue::Object { values, .. } = src.clone() { + let mut src_values = values.clone(); + src_values.remove("age"); + let deltas = diff( + &LinkMLValue::Object { + values: src_values.clone(), + class: class.clone(), + sv: sv.clone(), + }, + &LinkMLValue::Object { + values: values.clone(), + class: class.clone(), + sv: sv.clone(), + }, + false, + ); + assert!(deltas + .iter() + .any(|d| d.path == vec!["age".to_string()] && d.old.is_none() && d.new.is_some())); + } + + // X -> missing: ignored by default; produce update-to-null when treat_missing_as_null=true + if let LinkMLValue::Object { values, .. } = src.clone() { + let mut tgt_values = values.clone(); + tgt_values.remove("age"); + let deltas = diff( + &LinkMLValue::Object { + values: values.clone(), + class: class.clone(), + sv: sv.clone(), + }, + &LinkMLValue::Object { + values: tgt_values.clone(), + class: class.clone(), + sv: sv.clone(), + }, + false, + ); + assert!(deltas.iter().all(|d| d.path != vec!["age".to_string()])); + let deltas2 = diff( + &LinkMLValue::Object { + values: values.clone(), + class: class.clone(), + sv: sv.clone(), + }, + &LinkMLValue::Object { + values: tgt_values.clone(), + class: class.clone(), + sv: sv.clone(), + }, + true, + ); + assert!(deltas2 + .iter() + .any(|d| d.path == vec!["age".to_string()] && d.new == Some(serde_json::Value::Null))) + } +} + #[test] fn personinfo_invalid_fails() { let schema = from_yaml(Path::new(&info_path("personinfo.yaml"))).unwrap(); diff --git a/src/runtime/tests/validation.rs b/src/runtime/tests/validation.rs index b91163a..f7e05a1 100644 --- a/src/runtime/tests/validation.rs +++ b/src/runtime/tests/validation.rs @@ -51,3 +51,49 @@ fn validate_personinfo_example2() { .unwrap(); assert!(validate(&v).is_ok()); } + +#[test] +fn validate_personinfo_null_collections() { + let schema = from_yaml(Path::new(&info_path("personinfo.yaml"))).unwrap(); + let mut sv = SchemaView::new(); + sv.add_schema(schema.clone()).unwrap(); + let conv = converter_from_schema(&schema); + let container = sv + .get_class(&Identifier::new("Container"), &conv) + .unwrap() + .expect("class not found"); + let v = load_yaml_file( + Path::new(&info_path("example_personinfo_data_nulls.yaml")), + &sv, + &container, + &conv, + ) + .unwrap(); + assert!(validate(&v).is_ok()); + // Assert that nulls are preserved as LinkMLValue::Null (not empty collections) + if let linkml_runtime::LinkMLValue::Object { values, .. } = &v { + if let Some(linkml_runtime::LinkMLValue::List { values: objs, .. }) = values.get("objects") + { + if let Some(linkml_runtime::LinkMLValue::Object { values: person, .. }) = objs.first() { + assert!(matches!( + person.get("aliases"), + Some(linkml_runtime::LinkMLValue::Null { .. }) + )); + assert!(matches!( + person.get("has_employment_history"), + Some(linkml_runtime::LinkMLValue::Null { .. }) + )); + assert!(matches!( + person.get("has_familial_relationships"), + Some(linkml_runtime::LinkMLValue::Null { .. }) + )); + } else { + panic!("expected first object to be an Object"); + } + } else { + panic!("expected Container.objects to be a List"); + } + } else { + panic!("expected root to be an Object"); + } +}