From a3df32f27a3992e9556c87fe727c5bf6294dc9fb Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 18 May 2026 14:10:24 +0100 Subject: [PATCH 1/2] fix(security): enforce trust boundary before UMLS deserialize_unchecked Refs #1733 Convert advisory permission warning to hard error. An attacker who can write the artifact file can forge both bytes and checksums together, making the checksum gate useless for integrity. Changes: - medical_artifact.rs: world-writable and group-writable artifacts now bail before any data is read on Unix. - sharded_extractor.rs: update SAFETY comment to list both enforced gates (permission check + checksum verification). - Tests: replace advisory-success test with rejection tests for world-writable and group-writable, add secure-permissions test. --- .../src/medical_artifact.rs | 93 +++++++++++++++---- .../src/sharded_extractor.rs | 15 ++- 2 files changed, 86 insertions(+), 22 deletions(-) diff --git a/crates/terraphim_automata/src/medical_artifact.rs b/crates/terraphim_automata/src/medical_artifact.rs index 680a42530..2f5fbe2e8 100644 --- a/crates/terraphim_automata/src/medical_artifact.rs +++ b/crates/terraphim_automata/src/medical_artifact.rs @@ -104,22 +104,31 @@ pub fn save_umls_artifact( /// Load a UMLS artifact: returns (header, shard_bytes_list) pub fn load_umls_artifact(path: &Path) -> anyhow::Result<(ArtifactHeader, Vec>)> { - // Advisory permission check: a world-writable artifact file allows an - // attacker to forge both bytes and stored checksums together, bypassing - // the integrity gate. Log a warning but do not abort — this is P2 - // advisory (see Gitea #1587). Binding this to Unix keeps Windows builds clean. + // Permission enforcement: any write bit beyond the owner is a hard + // error. An attacker who can write the artifact can forge both bytes + // and stored checksums together, bypassing the integrity gate. The + // checksum guard is only effective when the file is owner-writable + // exclusively. Binding this to Unix keeps Windows builds clean; + // Windows callers should apply ACLs externally. #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; if let Ok(meta) = std::fs::metadata(path) { let mode = meta.permissions().mode(); + let mut reasons = Vec::new(); if mode & 0o002 != 0 { - log::warn!( - "Artifact {:?} is world-writable (mode {:04o}). \ - An attacker with filesystem write access could forge a \ - valid artifact and bypass checksum verification.", + reasons.push("world-writable".to_string()); + } + if mode & 0o020 != 0 { + reasons.push("group-writable".to_string()); + } + if !reasons.is_empty() { + anyhow::bail!( + "Artifact {:?} has insecure permissions (mode {:04o}): {}. \ + Restrict to owner-only write (e.g. chmod 600) and re-save.", path, - mode + mode, + reasons.join(", ") ); } } @@ -290,15 +299,13 @@ mod tests { assert!(msg.contains("checksum mismatch"), "error: {}", msg); } - /// Verify that a world-writable artifact still loads successfully. + /// Verify that a world-writable artifact is rejected as a hard error. /// - /// The permission check is advisory (P2, Gitea #1587): it emits a log - /// warning but does not abort loading. The checksum gate still protects - /// against _tampered_ bytes; the warning alerts operators to tighten - /// filesystem permissions. + /// An attacker who can write the file can forge both bytes and + /// checksums. The permission gate rejects before any data is read. #[test] #[cfg(unix)] - fn test_world_writable_artifact_loads_ok() { + fn test_world_writable_artifact_rejected() { use std::os::unix::fs::PermissionsExt; let dir = tempdir().unwrap(); let path = dir.path().join("world_writable.bin.zst"); @@ -312,11 +319,63 @@ mod tests { perms.set_mode(0o666); std::fs::set_permissions(&path, perms).unwrap(); - // Loading must succeed — the warning is advisory, not fatal + let result = load_umls_artifact(&path); + assert!(result.is_err(), "World-writable artifact must be rejected"); + let msg = result.err().unwrap().to_string(); + assert!( + msg.contains("world-writable"), + "error must mention world-writable, got: {msg}" + ); + } + + /// Verify that a group-writable artifact is rejected. + #[test] + #[cfg(unix)] + fn test_group_writable_artifact_rejected() { + use std::os::unix::fs::PermissionsExt; + let dir = tempdir().unwrap(); + let path = dir.path().join("group_writable.bin.zst"); + + let shard_bytes = vec![vec![1u8; 10], vec![2u8; 8]]; + let header = make_test_header(&shard_bytes); + save_umls_artifact(&header, &shard_bytes, &path).unwrap(); + + // Make group-writable (0o620) + let mut perms = std::fs::metadata(&path).unwrap().permissions(); + perms.set_mode(0o620); + std::fs::set_permissions(&path, perms).unwrap(); + + let result = load_umls_artifact(&path); + assert!(result.is_err(), "Group-writable artifact must be rejected"); + let msg = result.err().unwrap().to_string(); + assert!( + msg.contains("group-writable"), + "error must mention group-writable, got: {msg}" + ); + } + + /// Verify that an artifact with secure owner-only permissions (0o600) + /// loads successfully. + #[test] + #[cfg(unix)] + fn test_secure_permissions_artifact_loads_ok() { + use std::os::unix::fs::PermissionsExt; + let dir = tempdir().unwrap(); + let path = dir.path().join("secure.bin.zst"); + + let shard_bytes = vec![vec![1u8; 10], vec![2u8; 8]]; + let header = make_test_header(&shard_bytes); + save_umls_artifact(&header, &shard_bytes, &path).unwrap(); + + // Secure: owner read+write only + let mut perms = std::fs::metadata(&path).unwrap().permissions(); + perms.set_mode(0o600); + std::fs::set_permissions(&path, perms).unwrap(); + let result = load_umls_artifact(&path); assert!( result.is_ok(), - "World-writable artifact must still load (advisory warning only); got: {:?}", + "Owner-writable-only artifact must load; got: {:?}", result.err() ); let (loaded_header, loaded_shards) = result.unwrap(); diff --git a/crates/terraphim_automata/src/sharded_extractor.rs b/crates/terraphim_automata/src/sharded_extractor.rs index 3cdd4ffc6..8f8564006 100644 --- a/crates/terraphim_automata/src/sharded_extractor.rs +++ b/crates/terraphim_automata/src/sharded_extractor.rs @@ -214,11 +214,16 @@ impl ShardedUmlsExtractor { let shards: Vec> = shard_bytes .iter() .map(|bytes| { - // SAFETY: bytes were produced by `DoubleArrayAhoCorasick::serialize()` on - // this machine and have passed SHA-256 checksum verification inside - // `load_umls_artifact()` before reaching this point. The checksum gate - // guarantees the bytes are bit-for-bit identical to what `serialize()` - // wrote; no tampered or externally-sourced bytes can reach this call. + // SAFETY: bytes have passed two independent gates inside + // `load_umls_artifact()` before reaching this point: + // 1. Permission enforcement — the artifact file is owner-writable + // exclusively (world/group write bits rejected). An attacker + // who cannot write the file cannot forge its contents. + // 2. SHA-256 checksum verification — every shard's decompressed + // bytes match the digest stored in the header. Together with + // gate (1), this guarantees the bytes are bit-for-bit + // identical to what `serialize()` wrote; tampered or + // externally-sourced bytes cannot reach this call. let (automaton, _remaining) = unsafe { DoubleArrayAhoCorasick::::deserialize_unchecked(bytes) }; automaton From d3746fc01413c40aa4d92abc10611f45d4f39ea5 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 18 May 2026 15:14:48 +0200 Subject: [PATCH 2/2] fix(security): enforce trust boundary before UMLS deserialize_unchecked Refs #1733 --- .../src/medical_artifact.rs | 93 +++++++++++++++---- .../src/sharded_extractor.rs | 15 ++- 2 files changed, 86 insertions(+), 22 deletions(-) diff --git a/crates/terraphim_automata/src/medical_artifact.rs b/crates/terraphim_automata/src/medical_artifact.rs index 680a42530..2f5fbe2e8 100644 --- a/crates/terraphim_automata/src/medical_artifact.rs +++ b/crates/terraphim_automata/src/medical_artifact.rs @@ -104,22 +104,31 @@ pub fn save_umls_artifact( /// Load a UMLS artifact: returns (header, shard_bytes_list) pub fn load_umls_artifact(path: &Path) -> anyhow::Result<(ArtifactHeader, Vec>)> { - // Advisory permission check: a world-writable artifact file allows an - // attacker to forge both bytes and stored checksums together, bypassing - // the integrity gate. Log a warning but do not abort — this is P2 - // advisory (see Gitea #1587). Binding this to Unix keeps Windows builds clean. + // Permission enforcement: any write bit beyond the owner is a hard + // error. An attacker who can write the artifact can forge both bytes + // and stored checksums together, bypassing the integrity gate. The + // checksum guard is only effective when the file is owner-writable + // exclusively. Binding this to Unix keeps Windows builds clean; + // Windows callers should apply ACLs externally. #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; if let Ok(meta) = std::fs::metadata(path) { let mode = meta.permissions().mode(); + let mut reasons = Vec::new(); if mode & 0o002 != 0 { - log::warn!( - "Artifact {:?} is world-writable (mode {:04o}). \ - An attacker with filesystem write access could forge a \ - valid artifact and bypass checksum verification.", + reasons.push("world-writable".to_string()); + } + if mode & 0o020 != 0 { + reasons.push("group-writable".to_string()); + } + if !reasons.is_empty() { + anyhow::bail!( + "Artifact {:?} has insecure permissions (mode {:04o}): {}. \ + Restrict to owner-only write (e.g. chmod 600) and re-save.", path, - mode + mode, + reasons.join(", ") ); } } @@ -290,15 +299,13 @@ mod tests { assert!(msg.contains("checksum mismatch"), "error: {}", msg); } - /// Verify that a world-writable artifact still loads successfully. + /// Verify that a world-writable artifact is rejected as a hard error. /// - /// The permission check is advisory (P2, Gitea #1587): it emits a log - /// warning but does not abort loading. The checksum gate still protects - /// against _tampered_ bytes; the warning alerts operators to tighten - /// filesystem permissions. + /// An attacker who can write the file can forge both bytes and + /// checksums. The permission gate rejects before any data is read. #[test] #[cfg(unix)] - fn test_world_writable_artifact_loads_ok() { + fn test_world_writable_artifact_rejected() { use std::os::unix::fs::PermissionsExt; let dir = tempdir().unwrap(); let path = dir.path().join("world_writable.bin.zst"); @@ -312,11 +319,63 @@ mod tests { perms.set_mode(0o666); std::fs::set_permissions(&path, perms).unwrap(); - // Loading must succeed — the warning is advisory, not fatal + let result = load_umls_artifact(&path); + assert!(result.is_err(), "World-writable artifact must be rejected"); + let msg = result.err().unwrap().to_string(); + assert!( + msg.contains("world-writable"), + "error must mention world-writable, got: {msg}" + ); + } + + /// Verify that a group-writable artifact is rejected. + #[test] + #[cfg(unix)] + fn test_group_writable_artifact_rejected() { + use std::os::unix::fs::PermissionsExt; + let dir = tempdir().unwrap(); + let path = dir.path().join("group_writable.bin.zst"); + + let shard_bytes = vec![vec![1u8; 10], vec![2u8; 8]]; + let header = make_test_header(&shard_bytes); + save_umls_artifact(&header, &shard_bytes, &path).unwrap(); + + // Make group-writable (0o620) + let mut perms = std::fs::metadata(&path).unwrap().permissions(); + perms.set_mode(0o620); + std::fs::set_permissions(&path, perms).unwrap(); + + let result = load_umls_artifact(&path); + assert!(result.is_err(), "Group-writable artifact must be rejected"); + let msg = result.err().unwrap().to_string(); + assert!( + msg.contains("group-writable"), + "error must mention group-writable, got: {msg}" + ); + } + + /// Verify that an artifact with secure owner-only permissions (0o600) + /// loads successfully. + #[test] + #[cfg(unix)] + fn test_secure_permissions_artifact_loads_ok() { + use std::os::unix::fs::PermissionsExt; + let dir = tempdir().unwrap(); + let path = dir.path().join("secure.bin.zst"); + + let shard_bytes = vec![vec![1u8; 10], vec![2u8; 8]]; + let header = make_test_header(&shard_bytes); + save_umls_artifact(&header, &shard_bytes, &path).unwrap(); + + // Secure: owner read+write only + let mut perms = std::fs::metadata(&path).unwrap().permissions(); + perms.set_mode(0o600); + std::fs::set_permissions(&path, perms).unwrap(); + let result = load_umls_artifact(&path); assert!( result.is_ok(), - "World-writable artifact must still load (advisory warning only); got: {:?}", + "Owner-writable-only artifact must load; got: {:?}", result.err() ); let (loaded_header, loaded_shards) = result.unwrap(); diff --git a/crates/terraphim_automata/src/sharded_extractor.rs b/crates/terraphim_automata/src/sharded_extractor.rs index 3cdd4ffc6..8f8564006 100644 --- a/crates/terraphim_automata/src/sharded_extractor.rs +++ b/crates/terraphim_automata/src/sharded_extractor.rs @@ -214,11 +214,16 @@ impl ShardedUmlsExtractor { let shards: Vec> = shard_bytes .iter() .map(|bytes| { - // SAFETY: bytes were produced by `DoubleArrayAhoCorasick::serialize()` on - // this machine and have passed SHA-256 checksum verification inside - // `load_umls_artifact()` before reaching this point. The checksum gate - // guarantees the bytes are bit-for-bit identical to what `serialize()` - // wrote; no tampered or externally-sourced bytes can reach this call. + // SAFETY: bytes have passed two independent gates inside + // `load_umls_artifact()` before reaching this point: + // 1. Permission enforcement — the artifact file is owner-writable + // exclusively (world/group write bits rejected). An attacker + // who cannot write the file cannot forge its contents. + // 2. SHA-256 checksum verification — every shard's decompressed + // bytes match the digest stored in the header. Together with + // gate (1), this guarantees the bytes are bit-for-bit + // identical to what `serialize()` wrote; tampered or + // externally-sourced bytes cannot reach this call. let (automaton, _remaining) = unsafe { DoubleArrayAhoCorasick::::deserialize_unchecked(bytes) }; automaton