From 23088e769dbc91f0beb5ada78bd810167e689ef7 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Thu, 9 Apr 2026 13:13:03 +0200 Subject: [PATCH 1/7] Implement use-def maps --- Cargo.lock | 4 + Cargo.toml | 1 + crates/oak_index/Cargo.toml | 4 + crates/oak_index/src/builder.rs | 128 ++++- crates/oak_index/src/index_vec.rs | 9 + crates/oak_index/src/lib.rs | 1 + crates/oak_index/src/semantic_index.rs | 21 +- crates/oak_index/src/use_def_map.rs | 260 +++++++++ crates/oak_index/tests/use_def_map.rs | 704 +++++++++++++++++++++++++ crates/stdext/src/result.rs | 12 + 10 files changed, 1130 insertions(+), 14 deletions(-) create mode 100644 crates/oak_index/src/use_def_map.rs create mode 100644 crates/oak_index/tests/use_def_map.rs diff --git a/Cargo.lock b/Cargo.lock index 4fda3117c..8ff2e9fe8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2248,7 +2248,11 @@ dependencies = [ "air_r_parser", "air_r_syntax", "biome_rowan", + "itertools 0.10.5", + "log", "rustc-hash", + "smallvec", + "stdext", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 73df7d108..3b273b528 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -94,6 +94,7 @@ serde_json = { version = "1.0.94", features = ["preserve_order"] } serde_repr = "0.1.17" serde_with = "3.0.0" sha2 = "0.10.6" +smallvec = "1.13.2" stdext = { path = "crates/stdext" } streaming-iterator = "0.1.9" strum = "0.26.2" diff --git a/crates/oak_index/Cargo.toml b/crates/oak_index/Cargo.toml index e2bb4aa61..8d8436167 100644 --- a/crates/oak_index/Cargo.toml +++ b/crates/oak_index/Cargo.toml @@ -16,7 +16,11 @@ workspace = true [dependencies] aether_syntax.workspace = true biome_rowan.workspace = true +itertools.workspace = true +log.workspace = true rustc-hash.workspace = true +smallvec.workspace = true +stdext.workspace = true [dev-dependencies] aether_parser.workspace = true diff --git a/crates/oak_index/src/builder.rs b/crates/oak_index/src/builder.rs index 72f224921..3c2a62d8f 100644 --- a/crates/oak_index/src/builder.rs +++ b/crates/oak_index/src/builder.rs @@ -30,6 +30,8 @@ use crate::semantic_index::SymbolFlags; use crate::semantic_index::SymbolTableBuilder; use crate::semantic_index::Use; use crate::semantic_index::UseId; +use crate::use_def_map::UseDefMap; +use crate::use_def_map::UseDefMapBuilder; /// Build a [`SemanticIndex`] from a parsed R file. pub fn build(root: &RRoot) -> SemanticIndex { @@ -47,6 +49,9 @@ struct SemanticIndexBuilder { symbol_tables: IndexVec, definitions: IndexVec>, uses: IndexVec>, + use_def_maps: IndexVec, + current_use_def: UseDefMapBuilder, + use_def_stack: Vec, current_scope: ScopeId, } @@ -56,6 +61,7 @@ impl SemanticIndexBuilder { let mut symbol_tables = IndexVec::new(); let mut definitions = IndexVec::new(); let mut uses = IndexVec::new(); + let mut use_def_maps = IndexVec::new(); // The descendants range starts empty (`n+1..n+1`). `pop_scope` later // fills in `descendants.end` with the current arena length. Everything @@ -70,12 +76,16 @@ impl SemanticIndexBuilder { symbol_tables.push(SymbolTableBuilder::new()); definitions.push(IndexVec::new()); uses.push(IndexVec::new()); + use_def_maps.push(UseDefMap::empty()); Self { scopes, symbol_tables, definitions, uses, + use_def_maps, + current_use_def: UseDefMapBuilder::new(), + use_def_stack: Vec::new(), current_scope: file, } } @@ -99,6 +109,10 @@ impl SemanticIndexBuilder { self.symbol_tables.push(SymbolTableBuilder::new()); self.definitions.push(IndexVec::new()); self.uses.push(IndexVec::new()); + self.use_def_maps.push(UseDefMap::empty()); + + let parent_use_def = std::mem::replace(&mut self.current_use_def, UseDefMapBuilder::new()); + self.use_def_stack.push(parent_use_def); id } @@ -111,6 +125,13 @@ impl SemanticIndexBuilder { Some(parent) => parent, None => panic!("`pop_scope()` called on the file scope"), }; + + let parent_use_def = match self.use_def_stack.pop() { + Some(builder) => builder, + None => panic!("`pop_scope()` called with empty use-def stack"), + }; + let finalized = std::mem::replace(&mut self.current_use_def, parent_use_def).finish(); + self.use_def_maps[id] = finalized; } fn add_definition( @@ -120,17 +141,39 @@ impl SemanticIndexBuilder { kind: DefinitionKind, range: TextRange, ) { - let symbol = self.symbol_tables[self.current_scope].intern(name, flags); + let symbol_id = self.symbol_tables[self.current_scope].intern(name, flags); + let def_id = self.definitions[self.current_scope].push(Definition { + symbol: symbol_id, + kind, + range, + }); + + self.current_use_def.ensure_symbol(symbol_id); + self.current_use_def.record_binding(symbol_id, def_id); + } + + // Super-assignment is lexically in the current scope but binds in an + // ancestor. We record the definition here (for go-to-definition, rename) + // but skip use-def tracking since the binding doesn't affect local flow. + fn add_super_definition(&mut self, name: &str, kind: DefinitionKind, range: TextRange) { + let symbol_id = + self.symbol_tables[self.current_scope].intern(name, SymbolFlags::IS_SUPER_BOUND); self.definitions[self.current_scope].push(Definition { - symbol, + symbol: symbol_id, kind, range, }); } fn add_use(&mut self, name: &str, range: TextRange) { - let symbol = self.symbol_tables[self.current_scope].intern(name, SymbolFlags::IS_USED); - self.uses[self.current_scope].push(Use { symbol, range }); + let symbol_id = self.symbol_tables[self.current_scope].intern(name, SymbolFlags::IS_USED); + let use_id = self.uses[self.current_scope].push(Use { + symbol: symbol_id, + range, + }); + + self.current_use_def.ensure_symbol(symbol_id); + self.current_use_def.record_use(symbol_id, use_id); } // --- Recursive descent --- @@ -224,6 +267,10 @@ impl SemanticIndexBuilder { }, AnyRExpression::RForStatement(stmt) => { + // The for variable is always bound (R sets it to NULL for + // empty sequences), so its binding is recorded before the + // snapshot. Assignments inside the body are conditional + // (body may not execute for empty sequences). if let Ok(variable) = stmt.variable() { self.add_definition( &identifier_text(&variable), @@ -235,6 +282,61 @@ impl SemanticIndexBuilder { if let Ok(sequence) = stmt.sequence() { self.collect_expression(&sequence); } + + let pre_loop = self.current_use_def.snapshot(); + + if let Ok(body) = stmt.body() { + self.collect_expression(&body); + } + + self.current_use_def.merge(pre_loop); + }, + + AnyRExpression::RIfStatement(stmt) => { + // Condition is always evaluated + if let Ok(condition) = stmt.condition() { + self.collect_expression(&condition); + } + + let pre_if = self.current_use_def.snapshot(); + + // If-body (consequence) + if let Ok(consequence) = stmt.consequence() { + self.collect_expression(&consequence); + } + + let post_if = self.current_use_def.snapshot(); + self.current_use_def.restore(pre_if); + + // Else-body (alternative), if present. If absent, the + // "else path" is just the pre-if state we restored to. + if let Some(else_clause) = stmt.else_clause() { + if let Ok(alternative) = else_clause.alternative() { + self.collect_expression(&alternative); + } + } + + // After: definitions from both branches are live + self.current_use_def.merge(post_if); + }, + + AnyRExpression::RWhileStatement(stmt) => { + if let Ok(condition) = stmt.condition() { + self.collect_expression(&condition); + } + + let pre_loop = self.current_use_def.snapshot(); + + if let Ok(body) = stmt.body() { + self.collect_expression(&body); + } + + // Body may not execute + self.current_use_def.merge(pre_loop); + }, + + AnyRExpression::RRepeatStatement(stmt) => { + // Body always executes at least once, no snapshot needed if let Ok(body) = stmt.body() { self.collect_expression(&body); } @@ -244,8 +346,7 @@ impl SemanticIndexBuilder { // Generic fallback: walk over descendant nodes and collect their // `AnyRExpression` children, letting `collect_expression` - // handle their contents. This covers `RIfStatement`, - // `RWhileStatement`, `RRepeatStatement`, `RUnaryExpression`, + // handle their contents. This covers `RUnaryExpression`, // `RParenthesizedExpression`, `RReturnExpression`, literals, and // any future expression types without needing explicit arms. // @@ -379,9 +480,8 @@ impl SemanticIndexBuilder { }; if super_assign { - self.add_definition( + self.add_super_definition( &name, - SymbolFlags::IS_SUPER_BOUND, DefinitionKind::SuperAssignment(op.syntax().clone()), range, ); @@ -406,8 +506,18 @@ impl SemanticIndexBuilder { fn finish(mut self) -> SemanticIndex { self.scopes[ScopeId::from(0)].descendants.end = self.scopes.next_id(); + + let file_use_def_map = self.current_use_def.finish(); + self.use_def_maps[ScopeId::from(0)] = file_use_def_map; + let symbol_tables = self.symbol_tables.into_iter().map(|b| b.build()).collect(); - SemanticIndex::new(self.scopes, symbol_tables, self.definitions, self.uses) + SemanticIndex::new( + self.scopes, + symbol_tables, + self.definitions, + self.uses, + self.use_def_maps, + ) } } diff --git a/crates/oak_index/src/index_vec.rs b/crates/oak_index/src/index_vec.rs index 86979ce93..9d58cfd35 100644 --- a/crates/oak_index/src/index_vec.rs +++ b/crates/oak_index/src/index_vec.rs @@ -63,6 +63,15 @@ impl FromIterator for IndexVec { } } +impl Clone for IndexVec { + fn clone(&self) -> Self { + Self { + raw: self.raw.clone(), + _phantom: PhantomData, + } + } +} + impl Default for IndexVec { fn default() -> Self { Self::new() diff --git a/crates/oak_index/src/lib.rs b/crates/oak_index/src/lib.rs index a79d44530..ac0e2c5de 100644 --- a/crates/oak_index/src/lib.rs +++ b/crates/oak_index/src/lib.rs @@ -1,3 +1,4 @@ pub mod builder; pub(crate) mod index_vec; pub mod semantic_index; +pub mod use_def_map; diff --git a/crates/oak_index/src/semantic_index.rs b/crates/oak_index/src/semantic_index.rs index 72162b466..4c770140d 100644 --- a/crates/oak_index/src/semantic_index.rs +++ b/crates/oak_index/src/semantic_index.rs @@ -6,6 +6,7 @@ use rustc_hash::FxHashMap; use crate::index_vec::define_index; use crate::index_vec::IndexVec; +use crate::use_def_map::UseDefMap; // File-local scope identifier define_index!(ScopeId); @@ -48,11 +49,15 @@ pub struct SemanticIndex { // (a per-scope map from AST node positions to `ScopedUseId`). When we // introduce salsa, these lists may be restructured to match. // - // Use-def maps will layer on top of these lists, not replace them. A - // use-def map tracks which definitions reach each use through control flow, - // referencing `DefinitionId` and `UseId` indices into these arenas. + // Use-def maps layer on top of these lists. A use-def map tracks which + // definitions reach each use through control flow, referencing + // `DefinitionId` and `UseId` indices into these arenas. definitions: IndexVec>, uses: IndexVec>, + + // Per-scope flow-sensitive map from each use site to the set of definitions + // that can reach it. Built alongside the other arrays during the tree walk. + use_def_maps: IndexVec, } impl SemanticIndex { @@ -61,12 +66,14 @@ impl SemanticIndex { symbol_tables: IndexVec, definitions: IndexVec>, uses: IndexVec>, + use_def_maps: IndexVec, ) -> Self { Self { scopes, symbol_tables, definitions, uses, + use_def_maps, } } @@ -86,6 +93,10 @@ impl SemanticIndex { &self.uses[scope] } + pub fn use_def_map(&self, scope: ScopeId) -> &UseDefMap { + &self.use_def_maps[scope] + } + /// Find the innermost scope containing `offset`. pub fn scope_at(&self, offset: biome_rowan::TextSize) -> ScopeId { // Start at the file scope @@ -296,8 +307,8 @@ impl Symbol { // bound somewhere in this scope" but can't answer "which definition of x // reaches this point?" or "is x defined before this use?". Use-def maps // provide that precision. The flags remain useful for scope-level queries -// like `resolve_symbol` and `resolve_super_target` (which walk the scope -// chain checking `IS_BOUND`). They can also be useful as filters for +// like `resolve_symbol` (which walks the scope chain checking +// `IS_BOUND`). They can also be useful as filters for // short-circuiting unneeded expensive operations. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct SymbolFlags(u8); diff --git a/crates/oak_index/src/use_def_map.rs b/crates/oak_index/src/use_def_map.rs new file mode 100644 index 000000000..34214c6b9 --- /dev/null +++ b/crates/oak_index/src/use_def_map.rs @@ -0,0 +1,260 @@ +use itertools::EitherOrBoth; +use itertools::Itertools; +use smallvec::SmallVec; + +use crate::index_vec::Idx; +use crate::index_vec::IndexVec; +use crate::semantic_index::DefinitionId; +use crate::semantic_index::SymbolId; +use crate::semantic_index::UseId; + +// Use-def tracking answers: "at this use of `x`, which specific definitions +// could have run?" In straight-line code it's trivial: each definition shadows +// the previous one, and a use sees whatever definition came last. The +// complexity comes from branching. +// +// For each symbol in the current scope, we track a `Bindings`: the set of +// `DefinitionId`s that are "live". A fresh scope starts with every symbol in +// the "unbound" state: empty definition set, `may_be_unbound: true`. +// The "may_be_unbound" flag tracks whether there exists some control flow path +// where no definition was reached. +// +// ```r +// if (cond) { +// x <- 1 # def A +// } +// print(x) # may_be_unbound: true, definitions: {A} +// ``` +// +// - `record_binding()`: A definition like `x <- 1` kills all previous live +// definitions for that symbol and replaces them with a singleton. +// `may_be_unbound` becomes false. +// +// - `record_use()`: A use like `print(x)` freezes the current live state for +// that symbol. We clone the current `Bindings` and store it keyed by `use_id`. +// This operation doesn't change any state. +// +// The other operations (`snapshot()`, `restore()`, `merge()`) help dealing with +// control flow complications. +// +// ```r +// x <- 1 # def A +// if (cond) { +// x <- 2 # def B +// } else { +// x <- 3 # def C +// } +// print(x) # which defs reach this use? +// ``` +// +// 1. `snapshot()`: Clone the entire symbol state (all symbols' live +// definitions). This captures the state *before* either branch runs. Call +// this `pre_if`. +// 2. Visit the if-body: `x <- 2` shadows, so `x`'s state becomes `{B}`. +// 3. `snapshot()` again: capture the post-if-body state. Call this `post_if`. +// 4. `restore(pre_if)`: Reset to the state before the if-body ran. Now `x`'s +// state is back to `{A}`. +// 5. Visit the else-body. `x <- 3` shadows, so `x`'s state becomes `{C}`. +// 6. `merge(post_if)`: For each symbol, union the current state (post-else: +// `{C}`) with the snapshot (post-if: `{B}`). Result: `x` has `{B, C}`. +// +// After this, `print(x)` records a use that sees `{B, C}`. Def A is gone +// because both branches shadowed it. +// +// If there's no else clause, step 5 is skipped. The current state after +// restore is still `pre_if` (`{A}`). Merge unions `{A}` with `{B}` → `{A, B}`. +// The pre-if definition stays live because there's a path (the no-else path) +// where it wasn't shadowed. +// +// The same primitives can be used to implement other control flow constructs +// following similar considerations (the body of a loop might not execute). +// +// ## Interpreting `Bindings` +// +// Callers examine the two fields of `Bindings` at a use site to determine +// what happened along control flow: +// +// definitions | may_be_unbound | meaning +// ------------|----------------|-------- +// {A} | false | one definition, straight-line +// {B, C} | false | paths converge (e.g. both if/else branches) +// {A, B} | false | prior def + conditional redefinition +// {A} | true | conditional definition (e.g. if without else) +// {} | true | no local def, parent scope reference +// {A, B} | true | some paths define, some don't + +/// The set of definitions that can reach a particular point in control flow, +/// plus whether the symbol may be unbound (no definition on some path). +/// +/// Definitions are stored sorted by ID so that merge is a linear merge-join. The +/// `SmallVec<[DefinitionId; 2]>` avoids heap allocation for the common case of +/// 1-2 live definitions. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Bindings { + definitions: SmallVec<[DefinitionId; 2]>, + may_be_unbound: bool, +} + +impl Bindings { + fn unbound() -> Self { + Self { + definitions: SmallVec::new(), + may_be_unbound: true, + } + } + + pub fn definitions(&self) -> &[DefinitionId] { + &self.definitions + } + + pub fn contains_definition(&self, id: DefinitionId) -> bool { + self.definitions.contains(&id) + } + + pub fn definition_count(&self) -> usize { + self.definitions.len() + } + + pub fn may_be_unbound(&self) -> bool { + self.may_be_unbound + } + + /// Replace all live definitions with a single new one, marking the + /// symbol as definitely bound. + fn record_binding(&mut self, def_id: DefinitionId) { + self.definitions.clear(); + self.definitions.push(def_id); + self.may_be_unbound = false; + } + + /// Union definitions from `other` into `self`, OR the `may_be_unbound` + /// flags. Both sides are sorted by `DefinitionId`, so this is a linear + /// merge-join. + fn merge(&mut self, other: Bindings) { + self.definitions = sorted_union(&self.definitions, &other.definitions); + self.may_be_unbound |= other.may_be_unbound; + } +} + +/// Merge two sorted slices into a sorted `SmallVec` with no duplicates. +fn sorted_union(a: &[DefinitionId], b: &[DefinitionId]) -> SmallVec<[DefinitionId; 2]> { + a.iter() + .merge_join_by(b.iter(), |a, b| a.cmp(b)) + .map(|either| match either { + EitherOrBoth::Left(&id) | EitherOrBoth::Right(&id) | EitherOrBoth::Both(&id, _) => id, + }) + .collect() +} + +/// A snapshot of all symbol states at a particular point in control flow. +#[derive(Clone, Debug)] +pub(crate) struct FlowSnapshot { + symbol_states: IndexVec, +} + +/// The immutable use-def map for a single scope, produced by finalizing the +/// builder. For each use site, stores the set of definitions that can reach +/// it through control flow. +#[derive(Debug)] +pub struct UseDefMap { + bindings_by_use: IndexVec, +} + +impl UseDefMap { + pub(crate) fn empty() -> Self { + Self { + bindings_by_use: IndexVec::new(), + } + } + + pub fn bindings_at_use(&self, use_id: UseId) -> &Bindings { + &self.bindings_by_use[use_id] + } +} + +/// Mutable builder for constructing a [`UseDefMap`] during the tree walk. +/// One builder exists per scope. When entering a nested scope the current +/// builder is pushed onto a stack and a fresh one takes over. +#[derive(Debug)] +pub(crate) struct UseDefMapBuilder { + symbol_states: IndexVec, + bindings_by_use: IndexVec, +} + +impl UseDefMapBuilder { + pub(crate) fn new() -> Self { + Self { + symbol_states: IndexVec::new(), + bindings_by_use: IndexVec::new(), + } + } + + /// Ensure that `symbol_id` has an entry in `symbol_states`, growing the + /// vec with "unbound" entries as needed. Called after interning a symbol + /// so the use-def state stays in sync with the symbol table. + pub(crate) fn ensure_symbol(&mut self, symbol_id: SymbolId) { + // In practice this adds at most one entry (IDs are sequential), the + // `while` is defensive. + while self.symbol_states.len() <= symbol_id.index() { + self.symbol_states.push(Bindings::unbound()); + } + } + + /// Record a new binding for `symbol_id`. Replaces (shadows) all previous + /// live definitions for that symbol. + pub(crate) fn record_binding(&mut self, symbol_id: SymbolId, def_id: DefinitionId) { + self.symbol_states[symbol_id].record_binding(def_id); + } + + /// Record a use of `symbol_id`. Clones the current live bindings for that + /// symbol and associates them with `use_id`. + pub(crate) fn record_use(&mut self, symbol_id: SymbolId, use_id: UseId) { + let bindings = self.symbol_states[symbol_id].clone(); + let pushed_id = self.bindings_by_use.push(bindings); + stdext::soft_assert!(use_id == pushed_id); + } + + /// Take a snapshot of the current symbol states. + pub(crate) fn snapshot(&self) -> FlowSnapshot { + FlowSnapshot { + symbol_states: self.symbol_states.clone(), + } + } + + /// Restore state to a previously taken snapshot. + pub(crate) fn restore(&mut self, snapshot: FlowSnapshot) { + let num_symbols = self.symbol_states.len(); + self.symbol_states = snapshot.symbol_states; + + // New symbols may have been interned between snapshot and restore. + // Fill them in as "unbound" so IDs stay aligned. + while self.symbol_states.len() < num_symbols { + self.symbol_states.push(Bindings::unbound()); + } + } + + /// Merge a snapshot into the current state. For each symbol, union the + /// definition sets and OR the `may_be_unbound` flags. This reflects that + /// control flow could have taken either path to reach this point. + pub(crate) fn merge(&mut self, snapshot: FlowSnapshot) { + let mut snap_iter = snapshot.symbol_states.into_iter(); + + for i in 0..self.symbol_states.len() { + let id = SymbolId::new(i); + if let Some(snap_bindings) = snap_iter.next() { + self.symbol_states[id].merge(snap_bindings); + } else { + // Symbol didn't exist in the snapshot, so it was unbound on + // that path + self.symbol_states[id].merge(Bindings::unbound()); + } + } + } + + /// Finalize into an immutable [`UseDefMap`]. + pub(crate) fn finish(self) -> UseDefMap { + UseDefMap { + bindings_by_use: self.bindings_by_use, + } + } +} diff --git a/crates/oak_index/tests/use_def_map.rs b/crates/oak_index/tests/use_def_map.rs new file mode 100644 index 000000000..c842e9fdc --- /dev/null +++ b/crates/oak_index/tests/use_def_map.rs @@ -0,0 +1,704 @@ +use aether_parser::parse; +use aether_parser::RParserOptions; +use oak_index::builder::build; +use oak_index::semantic_index::DefinitionId; +use oak_index::semantic_index::ScopeId; +use oak_index::semantic_index::SemanticIndex; +use oak_index::semantic_index::UseId; +use stdext::assert_not; + +fn index(source: &str) -> SemanticIndex { + let parsed = parse(source, RParserOptions::default()); + build(&parsed.tree()) +} + +// --- Straight-line code --- + +#[test] +fn test_single_def_single_use() { + let index = index("x <- 1\nx"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_use_before_def_is_unbound() { + let index = index("x\nx <- 1"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert!(bindings.definitions().is_empty()); + assert!(bindings.may_be_unbound()); +} + +#[test] +fn test_second_def_shadows_first() { + let index = index("x <- 1\nx <- 2\nx"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // The use of `x` should see only the second definition. + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_use_between_defs() { + let index = index("x <- 1\nx\nx <- 2\nx"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // First use sees def 0, second use sees def 1. + let use0 = map.bindings_at_use(UseId::from(0)); + assert_eq!(use0.definitions(), &[DefinitionId::from(0)]); + assert_not!(use0.may_be_unbound()); + + let use1 = map.bindings_at_use(UseId::from(1)); + assert_eq!(use1.definitions(), &[DefinitionId::from(1)]); + assert_not!(use1.may_be_unbound()); +} + +#[test] +fn test_rhs_use_sees_previous_binding() { + let index = index("x <- 1\nx <- x + 1\nx"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // `x` on the RHS of the second assignment (use 0) + let rhs_use = map.bindings_at_use(UseId::from(0)); + assert_eq!(rhs_use.definitions(), &[DefinitionId::from(0)]); + + // Final `x` (use 1) + let final_use = map.bindings_at_use(UseId::from(1)); + assert_eq!(final_use.definitions(), &[DefinitionId::from(1)]); +} + +#[test] +fn test_different_symbols_independent() { + let index = index("x <- 1\ny <- 2\nx\ny"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let use_x = map.bindings_at_use(UseId::from(0)); + assert_eq!(use_x.definitions(), &[DefinitionId::from(0)]); + assert_not!(use_x.may_be_unbound()); + + let use_y = map.bindings_at_use(UseId::from(1)); + assert_eq!(use_y.definitions(), &[DefinitionId::from(1)]); + assert_not!(use_y.may_be_unbound()); +} + +#[test] +fn test_unbound_symbol() { + let index = index("x"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert!(bindings.definitions().is_empty()); + assert!(bindings.may_be_unbound()); +} + +// --- If/else --- + +#[test] +fn test_if_else_both_branches_define() { + let index = index( + "\ +x <- 1 # def 0 +if (cond) { + x <- 2 # def 1 +} else { + x <- 3 # def 2 +} +x # use 1 -> {def 1, def 2} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(1), + DefinitionId::from(2) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_if_only_one_branch_defines() { + let index = index( + "\ +x <- 1 # def 0 +if (cond) { + x <- 2 # def 1 +} +x # use 1 -> {def 0, def 1} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_if_no_prior_def_one_branch() { + let index = index( + "\ +if (cond) { + x <- 1 # def 0 +} +x # use 1 -> {def 0}, may_be_unbound +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert!(bindings.may_be_unbound()); +} + +#[test] +fn test_if_no_prior_def_both_branches() { + let index = index( + "\ +if (cond) { + x <- 1 # def 0 +} else { + x <- 2 # def 1 +} +x # use 1 -> {def 0, def 1}, not unbound +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_if_condition_uses_see_pre_if_state() { + let index = index("x <- 1\nif (x) {}"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Use of `x` in condition sees def 0. + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_nested_if_else() { + let index = index( + "\ +if (c1) { + if (c2) { + x <- 1 # def 0 + } else { + x <- 2 # def 1 + } +} else { + x <- 3 # def 2 +} +x # use 2 -> {def 0, def 1, def 2} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: c1 is use 0, c2 is use 1, final x is use 2. + let bindings = map.bindings_at_use(UseId::from(2)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1), + DefinitionId::from(2) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_if_else_without_braces() { + let index = index("if (cond) x <- 1 else x <- 2\nx"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +// --- For loops --- + +#[test] +fn test_for_variable_is_definite() { + let index = index("for (i in xs) {}\ni"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // `i` is always bound (R sets to NULL for empty sequences). + // Uses: `xs` is use 0, `i` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_for_body_assignment_is_conditional() { + let index = index( + "\ +for (i in xs) { # def 0 (i) + x <- 1 # def 1 +} +x # use 1 -> {def 1}, may_be_unbound (body may not execute) +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `xs` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); + assert!(bindings.may_be_unbound()); +} + +#[test] +fn test_for_body_assignment_merges_with_pre_loop() { + let index = index( + "\ +x <- 0 # def 0 +for (i in xs) { # def 1 (i) + x <- 1 # def 2 +} +x # use 1 -> {def 0, def 2} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `xs` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(2) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_for_variable_used_inside_body() { + let index = index( + "\ +for (i in xs) { + print(i) # use of `i` inside body sees for-variable def +} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `xs` is use 0, `print` is use 1, `i` is use 2. + let bindings = map.bindings_at_use(UseId::from(2)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +// --- While loops --- + +#[test] +fn test_while_body_is_conditional() { + let index = index( + "\ +while (cond) { + x <- 1 # def 0 +} +x # use 1 -> {def 0}, may_be_unbound +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert!(bindings.may_be_unbound()); +} + +#[test] +fn test_while_merges_with_pre_loop() { + let index = index( + "\ +x <- 0 # def 0 +while (cond) { + x <- 1 # def 1 +} +x # use 1 -> {def 0, def 1} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_while_condition_use() { + let index = index("x <- 1\nwhile (x) {}"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Condition use sees def 0. + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +// --- Repeat loops --- + +#[test] +fn test_repeat_body_is_definite() { + let index = index( + "\ +repeat { + x <- 1 # def 0 + break +} +x # use 0 -> {def 0}, not unbound +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_repeat_shadows_prior_def() { + let index = index( + "\ +x <- 0 # def 0 +repeat { + x <- 1 # def 1 + break +} +x # use 0 -> {def 1} (repeat always executes) +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); + assert_not!(bindings.may_be_unbound()); +} + +// --- Function scopes --- + +#[test] +fn test_function_scope_independent_use_def() { + let index = index( + "\ +x <- 1 # file: def 0 +f <- function(y) { # file: def 1, fun: def 0 (y param) + y # fun: use 0 +} +", + ); + let fun = ScopeId::from(1); + let map = index.use_def_map(fun); + + // In function scope, `y` (use 0) sees the parameter (def 0). + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_function_parameter_shadows() { + let index = index( + "\ +function(x) { # def 0 (param) + x <- 1 # def 1 + x # use 0 -> def 1 +} +", + ); + let fun = ScopeId::from(1); + let map = index.use_def_map(fun); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_function_unbound_reference() { + let index = index( + "\ +function() { + x # use 0: not bound in this scope +} +", + ); + let fun = ScopeId::from(1); + let map = index.use_def_map(fun); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert!(bindings.definitions().is_empty()); + assert!(bindings.may_be_unbound()); +} + +// --- Super-assignment --- + +#[test] +fn test_super_assignment_not_in_function_use_def() { + let index = index( + "\ +function() { + x <<- 1 # recorded here with IS_SUPER_BOUND, skipped by use-def + x # use 0: unbound in function scope +} +", + ); + let fun = ScopeId::from(1); + let map = index.use_def_map(fun); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert!(bindings.definitions().is_empty()); + assert!(bindings.may_be_unbound()); +} + +// --- Combined control flow --- + +#[test] +fn test_if_inside_for() { + let index = index( + "\ +for (i in xs) { # def 0 (i) + if (cond) { + x <- 1 # def 1 + } +} +x # use 2: may_be_unbound (loop might not run, if might not match) +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `xs` is use 0, `cond` is use 1, final `x` is use 2. + let bindings = map.bindings_at_use(UseId::from(2)); + assert!(bindings.definitions().contains(&DefinitionId::from(1))); + assert!(bindings.may_be_unbound()); +} + +#[test] +fn test_if_else_inside_while() { + let index = index( + "\ +x <- 0 # def 0 +while (cond) { + if (c2) { + x <- 1 # def 1 + } else { + x <- 2 # def 2 + } +} +x # use 2 -> {def 0, def 1, def 2} (while may not execute) +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, `c2` is use 1, final `x` is use 2. + let bindings = map.bindings_at_use(UseId::from(2)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1), + DefinitionId::from(2) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_assignment_in_if_condition() { + // In R, `if (x <- f()) x` is valid: the `<-` in the condition creates + // a binding. The use of `x` in the consequence should see it. + let index = index("if (x <- f()) x"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `f` is use 0. The `x` in consequence is use 1. + // Def 0 is `x <- f()`. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +// --- Duplicate definitions don't appear twice in merge --- + +#[test] +fn test_merge_deduplicates() { + let index = index( + "\ +x <- 1 # def 0 +if (cond) { + y <- 1 # def 1 (different symbol) +} +x # use 1: should see only {def 0}, not {def 0, def 0} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +// --- Control flow inside functions --- + +#[test] +fn test_if_else_in_function() { + let index = index( + "\ +function(x) { # def 0 (param) + if (x) { + y <- 1 # def 1 + } else { + y <- 2 # def 2 + } + y # use 1 -> {def 1, def 2} +} +", + ); + let fun = ScopeId::from(1); + let map = index.use_def_map(fun); + + // In function scope: uses are `x` (use 0), then `y` (use 1). + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(1), + DefinitionId::from(2) + ]); + assert_not!(bindings.may_be_unbound()); +} + +// --- Multiple unrelated symbols through control flow --- + +#[test] +fn test_different_symbols_through_if() { + let index = index( + "\ +x <- 1 # def 0 +if (cond) { + y <- 2 # def 1 +} +x # use 1: sees def 0, not unbound +y # use 2: sees def 1, may_be_unbound +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, `x` is use 1, `y` is use 2. + let x_bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(x_bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(x_bindings.may_be_unbound()); + + let y_bindings = map.bindings_at_use(UseId::from(2)); + assert_eq!(y_bindings.definitions(), &[DefinitionId::from(1)]); + assert!(y_bindings.may_be_unbound()); +} + +// --- Connected component patterns from the design doc --- + +#[test] +fn test_design_doc_disconnected_components() { + let index = index( + "\ +x <- 1 # def 0 +print(x) # use 0 (print), use 1 (x) -> {def 0} +if (cond) { + x <- 2 # def 1 +} else { + x <- 3 # def 2 +} +print(x) # use 2 (cond), use 3 (print), use 4 (x) -> {def 1, def 2} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let first_x = map.bindings_at_use(UseId::from(1)); + assert_eq!(first_x.definitions(), &[DefinitionId::from(0)]); + assert_not!(first_x.may_be_unbound()); + + let second_x = map.bindings_at_use(UseId::from(4)); + assert_eq!(second_x.definitions(), &[ + DefinitionId::from(1), + DefinitionId::from(2) + ]); + assert_not!(second_x.may_be_unbound()); +} + +#[test] +fn test_design_doc_connected_component() { + let index = index( + "\ +x <- 1 # def 0 +print(x) # use 0 (print), use 1 (x) -> {def 0} +if (cond) { + x <- 2 # def 1 +} +print(x) # use 2 (cond), use 3 (print), use 4 (x) -> {def 0, def 1} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let first_x = map.bindings_at_use(UseId::from(1)); + assert_eq!(first_x.definitions(), &[DefinitionId::from(0)]); + + // Linked to both def 0 and def 1. + let second_x = map.bindings_at_use(UseId::from(4)); + assert_eq!(second_x.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(second_x.may_be_unbound()); +} diff --git a/crates/stdext/src/result.rs b/crates/stdext/src/result.rs index d39fed779..9a607d41f 100644 --- a/crates/stdext/src/result.rs +++ b/crates/stdext/src/result.rs @@ -31,6 +31,18 @@ macro_rules! soft_assert { }; } +/// Like `assert!` but for negative conditions. `assert_not!(x)` reads +/// more clearly than `assert!(!x)`. +#[macro_export] +macro_rules! assert_not { + ( $cond:expr $(,)? ) => { + assert!(!$cond); + }; + ( $cond:expr, $($fmt_arg:tt)+ ) => { + assert!(!$cond, $($fmt_arg)+); + }; +} + // From https://github.com/zed-industries/zed/blob/a910c594/crates/util/src/util.rs#L554 pub trait ResultExt { type Ok; From 985b851f80203a4efd16bd054a263ac1c538e9a1 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 10 Apr 2026 11:50:53 +0200 Subject: [PATCH 2/7] More accurate tracking of use-def-maps in loops --- crates/oak_index/src/builder.rs | 147 ++++++++++++++++++++---- crates/oak_index/src/semantic_index.rs | 10 ++ crates/oak_index/src/use_def_map.rs | 73 ++++++++++++ crates/oak_index/tests/builder.rs | 8 +- crates/oak_index/tests/use_def_map.rs | 151 +++++++++++++++++++++---- 5 files changed, 342 insertions(+), 47 deletions(-) diff --git a/crates/oak_index/src/builder.rs b/crates/oak_index/src/builder.rs index 3c2a62d8f..c629eb2e4 100644 --- a/crates/oak_index/src/builder.rs +++ b/crates/oak_index/src/builder.rs @@ -4,6 +4,7 @@ use aether_syntax::AnyRValue; use aether_syntax::RArgumentList; use aether_syntax::RBinaryExpression; use aether_syntax::RExpressionList; +use aether_syntax::RForStatement; use aether_syntax::RFunctionDefinition; use aether_syntax::RParameter; use aether_syntax::RParameters; @@ -27,6 +28,7 @@ use crate::semantic_index::ScopeId; use crate::semantic_index::ScopeKind; use crate::semantic_index::SemanticIndex; use crate::semantic_index::SymbolFlags; +use crate::semantic_index::SymbolId; use crate::semantic_index::SymbolTableBuilder; use crate::semantic_index::Use; use crate::semantic_index::UseId; @@ -286,7 +288,10 @@ impl SemanticIndexBuilder { let pre_loop = self.current_use_def.snapshot(); if let Ok(body) = stmt.body() { + let first_use = self.uses[self.current_scope].next_id(); + let loop_header = self.build_loop_header(body.syntax()); self.collect_expression(&body); + self.finish_loop_header(&loop_header, first_use); } self.current_use_def.merge(pre_loop); @@ -328,7 +333,10 @@ impl SemanticIndexBuilder { let pre_loop = self.current_use_def.snapshot(); if let Ok(body) = stmt.body() { + let first_use = self.uses[self.current_scope].next_id(); + let loop_header = self.build_loop_header(body.syntax()); self.collect_expression(&body); + self.finish_loop_header(&loop_header, first_use); } // Body may not execute @@ -338,7 +346,10 @@ impl SemanticIndexBuilder { AnyRExpression::RRepeatStatement(stmt) => { // Body always executes at least once, no snapshot needed if let Ok(body) = stmt.body() { + let first_use = self.uses[self.current_scope].next_id(); + let loop_header = self.build_loop_header(body.syntax()); self.collect_expression(&body); + self.finish_loop_header(&loop_header, first_use); } }, @@ -455,28 +466,11 @@ impl SemanticIndexBuilder { let target = if right { op.right() } else { op.left() }; let Ok(target) = target else { return }; - let (name, range) = match &target { - AnyRExpression::RIdentifier(ident) => { - let name = identifier_text(ident); - let range = ident.syntax().text_trimmed_range(); - (name, range) - }, - - // `"x" <- 1` is equivalent to `x <- 1` in R - AnyRExpression::AnyRValue(AnyRValue::RStringValue(s)) => { - let Some(name) = string_value_text(s) else { - return; - }; - let range = s.syntax().text_trimmed_range(); - (name, range) - }, - + let Some((name, range)) = assignment_target_name(&target) else { // Complex target (`x$foo <- rhs`, `x[1] <- rhs`, etc.) does // not represent a binding. We recurse for uses. - other => { - self.collect_expression(other); - return; - }, + self.collect_expression(&target); + return; }; if super_assign { @@ -495,6 +489,87 @@ impl SemanticIndexBuilder { } } + // Pre-walk a loop body to find all symbols that will be bound, then + // create `LoopHeader` placeholder definitions for each. These are + // recorded as additional (non-shadowing) bindings so that uses at the + // top of the body can see definitions from a previous iteration. + // After the body is visited, `finish_loop_header` resolves each + // placeholder to the real definitions that are live at the end of + // the body. + // + // Skips function bodies (different scope) and super-assignments (don't + // affect local flow). + fn build_loop_header(&mut self, body: &RSyntaxNode) -> Vec<(SymbolId, DefinitionId)> { + let names = Self::collect_loop_bound_names(body); + let mut loop_header = Vec::new(); + + for name in names { + let symbol_id = + self.symbol_tables[self.current_scope].intern(&name, SymbolFlags::IS_BOUND); + let def_id = self.definitions[self.current_scope].push(Definition { + symbol: symbol_id, + kind: DefinitionKind::LoopHeader, + range: body.text_trimmed_range(), + }); + + self.current_use_def.ensure_symbol(symbol_id); + self.current_use_def.record_loop_binding(symbol_id, def_id); + loop_header.push((symbol_id, def_id)); + } + + loop_header + } + + fn finish_loop_header(&mut self, loop_header: &[(SymbolId, DefinitionId)], first_use: UseId) { + for &(symbol_id, placeholder_id) in loop_header { + self.current_use_def + .resolve_placeholder(symbol_id, placeholder_id, first_use); + } + } + + // Keep in sync with `collect_expression`: Every construct that creates + // a definition there must be matched here so that loop headers account + // for all bindings in the body. + fn collect_loop_bound_names(body: &RSyntaxNode) -> Vec { + let mut names = Vec::new(); + let mut preorder = body.preorder(); + + while let Some(event) = preorder.next() { + let WalkEvent::Enter(node) = event else { + continue; + }; + + match node.kind() { + // Function bodies are separate scopes. In the future we'll need + // an indirection here to handle other kinds of local scopes, in + // particular from NSE functions like `local()`. + RSyntaxKind::R_FUNCTION_DEFINITION => { + preorder.skip_subtree(); + }, + + RSyntaxKind::R_BINARY_EXPRESSION => { + let op: RBinaryExpression = node.cast().unwrap(); + if let Some((name, _)) = assignment_target(&op) { + names.push(name); + } + }, + + RSyntaxKind::R_FOR_STATEMENT => { + let for_stmt: RForStatement = node.cast().unwrap(); + if let Ok(variable) = for_stmt.variable() { + names.push(identifier_text(&variable)); + } + }, + + _ => {}, + } + } + + names.sort(); + names.dedup(); + names + } + fn collect_arguments(&mut self, args: &RArgumentList) { for item in args.iter() { let Ok(arg) = item else { continue }; @@ -570,6 +645,38 @@ fn string_value_text(s: &aether_syntax::RStringValue) -> Option { Some(text[1..text.len() - 1].to_string()) } +/// For a local (non-super) assignment, extract the binding name and range. +/// Returns `None` if the expression is not an assignment, is a +/// super-assignment, or has a complex target (`x$foo`, `x[1]`, etc.). +fn assignment_target(bin: &RBinaryExpression) -> Option<(String, TextRange)> { + if !is_assignment(bin) || is_super_assignment(bin) { + return None; + } + let right = is_right_assignment(bin); + let target = if right { bin.right() } else { bin.left() }.ok()?; + assignment_target_name(&target) +} + +/// Extract the binding name and range from an assignment target expression. +/// Returns `None` for complex targets (`x$foo`, `x[1]`, etc.) that don't +/// represent simple name bindings. +fn assignment_target_name(target: &AnyRExpression) -> Option<(String, TextRange)> { + match target { + AnyRExpression::RIdentifier(ident) => { + let name = identifier_text(ident); + let range = ident.syntax().text_trimmed_range(); + Some((name, range)) + }, + // `"x" <- 1` is equivalent to `x <- 1` in R + AnyRExpression::AnyRValue(AnyRValue::RStringValue(s)) => { + let name = string_value_text(s)?; + let range = s.syntax().text_trimmed_range(); + Some((name, range)) + }, + _ => None, + } +} + fn is_super_assignment(bin: &RBinaryExpression) -> bool { let Ok(op) = bin.operator() else { return false; diff --git a/crates/oak_index/src/semantic_index.rs b/crates/oak_index/src/semantic_index.rs index 4c770140d..dce370575 100644 --- a/crates/oak_index/src/semantic_index.rs +++ b/crates/oak_index/src/semantic_index.rs @@ -390,6 +390,16 @@ pub enum DefinitionKind { SuperAssignment(RSyntaxNode), Parameter(RSyntaxNode), ForVariable(RSyntaxNode), + // The "loop header" is the entry point of a loop in the control flow + // graph. This placeholder definition, created at the loop header before + // visiting the body, represents a value carried from a previous + // iteration so that uses at the top of the body can see definitions + // from the bottom. After the body is visited, each placeholder is + // resolved to the real definitions that are live at the end of the + // body (see `finish_loop_header`). Resolved placeholders no longer + // appear in use-def results but remain in the definition arena to + // preserve `DefinitionId` stability (the arena is append-only). + LoopHeader, } impl Definition { diff --git a/crates/oak_index/src/use_def_map.rs b/crates/oak_index/src/use_def_map.rs index 34214c6b9..564706566 100644 --- a/crates/oak_index/src/use_def_map.rs +++ b/crates/oak_index/src/use_def_map.rs @@ -127,6 +127,33 @@ impl Bindings { self.may_be_unbound = false; } + /// Add a definition to the live set without clearing existing ones and + /// without changing `may_be_unbound`. Used for `LoopHeader` placeholder + /// definitions that represent a value carried from a previous iteration. + fn add_definition(&mut self, def_id: DefinitionId) { + let pos = self.definitions.partition_point(|&id| id < def_id); + if pos >= self.definitions.len() || self.definitions[pos] != def_id { + self.definitions.insert(pos, def_id); + } + } + + fn remove_definition(&mut self, def_id: DefinitionId) { + if let Some(pos) = self.definitions.iter().position(|&id| id == def_id) { + self.definitions.remove(pos); + } + } + + /// Replace `old` with `replacements` in the definition set. If `old` is + /// not present, this is a no-op. Maintains sorted order and deduplicates. + fn replace_definition(&mut self, old: DefinitionId, replacements: &[DefinitionId]) { + if let Some(pos) = self.definitions.iter().position(|&id| id == old) { + self.definitions.remove(pos); + for &def_id in replacements { + self.add_definition(def_id); + } + } + } + /// Union definitions from `other` into `self`, OR the `may_be_unbound` /// flags. Both sides are sorted by `DefinitionId`, so this is a linear /// merge-join. @@ -206,6 +233,52 @@ impl UseDefMapBuilder { self.symbol_states[symbol_id].record_binding(def_id); } + /// Record a loop-carried binding. Adds the definition to the symbol's + /// live set without clearing existing definitions or changing + /// `may_be_unbound`. This represents a value that might arrive from a + /// previous loop iteration. + pub(crate) fn record_loop_binding(&mut self, symbol_id: SymbolId, def_id: DefinitionId) { + self.symbol_states[symbol_id].add_definition(def_id); + } + + /// Resolve a `LoopHeader` placeholder to the real definitions that + /// are live at the end of the loop body. Replaces the placeholder in + /// all `bindings_by_use` entries recorded during the body (from + /// `first_use` onwards) and removes it from the live symbol state. + /// + /// When Salsa is introduced, this eager rewriting will be replaced by + /// Salsa's `specify` mechanism: the synthesis step will create + /// Salsa-tracked `LoopToken` definitions (like ty's `LoopHeader`), + /// and this populate step will call `specify` on those tokens instead + /// of rewriting `bindings_by_use` in place. That lets downstream + /// queries lazily resolve loop headers and skip re-computation when + /// the set of reaching definitions hasn't changed. + pub(crate) fn resolve_placeholder( + &mut self, + symbol_id: SymbolId, + placeholder_id: DefinitionId, + first_use: UseId, + ) { + let replacements: SmallVec<[DefinitionId; 2]> = self.symbol_states[symbol_id] + .definitions() + .iter() + .filter(|&&id| id != placeholder_id) + .copied() + .collect(); + + for i in first_use.index()..self.bindings_by_use.len() { + let use_id = UseId::new(i); + self.bindings_by_use[use_id].replace_definition(placeholder_id, &replacements); + } + + // Prevent the placeholder from leaking into post-loop state. When the + // body unconditionally assigns the symbol, `record_binding()` already + // cleared the placeholder, so this is a no-op. When the assignment is + // conditional, the placeholder would survive and leak if we did not + // explicitly remove it here. + self.symbol_states[symbol_id].remove_definition(placeholder_id); + } + /// Record a use of `symbol_id`. Clones the current live bindings for that /// symbol and associates them with `use_id`. pub(crate) fn record_use(&mut self, symbol_id: SymbolId, use_id: UseId) { diff --git a/crates/oak_index/tests/builder.rs b/crates/oak_index/tests/builder.rs index 74a307f9f..5dbd78ef4 100644 --- a/crates/oak_index/tests/builder.rs +++ b/crates/oak_index/tests/builder.rs @@ -1157,7 +1157,10 @@ fn test_nested_for_loops() { let j = index.symbols(file).get("j").unwrap(); assert_eq!(j.flags(), SymbolFlags::IS_BOUND.union(SymbolFlags::IS_USED)); - assert_eq!(index.definitions(file).len(), 2); + // 2 real defs (i, j) + 1 placeholder LoopHeader (j from inner for, + // discovered by outer loop's pre-walk). Note the placeholder definitions + // remain in the Definition arena to preserve IDs. The arena is append-only. + assert_eq!(index.definitions(file).len(), 3); } // --- Assignment in loop body --- @@ -1173,5 +1176,6 @@ fn test_assignment_in_for_body() { let x = index.symbols(file).get("x").unwrap(); assert_eq!(x.flags(), SymbolFlags::IS_BOUND); - assert_eq!(index.definitions(file).len(), 2); + // 2 real defs (i, x) + 1 placeholder LoopHeader (x from body pre-walk) + assert_eq!(index.definitions(file).len(), 3); } diff --git a/crates/oak_index/tests/use_def_map.rs b/crates/oak_index/tests/use_def_map.rs index c842e9fdc..ac6cca17d 100644 --- a/crates/oak_index/tests/use_def_map.rs +++ b/crates/oak_index/tests/use_def_map.rs @@ -274,9 +274,9 @@ fn test_for_body_assignment_is_conditional() { let index = index( "\ for (i in xs) { # def 0 (i) - x <- 1 # def 1 + x <- 1 # def 2 } -x # use 1 -> {def 1}, may_be_unbound (body may not execute) +x # use 1 -> {def 2}, may_be_unbound (body may not execute) ", ); let file = ScopeId::from(0); @@ -284,7 +284,7 @@ x # use 1 -> {def 1}, may_be_unbound (body may not execute) // Uses: `xs` is use 0, final `x` is use 1. let bindings = map.bindings_at_use(UseId::from(1)); - assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); + assert_eq!(bindings.definitions(), &[DefinitionId::from(2)]); assert!(bindings.may_be_unbound()); } @@ -294,9 +294,9 @@ fn test_for_body_assignment_merges_with_pre_loop() { "\ x <- 0 # def 0 for (i in xs) { # def 1 (i) - x <- 1 # def 2 + x <- 1 # def 3 } -x # use 1 -> {def 0, def 2} +x # use 1 -> {def 0, def 3} ", ); let file = ScopeId::from(0); @@ -306,7 +306,7 @@ x # use 1 -> {def 0, def 2} let bindings = map.bindings_at_use(UseId::from(1)); assert_eq!(bindings.definitions(), &[ DefinitionId::from(0), - DefinitionId::from(2) + DefinitionId::from(3) ]); assert_not!(bindings.may_be_unbound()); } @@ -336,9 +336,9 @@ fn test_while_body_is_conditional() { let index = index( "\ while (cond) { - x <- 1 # def 0 + x <- 1 # def 1 } -x # use 1 -> {def 0}, may_be_unbound +x # use 1 -> {def 1}, may_be_unbound ", ); let file = ScopeId::from(0); @@ -346,7 +346,7 @@ x # use 1 -> {def 0}, may_be_unbound // Uses: `cond` is use 0, final `x` is use 1. let bindings = map.bindings_at_use(UseId::from(1)); - assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); assert!(bindings.may_be_unbound()); } @@ -356,9 +356,9 @@ fn test_while_merges_with_pre_loop() { "\ x <- 0 # def 0 while (cond) { - x <- 1 # def 1 + x <- 1 # def 2 } -x # use 1 -> {def 0, def 1} +x # use 1 -> {def 0, def 2} ", ); let file = ScopeId::from(0); @@ -368,7 +368,7 @@ x # use 1 -> {def 0, def 1} let bindings = map.bindings_at_use(UseId::from(1)); assert_eq!(bindings.definitions(), &[ DefinitionId::from(0), - DefinitionId::from(1) + DefinitionId::from(2) ]); assert_not!(bindings.may_be_unbound()); } @@ -392,17 +392,17 @@ fn test_repeat_body_is_definite() { let index = index( "\ repeat { - x <- 1 # def 0 + x <- 1 # def 1 break } -x # use 0 -> {def 0}, not unbound +x # use 0 -> {def 1}, not unbound ", ); let file = ScopeId::from(0); let map = index.use_def_map(file); let bindings = map.bindings_at_use(UseId::from(0)); - assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); assert_not!(bindings.may_be_unbound()); } @@ -412,20 +412,120 @@ fn test_repeat_shadows_prior_def() { "\ x <- 0 # def 0 repeat { - x <- 1 # def 1 + x <- 1 # def 2 break } -x # use 0 -> {def 1} (repeat always executes) +x # use 0 -> {def 2} (repeat always executes) ", ); let file = ScopeId::from(0); let map = index.use_def_map(file); let bindings = map.bindings_at_use(UseId::from(0)); - assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); + assert_eq!(bindings.definitions(), &[DefinitionId::from(2)]); + assert_not!(bindings.may_be_unbound()); +} + +// --- Loop-carried definitions --- +// +// Uses at the top of a loop body can see definitions from the bottom of the +// body (from a previous iteration). The builder synthesizes `LoopHeader` +// placeholders before visiting the body, then populates them with the real +// definitions that are live at the end of the body. The placeholders never +// appear in `bindings_at_use` results. + +#[test] +fn test_while_loop_carried_def() { + let index = index( + "\ +x <- 0 # def 0 +while (cond) { + x # use 1: sees {def 0, def 2} (pre-loop OR previous iteration) + x <- 1 # def 2 +} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, `x` inside body is use 1. + // def 1 is the LoopHeader for `x`, populated with {def 2}. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(2) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_for_loop_carried_def() { + let index = index( + "\ +x <- 0 # def 0 +for (i in xs) { # def 1 (i) + x # use 1: sees {def 0, def 3} + x <- 1 # def 3 +} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `xs` is use 0, `x` inside body is use 1. + // def 2 is the LoopHeader for `x`, populated with {def 3}. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(3) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_repeat_loop_carried_def() { + let index = index( + "\ +x <- 0 # def 0 +repeat { + x # use 0: sees {def 0, def 2} + x <- 1 # def 2 + break +} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // def 1 is the LoopHeader for `x`, populated with {def 2}. + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(2) + ]); assert_not!(bindings.may_be_unbound()); } +#[test] +fn test_loop_carried_unbound_before_loop() { + let index = index( + "\ +while (cond) { + x # use 1: sees {def 1}, may_be_unbound + x <- 1 # def 1 +} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, `x` inside body is use 1. + // def 0 is the LoopHeader for `x`, populated with {def 1}. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); + assert!(bindings.may_be_unbound()); +} + // --- Function scopes --- #[test] @@ -510,7 +610,7 @@ fn test_if_inside_for() { "\ for (i in xs) { # def 0 (i) if (cond) { - x <- 1 # def 1 + x <- 1 # def 2 } } x # use 2: may_be_unbound (loop might not run, if might not match) @@ -520,8 +620,9 @@ x # use 2: may_be_unbound (loop might not run, if might not match) let map = index.use_def_map(file); // Uses: `xs` is use 0, `cond` is use 1, final `x` is use 2. + // def 1 is the LoopHeader for `x`, populated before merge. let bindings = map.bindings_at_use(UseId::from(2)); - assert!(bindings.definitions().contains(&DefinitionId::from(1))); + assert_eq!(bindings.definitions(), &[DefinitionId::from(2)]); assert!(bindings.may_be_unbound()); } @@ -532,12 +633,12 @@ fn test_if_else_inside_while() { x <- 0 # def 0 while (cond) { if (c2) { - x <- 1 # def 1 + x <- 1 # def 2 } else { - x <- 2 # def 2 + x <- 2 # def 3 } } -x # use 2 -> {def 0, def 1, def 2} (while may not execute) +x # use 2 -> {def 0, def 2, def 3} (while may not execute) ", ); let file = ScopeId::from(0); @@ -547,8 +648,8 @@ x # use 2 -> {def 0, def 1, def 2} (while may not execute) let bindings = map.bindings_at_use(UseId::from(2)); assert_eq!(bindings.definitions(), &[ DefinitionId::from(0), - DefinitionId::from(1), - DefinitionId::from(2) + DefinitionId::from(2), + DefinitionId::from(3) ]); assert_not!(bindings.may_be_unbound()); } From 7d7d85c37ce615423d9adbde4e2ceecd0f72b32e Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 10 Apr 2026 13:33:34 +0200 Subject: [PATCH 3/7] More accurate tracking of super assignments --- crates/oak_index/src/builder.rs | 72 ++++++++++++- crates/oak_index/src/use_def_map.rs | 16 +-- crates/oak_index/tests/builder.rs | 101 +++++++++++++------ crates/oak_index/tests/use_def_map.rs | 139 +++++++++++++++++++++++++- 4 files changed, 281 insertions(+), 47 deletions(-) diff --git a/crates/oak_index/src/builder.rs b/crates/oak_index/src/builder.rs index c629eb2e4..3c1b1fe37 100644 --- a/crates/oak_index/src/builder.rs +++ b/crates/oak_index/src/builder.rs @@ -151,20 +151,84 @@ impl SemanticIndexBuilder { }); self.current_use_def.ensure_symbol(symbol_id); - self.current_use_def.record_binding(symbol_id, def_id); + self.current_use_def.record_definition(symbol_id, def_id); } // Super-assignment is lexically in the current scope but binds in an - // ancestor. We record the definition here (for go-to-definition, rename) - // but skip use-def tracking since the binding doesn't affect local flow. + // ancestor. We record the definition in the current scope and append + // it to the target scope's use-def map (without shadowing prior + // definitions). + // + // R's `<<-` walks up the environment chain from the parent, targeting + // the first scope where the symbol is already bound. If no binding is + // found, it assigns in the global (file) scope. fn add_super_definition(&mut self, name: &str, kind: DefinitionKind, range: TextRange) { let symbol_id = self.symbol_tables[self.current_scope].intern(name, SymbolFlags::IS_SUPER_BOUND); self.definitions[self.current_scope].push(Definition { symbol: symbol_id, + kind: kind.clone(), + range, + }); + + let target_scope = self.resolve_super_target(name); + + let target_symbol = self.symbol_tables[target_scope].intern(name, SymbolFlags::IS_BOUND); + let target_def_id = self.definitions[target_scope].push(Definition { + symbol: target_symbol, kind, range, }); + + let builder = self.use_def_builder_mut(target_scope); + builder.ensure_symbol(target_symbol); + builder.append_definition(target_symbol, target_def_id); + } + + // Walk up from the parent scope looking for a scope where `name` already + // has `IS_BOUND`. Returns that scope, or the file scope if no binding is + // found (mirroring R's assignment to the global environment). + fn resolve_super_target(&self, name: &str) -> ScopeId { + let file_scope = ScopeId::from(0); + let mut scope = match self.scopes[self.current_scope].parent { + Some(parent) => parent, + None => return file_scope, + }; + + loop { + if let Some(id) = self.symbol_tables[scope].id(name) { + if self.symbol_tables[scope] + .symbol(id) + .flags() + .contains(SymbolFlags::IS_BOUND) + { + return scope; + } + } + scope = match self.scopes[scope].parent { + Some(parent) => parent, + None => return file_scope, + }; + } + } + + fn use_def_builder_mut(&mut self, target: ScopeId) -> &mut UseDefMapBuilder { + if target == self.current_scope { + return &mut self.current_use_def; + } + + let mut steps = 0; + let mut scope = self.current_scope; + while scope != target { + scope = match self.scopes[scope].parent { + Some(parent) => parent, + None => panic!("Target scope {target:?} is not an ancestor of current scope"), + }; + steps += 1; + } + + let index = self.use_def_stack.len() - steps; + &mut self.use_def_stack[index] } fn add_use(&mut self, name: &str, range: TextRange) { @@ -513,7 +577,7 @@ impl SemanticIndexBuilder { }); self.current_use_def.ensure_symbol(symbol_id); - self.current_use_def.record_loop_binding(symbol_id, def_id); + self.current_use_def.append_definition(symbol_id, def_id); loop_header.push((symbol_id, def_id)); } diff --git a/crates/oak_index/src/use_def_map.rs b/crates/oak_index/src/use_def_map.rs index 564706566..462e0be72 100644 --- a/crates/oak_index/src/use_def_map.rs +++ b/crates/oak_index/src/use_def_map.rs @@ -121,7 +121,7 @@ impl Bindings { /// Replace all live definitions with a single new one, marking the /// symbol as definitely bound. - fn record_binding(&mut self, def_id: DefinitionId) { + fn record_definition(&mut self, def_id: DefinitionId) { self.definitions.clear(); self.definitions.push(def_id); self.may_be_unbound = false; @@ -229,15 +229,15 @@ impl UseDefMapBuilder { /// Record a new binding for `symbol_id`. Replaces (shadows) all previous /// live definitions for that symbol. - pub(crate) fn record_binding(&mut self, symbol_id: SymbolId, def_id: DefinitionId) { - self.symbol_states[symbol_id].record_binding(def_id); + pub(crate) fn record_definition(&mut self, symbol_id: SymbolId, def_id: DefinitionId) { + self.symbol_states[symbol_id].record_definition(def_id); } - /// Record a loop-carried binding. Adds the definition to the symbol's - /// live set without clearing existing definitions or changing - /// `may_be_unbound`. This represents a value that might arrive from a - /// previous loop iteration. - pub(crate) fn record_loop_binding(&mut self, symbol_id: SymbolId, def_id: DefinitionId) { + /// Add a definition to the symbol's live set without clearing existing + /// definitions or changing `may_be_unbound`. Unlike `record_binding` + /// which shadows all prior definitions, this appends to the live set. + /// Used for loop-header placeholders and super-assignments. + pub(crate) fn append_definition(&mut self, symbol_id: SymbolId, def_id: DefinitionId) { self.symbol_states[symbol_id].add_definition(def_id); } diff --git a/crates/oak_index/tests/builder.rs b/crates/oak_index/tests/builder.rs index 5dbd78ef4..e46d83776 100644 --- a/crates/oak_index/tests/builder.rs +++ b/crates/oak_index/tests/builder.rs @@ -549,20 +549,28 @@ fn test_repeat_loop() { #[test] fn test_super_assignment_at_file_scope() { - // At file scope, `<<-` records in file scope with IS_SUPER_BOUND + // At file scope, `<<-` targets the file scope itself (no parent to + // walk to), so the symbol gets both IS_SUPER_BOUND and IS_BOUND. let index = index("x <<- 1"); let file = ScopeId::from(0); let x = index.symbols(file).get("x").unwrap(); - assert_eq!(x.flags(), SymbolFlags::IS_SUPER_BOUND); + assert_eq!( + x.flags(), + SymbolFlags::IS_SUPER_BOUND.union(SymbolFlags::IS_BOUND) + ); - assert_eq!(index.definitions(file).len(), 1); - let DefinitionKind::SuperAssignment(node) = - index.definitions(file)[DefinitionId::from(0)].kind() - else { - panic!("expected SuperAssignment"); - }; - assert_eq!(node.kind(), RSyntaxKind::R_BINARY_EXPRESSION); + // Two definitions: one from the current-scope recording, one from the + // target-scope recording (same scope in this case). + assert_eq!(index.definitions(file).len(), 2); + assert!(matches!( + index.definitions(file)[DefinitionId::from(0)].kind(), + DefinitionKind::SuperAssignment(_) + )); + assert!(matches!( + index.definitions(file)[DefinitionId::from(1)].kind(), + DefinitionKind::SuperAssignment(_) + )); assert_eq!(index.uses(file).len(), 0); } @@ -572,9 +580,12 @@ fn test_super_assignment_right_at_file_scope() { let file = ScopeId::from(0); let x = index.symbols(file).get("x").unwrap(); - assert_eq!(x.flags(), SymbolFlags::IS_SUPER_BOUND); + assert_eq!( + x.flags(), + SymbolFlags::IS_SUPER_BOUND.union(SymbolFlags::IS_BOUND) + ); - assert_eq!(index.definitions(file).len(), 1); + assert_eq!(index.definitions(file).len(), 2); assert!(matches!( index.definitions(file)[DefinitionId::from(0)].kind(), DefinitionKind::SuperAssignment(_) @@ -585,16 +596,23 @@ fn test_super_assignment_right_at_file_scope() { #[test] fn test_super_assignment_recorded_in_current_scope() { // `<<-` records the definition in the function scope where it lexically - // appears, not in an ancestor scope. + // appears AND an extra definition in the parent scope. let index = index("f <- function() { x <<- 1 }"); let file = ScopeId::from(0); let fun = ScopeId::from(1); - // File scope only has `f` - assert!(index.symbols(file).get("x").is_none()); - assert_eq!(index.definitions(file).len(), 1); + // File scope has `x` with IS_BOUND (extra definition from `<<-`) + // and `f` with IS_BOUND. The `x <<-` definition is added during + // function body processing, before `f <-`. + let x_file = index.symbols(file).get("x").unwrap(); + assert_eq!(x_file.flags(), SymbolFlags::IS_BOUND); + assert_eq!(index.definitions(file).len(), 2); assert!(matches!( index.definitions(file)[DefinitionId::from(0)].kind(), + DefinitionKind::SuperAssignment(_) + )); + assert!(matches!( + index.definitions(file)[DefinitionId::from(1)].kind(), DefinitionKind::Assignment(_) )); @@ -614,7 +632,8 @@ fn test_super_assignment_right_recorded_in_current_scope() { let file = ScopeId::from(0); let fun = ScopeId::from(1); - assert!(index.symbols(file).get("x").is_none()); + let x_file = index.symbols(file).get("x").unwrap(); + assert_eq!(x_file.flags(), SymbolFlags::IS_BOUND); let x = index.symbols(fun).get("x").unwrap(); assert_eq!(x.flags(), SymbolFlags::IS_SUPER_BOUND); @@ -623,12 +642,13 @@ fn test_super_assignment_right_recorded_in_current_scope() { #[test] fn test_super_assignment_does_not_pollute_ancestor() { // `x <- 1` is in file scope, `x <<- 2` is in the function. The `<<-` - // does NOT add a definition to the file scope. + // adds an extra definition to the file scope in addition to the + // existing `x <- 1` assignment. let index = index("x <- 1\nf <- function() { x <<- 2 }"); let file = ScopeId::from(0); let fun = ScopeId::from(1); - // File scope: `x` has IS_BOUND from the `<-`, `f` has IS_BOUND + // File scope: `x` has IS_BOUND (from both `<-` and `<<-`), `f` has IS_BOUND let x_file = index.symbols(file).get("x").unwrap(); assert_eq!(x_file.flags(), SymbolFlags::IS_BOUND); @@ -637,11 +657,15 @@ fn test_super_assignment_does_not_pollute_ancestor() { .iter() .filter(|(_, d)| index.symbols(file).symbol(d.symbol()).name() == "x") .collect(); - assert_eq!(x_file_defs.len(), 1); + assert_eq!(x_file_defs.len(), 2); assert!(matches!( x_file_defs[0].1.kind(), DefinitionKind::Assignment(_) )); + assert!(matches!( + x_file_defs[1].1.kind(), + DefinitionKind::SuperAssignment(_) + )); // Function scope: `x` has IS_SUPER_BOUND from the `<<-` let x_fun = index.symbols(fun).get("x").unwrap(); @@ -656,7 +680,8 @@ fn test_super_assignment_does_not_pollute_ancestor() { #[test] fn test_super_assignment_nested_recorded_in_inner_scope() { // `x` is bound in both file and outer function. `<<-` in the inner - // function records the definition in the inner scope, not in any ancestor. + // function targets the outer function scope (immediate parent), adding + // an extra definition there. let index = index("x <- 0\nf <- function() { x <- 1; g <- function() { x <<- 2 } }"); let file = ScopeId::from(0); let outer = ScopeId::from(1); @@ -666,7 +691,8 @@ fn test_super_assignment_nested_recorded_in_inner_scope() { let x_file = index.symbols(file).get("x").unwrap(); assert_eq!(x_file.flags(), SymbolFlags::IS_BOUND); - // Outer function: `x` has IS_BOUND (from `<-`), no super-assignment here + // Outer function: `x` has IS_BOUND (from both `x <- 1` and the `<<-` + // extra definition from the inner function) let x_outer = index.symbols(outer).get("x").unwrap(); assert_eq!(x_outer.flags(), SymbolFlags::IS_BOUND); @@ -675,11 +701,15 @@ fn test_super_assignment_nested_recorded_in_inner_scope() { .iter() .filter(|(_, d)| index.symbols(outer).symbol(d.symbol()).name() == "x") .collect(); - assert_eq!(x_outer_defs.len(), 1); + assert_eq!(x_outer_defs.len(), 2); assert!(matches!( x_outer_defs[0].1.kind(), DefinitionKind::Assignment(_) )); + assert!(matches!( + x_outer_defs[1].1.kind(), + DefinitionKind::SuperAssignment(_) + )); // Inner function: `x` has IS_SUPER_BOUND (from `<<-`) let x_inner = index.symbols(inner).get("x").unwrap(); @@ -693,8 +723,9 @@ fn test_super_assignment_nested_recorded_in_inner_scope() { #[test] fn test_super_assignment_coexists_with_use_in_ancestors() { - // Outer function uses `x` but doesn't bind it. `<<-` in the inner - // function records in the inner scope. Ancestors are unaffected. + // `<<-` in inner function walks up from outer, finds `x` bound in file + // scope (from `x <- 1`), so it targets the file scope -- not the outer + // function where `x` is only used. let index = index("x <- 1\nf <- function() { print(x); g <- function() { x <<- 2 } }"); let file = ScopeId::from(0); let outer = ScopeId::from(1); @@ -703,7 +734,8 @@ fn test_super_assignment_coexists_with_use_in_ancestors() { let x_file = index.symbols(file).get("x").unwrap(); assert_eq!(x_file.flags(), SymbolFlags::IS_BOUND); - // Outer function has `x` as `IS_USED` only (from `print(x)`) + // Outer function has `x` as IS_USED only (from `print(x)`). The `<<-` + // skips it because `x` is not bound here. let x_outer = index.symbols(outer).get("x").unwrap(); assert_eq!(x_outer.flags(), SymbolFlags::IS_USED); @@ -714,22 +746,24 @@ fn test_super_assignment_coexists_with_use_in_ancestors() { #[test] fn test_super_assignment_not_visible_to_resolve_symbol() { - // `<<-` does not create a binding visible to `resolve_symbol` (which - // checks IS_BOUND, not IS_SUPER_BOUND). Cross-scope effects of `<<-` - // are a runtime concern, not statically modelled. + // `<<-` creates an extra definition in the parent scope with IS_BOUND, + // which is visible to `resolve_symbol`. let index = index("f <- function() { x <<- 1 }"); let file = ScopeId::from(0); let fun = ScopeId::from(1); - // File scope has no `x` - assert!(index.symbols(file).get("x").is_none()); + // File scope has `x` with IS_BOUND (extra definition from `<<-`) + let x_file = index.symbols(file).get("x").unwrap(); + assert_eq!(x_file.flags(), SymbolFlags::IS_BOUND); // Function scope has `x` with IS_SUPER_BOUND let x = index.symbols(fun).get("x").unwrap(); assert_eq!(x.flags(), SymbolFlags::IS_SUPER_BOUND); - // resolve_symbol does not find `x` because no scope has IS_BOUND for it - assert!(index.resolve_symbol("x", fun).is_none()); + // `resolve_symbol` finds `x` in the file scope via the extra definition + let resolved = index.resolve_symbol("x", fun); + assert!(resolved.is_some()); + assert_eq!(resolved.unwrap().0, file); } #[test] @@ -1122,7 +1156,8 @@ fn test_string_super_assignment() { let file = ScopeId::from(0); let fun = ScopeId::from(1); - assert!(index.symbols(file).get("x").is_none()); + let x_file = index.symbols(file).get("x").unwrap(); + assert_eq!(x_file.flags(), SymbolFlags::IS_BOUND); let x = index.symbols(fun).get("x").unwrap(); assert_eq!(x.flags(), SymbolFlags::IS_SUPER_BOUND); diff --git a/crates/oak_index/tests/use_def_map.rs b/crates/oak_index/tests/use_def_map.rs index ac6cca17d..de63b64c0 100644 --- a/crates/oak_index/tests/use_def_map.rs +++ b/crates/oak_index/tests/use_def_map.rs @@ -589,8 +589,8 @@ fn test_super_assignment_not_in_function_use_def() { let index = index( "\ function() { - x <<- 1 # recorded here with IS_SUPER_BOUND, skipped by use-def - x # use 0: unbound in function scope + x <<- 1 # fun: def 0, recorded with IS_SUPER_BOUND, skipped by use-def + x # fun: use 0: unbound in function scope } ", ); @@ -602,6 +602,141 @@ function() { assert!(bindings.may_be_unbound()); } +#[test] +fn test_super_assignment_visible_in_parent_use_def() { + let index = index( + "\ +x <- 1 # file: def 0 +f <- function() { x <<- 2 } # file: def 1 (x <<- extra def), def 2 (f) +x # file: use 0 -> {def 0, def 1} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // The <<- extra definition (def 1) is recorded in the file scope + // during function body processing, before the `f <-` assignment (def 2). + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_super_assignment_without_prior_def() { + let index = index( + "\ +f <- function() { x <<- 1 } # file: def 0 (x <<- extra def), def 1 (f) +x # file: use 0 -> {def 0}, may_be_unbound +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert!(bindings.may_be_unbound()); +} + +#[test] +fn test_fixme_super_assignment_not_visible_before_function_def() { + let index = index( + "\ +x # file: use 0 -> unbound +f <- function() { x <<- 1 } # file: def 0 (f), def 1 (x <<- in parent) +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // The use of `x` precedes the `<<-` definition in the flow. + // FIXME: Ideally the use would be mapped to the extra definition. + let bindings = map.bindings_at_use(UseId::from(0)); + assert!(bindings.definitions().is_empty()); + assert!(bindings.may_be_unbound()); +} + +#[test] +fn test_super_assignment_merges_with_if() { + let index = index( + "\ +x <- 1 # file: def 0 +if (cond) { + f <- function() { x <<- 2 } # file: def 1 (x <<- extra def), def 2 (f) +} +x # use 1 -> {def 0, def 1} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_super_assignment_targets_grandparent() { + let index = index( + "\ +x <- 1 # file: def 0 +f <- function() { # file: def 2 (f) + g <- function() { x <<- 2 } # file: def 1 (x <<-) +} +x # file: use 0 -> {def 0, def 1} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // `<<-` in g walks up: g's parent is f, f has no binding for x, + // so it continues to file scope where x has IS_BOUND. The extra + // binding lands in the file scope, skipping the intermediate f scope. + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_super_assignment_targets_intermediate_scope() { + let index = index( + "\ +x <- 1 # file: def 0 +f <- function() { + x <- 10 # outer: def 0 + g <- function() { x <<- 2 } # outer: def 1 (x <<-) +} +x # file: use 0 -> {def 0} only +", + ); + let file = ScopeId::from(0); + let outer = ScopeId::from(1); + + // `<<-` in g walks up: g's parent is f, f has x with IS_BOUND + // (from `x <- 10`), so it targets f -- not the file scope. + let file_map = index.use_def_map(file); + let bindings = file_map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); + + // The extra binding is in f's scope, not the file scope. + let outer_defs: Vec<_> = index + .definitions(outer) + .iter() + .filter(|(_, d)| index.symbols(outer).symbol(d.symbol()).name() == "x") + .collect(); + assert_eq!(outer_defs.len(), 2); +} + // --- Combined control flow --- #[test] From 574aadfb8b96717c0249d5e7359d62ee160f33f7 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 10 Apr 2026 14:14:39 +0200 Subject: [PATCH 4/7] Simpler fixup of deferred definitions --- crates/oak_index/src/builder.rs | 107 +------------- crates/oak_index/src/semantic_index.rs | 10 -- crates/oak_index/src/use_def_map.rs | 185 ++++++++++++++++--------- crates/oak_index/tests/builder.rs | 10 +- crates/oak_index/tests/use_def_map.rs | 91 ++++++------ 5 files changed, 175 insertions(+), 228 deletions(-) diff --git a/crates/oak_index/src/builder.rs b/crates/oak_index/src/builder.rs index 3c1b1fe37..0666f912e 100644 --- a/crates/oak_index/src/builder.rs +++ b/crates/oak_index/src/builder.rs @@ -4,7 +4,6 @@ use aether_syntax::AnyRValue; use aether_syntax::RArgumentList; use aether_syntax::RBinaryExpression; use aether_syntax::RExpressionList; -use aether_syntax::RForStatement; use aether_syntax::RFunctionDefinition; use aether_syntax::RParameter; use aether_syntax::RParameters; @@ -28,7 +27,6 @@ use crate::semantic_index::ScopeId; use crate::semantic_index::ScopeKind; use crate::semantic_index::SemanticIndex; use crate::semantic_index::SymbolFlags; -use crate::semantic_index::SymbolId; use crate::semantic_index::SymbolTableBuilder; use crate::semantic_index::Use; use crate::semantic_index::UseId; @@ -182,7 +180,7 @@ impl SemanticIndexBuilder { let builder = self.use_def_builder_mut(target_scope); builder.ensure_symbol(target_symbol); - builder.append_definition(target_symbol, target_def_id); + builder.record_deferred_definition(target_symbol, target_def_id); } // Walk up from the parent scope looking for a scope where `name` already @@ -353,9 +351,8 @@ impl SemanticIndexBuilder { if let Ok(body) = stmt.body() { let first_use = self.uses[self.current_scope].next_id(); - let loop_header = self.build_loop_header(body.syntax()); self.collect_expression(&body); - self.finish_loop_header(&loop_header, first_use); + self.current_use_def.finish_loop_defs(&pre_loop, first_use); } self.current_use_def.merge(pre_loop); @@ -398,9 +395,8 @@ impl SemanticIndexBuilder { if let Ok(body) = stmt.body() { let first_use = self.uses[self.current_scope].next_id(); - let loop_header = self.build_loop_header(body.syntax()); self.collect_expression(&body); - self.finish_loop_header(&loop_header, first_use); + self.current_use_def.finish_loop_defs(&pre_loop, first_use); } // Body may not execute @@ -410,10 +406,10 @@ impl SemanticIndexBuilder { AnyRExpression::RRepeatStatement(stmt) => { // Body always executes at least once, no snapshot needed if let Ok(body) = stmt.body() { + let pre_loop = self.current_use_def.snapshot(); let first_use = self.uses[self.current_scope].next_id(); - let loop_header = self.build_loop_header(body.syntax()); self.collect_expression(&body); - self.finish_loop_header(&loop_header, first_use); + self.current_use_def.finish_loop_defs(&pre_loop, first_use); } }, @@ -553,87 +549,6 @@ impl SemanticIndexBuilder { } } - // Pre-walk a loop body to find all symbols that will be bound, then - // create `LoopHeader` placeholder definitions for each. These are - // recorded as additional (non-shadowing) bindings so that uses at the - // top of the body can see definitions from a previous iteration. - // After the body is visited, `finish_loop_header` resolves each - // placeholder to the real definitions that are live at the end of - // the body. - // - // Skips function bodies (different scope) and super-assignments (don't - // affect local flow). - fn build_loop_header(&mut self, body: &RSyntaxNode) -> Vec<(SymbolId, DefinitionId)> { - let names = Self::collect_loop_bound_names(body); - let mut loop_header = Vec::new(); - - for name in names { - let symbol_id = - self.symbol_tables[self.current_scope].intern(&name, SymbolFlags::IS_BOUND); - let def_id = self.definitions[self.current_scope].push(Definition { - symbol: symbol_id, - kind: DefinitionKind::LoopHeader, - range: body.text_trimmed_range(), - }); - - self.current_use_def.ensure_symbol(symbol_id); - self.current_use_def.append_definition(symbol_id, def_id); - loop_header.push((symbol_id, def_id)); - } - - loop_header - } - - fn finish_loop_header(&mut self, loop_header: &[(SymbolId, DefinitionId)], first_use: UseId) { - for &(symbol_id, placeholder_id) in loop_header { - self.current_use_def - .resolve_placeholder(symbol_id, placeholder_id, first_use); - } - } - - // Keep in sync with `collect_expression`: Every construct that creates - // a definition there must be matched here so that loop headers account - // for all bindings in the body. - fn collect_loop_bound_names(body: &RSyntaxNode) -> Vec { - let mut names = Vec::new(); - let mut preorder = body.preorder(); - - while let Some(event) = preorder.next() { - let WalkEvent::Enter(node) = event else { - continue; - }; - - match node.kind() { - // Function bodies are separate scopes. In the future we'll need - // an indirection here to handle other kinds of local scopes, in - // particular from NSE functions like `local()`. - RSyntaxKind::R_FUNCTION_DEFINITION => { - preorder.skip_subtree(); - }, - - RSyntaxKind::R_BINARY_EXPRESSION => { - let op: RBinaryExpression = node.cast().unwrap(); - if let Some((name, _)) = assignment_target(&op) { - names.push(name); - } - }, - - RSyntaxKind::R_FOR_STATEMENT => { - let for_stmt: RForStatement = node.cast().unwrap(); - if let Ok(variable) = for_stmt.variable() { - names.push(identifier_text(&variable)); - } - }, - - _ => {}, - } - } - - names.sort(); - names.dedup(); - names - } - fn collect_arguments(&mut self, args: &RArgumentList) { for item in args.iter() { let Ok(arg) = item else { continue }; @@ -709,18 +624,6 @@ fn string_value_text(s: &aether_syntax::RStringValue) -> Option { Some(text[1..text.len() - 1].to_string()) } -/// For a local (non-super) assignment, extract the binding name and range. -/// Returns `None` if the expression is not an assignment, is a -/// super-assignment, or has a complex target (`x$foo`, `x[1]`, etc.). -fn assignment_target(bin: &RBinaryExpression) -> Option<(String, TextRange)> { - if !is_assignment(bin) || is_super_assignment(bin) { - return None; - } - let right = is_right_assignment(bin); - let target = if right { bin.right() } else { bin.left() }.ok()?; - assignment_target_name(&target) -} - /// Extract the binding name and range from an assignment target expression. /// Returns `None` for complex targets (`x$foo`, `x[1]`, etc.) that don't /// represent simple name bindings. diff --git a/crates/oak_index/src/semantic_index.rs b/crates/oak_index/src/semantic_index.rs index dce370575..4c770140d 100644 --- a/crates/oak_index/src/semantic_index.rs +++ b/crates/oak_index/src/semantic_index.rs @@ -390,16 +390,6 @@ pub enum DefinitionKind { SuperAssignment(RSyntaxNode), Parameter(RSyntaxNode), ForVariable(RSyntaxNode), - // The "loop header" is the entry point of a loop in the control flow - // graph. This placeholder definition, created at the loop header before - // visiting the body, represents a value carried from a previous - // iteration so that uses at the top of the body can see definitions - // from the bottom. After the body is visited, each placeholder is - // resolved to the real definitions that are live at the end of the - // body (see `finish_loop_header`). Resolved placeholders no longer - // appear in use-def results but remain in the definition arena to - // preserve `DefinitionId` stability (the arena is append-only). - LoopHeader, } impl Definition { diff --git a/crates/oak_index/src/use_def_map.rs b/crates/oak_index/src/use_def_map.rs index 462e0be72..f1199becc 100644 --- a/crates/oak_index/src/use_def_map.rs +++ b/crates/oak_index/src/use_def_map.rs @@ -26,7 +26,7 @@ use crate::semantic_index::UseId; // print(x) # may_be_unbound: true, definitions: {A} // ``` // -// - `record_binding()`: A definition like `x <- 1` kills all previous live +// - `record_definition()`: A definition like `x <- 1` kills all previous live // definitions for that symbol and replaces them with a singleton. // `may_be_unbound` becomes false. // @@ -66,8 +66,50 @@ use crate::semantic_index::UseId; // The pre-if definition stays live because there's a path (the no-else path) // where it wasn't shadowed. // -// The same primitives can be used to implement other control flow constructs -// following similar considerations (the body of a loop might not execute). +// The same primitives handle other control flow: a `while` body may not +// execute, so we snapshot before, visit the body, then merge (like an +// if-without-else). `for` is similar, except the loop variable is always +// bound before the snapshot. +// +// ## Retroactive fixups +// +// The snapshot/restore/merge model is forward-only: a use sees definitions +// recorded before it. Two situations need the opposite: definitions that +// are recorded *after* a use must retroactively reach it. Without this, +// features like rename and jump-to-definition would miss connections. +// +// ### Loop-carried definitions (`finish_loop_defs()`) +// +// ```r +// x <- 0 +// while (cond) { +// print(x) # should see def A (pre-loop) AND def B (previous iteration) +// x <- 1 # def B +// } +// ``` +// +// When visiting the body top-to-bottom, `print(x)` is recorded before +// `x <- 1`, so it only sees `{A}`. But in a second iteration, `x <- 1` +// from the first iteration should reach `print(x)`. After the body, +// `finish_loop_defs()` diffs the pre-loop and post-body symbol states. +// Any new definition (here, B) is retroactively added to uses of that +// symbol recorded during the body. Result: `print(x)` sees `{A, B}`. +// +// ### Deferred definitions (`record_deferred_definition()`) +// +// ```r +// x <- 0 # def A +// print(x) # should see def A AND def B +// f <- function() { +// x <<- 1 # def B (targets file scope) +// } +// ``` +// +// The `<<-` creates a definition in the file scope, but it's encountered +// during the function body walk, after `print(x)` was already recorded. +// `record_deferred_definition()` adds it to the live state (so future uses +// see it) and also stashes it. At finalization, `finish_deferred_defs()` +// retroactively adds it to all uses of that symbol, including `print(x)`. // // ## Interpreting `Bindings` // @@ -128,8 +170,8 @@ impl Bindings { } /// Add a definition to the live set without clearing existing ones and - /// without changing `may_be_unbound`. Used for `LoopHeader` placeholder - /// definitions that represent a value carried from a previous iteration. + /// without changing `may_be_unbound`. Used for loop-carried definitions + /// and scope-wide definitions (`<<-`). fn add_definition(&mut self, def_id: DefinitionId) { let pos = self.definitions.partition_point(|&id| id < def_id); if pos >= self.definitions.len() || self.definitions[pos] != def_id { @@ -137,23 +179,6 @@ impl Bindings { } } - fn remove_definition(&mut self, def_id: DefinitionId) { - if let Some(pos) = self.definitions.iter().position(|&id| id == def_id) { - self.definitions.remove(pos); - } - } - - /// Replace `old` with `replacements` in the definition set. If `old` is - /// not present, this is a no-op. Maintains sorted order and deduplicates. - fn replace_definition(&mut self, old: DefinitionId, replacements: &[DefinitionId]) { - if let Some(pos) = self.definitions.iter().position(|&id| id == old) { - self.definitions.remove(pos); - for &def_id in replacements { - self.add_definition(def_id); - } - } - } - /// Union definitions from `other` into `self`, OR the `may_be_unbound` /// flags. Both sides are sorted by `DefinitionId`, so this is a linear /// merge-join. @@ -206,6 +231,13 @@ impl UseDefMap { pub(crate) struct UseDefMapBuilder { symbol_states: IndexVec, bindings_by_use: IndexVec, + // Maps each use to its symbol, so retroactive fixups (for `<<-` and + // loop-carried definitions) can find which uses to patch for a given + // symbol. + symbol_for_use: IndexVec, + // Definitions whose effect on past uses is deferred to `finish()`. + // Currently used for `<<-` extra definitions in ancestor scopes. + deferred_defs: Vec<(SymbolId, DefinitionId)>, } impl UseDefMapBuilder { @@ -213,6 +245,8 @@ impl UseDefMapBuilder { Self { symbol_states: IndexVec::new(), bindings_by_use: IndexVec::new(), + symbol_for_use: IndexVec::new(), + deferred_defs: Vec::new(), } } @@ -233,50 +267,61 @@ impl UseDefMapBuilder { self.symbol_states[symbol_id].record_definition(def_id); } - /// Add a definition to the symbol's live set without clearing existing - /// definitions or changing `may_be_unbound`. Unlike `record_binding` - /// which shadows all prior definitions, this appends to the live set. - /// Used for loop-header placeholders and super-assignments. - pub(crate) fn append_definition(&mut self, symbol_id: SymbolId, def_id: DefinitionId) { + /// Record a definition whose effect on past uses is deferred to + /// `finish()`. The definition is added to the current flow state + /// immediately (so future uses see it), but uses already recorded + /// are patched up at finalization time. Used for `<<-` extra + /// definitions. + pub(crate) fn record_deferred_definition(&mut self, symbol_id: SymbolId, def_id: DefinitionId) { self.symbol_states[symbol_id].add_definition(def_id); + self.deferred_defs.push((symbol_id, def_id)); } - /// Resolve a `LoopHeader` placeholder to the real definitions that - /// are live at the end of the loop body. Replaces the placeholder in - /// all `bindings_by_use` entries recorded during the body (from - /// `first_use` onwards) and removes it from the live symbol state. + /// After visiting a loop body, retroactively patch uses so that + /// definitions from the bottom of the body reach uses at the top + /// (simulating a previous iteration). /// - /// When Salsa is introduced, this eager rewriting will be replaced by - /// Salsa's `specify` mechanism: the synthesis step will create - /// Salsa-tracked `LoopToken` definitions (like ty's `LoopHeader`), - /// and this populate step will call `specify` on those tokens instead - /// of rewriting `bindings_by_use` in place. That lets downstream - /// queries lazily resolve loop headers and skip re-computation when - /// the set of reaching definitions hasn't changed. - pub(crate) fn resolve_placeholder( - &mut self, - symbol_id: SymbolId, - placeholder_id: DefinitionId, - first_use: UseId, - ) { - let replacements: SmallVec<[DefinitionId; 2]> = self.symbol_states[symbol_id] - .definitions() - .iter() - .filter(|&&id| id != placeholder_id) - .copied() - .collect(); - - for i in first_use.index()..self.bindings_by_use.len() { - let use_id = UseId::new(i); - self.bindings_by_use[use_id].replace_definition(placeholder_id, &replacements); - } + /// Diffs each symbol's definitions before (`pre_loop`) and after the + /// body. Any definition present after but not before is new (it was + /// created inside the body). Those new definitions are added to all + /// uses of that symbol from `first_use` onwards, which covers exactly + /// the uses recorded during the body. + pub(crate) fn finish_loop_defs(&mut self, pre_loop: &FlowSnapshot, first_use: UseId) { + for i in 0..self.symbol_states.len() { + let symbol_id = SymbolId::new(i); + + let pre_defs = if i < pre_loop.symbol_states.len() { + pre_loop.symbol_states[symbol_id].definitions() + } else { + // Symbol was first interned during the body, so it had + // no definitions before the loop. + &[] + }; + let post_defs = self.symbol_states[symbol_id].definitions(); + + // Collect new definitions introduced in the body + let new_defs: SmallVec<[DefinitionId; 2]> = post_defs + .iter() + .filter(|d| !pre_defs.contains(d)) + .copied() + .collect(); + + // Most symbols are unchanged, exit early in that case + if new_defs.is_empty() { + continue; + } - // Prevent the placeholder from leaking into post-loop state. When the - // body unconditionally assigns the symbol, `record_binding()` already - // cleared the placeholder, so this is a no-op. When the assignment is - // conditional, the placeholder would survive and leak if we did not - // explicitly remove it here. - self.symbol_states[symbol_id].remove_definition(placeholder_id); + // Add new defs to uses recorded during the body (`first_use` + // onwards). Uses before the loop are unaffected. + for j in first_use.index()..self.bindings_by_use.len() { + let use_id = UseId::new(j); + if self.symbol_for_use[use_id] == symbol_id { + for &def_id in &new_defs { + self.bindings_by_use[use_id].add_definition(def_id); + } + } + } + } } /// Record a use of `symbol_id`. Clones the current live bindings for that @@ -284,6 +329,7 @@ impl UseDefMapBuilder { pub(crate) fn record_use(&mut self, symbol_id: SymbolId, use_id: UseId) { let bindings = self.symbol_states[symbol_id].clone(); let pushed_id = self.bindings_by_use.push(bindings); + self.symbol_for_use.push(symbol_id); stdext::soft_assert!(use_id == pushed_id); } @@ -325,9 +371,24 @@ impl UseDefMapBuilder { } /// Finalize into an immutable [`UseDefMap`]. - pub(crate) fn finish(self) -> UseDefMap { + pub(crate) fn finish(mut self) -> UseDefMap { + self.finish_deferred_defs(); UseDefMap { bindings_by_use: self.bindings_by_use, } } + + /// Retroactively add deferred definitions (from `<<-`) to all + /// uses of the corresponding symbol, including uses that were + /// recorded before the definition was encountered in the walk. + fn finish_deferred_defs(&mut self) { + for &(symbol_id, def_id) in &self.deferred_defs { + for i in 0..self.bindings_by_use.len() { + let use_id = UseId::new(i); + if self.symbol_for_use[use_id] == symbol_id { + self.bindings_by_use[use_id].add_definition(def_id); + } + } + } + } } diff --git a/crates/oak_index/tests/builder.rs b/crates/oak_index/tests/builder.rs index e46d83776..db6776b0a 100644 --- a/crates/oak_index/tests/builder.rs +++ b/crates/oak_index/tests/builder.rs @@ -1192,10 +1192,8 @@ fn test_nested_for_loops() { let j = index.symbols(file).get("j").unwrap(); assert_eq!(j.flags(), SymbolFlags::IS_BOUND.union(SymbolFlags::IS_USED)); - // 2 real defs (i, j) + 1 placeholder LoopHeader (j from inner for, - // discovered by outer loop's pre-walk). Note the placeholder definitions - // remain in the Definition arena to preserve IDs. The arena is append-only. - assert_eq!(index.definitions(file).len(), 3); + // 2 real defs (i, j), no LoopHeader placeholders. + assert_eq!(index.definitions(file).len(), 2); } // --- Assignment in loop body --- @@ -1211,6 +1209,6 @@ fn test_assignment_in_for_body() { let x = index.symbols(file).get("x").unwrap(); assert_eq!(x.flags(), SymbolFlags::IS_BOUND); - // 2 real defs (i, x) + 1 placeholder LoopHeader (x from body pre-walk) - assert_eq!(index.definitions(file).len(), 3); + // 2 real defs (i, x), no LoopHeader placeholders. + assert_eq!(index.definitions(file).len(), 2); } diff --git a/crates/oak_index/tests/use_def_map.rs b/crates/oak_index/tests/use_def_map.rs index de63b64c0..50e96454b 100644 --- a/crates/oak_index/tests/use_def_map.rs +++ b/crates/oak_index/tests/use_def_map.rs @@ -274,9 +274,9 @@ fn test_for_body_assignment_is_conditional() { let index = index( "\ for (i in xs) { # def 0 (i) - x <- 1 # def 2 + x <- 1 # def 1 } -x # use 1 -> {def 2}, may_be_unbound (body may not execute) +x # use 1 -> {def 1}, may_be_unbound (body may not execute) ", ); let file = ScopeId::from(0); @@ -284,7 +284,7 @@ x # use 1 -> {def 2}, may_be_unbound (body may not execute) // Uses: `xs` is use 0, final `x` is use 1. let bindings = map.bindings_at_use(UseId::from(1)); - assert_eq!(bindings.definitions(), &[DefinitionId::from(2)]); + assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); assert!(bindings.may_be_unbound()); } @@ -294,9 +294,9 @@ fn test_for_body_assignment_merges_with_pre_loop() { "\ x <- 0 # def 0 for (i in xs) { # def 1 (i) - x <- 1 # def 3 + x <- 1 # def 2 } -x # use 1 -> {def 0, def 3} +x # use 1 -> {def 0, def 2} ", ); let file = ScopeId::from(0); @@ -306,7 +306,7 @@ x # use 1 -> {def 0, def 3} let bindings = map.bindings_at_use(UseId::from(1)); assert_eq!(bindings.definitions(), &[ DefinitionId::from(0), - DefinitionId::from(3) + DefinitionId::from(2) ]); assert_not!(bindings.may_be_unbound()); } @@ -336,9 +336,9 @@ fn test_while_body_is_conditional() { let index = index( "\ while (cond) { - x <- 1 # def 1 + x <- 1 # def 0 } -x # use 1 -> {def 1}, may_be_unbound +x # use 1 -> {def 0}, may_be_unbound ", ); let file = ScopeId::from(0); @@ -346,7 +346,7 @@ x # use 1 -> {def 1}, may_be_unbound // Uses: `cond` is use 0, final `x` is use 1. let bindings = map.bindings_at_use(UseId::from(1)); - assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); assert!(bindings.may_be_unbound()); } @@ -356,9 +356,9 @@ fn test_while_merges_with_pre_loop() { "\ x <- 0 # def 0 while (cond) { - x <- 1 # def 2 + x <- 1 # def 1 } -x # use 1 -> {def 0, def 2} +x # use 1 -> {def 0, def 1} ", ); let file = ScopeId::from(0); @@ -368,7 +368,7 @@ x # use 1 -> {def 0, def 2} let bindings = map.bindings_at_use(UseId::from(1)); assert_eq!(bindings.definitions(), &[ DefinitionId::from(0), - DefinitionId::from(2) + DefinitionId::from(1) ]); assert_not!(bindings.may_be_unbound()); } @@ -392,17 +392,17 @@ fn test_repeat_body_is_definite() { let index = index( "\ repeat { - x <- 1 # def 1 + x <- 1 # def 0 break } -x # use 0 -> {def 1}, not unbound +x # use 0 -> {def 0}, not unbound ", ); let file = ScopeId::from(0); let map = index.use_def_map(file); let bindings = map.bindings_at_use(UseId::from(0)); - assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); assert_not!(bindings.may_be_unbound()); } @@ -412,17 +412,17 @@ fn test_repeat_shadows_prior_def() { "\ x <- 0 # def 0 repeat { - x <- 1 # def 2 + x <- 1 # def 1 break } -x # use 0 -> {def 2} (repeat always executes) +x # use 0 -> {def 1} (repeat always executes) ", ); let file = ScopeId::from(0); let map = index.use_def_map(file); let bindings = map.bindings_at_use(UseId::from(0)); - assert_eq!(bindings.definitions(), &[DefinitionId::from(2)]); + assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); assert_not!(bindings.may_be_unbound()); } @@ -440,8 +440,8 @@ fn test_while_loop_carried_def() { "\ x <- 0 # def 0 while (cond) { - x # use 1: sees {def 0, def 2} (pre-loop OR previous iteration) - x <- 1 # def 2 + x # use 1: sees {def 0, def 1} (pre-loop OR previous iteration) + x <- 1 # def 1 } ", ); @@ -449,11 +449,10 @@ while (cond) { let map = index.use_def_map(file); // Uses: `cond` is use 0, `x` inside body is use 1. - // def 1 is the LoopHeader for `x`, populated with {def 2}. let bindings = map.bindings_at_use(UseId::from(1)); assert_eq!(bindings.definitions(), &[ DefinitionId::from(0), - DefinitionId::from(2) + DefinitionId::from(1) ]); assert_not!(bindings.may_be_unbound()); } @@ -464,8 +463,8 @@ fn test_for_loop_carried_def() { "\ x <- 0 # def 0 for (i in xs) { # def 1 (i) - x # use 1: sees {def 0, def 3} - x <- 1 # def 3 + x # use 1: sees {def 0, def 2} + x <- 1 # def 2 } ", ); @@ -473,11 +472,10 @@ for (i in xs) { # def 1 (i) let map = index.use_def_map(file); // Uses: `xs` is use 0, `x` inside body is use 1. - // def 2 is the LoopHeader for `x`, populated with {def 3}. let bindings = map.bindings_at_use(UseId::from(1)); assert_eq!(bindings.definitions(), &[ DefinitionId::from(0), - DefinitionId::from(3) + DefinitionId::from(2) ]); assert_not!(bindings.may_be_unbound()); } @@ -488,8 +486,8 @@ fn test_repeat_loop_carried_def() { "\ x <- 0 # def 0 repeat { - x # use 0: sees {def 0, def 2} - x <- 1 # def 2 + x # use 0: sees {def 0, def 1} + x <- 1 # def 1 break } ", @@ -497,11 +495,10 @@ repeat { let file = ScopeId::from(0); let map = index.use_def_map(file); - // def 1 is the LoopHeader for `x`, populated with {def 2}. let bindings = map.bindings_at_use(UseId::from(0)); assert_eq!(bindings.definitions(), &[ DefinitionId::from(0), - DefinitionId::from(2) + DefinitionId::from(1) ]); assert_not!(bindings.may_be_unbound()); } @@ -511,8 +508,8 @@ fn test_loop_carried_unbound_before_loop() { let index = index( "\ while (cond) { - x # use 1: sees {def 1}, may_be_unbound - x <- 1 # def 1 + x # use 1: sees {def 0}, may_be_unbound + x <- 1 # def 0 } ", ); @@ -520,9 +517,8 @@ while (cond) { let map = index.use_def_map(file); // Uses: `cond` is use 0, `x` inside body is use 1. - // def 0 is the LoopHeader for `x`, populated with {def 1}. let bindings = map.bindings_at_use(UseId::from(1)); - assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); assert!(bindings.may_be_unbound()); } @@ -641,20 +637,20 @@ x # file: use 0 -> {def 0}, may_be_unbound } #[test] -fn test_fixme_super_assignment_not_visible_before_function_def() { +fn test_super_assignment_visible_before_function_def() { let index = index( "\ -x # file: use 0 -> unbound -f <- function() { x <<- 1 } # file: def 0 (f), def 1 (x <<- in parent) +x # file: use 0 -> {def 0}, may_be_unbound +f <- function() { x <<- 1 } # file: def 0 (x <<- in parent), def 1 (f) ", ); let file = ScopeId::from(0); let map = index.use_def_map(file); - // The use of `x` precedes the `<<-` definition in the flow. - // FIXME: Ideally the use would be mapped to the extra definition. + // `<<-` definitions are scope-wide, so the use of `x` before the + // function definition still sees the `<<-` binding. let bindings = map.bindings_at_use(UseId::from(0)); - assert!(bindings.definitions().is_empty()); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); assert!(bindings.may_be_unbound()); } @@ -745,7 +741,7 @@ fn test_if_inside_for() { "\ for (i in xs) { # def 0 (i) if (cond) { - x <- 1 # def 2 + x <- 1 # def 1 } } x # use 2: may_be_unbound (loop might not run, if might not match) @@ -755,9 +751,8 @@ x # use 2: may_be_unbound (loop might not run, if might not match) let map = index.use_def_map(file); // Uses: `xs` is use 0, `cond` is use 1, final `x` is use 2. - // def 1 is the LoopHeader for `x`, populated before merge. let bindings = map.bindings_at_use(UseId::from(2)); - assert_eq!(bindings.definitions(), &[DefinitionId::from(2)]); + assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); assert!(bindings.may_be_unbound()); } @@ -768,12 +763,12 @@ fn test_if_else_inside_while() { x <- 0 # def 0 while (cond) { if (c2) { - x <- 1 # def 2 + x <- 1 # def 1 } else { - x <- 2 # def 3 + x <- 2 # def 2 } } -x # use 2 -> {def 0, def 2, def 3} (while may not execute) +x # use 2 -> {def 0, def 1, def 2} (while may not execute) ", ); let file = ScopeId::from(0); @@ -783,8 +778,8 @@ x # use 2 -> {def 0, def 2, def 3} (while may not execute) let bindings = map.bindings_at_use(UseId::from(2)); assert_eq!(bindings.definitions(), &[ DefinitionId::from(0), - DefinitionId::from(2), - DefinitionId::from(3) + DefinitionId::from(1), + DefinitionId::from(2) ]); assert_not!(bindings.may_be_unbound()); } From f54d1b36906742e8fadb3eb37b4a529b533ada2e Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 10 Apr 2026 15:10:01 +0200 Subject: [PATCH 5/7] Reorder for top-to-bottom reading --- crates/oak_index/src/use_def_map.rs | 51 ++++++++++++++--------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/crates/oak_index/src/use_def_map.rs b/crates/oak_index/src/use_def_map.rs index f1199becc..105b82ed4 100644 --- a/crates/oak_index/src/use_def_map.rs +++ b/crates/oak_index/src/use_def_map.rs @@ -125,6 +125,25 @@ use crate::semantic_index::UseId; // {} | true | no local def, parent scope reference // {A, B} | true | some paths define, some don't +/// The immutable use-def map for a single scope. For each use site, stores the +/// set of definitions that can reach it through control flow. +#[derive(Debug)] +pub struct UseDefMap { + bindings_by_use: IndexVec, +} + +impl UseDefMap { + pub(crate) fn empty() -> Self { + Self { + bindings_by_use: IndexVec::new(), + } + } + + pub fn bindings_at_use(&self, use_id: UseId) -> &Bindings { + &self.bindings_by_use[use_id] + } +} + /// The set of definitions that can reach a particular point in control flow, /// plus whether the symbol may be unbound (no definition on some path). /// @@ -198,32 +217,6 @@ fn sorted_union(a: &[DefinitionId], b: &[DefinitionId]) -> SmallVec<[DefinitionI .collect() } -/// A snapshot of all symbol states at a particular point in control flow. -#[derive(Clone, Debug)] -pub(crate) struct FlowSnapshot { - symbol_states: IndexVec, -} - -/// The immutable use-def map for a single scope, produced by finalizing the -/// builder. For each use site, stores the set of definitions that can reach -/// it through control flow. -#[derive(Debug)] -pub struct UseDefMap { - bindings_by_use: IndexVec, -} - -impl UseDefMap { - pub(crate) fn empty() -> Self { - Self { - bindings_by_use: IndexVec::new(), - } - } - - pub fn bindings_at_use(&self, use_id: UseId) -> &Bindings { - &self.bindings_by_use[use_id] - } -} - /// Mutable builder for constructing a [`UseDefMap`] during the tree walk. /// One builder exists per scope. When entering a nested scope the current /// builder is pushed onto a stack and a fresh one takes over. @@ -392,3 +385,9 @@ impl UseDefMapBuilder { } } } + +/// A snapshot of all symbol states at a particular point in control flow. +#[derive(Clone, Debug)] +pub(crate) struct FlowSnapshot { + symbol_states: IndexVec, +} From 3a038791a353db2bd62efe83385069e26ad95e44 Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 10 Apr 2026 17:58:25 +0200 Subject: [PATCH 6/7] Motivate `<<-` behaviour --- crates/oak_index/src/use_def_map.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/crates/oak_index/src/use_def_map.rs b/crates/oak_index/src/use_def_map.rs index 105b82ed4..ba0974f2f 100644 --- a/crates/oak_index/src/use_def_map.rs +++ b/crates/oak_index/src/use_def_map.rs @@ -81,7 +81,7 @@ use crate::semantic_index::UseId; // ### Loop-carried definitions (`finish_loop_defs()`) // // ```r -// x <- 0 +// x <- 0 # def A // while (cond) { // print(x) # should see def A (pre-loop) AND def B (previous iteration) // x <- 1 # def B @@ -97,6 +97,12 @@ use crate::semantic_index::UseId; // // ### Deferred definitions (`record_deferred_definition()`) // +// `<<-` modifies a symbol that should already be bound in an ancestor scope (if +// there is no existing definition, R stores in the global environment, but +// we'll lint about it). For this reason, `<<-` _adds_ to the set of potential +// definitions reaching uses of that symbols, it doesn't overwrite like `<-` +// would. +// // ```r // x <- 0 # def A // print(x) # should see def A AND def B @@ -105,7 +111,7 @@ use crate::semantic_index::UseId; // } // ``` // -// The `<<-` creates a definition in the file scope, but it's encountered +// Here the `<<-` creates a definition in the file scope, but it's encountered // during the function body walk, after `print(x)` was already recorded. // `record_deferred_definition()` adds it to the live state (so future uses // see it) and also stashes it. At finalization, `finish_deferred_defs()` From 8ba3b796af521c12941257647f412335978473ba Mon Sep 17 00:00:00 2001 From: Lionel Henry Date: Fri, 10 Apr 2026 18:02:50 +0200 Subject: [PATCH 7/7] Mention why we fixup definitions after the loop walk --- crates/oak_index/src/use_def_map.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crates/oak_index/src/use_def_map.rs b/crates/oak_index/src/use_def_map.rs index ba0974f2f..4e9a25b51 100644 --- a/crates/oak_index/src/use_def_map.rs +++ b/crates/oak_index/src/use_def_map.rs @@ -285,6 +285,11 @@ impl UseDefMapBuilder { /// created inside the body). Those new definitions are added to all /// uses of that symbol from `first_use` onwards, which covers exactly /// the uses recorded during the body. + /// + /// This runs after the body (not eagerly at each definition) because + /// the body may contain branches. A diff at the end captures the + /// converged state after all snapshot/restore/merge within the body + /// has resolved. pub(crate) fn finish_loop_defs(&mut self, pre_loop: &FlowSnapshot, first_use: UseId) { for i in 0..self.symbol_states.len() { let symbol_id = SymbolId::new(i);