diff --git a/Cargo.lock b/Cargo.lock index 4fda3117c..8ff2e9fe8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2248,7 +2248,11 @@ dependencies = [ "air_r_parser", "air_r_syntax", "biome_rowan", + "itertools 0.10.5", + "log", "rustc-hash", + "smallvec", + "stdext", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 73df7d108..3b273b528 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -94,6 +94,7 @@ serde_json = { version = "1.0.94", features = ["preserve_order"] } serde_repr = "0.1.17" serde_with = "3.0.0" sha2 = "0.10.6" +smallvec = "1.13.2" stdext = { path = "crates/stdext" } streaming-iterator = "0.1.9" strum = "0.26.2" diff --git a/crates/oak_index/Cargo.toml b/crates/oak_index/Cargo.toml index e2bb4aa61..8d8436167 100644 --- a/crates/oak_index/Cargo.toml +++ b/crates/oak_index/Cargo.toml @@ -16,7 +16,11 @@ workspace = true [dependencies] aether_syntax.workspace = true biome_rowan.workspace = true +itertools.workspace = true +log.workspace = true rustc-hash.workspace = true +smallvec.workspace = true +stdext.workspace = true [dev-dependencies] aether_parser.workspace = true diff --git a/crates/oak_index/src/builder.rs b/crates/oak_index/src/builder.rs index 72f224921..0666f912e 100644 --- a/crates/oak_index/src/builder.rs +++ b/crates/oak_index/src/builder.rs @@ -30,6 +30,8 @@ use crate::semantic_index::SymbolFlags; use crate::semantic_index::SymbolTableBuilder; use crate::semantic_index::Use; use crate::semantic_index::UseId; +use crate::use_def_map::UseDefMap; +use crate::use_def_map::UseDefMapBuilder; /// Build a [`SemanticIndex`] from a parsed R file. pub fn build(root: &RRoot) -> SemanticIndex { @@ -47,6 +49,9 @@ struct SemanticIndexBuilder { symbol_tables: IndexVec, definitions: IndexVec>, uses: IndexVec>, + use_def_maps: IndexVec, + current_use_def: UseDefMapBuilder, + use_def_stack: Vec, current_scope: ScopeId, } @@ -56,6 +61,7 @@ impl SemanticIndexBuilder { let mut symbol_tables = IndexVec::new(); let mut definitions = IndexVec::new(); let mut uses = IndexVec::new(); + let mut use_def_maps = IndexVec::new(); // The descendants range starts empty (`n+1..n+1`). `pop_scope` later // fills in `descendants.end` with the current arena length. Everything @@ -70,12 +76,16 @@ impl SemanticIndexBuilder { symbol_tables.push(SymbolTableBuilder::new()); definitions.push(IndexVec::new()); uses.push(IndexVec::new()); + use_def_maps.push(UseDefMap::empty()); Self { scopes, symbol_tables, definitions, uses, + use_def_maps, + current_use_def: UseDefMapBuilder::new(), + use_def_stack: Vec::new(), current_scope: file, } } @@ -99,6 +109,10 @@ impl SemanticIndexBuilder { self.symbol_tables.push(SymbolTableBuilder::new()); self.definitions.push(IndexVec::new()); self.uses.push(IndexVec::new()); + self.use_def_maps.push(UseDefMap::empty()); + + let parent_use_def = std::mem::replace(&mut self.current_use_def, UseDefMapBuilder::new()); + self.use_def_stack.push(parent_use_def); id } @@ -111,6 +125,13 @@ impl SemanticIndexBuilder { Some(parent) => parent, None => panic!("`pop_scope()` called on the file scope"), }; + + let parent_use_def = match self.use_def_stack.pop() { + Some(builder) => builder, + None => panic!("`pop_scope()` called with empty use-def stack"), + }; + let finalized = std::mem::replace(&mut self.current_use_def, parent_use_def).finish(); + self.use_def_maps[id] = finalized; } fn add_definition( @@ -120,17 +141,103 @@ impl SemanticIndexBuilder { kind: DefinitionKind, range: TextRange, ) { - let symbol = self.symbol_tables[self.current_scope].intern(name, flags); + let symbol_id = self.symbol_tables[self.current_scope].intern(name, flags); + let def_id = self.definitions[self.current_scope].push(Definition { + symbol: symbol_id, + kind, + range, + }); + + self.current_use_def.ensure_symbol(symbol_id); + self.current_use_def.record_definition(symbol_id, def_id); + } + + // Super-assignment is lexically in the current scope but binds in an + // ancestor. We record the definition in the current scope and append + // it to the target scope's use-def map (without shadowing prior + // definitions). + // + // R's `<<-` walks up the environment chain from the parent, targeting + // the first scope where the symbol is already bound. If no binding is + // found, it assigns in the global (file) scope. + fn add_super_definition(&mut self, name: &str, kind: DefinitionKind, range: TextRange) { + let symbol_id = + self.symbol_tables[self.current_scope].intern(name, SymbolFlags::IS_SUPER_BOUND); self.definitions[self.current_scope].push(Definition { - symbol, + symbol: symbol_id, + kind: kind.clone(), + range, + }); + + let target_scope = self.resolve_super_target(name); + + let target_symbol = self.symbol_tables[target_scope].intern(name, SymbolFlags::IS_BOUND); + let target_def_id = self.definitions[target_scope].push(Definition { + symbol: target_symbol, kind, range, }); + + let builder = self.use_def_builder_mut(target_scope); + builder.ensure_symbol(target_symbol); + builder.record_deferred_definition(target_symbol, target_def_id); + } + + // Walk up from the parent scope looking for a scope where `name` already + // has `IS_BOUND`. Returns that scope, or the file scope if no binding is + // found (mirroring R's assignment to the global environment). + fn resolve_super_target(&self, name: &str) -> ScopeId { + let file_scope = ScopeId::from(0); + let mut scope = match self.scopes[self.current_scope].parent { + Some(parent) => parent, + None => return file_scope, + }; + + loop { + if let Some(id) = self.symbol_tables[scope].id(name) { + if self.symbol_tables[scope] + .symbol(id) + .flags() + .contains(SymbolFlags::IS_BOUND) + { + return scope; + } + } + scope = match self.scopes[scope].parent { + Some(parent) => parent, + None => return file_scope, + }; + } + } + + fn use_def_builder_mut(&mut self, target: ScopeId) -> &mut UseDefMapBuilder { + if target == self.current_scope { + return &mut self.current_use_def; + } + + let mut steps = 0; + let mut scope = self.current_scope; + while scope != target { + scope = match self.scopes[scope].parent { + Some(parent) => parent, + None => panic!("Target scope {target:?} is not an ancestor of current scope"), + }; + steps += 1; + } + + let index = self.use_def_stack.len() - steps; + &mut self.use_def_stack[index] } fn add_use(&mut self, name: &str, range: TextRange) { - let symbol = self.symbol_tables[self.current_scope].intern(name, SymbolFlags::IS_USED); - self.uses[self.current_scope].push(Use { symbol, range }); + let symbol_id = self.symbol_tables[self.current_scope].intern(name, SymbolFlags::IS_USED); + let use_id = self.uses[self.current_scope].push(Use { + symbol: symbol_id, + range, + }); + + self.current_use_def.ensure_symbol(symbol_id); + self.current_use_def.record_use(symbol_id, use_id); } // --- Recursive descent --- @@ -224,6 +331,10 @@ impl SemanticIndexBuilder { }, AnyRExpression::RForStatement(stmt) => { + // The for variable is always bound (R sets it to NULL for + // empty sequences), so its binding is recorded before the + // snapshot. Assignments inside the body are conditional + // (body may not execute for empty sequences). if let Ok(variable) = stmt.variable() { self.add_definition( &identifier_text(&variable), @@ -235,8 +346,70 @@ impl SemanticIndexBuilder { if let Ok(sequence) = stmt.sequence() { self.collect_expression(&sequence); } + + let pre_loop = self.current_use_def.snapshot(); + + if let Ok(body) = stmt.body() { + let first_use = self.uses[self.current_scope].next_id(); + self.collect_expression(&body); + self.current_use_def.finish_loop_defs(&pre_loop, first_use); + } + + self.current_use_def.merge(pre_loop); + }, + + AnyRExpression::RIfStatement(stmt) => { + // Condition is always evaluated + if let Ok(condition) = stmt.condition() { + self.collect_expression(&condition); + } + + let pre_if = self.current_use_def.snapshot(); + + // If-body (consequence) + if let Ok(consequence) = stmt.consequence() { + self.collect_expression(&consequence); + } + + let post_if = self.current_use_def.snapshot(); + self.current_use_def.restore(pre_if); + + // Else-body (alternative), if present. If absent, the + // "else path" is just the pre-if state we restored to. + if let Some(else_clause) = stmt.else_clause() { + if let Ok(alternative) = else_clause.alternative() { + self.collect_expression(&alternative); + } + } + + // After: definitions from both branches are live + self.current_use_def.merge(post_if); + }, + + AnyRExpression::RWhileStatement(stmt) => { + if let Ok(condition) = stmt.condition() { + self.collect_expression(&condition); + } + + let pre_loop = self.current_use_def.snapshot(); + + if let Ok(body) = stmt.body() { + let first_use = self.uses[self.current_scope].next_id(); + self.collect_expression(&body); + self.current_use_def.finish_loop_defs(&pre_loop, first_use); + } + + // Body may not execute + self.current_use_def.merge(pre_loop); + }, + + AnyRExpression::RRepeatStatement(stmt) => { + // Body always executes at least once, no snapshot needed if let Ok(body) = stmt.body() { + let pre_loop = self.current_use_def.snapshot(); + let first_use = self.uses[self.current_scope].next_id(); self.collect_expression(&body); + self.current_use_def.finish_loop_defs(&pre_loop, first_use); } }, @@ -244,8 +417,7 @@ impl SemanticIndexBuilder { // Generic fallback: walk over descendant nodes and collect their // `AnyRExpression` children, letting `collect_expression` - // handle their contents. This covers `RIfStatement`, - // `RWhileStatement`, `RRepeatStatement`, `RUnaryExpression`, + // handle their contents. This covers `RUnaryExpression`, // `RParenthesizedExpression`, `RReturnExpression`, literals, and // any future expression types without needing explicit arms. // @@ -354,34 +526,16 @@ impl SemanticIndexBuilder { let target = if right { op.right() } else { op.left() }; let Ok(target) = target else { return }; - let (name, range) = match &target { - AnyRExpression::RIdentifier(ident) => { - let name = identifier_text(ident); - let range = ident.syntax().text_trimmed_range(); - (name, range) - }, - - // `"x" <- 1` is equivalent to `x <- 1` in R - AnyRExpression::AnyRValue(AnyRValue::RStringValue(s)) => { - let Some(name) = string_value_text(s) else { - return; - }; - let range = s.syntax().text_trimmed_range(); - (name, range) - }, - + let Some((name, range)) = assignment_target_name(&target) else { // Complex target (`x$foo <- rhs`, `x[1] <- rhs`, etc.) does // not represent a binding. We recurse for uses. - other => { - self.collect_expression(other); - return; - }, + self.collect_expression(&target); + return; }; if super_assign { - self.add_definition( + self.add_super_definition( &name, - SymbolFlags::IS_SUPER_BOUND, DefinitionKind::SuperAssignment(op.syntax().clone()), range, ); @@ -406,8 +560,18 @@ impl SemanticIndexBuilder { fn finish(mut self) -> SemanticIndex { self.scopes[ScopeId::from(0)].descendants.end = self.scopes.next_id(); + + let file_use_def_map = self.current_use_def.finish(); + self.use_def_maps[ScopeId::from(0)] = file_use_def_map; + let symbol_tables = self.symbol_tables.into_iter().map(|b| b.build()).collect(); - SemanticIndex::new(self.scopes, symbol_tables, self.definitions, self.uses) + SemanticIndex::new( + self.scopes, + symbol_tables, + self.definitions, + self.uses, + self.use_def_maps, + ) } } @@ -460,6 +624,26 @@ fn string_value_text(s: &aether_syntax::RStringValue) -> Option { Some(text[1..text.len() - 1].to_string()) } +/// Extract the binding name and range from an assignment target expression. +/// Returns `None` for complex targets (`x$foo`, `x[1]`, etc.) that don't +/// represent simple name bindings. +fn assignment_target_name(target: &AnyRExpression) -> Option<(String, TextRange)> { + match target { + AnyRExpression::RIdentifier(ident) => { + let name = identifier_text(ident); + let range = ident.syntax().text_trimmed_range(); + Some((name, range)) + }, + // `"x" <- 1` is equivalent to `x <- 1` in R + AnyRExpression::AnyRValue(AnyRValue::RStringValue(s)) => { + let name = string_value_text(s)?; + let range = s.syntax().text_trimmed_range(); + Some((name, range)) + }, + _ => None, + } +} + fn is_super_assignment(bin: &RBinaryExpression) -> bool { let Ok(op) = bin.operator() else { return false; diff --git a/crates/oak_index/src/index_vec.rs b/crates/oak_index/src/index_vec.rs index 86979ce93..9d58cfd35 100644 --- a/crates/oak_index/src/index_vec.rs +++ b/crates/oak_index/src/index_vec.rs @@ -63,6 +63,15 @@ impl FromIterator for IndexVec { } } +impl Clone for IndexVec { + fn clone(&self) -> Self { + Self { + raw: self.raw.clone(), + _phantom: PhantomData, + } + } +} + impl Default for IndexVec { fn default() -> Self { Self::new() diff --git a/crates/oak_index/src/lib.rs b/crates/oak_index/src/lib.rs index a79d44530..ac0e2c5de 100644 --- a/crates/oak_index/src/lib.rs +++ b/crates/oak_index/src/lib.rs @@ -1,3 +1,4 @@ pub mod builder; pub(crate) mod index_vec; pub mod semantic_index; +pub mod use_def_map; diff --git a/crates/oak_index/src/semantic_index.rs b/crates/oak_index/src/semantic_index.rs index 72162b466..4c770140d 100644 --- a/crates/oak_index/src/semantic_index.rs +++ b/crates/oak_index/src/semantic_index.rs @@ -6,6 +6,7 @@ use rustc_hash::FxHashMap; use crate::index_vec::define_index; use crate::index_vec::IndexVec; +use crate::use_def_map::UseDefMap; // File-local scope identifier define_index!(ScopeId); @@ -48,11 +49,15 @@ pub struct SemanticIndex { // (a per-scope map from AST node positions to `ScopedUseId`). When we // introduce salsa, these lists may be restructured to match. // - // Use-def maps will layer on top of these lists, not replace them. A - // use-def map tracks which definitions reach each use through control flow, - // referencing `DefinitionId` and `UseId` indices into these arenas. + // Use-def maps layer on top of these lists. A use-def map tracks which + // definitions reach each use through control flow, referencing + // `DefinitionId` and `UseId` indices into these arenas. definitions: IndexVec>, uses: IndexVec>, + + // Per-scope flow-sensitive map from each use site to the set of definitions + // that can reach it. Built alongside the other arrays during the tree walk. + use_def_maps: IndexVec, } impl SemanticIndex { @@ -61,12 +66,14 @@ impl SemanticIndex { symbol_tables: IndexVec, definitions: IndexVec>, uses: IndexVec>, + use_def_maps: IndexVec, ) -> Self { Self { scopes, symbol_tables, definitions, uses, + use_def_maps, } } @@ -86,6 +93,10 @@ impl SemanticIndex { &self.uses[scope] } + pub fn use_def_map(&self, scope: ScopeId) -> &UseDefMap { + &self.use_def_maps[scope] + } + /// Find the innermost scope containing `offset`. pub fn scope_at(&self, offset: biome_rowan::TextSize) -> ScopeId { // Start at the file scope @@ -296,8 +307,8 @@ impl Symbol { // bound somewhere in this scope" but can't answer "which definition of x // reaches this point?" or "is x defined before this use?". Use-def maps // provide that precision. The flags remain useful for scope-level queries -// like `resolve_symbol` and `resolve_super_target` (which walk the scope -// chain checking `IS_BOUND`). They can also be useful as filters for +// like `resolve_symbol` (which walks the scope chain checking +// `IS_BOUND`). They can also be useful as filters for // short-circuiting unneeded expensive operations. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct SymbolFlags(u8); diff --git a/crates/oak_index/src/use_def_map.rs b/crates/oak_index/src/use_def_map.rs new file mode 100644 index 000000000..4e9a25b51 --- /dev/null +++ b/crates/oak_index/src/use_def_map.rs @@ -0,0 +1,404 @@ +use itertools::EitherOrBoth; +use itertools::Itertools; +use smallvec::SmallVec; + +use crate::index_vec::Idx; +use crate::index_vec::IndexVec; +use crate::semantic_index::DefinitionId; +use crate::semantic_index::SymbolId; +use crate::semantic_index::UseId; + +// Use-def tracking answers: "at this use of `x`, which specific definitions +// could have run?" In straight-line code it's trivial: each definition shadows +// the previous one, and a use sees whatever definition came last. The +// complexity comes from branching. +// +// For each symbol in the current scope, we track a `Bindings`: the set of +// `DefinitionId`s that are "live". A fresh scope starts with every symbol in +// the "unbound" state: empty definition set, `may_be_unbound: true`. +// The "may_be_unbound" flag tracks whether there exists some control flow path +// where no definition was reached. +// +// ```r +// if (cond) { +// x <- 1 # def A +// } +// print(x) # may_be_unbound: true, definitions: {A} +// ``` +// +// - `record_definition()`: A definition like `x <- 1` kills all previous live +// definitions for that symbol and replaces them with a singleton. +// `may_be_unbound` becomes false. +// +// - `record_use()`: A use like `print(x)` freezes the current live state for +// that symbol. We clone the current `Bindings` and store it keyed by `use_id`. +// This operation doesn't change any state. +// +// The other operations (`snapshot()`, `restore()`, `merge()`) help dealing with +// control flow complications. +// +// ```r +// x <- 1 # def A +// if (cond) { +// x <- 2 # def B +// } else { +// x <- 3 # def C +// } +// print(x) # which defs reach this use? +// ``` +// +// 1. `snapshot()`: Clone the entire symbol state (all symbols' live +// definitions). This captures the state *before* either branch runs. Call +// this `pre_if`. +// 2. Visit the if-body: `x <- 2` shadows, so `x`'s state becomes `{B}`. +// 3. `snapshot()` again: capture the post-if-body state. Call this `post_if`. +// 4. `restore(pre_if)`: Reset to the state before the if-body ran. Now `x`'s +// state is back to `{A}`. +// 5. Visit the else-body. `x <- 3` shadows, so `x`'s state becomes `{C}`. +// 6. `merge(post_if)`: For each symbol, union the current state (post-else: +// `{C}`) with the snapshot (post-if: `{B}`). Result: `x` has `{B, C}`. +// +// After this, `print(x)` records a use that sees `{B, C}`. Def A is gone +// because both branches shadowed it. +// +// If there's no else clause, step 5 is skipped. The current state after +// restore is still `pre_if` (`{A}`). Merge unions `{A}` with `{B}` → `{A, B}`. +// The pre-if definition stays live because there's a path (the no-else path) +// where it wasn't shadowed. +// +// The same primitives handle other control flow: a `while` body may not +// execute, so we snapshot before, visit the body, then merge (like an +// if-without-else). `for` is similar, except the loop variable is always +// bound before the snapshot. +// +// ## Retroactive fixups +// +// The snapshot/restore/merge model is forward-only: a use sees definitions +// recorded before it. Two situations need the opposite: definitions that +// are recorded *after* a use must retroactively reach it. Without this, +// features like rename and jump-to-definition would miss connections. +// +// ### Loop-carried definitions (`finish_loop_defs()`) +// +// ```r +// x <- 0 # def A +// while (cond) { +// print(x) # should see def A (pre-loop) AND def B (previous iteration) +// x <- 1 # def B +// } +// ``` +// +// When visiting the body top-to-bottom, `print(x)` is recorded before +// `x <- 1`, so it only sees `{A}`. But in a second iteration, `x <- 1` +// from the first iteration should reach `print(x)`. After the body, +// `finish_loop_defs()` diffs the pre-loop and post-body symbol states. +// Any new definition (here, B) is retroactively added to uses of that +// symbol recorded during the body. Result: `print(x)` sees `{A, B}`. +// +// ### Deferred definitions (`record_deferred_definition()`) +// +// `<<-` modifies a symbol that should already be bound in an ancestor scope (if +// there is no existing definition, R stores in the global environment, but +// we'll lint about it). For this reason, `<<-` _adds_ to the set of potential +// definitions reaching uses of that symbols, it doesn't overwrite like `<-` +// would. +// +// ```r +// x <- 0 # def A +// print(x) # should see def A AND def B +// f <- function() { +// x <<- 1 # def B (targets file scope) +// } +// ``` +// +// Here the `<<-` creates a definition in the file scope, but it's encountered +// during the function body walk, after `print(x)` was already recorded. +// `record_deferred_definition()` adds it to the live state (so future uses +// see it) and also stashes it. At finalization, `finish_deferred_defs()` +// retroactively adds it to all uses of that symbol, including `print(x)`. +// +// ## Interpreting `Bindings` +// +// Callers examine the two fields of `Bindings` at a use site to determine +// what happened along control flow: +// +// definitions | may_be_unbound | meaning +// ------------|----------------|-------- +// {A} | false | one definition, straight-line +// {B, C} | false | paths converge (e.g. both if/else branches) +// {A, B} | false | prior def + conditional redefinition +// {A} | true | conditional definition (e.g. if without else) +// {} | true | no local def, parent scope reference +// {A, B} | true | some paths define, some don't + +/// The immutable use-def map for a single scope. For each use site, stores the +/// set of definitions that can reach it through control flow. +#[derive(Debug)] +pub struct UseDefMap { + bindings_by_use: IndexVec, +} + +impl UseDefMap { + pub(crate) fn empty() -> Self { + Self { + bindings_by_use: IndexVec::new(), + } + } + + pub fn bindings_at_use(&self, use_id: UseId) -> &Bindings { + &self.bindings_by_use[use_id] + } +} + +/// The set of definitions that can reach a particular point in control flow, +/// plus whether the symbol may be unbound (no definition on some path). +/// +/// Definitions are stored sorted by ID so that merge is a linear merge-join. The +/// `SmallVec<[DefinitionId; 2]>` avoids heap allocation for the common case of +/// 1-2 live definitions. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Bindings { + definitions: SmallVec<[DefinitionId; 2]>, + may_be_unbound: bool, +} + +impl Bindings { + fn unbound() -> Self { + Self { + definitions: SmallVec::new(), + may_be_unbound: true, + } + } + + pub fn definitions(&self) -> &[DefinitionId] { + &self.definitions + } + + pub fn contains_definition(&self, id: DefinitionId) -> bool { + self.definitions.contains(&id) + } + + pub fn definition_count(&self) -> usize { + self.definitions.len() + } + + pub fn may_be_unbound(&self) -> bool { + self.may_be_unbound + } + + /// Replace all live definitions with a single new one, marking the + /// symbol as definitely bound. + fn record_definition(&mut self, def_id: DefinitionId) { + self.definitions.clear(); + self.definitions.push(def_id); + self.may_be_unbound = false; + } + + /// Add a definition to the live set without clearing existing ones and + /// without changing `may_be_unbound`. Used for loop-carried definitions + /// and scope-wide definitions (`<<-`). + fn add_definition(&mut self, def_id: DefinitionId) { + let pos = self.definitions.partition_point(|&id| id < def_id); + if pos >= self.definitions.len() || self.definitions[pos] != def_id { + self.definitions.insert(pos, def_id); + } + } + + /// Union definitions from `other` into `self`, OR the `may_be_unbound` + /// flags. Both sides are sorted by `DefinitionId`, so this is a linear + /// merge-join. + fn merge(&mut self, other: Bindings) { + self.definitions = sorted_union(&self.definitions, &other.definitions); + self.may_be_unbound |= other.may_be_unbound; + } +} + +/// Merge two sorted slices into a sorted `SmallVec` with no duplicates. +fn sorted_union(a: &[DefinitionId], b: &[DefinitionId]) -> SmallVec<[DefinitionId; 2]> { + a.iter() + .merge_join_by(b.iter(), |a, b| a.cmp(b)) + .map(|either| match either { + EitherOrBoth::Left(&id) | EitherOrBoth::Right(&id) | EitherOrBoth::Both(&id, _) => id, + }) + .collect() +} + +/// Mutable builder for constructing a [`UseDefMap`] during the tree walk. +/// One builder exists per scope. When entering a nested scope the current +/// builder is pushed onto a stack and a fresh one takes over. +#[derive(Debug)] +pub(crate) struct UseDefMapBuilder { + symbol_states: IndexVec, + bindings_by_use: IndexVec, + // Maps each use to its symbol, so retroactive fixups (for `<<-` and + // loop-carried definitions) can find which uses to patch for a given + // symbol. + symbol_for_use: IndexVec, + // Definitions whose effect on past uses is deferred to `finish()`. + // Currently used for `<<-` extra definitions in ancestor scopes. + deferred_defs: Vec<(SymbolId, DefinitionId)>, +} + +impl UseDefMapBuilder { + pub(crate) fn new() -> Self { + Self { + symbol_states: IndexVec::new(), + bindings_by_use: IndexVec::new(), + symbol_for_use: IndexVec::new(), + deferred_defs: Vec::new(), + } + } + + /// Ensure that `symbol_id` has an entry in `symbol_states`, growing the + /// vec with "unbound" entries as needed. Called after interning a symbol + /// so the use-def state stays in sync with the symbol table. + pub(crate) fn ensure_symbol(&mut self, symbol_id: SymbolId) { + // In practice this adds at most one entry (IDs are sequential), the + // `while` is defensive. + while self.symbol_states.len() <= symbol_id.index() { + self.symbol_states.push(Bindings::unbound()); + } + } + + /// Record a new binding for `symbol_id`. Replaces (shadows) all previous + /// live definitions for that symbol. + pub(crate) fn record_definition(&mut self, symbol_id: SymbolId, def_id: DefinitionId) { + self.symbol_states[symbol_id].record_definition(def_id); + } + + /// Record a definition whose effect on past uses is deferred to + /// `finish()`. The definition is added to the current flow state + /// immediately (so future uses see it), but uses already recorded + /// are patched up at finalization time. Used for `<<-` extra + /// definitions. + pub(crate) fn record_deferred_definition(&mut self, symbol_id: SymbolId, def_id: DefinitionId) { + self.symbol_states[symbol_id].add_definition(def_id); + self.deferred_defs.push((symbol_id, def_id)); + } + + /// After visiting a loop body, retroactively patch uses so that + /// definitions from the bottom of the body reach uses at the top + /// (simulating a previous iteration). + /// + /// Diffs each symbol's definitions before (`pre_loop`) and after the + /// body. Any definition present after but not before is new (it was + /// created inside the body). Those new definitions are added to all + /// uses of that symbol from `first_use` onwards, which covers exactly + /// the uses recorded during the body. + /// + /// This runs after the body (not eagerly at each definition) because + /// the body may contain branches. A diff at the end captures the + /// converged state after all snapshot/restore/merge within the body + /// has resolved. + pub(crate) fn finish_loop_defs(&mut self, pre_loop: &FlowSnapshot, first_use: UseId) { + for i in 0..self.symbol_states.len() { + let symbol_id = SymbolId::new(i); + + let pre_defs = if i < pre_loop.symbol_states.len() { + pre_loop.symbol_states[symbol_id].definitions() + } else { + // Symbol was first interned during the body, so it had + // no definitions before the loop. + &[] + }; + let post_defs = self.symbol_states[symbol_id].definitions(); + + // Collect new definitions introduced in the body + let new_defs: SmallVec<[DefinitionId; 2]> = post_defs + .iter() + .filter(|d| !pre_defs.contains(d)) + .copied() + .collect(); + + // Most symbols are unchanged, exit early in that case + if new_defs.is_empty() { + continue; + } + + // Add new defs to uses recorded during the body (`first_use` + // onwards). Uses before the loop are unaffected. + for j in first_use.index()..self.bindings_by_use.len() { + let use_id = UseId::new(j); + if self.symbol_for_use[use_id] == symbol_id { + for &def_id in &new_defs { + self.bindings_by_use[use_id].add_definition(def_id); + } + } + } + } + } + + /// Record a use of `symbol_id`. Clones the current live bindings for that + /// symbol and associates them with `use_id`. + pub(crate) fn record_use(&mut self, symbol_id: SymbolId, use_id: UseId) { + let bindings = self.symbol_states[symbol_id].clone(); + let pushed_id = self.bindings_by_use.push(bindings); + self.symbol_for_use.push(symbol_id); + stdext::soft_assert!(use_id == pushed_id); + } + + /// Take a snapshot of the current symbol states. + pub(crate) fn snapshot(&self) -> FlowSnapshot { + FlowSnapshot { + symbol_states: self.symbol_states.clone(), + } + } + + /// Restore state to a previously taken snapshot. + pub(crate) fn restore(&mut self, snapshot: FlowSnapshot) { + let num_symbols = self.symbol_states.len(); + self.symbol_states = snapshot.symbol_states; + + // New symbols may have been interned between snapshot and restore. + // Fill them in as "unbound" so IDs stay aligned. + while self.symbol_states.len() < num_symbols { + self.symbol_states.push(Bindings::unbound()); + } + } + + /// Merge a snapshot into the current state. For each symbol, union the + /// definition sets and OR the `may_be_unbound` flags. This reflects that + /// control flow could have taken either path to reach this point. + pub(crate) fn merge(&mut self, snapshot: FlowSnapshot) { + let mut snap_iter = snapshot.symbol_states.into_iter(); + + for i in 0..self.symbol_states.len() { + let id = SymbolId::new(i); + if let Some(snap_bindings) = snap_iter.next() { + self.symbol_states[id].merge(snap_bindings); + } else { + // Symbol didn't exist in the snapshot, so it was unbound on + // that path + self.symbol_states[id].merge(Bindings::unbound()); + } + } + } + + /// Finalize into an immutable [`UseDefMap`]. + pub(crate) fn finish(mut self) -> UseDefMap { + self.finish_deferred_defs(); + UseDefMap { + bindings_by_use: self.bindings_by_use, + } + } + + /// Retroactively add deferred definitions (from `<<-`) to all + /// uses of the corresponding symbol, including uses that were + /// recorded before the definition was encountered in the walk. + fn finish_deferred_defs(&mut self) { + for &(symbol_id, def_id) in &self.deferred_defs { + for i in 0..self.bindings_by_use.len() { + let use_id = UseId::new(i); + if self.symbol_for_use[use_id] == symbol_id { + self.bindings_by_use[use_id].add_definition(def_id); + } + } + } + } +} + +/// A snapshot of all symbol states at a particular point in control flow. +#[derive(Clone, Debug)] +pub(crate) struct FlowSnapshot { + symbol_states: IndexVec, +} diff --git a/crates/oak_index/tests/builder.rs b/crates/oak_index/tests/builder.rs index 74a307f9f..db6776b0a 100644 --- a/crates/oak_index/tests/builder.rs +++ b/crates/oak_index/tests/builder.rs @@ -549,20 +549,28 @@ fn test_repeat_loop() { #[test] fn test_super_assignment_at_file_scope() { - // At file scope, `<<-` records in file scope with IS_SUPER_BOUND + // At file scope, `<<-` targets the file scope itself (no parent to + // walk to), so the symbol gets both IS_SUPER_BOUND and IS_BOUND. let index = index("x <<- 1"); let file = ScopeId::from(0); let x = index.symbols(file).get("x").unwrap(); - assert_eq!(x.flags(), SymbolFlags::IS_SUPER_BOUND); + assert_eq!( + x.flags(), + SymbolFlags::IS_SUPER_BOUND.union(SymbolFlags::IS_BOUND) + ); - assert_eq!(index.definitions(file).len(), 1); - let DefinitionKind::SuperAssignment(node) = - index.definitions(file)[DefinitionId::from(0)].kind() - else { - panic!("expected SuperAssignment"); - }; - assert_eq!(node.kind(), RSyntaxKind::R_BINARY_EXPRESSION); + // Two definitions: one from the current-scope recording, one from the + // target-scope recording (same scope in this case). + assert_eq!(index.definitions(file).len(), 2); + assert!(matches!( + index.definitions(file)[DefinitionId::from(0)].kind(), + DefinitionKind::SuperAssignment(_) + )); + assert!(matches!( + index.definitions(file)[DefinitionId::from(1)].kind(), + DefinitionKind::SuperAssignment(_) + )); assert_eq!(index.uses(file).len(), 0); } @@ -572,9 +580,12 @@ fn test_super_assignment_right_at_file_scope() { let file = ScopeId::from(0); let x = index.symbols(file).get("x").unwrap(); - assert_eq!(x.flags(), SymbolFlags::IS_SUPER_BOUND); + assert_eq!( + x.flags(), + SymbolFlags::IS_SUPER_BOUND.union(SymbolFlags::IS_BOUND) + ); - assert_eq!(index.definitions(file).len(), 1); + assert_eq!(index.definitions(file).len(), 2); assert!(matches!( index.definitions(file)[DefinitionId::from(0)].kind(), DefinitionKind::SuperAssignment(_) @@ -585,16 +596,23 @@ fn test_super_assignment_right_at_file_scope() { #[test] fn test_super_assignment_recorded_in_current_scope() { // `<<-` records the definition in the function scope where it lexically - // appears, not in an ancestor scope. + // appears AND an extra definition in the parent scope. let index = index("f <- function() { x <<- 1 }"); let file = ScopeId::from(0); let fun = ScopeId::from(1); - // File scope only has `f` - assert!(index.symbols(file).get("x").is_none()); - assert_eq!(index.definitions(file).len(), 1); + // File scope has `x` with IS_BOUND (extra definition from `<<-`) + // and `f` with IS_BOUND. The `x <<-` definition is added during + // function body processing, before `f <-`. + let x_file = index.symbols(file).get("x").unwrap(); + assert_eq!(x_file.flags(), SymbolFlags::IS_BOUND); + assert_eq!(index.definitions(file).len(), 2); assert!(matches!( index.definitions(file)[DefinitionId::from(0)].kind(), + DefinitionKind::SuperAssignment(_) + )); + assert!(matches!( + index.definitions(file)[DefinitionId::from(1)].kind(), DefinitionKind::Assignment(_) )); @@ -614,7 +632,8 @@ fn test_super_assignment_right_recorded_in_current_scope() { let file = ScopeId::from(0); let fun = ScopeId::from(1); - assert!(index.symbols(file).get("x").is_none()); + let x_file = index.symbols(file).get("x").unwrap(); + assert_eq!(x_file.flags(), SymbolFlags::IS_BOUND); let x = index.symbols(fun).get("x").unwrap(); assert_eq!(x.flags(), SymbolFlags::IS_SUPER_BOUND); @@ -623,12 +642,13 @@ fn test_super_assignment_right_recorded_in_current_scope() { #[test] fn test_super_assignment_does_not_pollute_ancestor() { // `x <- 1` is in file scope, `x <<- 2` is in the function. The `<<-` - // does NOT add a definition to the file scope. + // adds an extra definition to the file scope in addition to the + // existing `x <- 1` assignment. let index = index("x <- 1\nf <- function() { x <<- 2 }"); let file = ScopeId::from(0); let fun = ScopeId::from(1); - // File scope: `x` has IS_BOUND from the `<-`, `f` has IS_BOUND + // File scope: `x` has IS_BOUND (from both `<-` and `<<-`), `f` has IS_BOUND let x_file = index.symbols(file).get("x").unwrap(); assert_eq!(x_file.flags(), SymbolFlags::IS_BOUND); @@ -637,11 +657,15 @@ fn test_super_assignment_does_not_pollute_ancestor() { .iter() .filter(|(_, d)| index.symbols(file).symbol(d.symbol()).name() == "x") .collect(); - assert_eq!(x_file_defs.len(), 1); + assert_eq!(x_file_defs.len(), 2); assert!(matches!( x_file_defs[0].1.kind(), DefinitionKind::Assignment(_) )); + assert!(matches!( + x_file_defs[1].1.kind(), + DefinitionKind::SuperAssignment(_) + )); // Function scope: `x` has IS_SUPER_BOUND from the `<<-` let x_fun = index.symbols(fun).get("x").unwrap(); @@ -656,7 +680,8 @@ fn test_super_assignment_does_not_pollute_ancestor() { #[test] fn test_super_assignment_nested_recorded_in_inner_scope() { // `x` is bound in both file and outer function. `<<-` in the inner - // function records the definition in the inner scope, not in any ancestor. + // function targets the outer function scope (immediate parent), adding + // an extra definition there. let index = index("x <- 0\nf <- function() { x <- 1; g <- function() { x <<- 2 } }"); let file = ScopeId::from(0); let outer = ScopeId::from(1); @@ -666,7 +691,8 @@ fn test_super_assignment_nested_recorded_in_inner_scope() { let x_file = index.symbols(file).get("x").unwrap(); assert_eq!(x_file.flags(), SymbolFlags::IS_BOUND); - // Outer function: `x` has IS_BOUND (from `<-`), no super-assignment here + // Outer function: `x` has IS_BOUND (from both `x <- 1` and the `<<-` + // extra definition from the inner function) let x_outer = index.symbols(outer).get("x").unwrap(); assert_eq!(x_outer.flags(), SymbolFlags::IS_BOUND); @@ -675,11 +701,15 @@ fn test_super_assignment_nested_recorded_in_inner_scope() { .iter() .filter(|(_, d)| index.symbols(outer).symbol(d.symbol()).name() == "x") .collect(); - assert_eq!(x_outer_defs.len(), 1); + assert_eq!(x_outer_defs.len(), 2); assert!(matches!( x_outer_defs[0].1.kind(), DefinitionKind::Assignment(_) )); + assert!(matches!( + x_outer_defs[1].1.kind(), + DefinitionKind::SuperAssignment(_) + )); // Inner function: `x` has IS_SUPER_BOUND (from `<<-`) let x_inner = index.symbols(inner).get("x").unwrap(); @@ -693,8 +723,9 @@ fn test_super_assignment_nested_recorded_in_inner_scope() { #[test] fn test_super_assignment_coexists_with_use_in_ancestors() { - // Outer function uses `x` but doesn't bind it. `<<-` in the inner - // function records in the inner scope. Ancestors are unaffected. + // `<<-` in inner function walks up from outer, finds `x` bound in file + // scope (from `x <- 1`), so it targets the file scope -- not the outer + // function where `x` is only used. let index = index("x <- 1\nf <- function() { print(x); g <- function() { x <<- 2 } }"); let file = ScopeId::from(0); let outer = ScopeId::from(1); @@ -703,7 +734,8 @@ fn test_super_assignment_coexists_with_use_in_ancestors() { let x_file = index.symbols(file).get("x").unwrap(); assert_eq!(x_file.flags(), SymbolFlags::IS_BOUND); - // Outer function has `x` as `IS_USED` only (from `print(x)`) + // Outer function has `x` as IS_USED only (from `print(x)`). The `<<-` + // skips it because `x` is not bound here. let x_outer = index.symbols(outer).get("x").unwrap(); assert_eq!(x_outer.flags(), SymbolFlags::IS_USED); @@ -714,22 +746,24 @@ fn test_super_assignment_coexists_with_use_in_ancestors() { #[test] fn test_super_assignment_not_visible_to_resolve_symbol() { - // `<<-` does not create a binding visible to `resolve_symbol` (which - // checks IS_BOUND, not IS_SUPER_BOUND). Cross-scope effects of `<<-` - // are a runtime concern, not statically modelled. + // `<<-` creates an extra definition in the parent scope with IS_BOUND, + // which is visible to `resolve_symbol`. let index = index("f <- function() { x <<- 1 }"); let file = ScopeId::from(0); let fun = ScopeId::from(1); - // File scope has no `x` - assert!(index.symbols(file).get("x").is_none()); + // File scope has `x` with IS_BOUND (extra definition from `<<-`) + let x_file = index.symbols(file).get("x").unwrap(); + assert_eq!(x_file.flags(), SymbolFlags::IS_BOUND); // Function scope has `x` with IS_SUPER_BOUND let x = index.symbols(fun).get("x").unwrap(); assert_eq!(x.flags(), SymbolFlags::IS_SUPER_BOUND); - // resolve_symbol does not find `x` because no scope has IS_BOUND for it - assert!(index.resolve_symbol("x", fun).is_none()); + // `resolve_symbol` finds `x` in the file scope via the extra definition + let resolved = index.resolve_symbol("x", fun); + assert!(resolved.is_some()); + assert_eq!(resolved.unwrap().0, file); } #[test] @@ -1122,7 +1156,8 @@ fn test_string_super_assignment() { let file = ScopeId::from(0); let fun = ScopeId::from(1); - assert!(index.symbols(file).get("x").is_none()); + let x_file = index.symbols(file).get("x").unwrap(); + assert_eq!(x_file.flags(), SymbolFlags::IS_BOUND); let x = index.symbols(fun).get("x").unwrap(); assert_eq!(x.flags(), SymbolFlags::IS_SUPER_BOUND); @@ -1157,6 +1192,7 @@ fn test_nested_for_loops() { let j = index.symbols(file).get("j").unwrap(); assert_eq!(j.flags(), SymbolFlags::IS_BOUND.union(SymbolFlags::IS_USED)); + // 2 real defs (i, j), no LoopHeader placeholders. assert_eq!(index.definitions(file).len(), 2); } @@ -1173,5 +1209,6 @@ fn test_assignment_in_for_body() { let x = index.symbols(file).get("x").unwrap(); assert_eq!(x.flags(), SymbolFlags::IS_BOUND); + // 2 real defs (i, x), no LoopHeader placeholders. assert_eq!(index.definitions(file).len(), 2); } diff --git a/crates/oak_index/tests/use_def_map.rs b/crates/oak_index/tests/use_def_map.rs new file mode 100644 index 000000000..50e96454b --- /dev/null +++ b/crates/oak_index/tests/use_def_map.rs @@ -0,0 +1,935 @@ +use aether_parser::parse; +use aether_parser::RParserOptions; +use oak_index::builder::build; +use oak_index::semantic_index::DefinitionId; +use oak_index::semantic_index::ScopeId; +use oak_index::semantic_index::SemanticIndex; +use oak_index::semantic_index::UseId; +use stdext::assert_not; + +fn index(source: &str) -> SemanticIndex { + let parsed = parse(source, RParserOptions::default()); + build(&parsed.tree()) +} + +// --- Straight-line code --- + +#[test] +fn test_single_def_single_use() { + let index = index("x <- 1\nx"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_use_before_def_is_unbound() { + let index = index("x\nx <- 1"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert!(bindings.definitions().is_empty()); + assert!(bindings.may_be_unbound()); +} + +#[test] +fn test_second_def_shadows_first() { + let index = index("x <- 1\nx <- 2\nx"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // The use of `x` should see only the second definition. + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_use_between_defs() { + let index = index("x <- 1\nx\nx <- 2\nx"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // First use sees def 0, second use sees def 1. + let use0 = map.bindings_at_use(UseId::from(0)); + assert_eq!(use0.definitions(), &[DefinitionId::from(0)]); + assert_not!(use0.may_be_unbound()); + + let use1 = map.bindings_at_use(UseId::from(1)); + assert_eq!(use1.definitions(), &[DefinitionId::from(1)]); + assert_not!(use1.may_be_unbound()); +} + +#[test] +fn test_rhs_use_sees_previous_binding() { + let index = index("x <- 1\nx <- x + 1\nx"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // `x` on the RHS of the second assignment (use 0) + let rhs_use = map.bindings_at_use(UseId::from(0)); + assert_eq!(rhs_use.definitions(), &[DefinitionId::from(0)]); + + // Final `x` (use 1) + let final_use = map.bindings_at_use(UseId::from(1)); + assert_eq!(final_use.definitions(), &[DefinitionId::from(1)]); +} + +#[test] +fn test_different_symbols_independent() { + let index = index("x <- 1\ny <- 2\nx\ny"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let use_x = map.bindings_at_use(UseId::from(0)); + assert_eq!(use_x.definitions(), &[DefinitionId::from(0)]); + assert_not!(use_x.may_be_unbound()); + + let use_y = map.bindings_at_use(UseId::from(1)); + assert_eq!(use_y.definitions(), &[DefinitionId::from(1)]); + assert_not!(use_y.may_be_unbound()); +} + +#[test] +fn test_unbound_symbol() { + let index = index("x"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert!(bindings.definitions().is_empty()); + assert!(bindings.may_be_unbound()); +} + +// --- If/else --- + +#[test] +fn test_if_else_both_branches_define() { + let index = index( + "\ +x <- 1 # def 0 +if (cond) { + x <- 2 # def 1 +} else { + x <- 3 # def 2 +} +x # use 1 -> {def 1, def 2} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(1), + DefinitionId::from(2) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_if_only_one_branch_defines() { + let index = index( + "\ +x <- 1 # def 0 +if (cond) { + x <- 2 # def 1 +} +x # use 1 -> {def 0, def 1} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_if_no_prior_def_one_branch() { + let index = index( + "\ +if (cond) { + x <- 1 # def 0 +} +x # use 1 -> {def 0}, may_be_unbound +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert!(bindings.may_be_unbound()); +} + +#[test] +fn test_if_no_prior_def_both_branches() { + let index = index( + "\ +if (cond) { + x <- 1 # def 0 +} else { + x <- 2 # def 1 +} +x # use 1 -> {def 0, def 1}, not unbound +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_if_condition_uses_see_pre_if_state() { + let index = index("x <- 1\nif (x) {}"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Use of `x` in condition sees def 0. + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_nested_if_else() { + let index = index( + "\ +if (c1) { + if (c2) { + x <- 1 # def 0 + } else { + x <- 2 # def 1 + } +} else { + x <- 3 # def 2 +} +x # use 2 -> {def 0, def 1, def 2} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: c1 is use 0, c2 is use 1, final x is use 2. + let bindings = map.bindings_at_use(UseId::from(2)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1), + DefinitionId::from(2) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_if_else_without_braces() { + let index = index("if (cond) x <- 1 else x <- 2\nx"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +// --- For loops --- + +#[test] +fn test_for_variable_is_definite() { + let index = index("for (i in xs) {}\ni"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // `i` is always bound (R sets to NULL for empty sequences). + // Uses: `xs` is use 0, `i` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_for_body_assignment_is_conditional() { + let index = index( + "\ +for (i in xs) { # def 0 (i) + x <- 1 # def 1 +} +x # use 1 -> {def 1}, may_be_unbound (body may not execute) +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `xs` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); + assert!(bindings.may_be_unbound()); +} + +#[test] +fn test_for_body_assignment_merges_with_pre_loop() { + let index = index( + "\ +x <- 0 # def 0 +for (i in xs) { # def 1 (i) + x <- 1 # def 2 +} +x # use 1 -> {def 0, def 2} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `xs` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(2) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_for_variable_used_inside_body() { + let index = index( + "\ +for (i in xs) { + print(i) # use of `i` inside body sees for-variable def +} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `xs` is use 0, `print` is use 1, `i` is use 2. + let bindings = map.bindings_at_use(UseId::from(2)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +// --- While loops --- + +#[test] +fn test_while_body_is_conditional() { + let index = index( + "\ +while (cond) { + x <- 1 # def 0 +} +x # use 1 -> {def 0}, may_be_unbound +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert!(bindings.may_be_unbound()); +} + +#[test] +fn test_while_merges_with_pre_loop() { + let index = index( + "\ +x <- 0 # def 0 +while (cond) { + x <- 1 # def 1 +} +x # use 1 -> {def 0, def 1} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_while_condition_use() { + let index = index("x <- 1\nwhile (x) {}"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Condition use sees def 0. + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +// --- Repeat loops --- + +#[test] +fn test_repeat_body_is_definite() { + let index = index( + "\ +repeat { + x <- 1 # def 0 + break +} +x # use 0 -> {def 0}, not unbound +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_repeat_shadows_prior_def() { + let index = index( + "\ +x <- 0 # def 0 +repeat { + x <- 1 # def 1 + break +} +x # use 0 -> {def 1} (repeat always executes) +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); + assert_not!(bindings.may_be_unbound()); +} + +// --- Loop-carried definitions --- +// +// Uses at the top of a loop body can see definitions from the bottom of the +// body (from a previous iteration). The builder synthesizes `LoopHeader` +// placeholders before visiting the body, then populates them with the real +// definitions that are live at the end of the body. The placeholders never +// appear in `bindings_at_use` results. + +#[test] +fn test_while_loop_carried_def() { + let index = index( + "\ +x <- 0 # def 0 +while (cond) { + x # use 1: sees {def 0, def 1} (pre-loop OR previous iteration) + x <- 1 # def 1 +} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, `x` inside body is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_for_loop_carried_def() { + let index = index( + "\ +x <- 0 # def 0 +for (i in xs) { # def 1 (i) + x # use 1: sees {def 0, def 2} + x <- 1 # def 2 +} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `xs` is use 0, `x` inside body is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(2) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_repeat_loop_carried_def() { + let index = index( + "\ +x <- 0 # def 0 +repeat { + x # use 0: sees {def 0, def 1} + x <- 1 # def 1 + break +} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_loop_carried_unbound_before_loop() { + let index = index( + "\ +while (cond) { + x # use 1: sees {def 0}, may_be_unbound + x <- 1 # def 0 +} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, `x` inside body is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert!(bindings.may_be_unbound()); +} + +// --- Function scopes --- + +#[test] +fn test_function_scope_independent_use_def() { + let index = index( + "\ +x <- 1 # file: def 0 +f <- function(y) { # file: def 1, fun: def 0 (y param) + y # fun: use 0 +} +", + ); + let fun = ScopeId::from(1); + let map = index.use_def_map(fun); + + // In function scope, `y` (use 0) sees the parameter (def 0). + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_function_parameter_shadows() { + let index = index( + "\ +function(x) { # def 0 (param) + x <- 1 # def 1 + x # use 0 -> def 1 +} +", + ); + let fun = ScopeId::from(1); + let map = index.use_def_map(fun); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_function_unbound_reference() { + let index = index( + "\ +function() { + x # use 0: not bound in this scope +} +", + ); + let fun = ScopeId::from(1); + let map = index.use_def_map(fun); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert!(bindings.definitions().is_empty()); + assert!(bindings.may_be_unbound()); +} + +// --- Super-assignment --- + +#[test] +fn test_super_assignment_not_in_function_use_def() { + let index = index( + "\ +function() { + x <<- 1 # fun: def 0, recorded with IS_SUPER_BOUND, skipped by use-def + x # fun: use 0: unbound in function scope +} +", + ); + let fun = ScopeId::from(1); + let map = index.use_def_map(fun); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert!(bindings.definitions().is_empty()); + assert!(bindings.may_be_unbound()); +} + +#[test] +fn test_super_assignment_visible_in_parent_use_def() { + let index = index( + "\ +x <- 1 # file: def 0 +f <- function() { x <<- 2 } # file: def 1 (x <<- extra def), def 2 (f) +x # file: use 0 -> {def 0, def 1} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // The <<- extra definition (def 1) is recorded in the file scope + // during function body processing, before the `f <-` assignment (def 2). + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_super_assignment_without_prior_def() { + let index = index( + "\ +f <- function() { x <<- 1 } # file: def 0 (x <<- extra def), def 1 (f) +x # file: use 0 -> {def 0}, may_be_unbound +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert!(bindings.may_be_unbound()); +} + +#[test] +fn test_super_assignment_visible_before_function_def() { + let index = index( + "\ +x # file: use 0 -> {def 0}, may_be_unbound +f <- function() { x <<- 1 } # file: def 0 (x <<- in parent), def 1 (f) +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // `<<-` definitions are scope-wide, so the use of `x` before the + // function definition still sees the `<<-` binding. + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert!(bindings.may_be_unbound()); +} + +#[test] +fn test_super_assignment_merges_with_if() { + let index = index( + "\ +x <- 1 # file: def 0 +if (cond) { + f <- function() { x <<- 2 } # file: def 1 (x <<- extra def), def 2 (f) +} +x # use 1 -> {def 0, def 1} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_super_assignment_targets_grandparent() { + let index = index( + "\ +x <- 1 # file: def 0 +f <- function() { # file: def 2 (f) + g <- function() { x <<- 2 } # file: def 1 (x <<-) +} +x # file: use 0 -> {def 0, def 1} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // `<<-` in g walks up: g's parent is f, f has no binding for x, + // so it continues to file scope where x has IS_BOUND. The extra + // binding lands in the file scope, skipping the intermediate f scope. + let bindings = map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_super_assignment_targets_intermediate_scope() { + let index = index( + "\ +x <- 1 # file: def 0 +f <- function() { + x <- 10 # outer: def 0 + g <- function() { x <<- 2 } # outer: def 1 (x <<-) +} +x # file: use 0 -> {def 0} only +", + ); + let file = ScopeId::from(0); + let outer = ScopeId::from(1); + + // `<<-` in g walks up: g's parent is f, f has x with IS_BOUND + // (from `x <- 10`), so it targets f -- not the file scope. + let file_map = index.use_def_map(file); + let bindings = file_map.bindings_at_use(UseId::from(0)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); + + // The extra binding is in f's scope, not the file scope. + let outer_defs: Vec<_> = index + .definitions(outer) + .iter() + .filter(|(_, d)| index.symbols(outer).symbol(d.symbol()).name() == "x") + .collect(); + assert_eq!(outer_defs.len(), 2); +} + +// --- Combined control flow --- + +#[test] +fn test_if_inside_for() { + let index = index( + "\ +for (i in xs) { # def 0 (i) + if (cond) { + x <- 1 # def 1 + } +} +x # use 2: may_be_unbound (loop might not run, if might not match) +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `xs` is use 0, `cond` is use 1, final `x` is use 2. + let bindings = map.bindings_at_use(UseId::from(2)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(1)]); + assert!(bindings.may_be_unbound()); +} + +#[test] +fn test_if_else_inside_while() { + let index = index( + "\ +x <- 0 # def 0 +while (cond) { + if (c2) { + x <- 1 # def 1 + } else { + x <- 2 # def 2 + } +} +x # use 2 -> {def 0, def 1, def 2} (while may not execute) +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, `c2` is use 1, final `x` is use 2. + let bindings = map.bindings_at_use(UseId::from(2)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1), + DefinitionId::from(2) + ]); + assert_not!(bindings.may_be_unbound()); +} + +#[test] +fn test_assignment_in_if_condition() { + // In R, `if (x <- f()) x` is valid: the `<-` in the condition creates + // a binding. The use of `x` in the consequence should see it. + let index = index("if (x <- f()) x"); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `f` is use 0. The `x` in consequence is use 1. + // Def 0 is `x <- f()`. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +// --- Duplicate definitions don't appear twice in merge --- + +#[test] +fn test_merge_deduplicates() { + let index = index( + "\ +x <- 1 # def 0 +if (cond) { + y <- 1 # def 1 (different symbol) +} +x # use 1: should see only {def 0}, not {def 0, def 0} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, final `x` is use 1. + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(bindings.may_be_unbound()); +} + +// --- Control flow inside functions --- + +#[test] +fn test_if_else_in_function() { + let index = index( + "\ +function(x) { # def 0 (param) + if (x) { + y <- 1 # def 1 + } else { + y <- 2 # def 2 + } + y # use 1 -> {def 1, def 2} +} +", + ); + let fun = ScopeId::from(1); + let map = index.use_def_map(fun); + + // In function scope: uses are `x` (use 0), then `y` (use 1). + let bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(bindings.definitions(), &[ + DefinitionId::from(1), + DefinitionId::from(2) + ]); + assert_not!(bindings.may_be_unbound()); +} + +// --- Multiple unrelated symbols through control flow --- + +#[test] +fn test_different_symbols_through_if() { + let index = index( + "\ +x <- 1 # def 0 +if (cond) { + y <- 2 # def 1 +} +x # use 1: sees def 0, not unbound +y # use 2: sees def 1, may_be_unbound +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + // Uses: `cond` is use 0, `x` is use 1, `y` is use 2. + let x_bindings = map.bindings_at_use(UseId::from(1)); + assert_eq!(x_bindings.definitions(), &[DefinitionId::from(0)]); + assert_not!(x_bindings.may_be_unbound()); + + let y_bindings = map.bindings_at_use(UseId::from(2)); + assert_eq!(y_bindings.definitions(), &[DefinitionId::from(1)]); + assert!(y_bindings.may_be_unbound()); +} + +// --- Connected component patterns from the design doc --- + +#[test] +fn test_design_doc_disconnected_components() { + let index = index( + "\ +x <- 1 # def 0 +print(x) # use 0 (print), use 1 (x) -> {def 0} +if (cond) { + x <- 2 # def 1 +} else { + x <- 3 # def 2 +} +print(x) # use 2 (cond), use 3 (print), use 4 (x) -> {def 1, def 2} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let first_x = map.bindings_at_use(UseId::from(1)); + assert_eq!(first_x.definitions(), &[DefinitionId::from(0)]); + assert_not!(first_x.may_be_unbound()); + + let second_x = map.bindings_at_use(UseId::from(4)); + assert_eq!(second_x.definitions(), &[ + DefinitionId::from(1), + DefinitionId::from(2) + ]); + assert_not!(second_x.may_be_unbound()); +} + +#[test] +fn test_design_doc_connected_component() { + let index = index( + "\ +x <- 1 # def 0 +print(x) # use 0 (print), use 1 (x) -> {def 0} +if (cond) { + x <- 2 # def 1 +} +print(x) # use 2 (cond), use 3 (print), use 4 (x) -> {def 0, def 1} +", + ); + let file = ScopeId::from(0); + let map = index.use_def_map(file); + + let first_x = map.bindings_at_use(UseId::from(1)); + assert_eq!(first_x.definitions(), &[DefinitionId::from(0)]); + + // Linked to both def 0 and def 1. + let second_x = map.bindings_at_use(UseId::from(4)); + assert_eq!(second_x.definitions(), &[ + DefinitionId::from(0), + DefinitionId::from(1) + ]); + assert_not!(second_x.may_be_unbound()); +} diff --git a/crates/stdext/src/result.rs b/crates/stdext/src/result.rs index d39fed779..9a607d41f 100644 --- a/crates/stdext/src/result.rs +++ b/crates/stdext/src/result.rs @@ -31,6 +31,18 @@ macro_rules! soft_assert { }; } +/// Like `assert!` but for negative conditions. `assert_not!(x)` reads +/// more clearly than `assert!(!x)`. +#[macro_export] +macro_rules! assert_not { + ( $cond:expr $(,)? ) => { + assert!(!$cond); + }; + ( $cond:expr, $($fmt_arg:tt)+ ) => { + assert!(!$cond, $($fmt_arg)+); + }; +} + // From https://github.com/zed-industries/zed/blob/a910c594/crates/util/src/util.rs#L554 pub trait ResultExt { type Ok;