Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ serde_json = { version = "1.0.94", features = ["preserve_order"] }
serde_repr = "0.1.17"
serde_with = "3.0.0"
sha2 = "0.10.6"
smallvec = "1.13.2"
stdext = { path = "crates/stdext" }
streaming-iterator = "0.1.9"
strum = "0.26.2"
Expand Down
4 changes: 4 additions & 0 deletions crates/oak_index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@ workspace = true
[dependencies]
aether_syntax.workspace = true
biome_rowan.workspace = true
itertools.workspace = true
log.workspace = true
rustc-hash.workspace = true
smallvec.workspace = true
stdext.workspace = true

[dev-dependencies]
aether_parser.workspace = true
242 changes: 213 additions & 29 deletions crates/oak_index/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ use crate::semantic_index::SymbolFlags;
use crate::semantic_index::SymbolTableBuilder;
use crate::semantic_index::Use;
use crate::semantic_index::UseId;
use crate::use_def_map::UseDefMap;
use crate::use_def_map::UseDefMapBuilder;

/// Build a [`SemanticIndex`] from a parsed R file.
pub fn build(root: &RRoot) -> SemanticIndex {
Expand All @@ -47,6 +49,9 @@ struct SemanticIndexBuilder {
symbol_tables: IndexVec<ScopeId, SymbolTableBuilder>,
definitions: IndexVec<ScopeId, IndexVec<DefinitionId, Definition>>,
uses: IndexVec<ScopeId, IndexVec<UseId, Use>>,
use_def_maps: IndexVec<ScopeId, UseDefMap>,
current_use_def: UseDefMapBuilder,
use_def_stack: Vec<UseDefMapBuilder>,
current_scope: ScopeId,
}

Expand All @@ -56,6 +61,7 @@ impl SemanticIndexBuilder {
let mut symbol_tables = IndexVec::new();
let mut definitions = IndexVec::new();
let mut uses = IndexVec::new();
let mut use_def_maps = IndexVec::new();

// The descendants range starts empty (`n+1..n+1`). `pop_scope` later
// fills in `descendants.end` with the current arena length. Everything
Expand All @@ -70,12 +76,16 @@ impl SemanticIndexBuilder {
symbol_tables.push(SymbolTableBuilder::new());
definitions.push(IndexVec::new());
uses.push(IndexVec::new());
use_def_maps.push(UseDefMap::empty());

Self {
scopes,
symbol_tables,
definitions,
uses,
use_def_maps,
current_use_def: UseDefMapBuilder::new(),
use_def_stack: Vec::new(),
current_scope: file,
}
}
Expand All @@ -99,6 +109,10 @@ impl SemanticIndexBuilder {
self.symbol_tables.push(SymbolTableBuilder::new());
self.definitions.push(IndexVec::new());
self.uses.push(IndexVec::new());
self.use_def_maps.push(UseDefMap::empty());

let parent_use_def = std::mem::replace(&mut self.current_use_def, UseDefMapBuilder::new());
self.use_def_stack.push(parent_use_def);

id
}
Expand All @@ -111,6 +125,13 @@ impl SemanticIndexBuilder {
Some(parent) => parent,
None => panic!("`pop_scope()` called on the file scope"),
};

let parent_use_def = match self.use_def_stack.pop() {
Some(builder) => builder,
None => panic!("`pop_scope()` called with empty use-def stack"),
};
let finalized = std::mem::replace(&mut self.current_use_def, parent_use_def).finish();
self.use_def_maps[id] = finalized;
}

fn add_definition(
Expand All @@ -120,17 +141,103 @@ impl SemanticIndexBuilder {
kind: DefinitionKind,
range: TextRange,
) {
let symbol = self.symbol_tables[self.current_scope].intern(name, flags);
let symbol_id = self.symbol_tables[self.current_scope].intern(name, flags);
let def_id = self.definitions[self.current_scope].push(Definition {
symbol: symbol_id,
kind,
range,
});

self.current_use_def.ensure_symbol(symbol_id);
self.current_use_def.record_definition(symbol_id, def_id);
}

// Super-assignment is lexically in the current scope but binds in an
// ancestor. We record the definition in the current scope and append
// it to the target scope's use-def map (without shadowing prior
// definitions).
//
// R's `<<-` walks up the environment chain from the parent, targeting
// the first scope where the symbol is already bound. If no binding is
// found, it assigns in the global (file) scope.
fn add_super_definition(&mut self, name: &str, kind: DefinitionKind, range: TextRange) {
let symbol_id =
self.symbol_tables[self.current_scope].intern(name, SymbolFlags::IS_SUPER_BOUND);
self.definitions[self.current_scope].push(Definition {
symbol,
symbol: symbol_id,
kind: kind.clone(),
range,
});

let target_scope = self.resolve_super_target(name);

let target_symbol = self.symbol_tables[target_scope].intern(name, SymbolFlags::IS_BOUND);
let target_def_id = self.definitions[target_scope].push(Definition {
symbol: target_symbol,
kind,
range,
});

let builder = self.use_def_builder_mut(target_scope);
builder.ensure_symbol(target_symbol);
builder.record_deferred_definition(target_symbol, target_def_id);
}

// Walk up from the parent scope looking for a scope where `name` already
// has `IS_BOUND`. Returns that scope, or the file scope if no binding is
// found (mirroring R's assignment to the global environment).
fn resolve_super_target(&self, name: &str) -> ScopeId {
let file_scope = ScopeId::from(0);
let mut scope = match self.scopes[self.current_scope].parent {
Some(parent) => parent,
None => return file_scope,
};

loop {
if let Some(id) = self.symbol_tables[scope].id(name) {
if self.symbol_tables[scope]
.symbol(id)
.flags()
.contains(SymbolFlags::IS_BOUND)
{
return scope;
}
}
scope = match self.scopes[scope].parent {
Some(parent) => parent,
None => return file_scope,
};
}
}

fn use_def_builder_mut(&mut self, target: ScopeId) -> &mut UseDefMapBuilder {
if target == self.current_scope {
return &mut self.current_use_def;
}

let mut steps = 0;
let mut scope = self.current_scope;
while scope != target {
scope = match self.scopes[scope].parent {
Some(parent) => parent,
None => panic!("Target scope {target:?} is not an ancestor of current scope"),
};
steps += 1;
}

let index = self.use_def_stack.len() - steps;
&mut self.use_def_stack[index]
}

fn add_use(&mut self, name: &str, range: TextRange) {
let symbol = self.symbol_tables[self.current_scope].intern(name, SymbolFlags::IS_USED);
self.uses[self.current_scope].push(Use { symbol, range });
let symbol_id = self.symbol_tables[self.current_scope].intern(name, SymbolFlags::IS_USED);
let use_id = self.uses[self.current_scope].push(Use {
symbol: symbol_id,
range,
});

self.current_use_def.ensure_symbol(symbol_id);
self.current_use_def.record_use(symbol_id, use_id);
}

// --- Recursive descent ---
Expand Down Expand Up @@ -224,6 +331,10 @@ impl SemanticIndexBuilder {
},

AnyRExpression::RForStatement(stmt) => {
// The for variable is always bound (R sets it to NULL for
// empty sequences), so its binding is recorded before the
// snapshot. Assignments inside the body are conditional
// (body may not execute for empty sequences).
if let Ok(variable) = stmt.variable() {
self.add_definition(
&identifier_text(&variable),
Expand All @@ -235,17 +346,78 @@ impl SemanticIndexBuilder {
if let Ok(sequence) = stmt.sequence() {
self.collect_expression(&sequence);
}

let pre_loop = self.current_use_def.snapshot();

if let Ok(body) = stmt.body() {
let first_use = self.uses[self.current_scope].next_id();
self.collect_expression(&body);
self.current_use_def.finish_loop_defs(&pre_loop, first_use);
}

self.current_use_def.merge(pre_loop);
},

AnyRExpression::RIfStatement(stmt) => {
// Condition is always evaluated
if let Ok(condition) = stmt.condition() {
self.collect_expression(&condition);
}

let pre_if = self.current_use_def.snapshot();

// If-body (consequence)
if let Ok(consequence) = stmt.consequence() {
self.collect_expression(&consequence);
}

let post_if = self.current_use_def.snapshot();
self.current_use_def.restore(pre_if);

// Else-body (alternative), if present. If absent, the
// "else path" is just the pre-if state we restored to.
if let Some(else_clause) = stmt.else_clause() {
if let Ok(alternative) = else_clause.alternative() {
self.collect_expression(&alternative);
}
}

// After: definitions from both branches are live
self.current_use_def.merge(post_if);
},

AnyRExpression::RWhileStatement(stmt) => {
if let Ok(condition) = stmt.condition() {
self.collect_expression(&condition);
}

let pre_loop = self.current_use_def.snapshot();

if let Ok(body) = stmt.body() {
let first_use = self.uses[self.current_scope].next_id();
self.collect_expression(&body);
self.current_use_def.finish_loop_defs(&pre_loop, first_use);
}

// Body may not execute
self.current_use_def.merge(pre_loop);
},

AnyRExpression::RRepeatStatement(stmt) => {
// Body always executes at least once, no snapshot needed
if let Ok(body) = stmt.body() {
let pre_loop = self.current_use_def.snapshot();
let first_use = self.uses[self.current_scope].next_id();
self.collect_expression(&body);
self.current_use_def.finish_loop_defs(&pre_loop, first_use);
}
},

AnyRExpression::RBogusExpression(_) => {},

// Generic fallback: walk over descendant nodes and collect their
// `AnyRExpression` children, letting `collect_expression`
// handle their contents. This covers `RIfStatement`,
// `RWhileStatement`, `RRepeatStatement`, `RUnaryExpression`,
// handle their contents. This covers `RUnaryExpression`,
// `RParenthesizedExpression`, `RReturnExpression`, literals, and
// any future expression types without needing explicit arms.
//
Expand Down Expand Up @@ -354,34 +526,16 @@ impl SemanticIndexBuilder {
let target = if right { op.right() } else { op.left() };
let Ok(target) = target else { return };

let (name, range) = match &target {
AnyRExpression::RIdentifier(ident) => {
let name = identifier_text(ident);
let range = ident.syntax().text_trimmed_range();
(name, range)
},

// `"x" <- 1` is equivalent to `x <- 1` in R
AnyRExpression::AnyRValue(AnyRValue::RStringValue(s)) => {
let Some(name) = string_value_text(s) else {
return;
};
let range = s.syntax().text_trimmed_range();
(name, range)
},

let Some((name, range)) = assignment_target_name(&target) else {
// Complex target (`x$foo <- rhs`, `x[1] <- rhs`, etc.) does
// not represent a binding. We recurse for uses.
other => {
self.collect_expression(other);
return;
},
self.collect_expression(&target);
return;
};

if super_assign {
self.add_definition(
self.add_super_definition(
&name,
SymbolFlags::IS_SUPER_BOUND,
DefinitionKind::SuperAssignment(op.syntax().clone()),
range,
);
Expand All @@ -406,8 +560,18 @@ impl SemanticIndexBuilder {

fn finish(mut self) -> SemanticIndex {
self.scopes[ScopeId::from(0)].descendants.end = self.scopes.next_id();

let file_use_def_map = self.current_use_def.finish();
self.use_def_maps[ScopeId::from(0)] = file_use_def_map;

let symbol_tables = self.symbol_tables.into_iter().map(|b| b.build()).collect();
SemanticIndex::new(self.scopes, symbol_tables, self.definitions, self.uses)
SemanticIndex::new(
self.scopes,
symbol_tables,
self.definitions,
self.uses,
self.use_def_maps,
)
}
}

Expand Down Expand Up @@ -460,6 +624,26 @@ fn string_value_text(s: &aether_syntax::RStringValue) -> Option<String> {
Some(text[1..text.len() - 1].to_string())
}

/// Extract the binding name and range from an assignment target expression.
/// Returns `None` for complex targets (`x$foo`, `x[1]`, etc.) that don't
/// represent simple name bindings.
fn assignment_target_name(target: &AnyRExpression) -> Option<(String, TextRange)> {
match target {
AnyRExpression::RIdentifier(ident) => {
let name = identifier_text(ident);
let range = ident.syntax().text_trimmed_range();
Some((name, range))
},
// `"x" <- 1` is equivalent to `x <- 1` in R
AnyRExpression::AnyRValue(AnyRValue::RStringValue(s)) => {
let name = string_value_text(s)?;
let range = s.syntax().text_trimmed_range();
Some((name, range))
},
_ => None,
}
}

fn is_super_assignment(bin: &RBinaryExpression) -> bool {
let Ok(op) = bin.operator() else {
return false;
Expand Down
Loading
Loading