Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/codegraph-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ tree-sitter-dart = "0.0.4"
tree-sitter-zig = "1"
tree-sitter-haskell = "0.23"
tree-sitter-ocaml = "0.24"
tree-sitter-fsharp = "0.3"
tree-sitter-clojure-orchard = "0.2"
rayon = "1"
ignore = "0.4"
Expand Down
3 changes: 1 addition & 2 deletions crates/codegraph-core/src/change_detection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ mod tests {

#[test]
fn detect_removed_skips_unsupported_extensions() {
// Files in WASM-only languages (Gleam, Julia, F#) live in
// Files in WASM-only languages (Gleam, Julia) live in
// `file_hashes` because the JS-side WASM backfill writes them, but
// Rust's narrower file_collector never collects them. Without this
// skip, every incremental rebuild would flag them as removed and
Expand All @@ -783,7 +783,6 @@ mod tests {
for path in [
"tests/fixtures/gleam/main.gleam",
"tests/fixtures/julia/main.jl",
"tests/fixtures/fsharp/Main.fs",
] {
existing.insert(
path.to_string(),
Expand Down
287 changes: 287 additions & 0 deletions crates/codegraph-core/src/extractors/fsharp.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,287 @@
use tree_sitter::{Node, Tree};
use crate::cfg::build_function_cfg;
use crate::complexity::compute_all_metrics;
use crate::types::*;
use super::helpers::*;
use super::SymbolExtractor;

pub struct FSharpExtractor;

impl SymbolExtractor for FSharpExtractor {
fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols {
let mut symbols = FileSymbols::new(file_path.to_string());
walk_tree(&tree.root_node(), source, &mut symbols, match_fsharp_node);
walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &FSHARP_AST_CONFIG);
symbols
}
}

fn match_fsharp_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) {
match node.kind() {
"named_module" => handle_named_module(node, source, symbols),
"function_declaration_left" => handle_function_decl(node, source, symbols),
"type_definition" => handle_type_def(node, source, symbols),
"import_decl" => handle_import_decl(node, source, symbols),
"application_expression" => handle_application(node, source, symbols),
"dot_expression" => handle_dot_expression(node, source, symbols),
_ => {}
}
}

/// Find the enclosing `named_module` and return its identifier text.
fn enclosing_module_name(node: &Node, source: &[u8]) -> Option<String> {
let module = find_parent_of_type(node, "named_module")?;
let id = find_child(&module, "long_identifier")?;
Some(node_text(&id, source).to_string())
}

fn handle_named_module(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
let name_node = match find_child(node, "long_identifier") {
Some(n) => n,
None => return,
};
symbols.definitions.push(Definition {
name: node_text(&name_node, source).to_string(),
kind: "module".to_string(),
line: start_line(node),
end_line: Some(end_line(node)),
decorators: None,
complexity: None,
cfg: None,
children: None,
});
}

fn handle_function_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
// function_declaration_left: first child is the function name identifier,
// followed by argument_patterns.
let name_node = match find_child(node, "identifier") {
Some(n) => n,
None => return,
};
let raw_name = node_text(&name_node, source).to_string();

let params = extract_fsharp_params(node, source);
let module_name = enclosing_module_name(node, source);
let qualified = match module_name {
Some(m) => format!("{}.{}", m, raw_name),
None => raw_name,
};

// JS extractor uses the parent's endLine (the function_or_value_defn) for
// a tighter bound; do the same to preserve parity.
let end = node.parent().unwrap_or(*node);

symbols.definitions.push(Definition {
name: qualified,
kind: "function".to_string(),
line: start_line(node),
end_line: Some(end_line(&end)),
decorators: None,
complexity: compute_all_metrics(&end, source, "fsharp"),
cfg: build_function_cfg(&end, "fsharp", source),
children: opt_children(params),
});
}

fn extract_fsharp_params(decl_left: &Node, source: &[u8]) -> Vec<Definition> {
let mut params = Vec::new();
if let Some(arg_patterns) = find_child(decl_left, "argument_patterns") {
collect_param_identifiers(&arg_patterns, source, &mut params);
}
params
}

fn collect_param_identifiers(node: &Node, source: &[u8], params: &mut Vec<Definition>) {
if node.kind() == "identifier" {
params.push(child_def(
node_text(node, source).to_string(),
"parameter",
start_line(node),
));
return;
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
collect_param_identifiers(&child, source, params);
}
}
}

fn handle_type_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
// type_definition contains union_type_defn, record_type_defn, etc.
for i in 0..node.child_count() {
let child = match node.child(i) {
Some(c) => c,
None => continue,
};
let kind = child.kind();
if !matches!(
kind,
"union_type_defn"
| "record_type_defn"
| "type_abbreviation_defn"
| "class_type_defn"
| "interface_type_defn"
| "type_defn"
) {
continue;
}

let name = match find_child(&child, "type_name") {
Some(type_name) => find_child(&type_name, "identifier")
.map(|n| node_text(&n, source).to_string())
.unwrap_or_else(|| node_text(&type_name, source).to_string()),
None => match find_child(&child, "identifier") {
Some(id) => node_text(&id, source).to_string(),
None => continue,
},
};

let mut children: Vec<Definition> = Vec::new();
extract_type_members(&child, source, &mut children);

symbols.definitions.push(Definition {
name,
kind: determine_type_kind(kind).to_string(),
line: start_line(&child),
end_line: Some(end_line(&child)),
decorators: None,
complexity: None,
cfg: None,
children: opt_children(children),
});
}
}

fn determine_type_kind(node_kind: &str) -> &'static str {
match node_kind {
"union_type_defn" => "enum",
"record_type_defn" => "record",
"class_type_defn" => "class",
"interface_type_defn" => "interface",
_ => "type",
}
}

fn extract_type_members(type_defn: &Node, source: &[u8], children: &mut Vec<Definition>) {
for i in 0..type_defn.child_count() {
let child = match type_defn.child(i) {
Some(c) => c,
None => continue,
};

match child.kind() {
"union_type_case" => {
if let Some(name) = find_child(&child, "identifier") {
children.push(child_def(
node_text(&name, source).to_string(),
"property",
start_line(&child),
));
}
}
"record_field" => {
let name_node = child
.child_by_field_name("name")
.or_else(|| find_child(&child, "identifier"));
if let Some(name) = name_node {
children.push(child_def(
node_text(&name, source).to_string(),
"property",
start_line(&child),
));
}
}
// Recurse into container nodes that hold cases/fields.
"union_type_cases" | "record_fields" => {
extract_type_members(&child, source, children);
}
_ => {}
}
}
}

fn handle_import_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
let module_node = match find_child(node, "long_identifier") {
Some(n) => n,
None => return,
};

let source_name = node_text(&module_node, source).to_string();
let last = source_name
.split('.')
.last()
.unwrap_or(&source_name)
.to_string();

symbols
.imports
.push(Import::new(source_name, vec![last], start_line(node)));
}

fn handle_application(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
let func_node = match node.child(0) {
Some(n) => n,
None => return,
};

// Mirrors the JS extractor's `handleApplication`: the full dotted name
// (e.g. `Service.createUser`) is stored in `name`. Splitting `name` into
// `(receiver, method)` would diverge from the JS engine's output and
// change which resolution rules fire downstream.
match func_node.kind() {
"identifier" | "long_identifier" => {
symbols.calls.push(Call {
name: node_text(&func_node, source).to_string(),
line: start_line(node),
dynamic: None,
receiver: None,
});
}
"long_identifier_or_op" => {
// Inner child is either `identifier` (bare, e.g. `validateUser`) or
// `long_identifier` (qualified, e.g. `Repository.save`). Order
// matches the JS extractor (`identifier` first). Operator forms
// like `( + )` have neither child; we emit nothing in that case,
// mirroring the JS extractor's silent skip.
if let Some(inner) = find_child(&func_node, "identifier")
.or_else(|| find_child(&func_node, "long_identifier"))
{
symbols.calls.push(Call {
name: node_text(&inner, source).to_string(),
line: start_line(node),
dynamic: None,
receiver: None,
});
}
}
Comment on lines +242 to +258
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Divergence from JS extractor in handle_application

Two behavioural differences exist versus the JS handleApplication that the PR claims to mirror:

  1. Search order flipped: The JS extractor tries identifier first, then long_identifier inside a long_identifier_or_op wrapper (findChild(funcNode, 'identifier') || findChild(funcNode, 'long_identifier')). The Rust version tries long_identifier first. For a node containing both kinds, the preferred result will differ.

  2. Extra fallback emits operator calls: When neither child is found (e.g., an operator expression like ( + )), JS emits nothing. Rust falls back to the raw text of func_node and still pushes a Call. This means every operator application in an F# file produces a spurious call entry in the native engine that the WASM engine never produces, diverging the two outputs.

Fix in Claude Code

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in the merge resolution commit. The handle_application branch for long_identifier_or_op now matches the JS extractor exactly:

  1. Search order is now identifier first, then long_identifier (matches findChild(funcNode, 'identifier') || findChild(funcNode, 'long_identifier') in the JS extractor).
  2. When neither child is present (operator forms like ( + )), the Rust extractor emits nothing — mirroring the JS extractor's silent skip. The previous fallback that pushed a Call with the raw func_node text has been removed.

See crates/codegraph-core/src/extractors/fsharp.rs:242-260.

_ => {}
}
}

fn handle_dot_expression(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
// Mirrors the JS extractor's `handleDotExpression`: collect identifier
// segments and emit `name = last`, `receiver = everything-before`.
let mut parts: Vec<String> = Vec::new();
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
match child.kind() {
"identifier" | "long_identifier" => {
parts.push(node_text(&child, source).to_string());
}
_ => {}
}
}
}
if parts.len() >= 2 {
let method = parts.last().cloned().unwrap_or_default();
let receiver = parts[..parts.len() - 1].join(".");
symbols.calls.push(Call {
name: method,
line: start_line(node),
dynamic: None,
receiver: Some(receiver),
});
}
}
12 changes: 12 additions & 0 deletions crates/codegraph-core/src/extractors/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,18 @@ pub const OCAML_AST_CONFIG: LangAstConfig = LangAstConfig {
string_prefixes: &[],
};

// F# string nodes in tree-sitter-fsharp surface under the `string` kind inside
// `const` literals. The grammar exposes no dedicated raw-string or regex form.
pub const FSHARP_AST_CONFIG: LangAstConfig = LangAstConfig {
new_types: &[],
throw_types: &[],
await_types: &[],
string_types: &["string"],
regex_types: &[],
quote_chars: &['"'],
string_prefixes: &[],
};

pub const CLOJURE_AST_CONFIG: LangAstConfig = LangAstConfig {
new_types: &[],
throw_types: &[],
Expand Down
4 changes: 4 additions & 0 deletions crates/codegraph-core/src/extractors/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pub mod cpp;
pub mod csharp;
pub mod dart;
pub mod elixir;
pub mod fsharp;
pub mod go;
pub mod haskell;
pub mod hcl;
Expand Down Expand Up @@ -127,6 +128,9 @@ pub fn extract_symbols_with_opts(
LanguageKind::Ocaml | LanguageKind::OcamlInterface => {
ocaml::OcamlExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes)
}
LanguageKind::FSharp => {
fsharp::FSharpExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes)
}
LanguageKind::Clojure => {
clojure::ClojureExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes)
}
Expand Down
2 changes: 1 addition & 1 deletion crates/codegraph-core/src/file_collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ const SUPPORTED_EXTENSIONS: &[&str] = &[
"js", "jsx", "mjs", "cjs", "ts", "tsx", "d.ts", "py", "pyi", "go", "rs", "java", "cs", "rb",
"rake", "gemspec", "php", "phtml", "tf", "hcl", "c", "h", "cpp", "cc", "cxx", "hpp", "kt",
"kts", "swift", "scala", "sh", "bash", "ex", "exs", "lua", "dart", "zig", "hs", "ml", "mli",
"clj", "cljs", "cljc",
"fs", "fsx", "fsi", "clj", "cljs", "cljc",
];

/// Returns whether `path` has an extension the Rust file_collector would accept.
Expand Down
Loading
Loading