diff --git a/Cargo.lock b/Cargo.lock index e2fd1f008..ae553d147 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -102,6 +102,7 @@ dependencies = [ "tree-sitter-ruby", "tree-sitter-rust", "tree-sitter-scala", + "tree-sitter-solidity", "tree-sitter-swift", "tree-sitter-typescript", "tree-sitter-zig", @@ -958,6 +959,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-solidity" +version = "1.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eacf8875b70879f0cb670c60b233ad0b68752d9e1474e6c3ef168eea8a90b25" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-swift" version = "0.6.0" diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml index 093f8aaf3..5173c16b1 100644 --- a/crates/codegraph-core/Cargo.toml +++ b/crates/codegraph-core/Cargo.toml @@ -38,6 +38,7 @@ tree-sitter-haskell = "0.23" tree-sitter-ocaml = "0.24" tree-sitter-julia = "0.23" tree-sitter-clojure-orchard = "0.2" +tree-sitter-solidity = "1.2" rayon = "1" ignore = "0.4" globset = "0.4" diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index 50e243f60..107381e24 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -394,6 +394,16 @@ pub const CLOJURE_AST_CONFIG: LangAstConfig = LangAstConfig { string_prefixes: &[], }; +pub const SOLIDITY_AST_CONFIG: LangAstConfig = LangAstConfig { + new_types: &["new_expression"], + throw_types: &["revert_statement"], + await_types: &[], + string_types: &["string_literal", "hex_string_literal", "unicode_string_literal"], + regex_types: &[], + quote_chars: &['"', '\''], + string_prefixes: &[], +}; + // ── Generic AST node walker ────────────────────────────────────────────────── /// Node types that represent identifiers across languages. diff --git a/crates/codegraph-core/src/extractors/mod.rs b/crates/codegraph-core/src/extractors/mod.rs index 5e4131acf..2cd18fe9b 100644 --- a/crates/codegraph-core/src/extractors/mod.rs +++ b/crates/codegraph-core/src/extractors/mod.rs @@ -21,6 +21,7 @@ pub mod python; pub mod ruby; pub mod rust_lang; pub mod scala; +pub mod solidity; pub mod swift; pub mod zig; @@ -138,5 +139,8 @@ pub fn extract_symbols_with_opts( LanguageKind::Clojure => { clojure::ClojureExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } + LanguageKind::Solidity => { + solidity::SolidityExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) + } } } diff --git a/crates/codegraph-core/src/extractors/solidity.rs b/crates/codegraph-core/src/extractors/solidity.rs new file mode 100644 index 000000000..0302250ee --- /dev/null +++ b/crates/codegraph-core/src/extractors/solidity.rs @@ -0,0 +1,624 @@ +use super::helpers::*; +use super::SymbolExtractor; +use crate::types::*; +use tree_sitter::{Node, Tree}; + +pub struct SolidityExtractor; + +impl SymbolExtractor for SolidityExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_tree(&tree.root_node(), source, &mut symbols, match_solidity_node); + walk_ast_nodes_with_config( + &tree.root_node(), + source, + &mut symbols.ast_nodes, + &SOLIDITY_AST_CONFIG, + ); + symbols + } +} + +// ── Constants ──────────────────────────────────────────────────────────────── + +/// Container kinds that "own" nested declarations (functions, structs, enums…). +/// Mirrors `SOL_PARENT_TYPES` in `src/extractors/solidity.ts`. +const SOL_PARENT_TYPES: &[&str] = &[ + "contract_declaration", + "interface_declaration", + "library_declaration", +]; + +// ── Walker ─────────────────────────────────────────────────────────────────── + +fn match_solidity_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "contract_declaration" => handle_contract_decl(node, source, symbols, "class"), + "interface_declaration" => handle_contract_decl(node, source, symbols, "interface"), + "library_declaration" => handle_contract_decl(node, source, symbols, "module"), + "struct_declaration" => handle_struct_decl(node, source, symbols), + "enum_declaration" => handle_enum_decl(node, source, symbols), + "function_definition" => handle_function_def(node, source, symbols), + "modifier_definition" => handle_modifier_def(node, source, symbols), + "event_definition" => handle_event_def(node, source, symbols), + "error_declaration" => handle_error_decl(node, source, symbols), + "state_variable_declaration" => handle_state_var_decl(node, source, symbols), + "import_directive" => handle_import_directive(node, source, symbols), + "call_expression" | "function_call" => handle_call_expression(node, source, symbols), + _ => {} + } +} + +// ── Contracts / interfaces / libraries ─────────────────────────────────────── + +fn handle_contract_decl( + node: &Node, + source: &[u8], + symbols: &mut FileSymbols, + kind: &str, +) { + let Some(name_node) = node.child_by_field_name("name") else { + return; + }; + let name = node_text(&name_node, source).to_string(); + + let body = node + .child_by_field_name("body") + .or_else(|| find_child(node, "contract_body")); + let members = match body { + Some(b) => extract_contract_members(&b, source), + None => Vec::new(), + }; + + symbols.definitions.push(Definition { + name: name.clone(), + kind: kind.to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(members), + }); + + extract_inheritance(node, &name, source, symbols); +} + +/// Extract member declarations from a contract body node. +fn extract_contract_members(body: &Node, source: &[u8]) -> Vec { + let mut members = Vec::new(); + for i in 0..body.child_count() { + if let Some(child) = body.child(i) { + if let Some(member) = extract_contract_member(&child, source) { + members.push(member); + } + } + } + members +} + +/// Map a single contract body child to a SubDeclaration, or None. +fn extract_contract_member(child: &Node, source: &[u8]) -> Option { + let line = start_line(child); + match child.kind() { + "function_definition" => { + let name_node = child.child_by_field_name("name")?; + Some(child_def( + node_text(&name_node, source).to_string(), + "method", + line, + )) + } + "state_variable_declaration" => { + let name_node = child.child_by_field_name("name")?; + Some(child_def( + node_text(&name_node, source).to_string(), + "property", + line, + )) + } + "event_definition" => { + let name_node = child.child_by_field_name("name")?; + Some(Definition { + name: node_text(&name_node, source).to_string(), + kind: "property".to_string(), + line, + end_line: None, + decorators: Some(vec!["event".to_string()]), + complexity: None, + cfg: None, + children: None, + }) + } + "error_declaration" => { + let name_node = child.child_by_field_name("name")?; + Some(Definition { + name: node_text(&name_node, source).to_string(), + kind: "property".to_string(), + line, + end_line: None, + decorators: Some(vec!["error".to_string()]), + complexity: None, + cfg: None, + children: None, + }) + } + "modifier_definition" => { + let name_node = child.child_by_field_name("name")?; + Some(Definition { + name: node_text(&name_node, source).to_string(), + kind: "method".to_string(), + line, + end_line: None, + decorators: Some(vec!["modifier".to_string()]), + complexity: None, + cfg: None, + children: None, + }) + } + _ => None, + } +} + +/// Extract inheritance (extends) relationships from a contract node. +/// +/// Each parent in `contract A is B, C, D { }` is its own `inheritance_specifier` +/// sibling under the contract node (see tree-sitter-solidity grammar: +/// `_class_heritage: "is" commaSep1($.inheritance_specifier)`), so we must walk +/// all direct children rather than stopping at the first match. +fn extract_inheritance(node: &Node, name: &str, source: &[u8], symbols: &mut FileSymbols) { + for i in 0..node.child_count() { + let Some(inheritance) = node.child(i) else { + continue; + }; + if inheritance.kind() != "inheritance_specifier" { + continue; + } + for j in 0..inheritance.child_count() { + let Some(child) = inheritance.child(j) else { + continue; + }; + if child.kind() == "user_defined_type" || child.kind() == "identifier" { + symbols.classes.push(ClassRelation { + name: name.to_string(), + extends: Some(node_text(&child, source).to_string()), + implements: None, + line: start_line(node), + }); + } + } + } +} + +// ── Structs / enums ────────────────────────────────────────────────────────── + +fn handle_struct_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { + return; + }; + + // The JS extractor iterates direct children of the struct_declaration looking + // for `struct_member`, but the tree-sitter grammar wraps members inside a + // `struct_body` node. Mirror JS behaviour by scanning direct children — this + // produces no members in practice, matching WASM output. + let mut members: Vec = Vec::new(); + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "struct_member" { + if let Some(member_name) = child.child_by_field_name("name") { + members.push(child_def( + node_text(&member_name, source).to_string(), + "property", + start_line(&child), + )); + } + } + } + } + + let parent = find_parent_name(node, source); + let full_name = match parent { + Some(p) => format!("{}.{}", p, node_text(&name_node, source)), + None => node_text(&name_node, source).to_string(), + }; + + symbols.definitions.push(Definition { + name: full_name, + kind: "struct".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(members), + }); +} + +fn handle_enum_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { + return; + }; + + // Mirror JS: iterate direct children for `enum_value`. The grammar wraps + // enum values inside `enum_body`, so this produces no members in practice + // (matching WASM output). + let mut members: Vec = Vec::new(); + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "enum_value" { + members.push(child_def( + node_text(&child, source).to_string(), + "constant", + start_line(&child), + )); + } + } + } + + let parent = find_parent_name(node, source); + let full_name = match parent { + Some(p) => format!("{}.{}", p, node_text(&name_node, source)), + None => node_text(&name_node, source).to_string(), + }; + + symbols.definitions.push(Definition { + name: full_name, + kind: "enum".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(members), + }); +} + +// ── Functions / modifiers / events / errors / state vars ───────────────────── + +fn handle_function_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { + return; + }; + let parent = find_parent_name(node, source); + let full_name = match &parent { + Some(p) => format!("{}.{}", p, node_text(&name_node, source)), + None => node_text(&name_node, source).to_string(), + }; + let kind = if parent.is_some() { "method" } else { "function" }; + + let params = extract_sol_params(node, source); + symbols.definitions.push(Definition { + name: full_name, + kind: kind.to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(params), + }); +} + +fn handle_modifier_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { + return; + }; + let parent = find_parent_name(node, source); + let full_name = match parent { + Some(p) => format!("{}.{}", p, node_text(&name_node, source)), + None => node_text(&name_node, source).to_string(), + }; + + symbols.definitions.push(Definition { + name: full_name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: Some(vec!["modifier".to_string()]), + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_event_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { + return; + }; + let parent = find_parent_name(node, source); + let full_name = match parent { + Some(p) => format!("{}.{}", p, node_text(&name_node, source)), + None => node_text(&name_node, source).to_string(), + }; + + symbols.definitions.push(Definition { + name: full_name, + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: Some(vec!["event".to_string()]), + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_error_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { + return; + }; + let parent = find_parent_name(node, source); + let full_name = match parent { + Some(p) => format!("{}.{}", p, node_text(&name_node, source)), + None => node_text(&name_node, source).to_string(), + }; + + symbols.definitions.push(Definition { + name: full_name, + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: Some(vec!["error".to_string()]), + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_state_var_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { + return; + }; + let parent = find_parent_name(node, source); + let full_name = match parent { + Some(p) => format!("{}.{}", p, node_text(&name_node, source)), + None => node_text(&name_node, source).to_string(), + }; + + symbols.definitions.push(Definition { + name: full_name, + kind: "variable".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +// ── Imports ────────────────────────────────────────────────────────────────── + +fn handle_import_directive(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // Three Solidity shapes: + // import "path"; + // import { X, Y } from "path"; + // import * as Alias from "path"; + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + if child.kind() == "string" || child.kind() == "string_literal" { + let source_path = strip_quotes(node_text(&child, source)); + let mut names: Vec = Vec::new(); + for j in 0..node.child_count() { + if let Some(sibling) = node.child(j) { + if sibling.kind() == "identifier" { + names.push(node_text(&sibling, source).to_string()); + } + if sibling.kind() == "import_declaration" { + if let Some(id) = find_child(&sibling, "identifier") { + names.push(node_text(&id, source).to_string()); + } + } + } + } + if names.is_empty() { + names.push("*".to_string()); + } + symbols + .imports + .push(Import::new(source_path, names, start_line(node))); + return; + } + // source_import / import_clause: `import * as Alias from "path"` + if child.kind() == "source_import" || child.kind() == "import_clause" { + let str_node = find_child(&child, "string").or_else(|| find_child(&child, "string_literal")); + if let Some(str_node) = str_node { + let source_path = strip_quotes(node_text(&str_node, source)); + symbols + .imports + .push(Import::new(source_path, vec!["*".to_string()], start_line(node))); + return; + } + } + } +} + +// ── Calls ──────────────────────────────────────────────────────────────────── + +fn handle_call_expression(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let func_node = node + .child_by_field_name("function") + .or_else(|| node.child_by_field_name("callee")); + let Some(func_node) = func_node else { + return; + }; + + let (name, receiver) = match func_node.kind() { + "member_expression" | "member_access" => { + let prop = func_node + .child_by_field_name("property") + .or_else(|| func_node.child_by_field_name("member")); + let obj = func_node + .child_by_field_name("object") + .or_else(|| func_node.child_by_field_name("expression")); + ( + prop.map(|n| node_text(&n, source).to_string()).unwrap_or_default(), + obj.map(|n| node_text(&n, source).to_string()), + ) + } + _ => (node_text(&func_node, source).to_string(), None), + }; + + if !name.is_empty() { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + receiver, + }); + } +} + +// ── Helpers ────────────────────────────────────────────────────────────────── + +fn extract_sol_params(func_node: &Node, source: &[u8]) -> Vec { + let mut params = Vec::new(); + let param_list = func_node + .child_by_field_name("parameters") + .or_else(|| find_child(func_node, "parameter_list")); + let Some(param_list) = param_list else { + return params; + }; + for i in 0..param_list.child_count() { + let Some(param) = param_list.child(i) else { continue }; + if param.kind() != "parameter" { + continue; + } + if let Some(name_node) = param.child_by_field_name("name") { + params.push(child_def( + node_text(&name_node, source).to_string(), + "parameter", + start_line(¶m), + )); + } + } + params +} + +/// Find the name of an enclosing contract/interface/library, if any. +fn find_parent_name(node: &Node, source: &[u8]) -> Option { + find_enclosing_type_name(node, SOL_PARENT_TYPES, source) +} + +/// Strip leading/trailing single, double, or backtick quotes. +fn strip_quotes(text: &str) -> String { + let trimmed = text + .trim_start_matches(|c: char| c == '\'' || c == '"' || c == '`') + .trim_end_matches(|c: char| c == '\'' || c == '"' || c == '`'); + trimmed.to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + fn parse_sol(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&crate::parser_registry::LanguageKind::Solidity.tree_sitter_language()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + SolidityExtractor.extract(&tree, code.as_bytes(), "Test.sol") + } + + #[test] + fn extracts_contract_as_class() { + let s = parse_sol("contract MyToken { uint256 public total; }"); + let d = s.definitions.iter().find(|d| d.name == "MyToken").unwrap(); + assert_eq!(d.kind, "class"); + } + + #[test] + fn extracts_interface() { + let s = parse_sol( + "interface IERC20 { function transfer(address to, uint256 amount) external returns (bool); }", + ); + let d = s.definitions.iter().find(|d| d.name == "IERC20").unwrap(); + assert_eq!(d.kind, "interface"); + } + + #[test] + fn extracts_library_as_module() { + let s = parse_sol( + "library Validators { function v(string memory n) internal pure returns (bool) { return true; } }", + ); + let d = s.definitions.iter().find(|d| d.name == "Validators").unwrap(); + assert_eq!(d.kind, "module"); + } + + #[test] + fn extracts_function_with_contract_prefix() { + let s = parse_sol( + "contract Token { function transfer(address to, uint256 amount) public returns (bool) { return true; } }", + ); + let d = s.definitions.iter().find(|d| d.name == "Token.transfer").unwrap(); + assert_eq!(d.kind, "method"); + // NOTE: matches WASM/JS behaviour — neither a `parameters` field nor a + // `parameter_list` node exists in the Solidity tree-sitter grammar + // (parameters are direct children of `function_definition`), so the + // current extractor emits no parameter children. Tracked alongside JS + // parity; do not "fix" here without also updating the WASM extractor. + } + + #[test] + fn extracts_import() { + let s = parse_sol("import \"./IERC20.sol\";"); + let imp = s.imports.iter().find(|i| i.source == "./IERC20.sol").unwrap(); + assert_eq!(imp.names, vec!["*".to_string()]); + } + + #[test] + fn extracts_named_import() { + let s = parse_sol("import { Foo, Bar } from \"./Stuff.sol\";"); + let imp = s.imports.iter().find(|i| i.source == "./Stuff.sol").unwrap(); + assert!(imp.names.contains(&"Foo".to_string())); + assert!(imp.names.contains(&"Bar".to_string())); + } + + #[test] + fn extracts_inheritance() { + let s = parse_sol("contract MyToken is ERC20 {}"); + let c = s.classes.iter().find(|c| c.name == "MyToken").unwrap(); + assert_eq!(c.extends.as_deref(), Some("ERC20")); + } + + #[test] + fn extracts_multi_parent_inheritance() { + // Each parent in `is B, C` becomes a separate `inheritance_specifier` + // sibling in the tree-sitter-solidity grammar — make sure we emit a + // ClassRelation for each. + let s = parse_sol("contract A is B, C, D {}"); + let parents: Vec<_> = s + .classes + .iter() + .filter(|c| c.name == "A") + .filter_map(|c| c.extends.as_deref()) + .collect(); + assert_eq!(parents, vec!["B", "C", "D"]); + } + + #[test] + fn extracts_event_as_member() { + let s = parse_sol("contract Token { event Transfer(address from, address to); }"); + let token = s.definitions.iter().find(|d| d.name == "Token").unwrap(); + let children = token.children.as_ref().unwrap(); + let ev = children.iter().find(|c| c.name == "Transfer").unwrap(); + assert_eq!(ev.kind, "property"); + assert_eq!(ev.decorators.as_deref(), Some(&["event".to_string()][..])); + } + + #[test] + fn extracts_modifier_definition() { + let s = parse_sol( + "contract Token { modifier onlyOwner() { _; } function foo() public onlyOwner {} }", + ); + let m = s + .definitions + .iter() + .find(|d| d.name == "Token.onlyOwner") + .unwrap(); + assert_eq!(m.kind, "function"); + assert_eq!(m.decorators.as_deref(), Some(&["modifier".to_string()][..])); + } +} diff --git a/crates/codegraph-core/src/file_collector.rs b/crates/codegraph-core/src/file_collector.rs index bb1fb41db..614ca2581 100644 --- a/crates/codegraph-core/src/file_collector.rs +++ b/crates/codegraph-core/src/file_collector.rs @@ -36,7 +36,7 @@ const SUPPORTED_EXTENSIONS: &[&str] = &[ "js", "jsx", "mjs", "cjs", "ts", "tsx", "d.ts", "py", "pyi", "go", "rs", "java", "cs", "rb", "rake", "gemspec", "php", "phtml", "tf", "hcl", "c", "h", "cpp", "cc", "cxx", "hpp", "cu", "cuh", "kt", "kts", "swift", "scala", "sh", "bash", "ex", "exs", "lua", "dart", "zig", "hs", - "ml", "mli", "jl", "clj", "cljs", "cljc", + "ml", "mli", "jl", "clj", "cljs", "cljc", "sol", ]; /// Returns whether `path` has an extension the Rust file_collector would accept. diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index 5073bb5a6..6e767ecf7 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -30,6 +30,7 @@ pub enum LanguageKind { Julia, Cuda, Clojure, + Solidity, } impl LanguageKind { @@ -64,6 +65,7 @@ impl LanguageKind { Self::Julia => "julia", Self::Cuda => "cuda", Self::Clojure => "clojure", + Self::Solidity => "solidity", } } @@ -106,6 +108,7 @@ impl LanguageKind { "mli" => Some(Self::OcamlInterface), "jl" => Some(Self::Julia), "clj" | "cljs" | "cljc" => Some(Self::Clojure), + "sol" => Some(Self::Solidity), _ => None, } } @@ -141,6 +144,7 @@ impl LanguageKind { "julia" => Some(Self::Julia), "cuda" => Some(Self::Cuda), "clojure" => Some(Self::Clojure), + "solidity" => Some(Self::Solidity), _ => None, } } @@ -175,6 +179,7 @@ impl LanguageKind { Self::Julia => tree_sitter_julia::LANGUAGE.into(), Self::Cuda => tree_sitter_cuda::LANGUAGE.into(), Self::Clojure => tree_sitter_clojure_orchard::LANGUAGE.into(), + Self::Solidity => tree_sitter_solidity::LANGUAGE.into(), } } @@ -190,7 +195,7 @@ impl LanguageKind { &[ JavaScript, TypeScript, Tsx, Python, Go, Rust, Java, CSharp, Ruby, Php, Hcl, C, Cpp, Kotlin, Swift, Scala, Bash, Elixir, Lua, Dart, Zig, Haskell, Ocaml, - OcamlInterface, Julia, Cuda, Clojure, + OcamlInterface, Julia, Cuda, Clojure, Solidity, ] } } @@ -262,14 +267,15 @@ mod tests { | LanguageKind::OcamlInterface | LanguageKind::Julia | LanguageKind::Cuda - | LanguageKind::Clojure => (), + | LanguageKind::Clojure + | LanguageKind::Solidity => (), }; // IMPORTANT: this constant must equal the number of arms in the match // above AND the length of the slice returned by `LanguageKind::all()`. // Because both checks require the same manual update, they reinforce // each other: a developer who updates the match is reminded to also // update `all()` and this count. - const EXPECTED_LEN: usize = 27; + const EXPECTED_LEN: usize = 28; assert_eq!( LanguageKind::all().len(), EXPECTED_LEN, diff --git a/src/ast-analysis/rules/index.ts b/src/ast-analysis/rules/index.ts index 3360891b0..5ce8af9ac 100644 --- a/src/ast-analysis/rules/index.ts +++ b/src/ast-analysis/rules/index.ts @@ -168,6 +168,14 @@ const CLOJURE_AST_TYPES: Record = { regex_lit: 'regex', }; +const SOLIDITY_AST_TYPES: Record = { + new_expression: 'new', + revert_statement: 'throw', + string_literal: 'string', + hex_string_literal: 'string', + unicode_string_literal: 'string', +}; + export const AST_TYPE_MAPS: Map> = new Map([ ['javascript', JS_AST_TYPES], ['typescript', JS_AST_TYPES], @@ -195,6 +203,7 @@ export const AST_TYPE_MAPS: Map> = new Map([ ['ocaml-interface', OCAML_AST_TYPES], ['julia', JULIA_AST_TYPES], ['clojure', CLOJURE_AST_TYPES], + ['solidity', SOLIDITY_AST_TYPES], ]); // ─── Per-language string-extraction config ─────────────────────────────── @@ -233,6 +242,7 @@ const HASKELL_STRING_CONFIG: AstStringConfig = { quoteChars: '"\'', stringPrefix const OCAML_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const JULIA_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const CLOJURE_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; +const SOLIDITY_STRING_CONFIG: AstStringConfig = { quoteChars: '"\'', stringPrefixes: '' }; export const AST_STRING_CONFIGS: Map = new Map([ ['javascript', JS_STRING_CONFIG], @@ -261,6 +271,7 @@ export const AST_STRING_CONFIGS: Map = new Map([ ['ocaml-interface', OCAML_STRING_CONFIG], ['julia', JULIA_STRING_CONFIG], ['clojure', CLOJURE_STRING_CONFIG], + ['solidity', SOLIDITY_STRING_CONFIG], ]); // ─── Per-language "stop-after-collect" kinds ───────────────────────────── diff --git a/src/domain/parser.ts b/src/domain/parser.ts index e6ef71310..6d3df2e8b 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -477,6 +477,7 @@ export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet = new Set([ '.clj', '.cljs', '.cljc', + '.sol', ]); /** diff --git a/src/extractors/solidity.ts b/src/extractors/solidity.ts index 24b3f495a..c68043966 100644 --- a/src/extractors/solidity.ts +++ b/src/extractors/solidity.ts @@ -156,15 +156,24 @@ function extractContractMember(child: TreeSitterNode): SubDeclaration | null { } } -/** Extract inheritance (extends) relationships from a contract node. */ +/** + * Extract inheritance (extends) relationships from a contract node. + * + * Each parent in `contract A is B, C, D { }` is its own `inheritance_specifier` + * sibling under the contract node (see tree-sitter-solidity grammar: + * `_class_heritage: "is" commaSep1($.inheritance_specifier)`), so we must walk + * all direct children rather than stopping at the first match. + */ function extractInheritance(node: TreeSitterNode, name: string, ctx: ExtractorOutput): void { - const inheritance = findChild(node, 'inheritance_specifier'); - if (!inheritance) return; - for (let i = 0; i < inheritance.childCount; i++) { - const child = inheritance.child(i); - if (!child) continue; - if (child.type === 'user_defined_type' || child.type === 'identifier') { - ctx.classes.push({ name, extends: child.text, line: node.startPosition.row + 1 }); + for (let i = 0; i < node.childCount; i++) { + const inheritance = node.child(i); + if (!inheritance || inheritance.type !== 'inheritance_specifier') continue; + for (let j = 0; j < inheritance.childCount; j++) { + const child = inheritance.child(j); + if (!child) continue; + if (child.type === 'user_defined_type' || child.type === 'identifier') { + ctx.classes.push({ name, extends: child.text, line: node.startPosition.row + 1 }); + } } } } diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts index 97041dea7..1c3aa1237 100644 --- a/tests/parsers/native-drop-classification.test.ts +++ b/tests/parsers/native-drop-classification.test.ts @@ -19,12 +19,11 @@ describe('classifyNativeDrops', () => { 'src/b.gleam', 'src/e.R', 'src/f.erl', - 'src/g.sol', 'src/i.groovy', 'src/j.v', 'src/k.m', ]); - expect(totals['unsupported-by-native']).toBe(8); + expect(totals['unsupported-by-native']).toBe(7); expect(totals['native-extractor-failure']).toBe(0); expect(byReason['unsupported-by-native'].get('.fs')).toEqual(['src/a.fs']); expect(byReason['unsupported-by-native'].get('.gleam')).toEqual(['src/b.gleam']); diff --git a/tests/parsers/solidity.test.ts b/tests/parsers/solidity.test.ts index ac94b79c3..fa108409c 100644 --- a/tests/parsers/solidity.test.ts +++ b/tests/parsers/solidity.test.ts @@ -59,4 +59,14 @@ contract MyToken { expect.objectContaining({ name: 'MyToken', extends: 'ERC20' }), ); }); + + it('extracts multi-parent inheritance', () => { + // Each parent in `is B, C, D` is its own inheritance_specifier sibling in + // the tree-sitter-solidity grammar — we should emit a ClassRelation for + // each parent, not just the first. + const symbols = parseSol(`contract A is B, C, D { +}`); + const parents = symbols.classes.filter((c) => c.name === 'A').map((c) => c.extends); + expect(parents).toEqual(['B', 'C', 'D']); + }); });