Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions cmd/code-index/cmd/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,16 @@ func runParse(cmd *cobra.Command, args []string) error {
return fmt.Errorf("parsing R source %s: %w", source.Path, err)
}

case "c", "cpp":
case "c":
parser := indexer.NewCParser(srcPath, source.Exclude)
if err := parser.Parse(result); err != nil {
return fmt.Errorf("parsing C/C++ source %s: %w", source.Path, err)
return fmt.Errorf("parsing C source %s: %w", source.Path, err)
}

case "cpp":
parser := indexer.NewCPPParser(srcPath, source.Exclude)
if err := parser.Parse(result); err != nil {
return fmt.Errorf("parsing C++ source %s: %w", source.Path, err)
}

Comment thread
cm421 marked this conversation as resolved.
case "markdown":
Expand Down
6 changes: 4 additions & 2 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,13 @@ Each source object defines a directory to index:
| `typescript` | `.ts`, `.tsx`, `.js`, `.jsx`, `.vue` | tree-sitter — functions, classes, interfaces, enums, JSDoc |
| `javascript` | `.js`, `.jsx` | tree-sitter (same as typescript) |
| `python` | `.py` | tree-sitter — functions, classes, decorators, docstrings |
| `c` | `.c`, `.h`, `.cpp`, `.cc`, `.hpp` | tree-sitter — functions, structs, classes, enums, typedefs, Doxygen |
| `cpp` | `.c`, `.h`, `.cpp`, `.cc`, `.hpp` | tree-sitter (same as c) |
| `c` | `.c`, `.h` | tree-sitter — functions, structs, enums, typedefs, Doxygen |
| `cpp` | `.cpp`, `.cc`, `.hpp`, `.cxx`, `.hxx` | tree-sitter — functions, classes, structs, namespaces, templates, enums, typedefs, Doxygen |
| `r` | `.R`, `.r` | Native Rscript with regex fallback — functions, roxygen, S4/R6 classes |
| `markdown` | `.md`, `.qmd` | Regex — headings as sections, YAML front matter |

> **Note:** `.h` files are handled by the `c` parser, not `cpp`. This avoids double-parsing when a project configures both `c` and `cpp` sources over the same tree. C++ projects with `.h` headers should configure a `c` source pointing at the header directories.

#### Vendor-aware Go indexing

For Go projects, you can include specific vendored dependencies in the index:
Expand Down
74 changes: 11 additions & 63 deletions indexer/parser_c.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,21 @@ import (

sitter "github.com/smacker/go-tree-sitter"
treesitterc "github.com/smacker/go-tree-sitter/c"
"github.com/smacker/go-tree-sitter/cpp"
)

// CParser extracts structured information from C/C++ source files using tree-sitter.
// CParser extracts structured information from C source files using tree-sitter.
type CParser struct {
srcRoot string
excludes []string
cLang *sitter.Language
cppLang *sitter.Language
}

// NewCParser creates a new C/C++ parser.
// NewCParser creates a new C parser.
func NewCParser(srcRoot string, excludes []string) *CParser {
return &CParser{
srcRoot: srcRoot,
excludes: excludes,
cLang: treesitterc.GetLanguage(),
cppLang: cpp.GetLanguage(),
}
}

Expand All @@ -47,9 +44,7 @@ func (p *CParser) Parse(result *ParseResult) error {
}

ext := filepath.Ext(path)
isCPP := ext == ".cpp" || ext == ".cc" || ext == ".hpp" || ext == ".cxx"
isC := ext == ".c" || ext == ".h"
if !isC && !isCPP {
if ext != ".c" && ext != ".h" {
return nil
}

Expand All @@ -67,22 +62,18 @@ func (p *CParser) Parse(result *ParseResult) error {
}
}

return p.parseFile(path, relPath, isCPP, result)
return p.parseFile(path, relPath, result)
})
}

func (p *CParser) parseFile(path, relPath string, isCPP bool, result *ParseResult) error {
func (p *CParser) parseFile(path, relPath string, result *ParseResult) error {
content, err := os.ReadFile(path)
if err != nil {
return nil
}

parser := sitter.NewParser()
if isCPP {
parser.SetLanguage(p.cppLang)
} else {
parser.SetLanguage(p.cLang)
}
parser.SetLanguage(p.cLang)

tree, err := parser.ParseCtx(context.Background(), nil, content)
if err != nil {
Expand All @@ -102,7 +93,7 @@ func (p *CParser) parseFile(path, relPath string, isCPP bool, result *ParseResul
ImportPath: importPath,
}

p.extractDeclarations(root, content, fileInfo, isCPP)
p.extractDeclarations(root, content, fileInfo)

fileInfo.ASTHash = hashString(string(content))

Expand All @@ -121,7 +112,7 @@ func (p *CParser) parseFile(path, relPath string, isCPP bool, result *ParseResul
return nil
}

func (p *CParser) extractDeclarations(node *sitter.Node, content []byte, fileInfo *FileInfo, isCPP bool) {
func (p *CParser) extractDeclarations(node *sitter.Node, content []byte, fileInfo *FileInfo) {
for i := 0; i < int(node.ChildCount()); i++ {
child := node.Child(i)
nodeType := child.Type()
Expand Down Expand Up @@ -153,26 +144,10 @@ func (p *CParser) extractDeclarations(node *sitter.Node, content []byte, fileInf
fileInfo.Types = append(fileInfo.Types, *t)
}

// C++ specific
case "class_specifier":
if isCPP {
if t := p.extractClass(child, content, fileInfo.Path); t != nil {
fileInfo.Types = append(fileInfo.Types, *t)
}
}

case "namespace_definition":
if isCPP {
// Recurse into namespace body.
if body := child.ChildByFieldName("body"); body != nil {
p.extractDeclarations(body, content, fileInfo, isCPP)
}
}

// Preprocessor blocks — recurse into their bodies to find declarations
// inside #ifndef/#ifdef/#if guards (common in header files).
case "preproc_ifdef", "preproc_if", "preproc_else", "preproc_elif":
p.extractDeclarations(child, content, fileInfo, isCPP)
p.extractDeclarations(child, content, fileInfo)
}
}
}
Expand Down Expand Up @@ -319,26 +294,6 @@ func (p *CParser) extractTypedef(node *sitter.Node, content []byte, filePath str
}
}

func (p *CParser) extractClass(node *sitter.Node, content []byte, filePath string) *TypeInfo {
nameNode := node.ChildByFieldName("name")
if nameNode == nil {
return nil
}

name := nameNode.Content(content)
doc := extractPrecedingComment(node, content)

return &TypeInfo{
Name: name,
Kind: "class",
Doc: doc,
File: filePath,
Line: int(node.StartPoint().Row) + 1,
Exported: true,
ASTHash: hashString(node.Content(content)),
}
}

// extractCName finds the function/variable name from a declarator node.
func extractCName(declarator *sitter.Node, content []byte) string {
// Walk down through pointer_declarator, function_declarator, etc.
Expand Down Expand Up @@ -371,19 +326,12 @@ func extractCSignature(node *sitter.Node, content []byte) string {
// Take everything before the function body.
body := node.ChildByFieldName("body")
if body != nil {
sig := strings.TrimSpace(string(content[node.StartByte():body.StartByte()]))
if len(sig) > 200 {
sig = sig[:200] + "..."
}
return sig
return truncateSignature(strings.TrimSpace(string(content[node.StartByte():body.StartByte()])))
}
// Fallback: first line.
text := node.Content(content)
if idx := strings.Index(text, "{"); idx > 0 {
text = strings.TrimSpace(text[:idx])
}
if len(text) > 200 {
text = text[:200] + "..."
}
return text
return truncateSignature(text)
}
11 changes: 11 additions & 0 deletions indexer/parser_common.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Copyright (C) 2026 by Posit Software, PBC
package indexer

const maxSignatureLen = 200

func truncateSignature(sig string) string {
if len(sig) <= maxSignatureLen {
return sig
}
return sig[:maxSignatureLen] + "..."
}
Loading
Loading