From b45b9a6be29d0389420e9e8c7f3a7a99b77b462c Mon Sep 17 00:00:00 2001 From: craig Date: Fri, 10 Apr 2026 09:58:13 -0600 Subject: [PATCH 1/6] Add C++ parser --- cmd/code-index/cmd/parse.go | 10 +- indexer/parser_c.go | 63 +-- indexer/parser_cpp.go | 900 ++++++++++++++++++++++++++++++++++++ indexer/parser_test.go | 71 ++- testdata/c/container.hpp | 55 +++ 5 files changed, 1041 insertions(+), 58 deletions(-) create mode 100644 indexer/parser_cpp.go create mode 100644 testdata/c/container.hpp diff --git a/cmd/code-index/cmd/parse.go b/cmd/code-index/cmd/parse.go index 70422e3..2e8e959 100644 --- a/cmd/code-index/cmd/parse.go +++ b/cmd/code-index/cmd/parse.go @@ -86,10 +86,16 @@ func runParse(cmd *cobra.Command, args []string) error { return fmt.Errorf("parsing R source %s: %w", source.Path, err) } - case "c", "cpp": + case "c": parser := indexer.NewCParser(srcPath, source.Exclude) if err := parser.Parse(result); err != nil { - return fmt.Errorf("parsing C/C++ source %s: %w", source.Path, err) + return fmt.Errorf("parsing C source %s: %w", source.Path, err) + } + + case "cpp": + parser := indexer.NewCPPParser(srcPath, source.Exclude) + if err := parser.Parse(result); err != nil { + return fmt.Errorf("parsing C++ source %s: %w", source.Path, err) } case "markdown": diff --git a/indexer/parser_c.go b/indexer/parser_c.go index a2d553f..df26b38 100644 --- a/indexer/parser_c.go +++ b/indexer/parser_c.go @@ -10,24 +10,21 @@ import ( sitter "github.com/smacker/go-tree-sitter" treesitterc "github.com/smacker/go-tree-sitter/c" - "github.com/smacker/go-tree-sitter/cpp" ) -// CParser extracts structured information from C/C++ source files using tree-sitter. +// CParser extracts structured information from C source files using tree-sitter. type CParser struct { srcRoot string excludes []string cLang *sitter.Language - cppLang *sitter.Language } -// NewCParser creates a new C/C++ parser. +// NewCParser creates a new C parser. func NewCParser(srcRoot string, excludes []string) *CParser { return &CParser{ srcRoot: srcRoot, excludes: excludes, cLang: treesitterc.GetLanguage(), - cppLang: cpp.GetLanguage(), } } @@ -47,9 +44,7 @@ func (p *CParser) Parse(result *ParseResult) error { } ext := filepath.Ext(path) - isCPP := ext == ".cpp" || ext == ".cc" || ext == ".hpp" || ext == ".cxx" - isC := ext == ".c" || ext == ".h" - if !isC && !isCPP { + if ext != ".c" && ext != ".h" { return nil } @@ -67,22 +62,18 @@ func (p *CParser) Parse(result *ParseResult) error { } } - return p.parseFile(path, relPath, isCPP, result) + return p.parseFile(path, relPath, result) }) } -func (p *CParser) parseFile(path, relPath string, isCPP bool, result *ParseResult) error { +func (p *CParser) parseFile(path, relPath string, result *ParseResult) error { content, err := os.ReadFile(path) if err != nil { return nil } parser := sitter.NewParser() - if isCPP { - parser.SetLanguage(p.cppLang) - } else { - parser.SetLanguage(p.cLang) - } + parser.SetLanguage(p.cLang) tree, err := parser.ParseCtx(context.Background(), nil, content) if err != nil { @@ -102,7 +93,7 @@ func (p *CParser) parseFile(path, relPath string, isCPP bool, result *ParseResul ImportPath: importPath, } - p.extractDeclarations(root, content, fileInfo, isCPP) + p.extractDeclarations(root, content, fileInfo) fileInfo.ASTHash = hashString(string(content)) @@ -121,7 +112,7 @@ func (p *CParser) parseFile(path, relPath string, isCPP bool, result *ParseResul return nil } -func (p *CParser) extractDeclarations(node *sitter.Node, content []byte, fileInfo *FileInfo, isCPP bool) { +func (p *CParser) extractDeclarations(node *sitter.Node, content []byte, fileInfo *FileInfo) { for i := 0; i < int(node.ChildCount()); i++ { child := node.Child(i) nodeType := child.Type() @@ -153,26 +144,10 @@ func (p *CParser) extractDeclarations(node *sitter.Node, content []byte, fileInf fileInfo.Types = append(fileInfo.Types, *t) } - // C++ specific - case "class_specifier": - if isCPP { - if t := p.extractClass(child, content, fileInfo.Path); t != nil { - fileInfo.Types = append(fileInfo.Types, *t) - } - } - - case "namespace_definition": - if isCPP { - // Recurse into namespace body. - if body := child.ChildByFieldName("body"); body != nil { - p.extractDeclarations(body, content, fileInfo, isCPP) - } - } - // Preprocessor blocks — recurse into their bodies to find declarations // inside #ifndef/#ifdef/#if guards (common in header files). case "preproc_ifdef", "preproc_if", "preproc_else", "preproc_elif": - p.extractDeclarations(child, content, fileInfo, isCPP) + p.extractDeclarations(child, content, fileInfo) } } } @@ -319,26 +294,6 @@ func (p *CParser) extractTypedef(node *sitter.Node, content []byte, filePath str } } -func (p *CParser) extractClass(node *sitter.Node, content []byte, filePath string) *TypeInfo { - nameNode := node.ChildByFieldName("name") - if nameNode == nil { - return nil - } - - name := nameNode.Content(content) - doc := extractPrecedingComment(node, content) - - return &TypeInfo{ - Name: name, - Kind: "class", - Doc: doc, - File: filePath, - Line: int(node.StartPoint().Row) + 1, - Exported: true, - ASTHash: hashString(node.Content(content)), - } -} - // extractCName finds the function/variable name from a declarator node. func extractCName(declarator *sitter.Node, content []byte) string { // Walk down through pointer_declarator, function_declarator, etc. diff --git a/indexer/parser_cpp.go b/indexer/parser_cpp.go new file mode 100644 index 0000000..af1708b --- /dev/null +++ b/indexer/parser_cpp.go @@ -0,0 +1,900 @@ +// Copyright (C) 2026 by Posit Software, PBC +package indexer + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + sitter "github.com/smacker/go-tree-sitter" + "github.com/smacker/go-tree-sitter/cpp" +) + +// CPPParser extracts structured information from C++ source files using tree-sitter. +type CPPParser struct { + srcRoot string + excludes []string + lang *sitter.Language +} + +// NewCPPParser creates a new C++ parser. +func NewCPPParser(srcRoot string, excludes []string) *CPPParser { + return &CPPParser{ + srcRoot: srcRoot, + excludes: excludes, + lang: cpp.GetLanguage(), + } +} + +// Parse walks the source tree and extracts all file, function, and type information. +func (p *CPPParser) Parse(result *ParseResult) error { + return filepath.Walk(p.srcRoot, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if info.IsDir() { + base := filepath.Base(path) + if base == "build" || base == "CMakeFiles" || strings.HasPrefix(base, ".") { + return filepath.SkipDir + } + return nil + } + + ext := filepath.Ext(path) + if ext != ".cpp" && ext != ".cc" && ext != ".hpp" && ext != ".cxx" && ext != ".hxx" && ext != ".h" { + return nil + } + + relPath, err := filepath.Rel(p.srcRoot, path) + if err != nil { + return nil + } + + for _, pattern := range p.excludes { + if matched, err := filepath.Match(pattern, relPath); err == nil && matched { + return nil + } + if matched, err := filepath.Match(pattern, filepath.Base(path)); err == nil && matched { + return nil + } + } + + return p.parseFile(path, relPath, result) + }) +} + +func (p *CPPParser) parseFile(path, relPath string, result *ParseResult) error { + content, err := os.ReadFile(path) + if err != nil { + return nil + } + + parser := sitter.NewParser() + parser.SetLanguage(p.lang) + + tree, err := parser.ParseCtx(context.Background(), nil, content) + if err != nil { + fmt.Fprintf(os.Stderr, "warning: skipping %s: %v\n", relPath, err) + return nil + } + defer tree.Close() + + root := tree.RootNode() + + dir := filepath.Dir(relPath) + importPath := filepath.ToSlash(dir) + + fileInfo := &FileInfo{ + Path: filepath.ToSlash(relPath), + Package: filepath.Base(dir), + ImportPath: importPath, + } + + // Extract declarations with namespace tracking. + ctx := &cppParseContext{ + content: content, + filePath: fileInfo.Path, + namespaces: nil, + } + p.extractDeclarations(root, ctx, fileInfo) + + fileInfo.ASTHash = hashString(string(content)) + + if len(fileInfo.Functions) > 0 || len(fileInfo.Types) > 0 { + result.Files[fileInfo.Path] = fileInfo + + if _, ok := result.Packages[importPath]; !ok { + result.Packages[importPath] = &PackageInfo{ + ImportPath: importPath, + Dir: dir, + } + } + result.Packages[importPath].Files = append(result.Packages[importPath].Files, fileInfo.Path) + } + + return nil +} + +// cppParseContext tracks parsing state like namespace nesting and template parameters. +type cppParseContext struct { + content []byte + filePath string + namespaces []string // Current namespace stack (e.g., ["rstudio", "core"]). + template string // Current template parameters (e.g., "template"). +} + +// qualifiedName returns the fully qualified name given current namespace context. +func (ctx *cppParseContext) qualifiedName(name string) string { + if len(ctx.namespaces) == 0 { + return name + } + return strings.Join(ctx.namespaces, "::") + "::" + name +} + +// pushNamespace adds a namespace to the stack. +func (ctx *cppParseContext) pushNamespace(ns string) { + ctx.namespaces = append(ctx.namespaces, ns) +} + +// popNamespace removes the last namespace from the stack. +func (ctx *cppParseContext) popNamespace() { + if len(ctx.namespaces) > 0 { + ctx.namespaces = ctx.namespaces[:len(ctx.namespaces)-1] + } +} + +func (p *CPPParser) extractDeclarations(node *sitter.Node, ctx *cppParseContext, fileInfo *FileInfo) { + for i := 0; i < int(node.ChildCount()); i++ { + child := node.Child(i) + nodeType := child.Type() + + switch nodeType { + case "function_definition": + if fn := p.extractFunction(child, ctx, ""); fn != nil { + fileInfo.Functions = append(fileInfo.Functions, *fn) + } + + case "declaration": + // Function declarations (prototypes) in headers. + if fn := p.extractFunctionDeclaration(child, ctx, ""); fn != nil { + fileInfo.Functions = append(fileInfo.Functions, *fn) + } + + case "struct_specifier", "union_specifier": + if t := p.extractStructOrUnion(child, ctx); t != nil { + fileInfo.Types = append(fileInfo.Types, *t) + } + + case "enum_specifier": + if t := p.extractEnum(child, ctx); t != nil { + fileInfo.Types = append(fileInfo.Types, *t) + } + + case "type_definition": + if t := p.extractTypedef(child, ctx); t != nil { + fileInfo.Types = append(fileInfo.Types, *t) + } + + case "class_specifier": + if t := p.extractClass(child, ctx); t != nil { + fileInfo.Types = append(fileInfo.Types, *t) + } + + case "namespace_definition": + p.extractNamespace(child, ctx, fileInfo) + + case "template_declaration": + p.extractTemplate(child, ctx, fileInfo) + + // Preprocessor blocks - recurse into their bodies to find declarations + // inside #ifndef/#ifdef/#if guards (common in header files). + case "preproc_ifdef", "preproc_if", "preproc_else", "preproc_elif": + p.extractDeclarations(child, ctx, fileInfo) + } + } +} + +// extractNamespace handles namespace definitions and recurses into their body. +func (p *CPPParser) extractNamespace(node *sitter.Node, ctx *cppParseContext, fileInfo *FileInfo) { + nameNode := node.ChildByFieldName("name") + if nameNode == nil { + // Anonymous namespace - still recurse. + if body := node.ChildByFieldName("body"); body != nil { + p.extractDeclarations(body, ctx, fileInfo) + } + return + } + + nsName := nameNode.Content(ctx.content) + ctx.pushNamespace(nsName) + defer ctx.popNamespace() + + if body := node.ChildByFieldName("body"); body != nil { + p.extractDeclarations(body, ctx, fileInfo) + } +} + +// extractTemplate handles template declarations (classes, functions, structs). +func (p *CPPParser) extractTemplate(node *sitter.Node, ctx *cppParseContext, fileInfo *FileInfo) { + // Extract template parameters. + templateParams := "" + for i := 0; i < int(node.ChildCount()); i++ { + child := node.Child(i) + if child.Type() == "template_parameter_list" { + templateParams = "template" + child.Content(ctx.content) + break + } + } + + oldTemplate := ctx.template + ctx.template = templateParams + defer func() { ctx.template = oldTemplate }() + + // Find the actual declaration inside the template. + for i := 0; i < int(node.ChildCount()); i++ { + child := node.Child(i) + switch child.Type() { + case "class_specifier": + if t := p.extractClass(child, ctx); t != nil { + fileInfo.Types = append(fileInfo.Types, *t) + } + case "struct_specifier": + if t := p.extractStructOrUnion(child, ctx); t != nil { + fileInfo.Types = append(fileInfo.Types, *t) + } + case "function_definition": + if fn := p.extractFunction(child, ctx, ""); fn != nil { + fileInfo.Functions = append(fileInfo.Functions, *fn) + } + case "declaration": + if fn := p.extractFunctionDeclaration(child, ctx, ""); fn != nil { + fileInfo.Functions = append(fileInfo.Functions, *fn) + } + } + } +} + +func (p *CPPParser) extractFunction(node *sitter.Node, ctx *cppParseContext, receiver string) *FunctionInfo { + declarator := node.ChildByFieldName("declarator") + if declarator == nil { + return nil + } + + name, qualifiedReceiver := extractCPPNameWithQualifier(declarator, ctx.content) + if name == "" { + return nil + } + + // If the declarator has a qualified name (e.g., Foo::bar), use that as receiver. + if qualifiedReceiver != "" { + receiver = qualifiedReceiver + } + + sig := p.buildSignature(node, ctx) + doc := extractPrecedingComment(node, ctx.content) + line := int(node.StartPoint().Row) + 1 + + qualName := name + if receiver == "" && len(ctx.namespaces) > 0 { + qualName = ctx.qualifiedName(name) + } + + return &FunctionInfo{ + Name: qualName, + Receiver: receiver, + Signature: sig, + Doc: doc, + File: ctx.filePath, + Line: line, + Exported: true, + ASTHash: hashString(node.Content(ctx.content)), + SigHash: hashString(sig), + } +} + +func (p *CPPParser) extractFunctionDeclaration( + node *sitter.Node, + ctx *cppParseContext, + receiver string, +) *FunctionInfo { + declarator := node.ChildByFieldName("declarator") + if declarator == nil { + return nil + } + + // Only match function declarators (with parameter list). + if declarator.Type() != "function_declarator" { + // Check for pointer_declarator wrapping function_declarator. + found := false + for j := 0; j < int(declarator.ChildCount()); j++ { + if declarator.Child(j).Type() == "function_declarator" { + declarator = declarator.Child(j) + found = true + break + } + } + if !found { + return nil + } + } + + name, qualifiedReceiver := extractCPPNameWithQualifier(declarator, ctx.content) + if name == "" { + return nil + } + + if qualifiedReceiver != "" { + receiver = qualifiedReceiver + } + + sig := p.buildDeclarationSignature(node, ctx) + doc := extractPrecedingComment(node, ctx.content) + line := int(node.StartPoint().Row) + 1 + + qualName := name + if receiver == "" && len(ctx.namespaces) > 0 { + qualName = ctx.qualifiedName(name) + } + + return &FunctionInfo{ + Name: qualName, + Receiver: receiver, + Signature: sig, + Doc: doc, + File: ctx.filePath, + Line: line, + Exported: true, + ASTHash: hashString(node.Content(ctx.content)), + SigHash: hashString(sig), + } +} + +func (p *CPPParser) extractStructOrUnion(node *sitter.Node, ctx *cppParseContext) *TypeInfo { + nameNode := node.ChildByFieldName("name") + if nameNode == nil { + return nil // Anonymous struct/union. + } + + name := nameNode.Content(ctx.content) + kind := "struct" + if node.Type() == "union_specifier" { + kind = "union" + } + doc := extractPrecedingComment(node, ctx.content) + + qualName := name + if len(ctx.namespaces) > 0 { + qualName = ctx.qualifiedName(name) + } + if ctx.template != "" { + qualName = ctx.template + " " + qualName + } + + typeInfo := &TypeInfo{ + Name: qualName, + Kind: kind, + Doc: doc, + File: ctx.filePath, + Line: int(node.StartPoint().Row) + 1, + Exported: true, + ASTHash: hashString(node.Content(ctx.content)), + } + + // Extract fields and methods from struct body. + if body := node.ChildByFieldName("body"); body != nil { + p.extractClassMembers(body, ctx, typeInfo, name) + } + + return typeInfo +} + +func (p *CPPParser) extractEnum(node *sitter.Node, ctx *cppParseContext) *TypeInfo { + nameNode := node.ChildByFieldName("name") + if nameNode == nil { + return nil + } + + name := nameNode.Content(ctx.content) + doc := extractPrecedingComment(node, ctx.content) + + qualName := name + if len(ctx.namespaces) > 0 { + qualName = ctx.qualifiedName(name) + } + + return &TypeInfo{ + Name: qualName, + Kind: "enum", + Doc: doc, + File: ctx.filePath, + Line: int(node.StartPoint().Row) + 1, + Exported: true, + ASTHash: hashString(node.Content(ctx.content)), + } +} + +func (p *CPPParser) extractTypedef(node *sitter.Node, ctx *cppParseContext) *TypeInfo { + declarator := node.ChildByFieldName("declarator") + if declarator == nil { + return nil + } + + name, _ := extractCPPNameWithQualifier(declarator, ctx.content) + if name == "" { + return nil + } + + doc := extractPrecedingComment(node, ctx.content) + + qualName := name + if len(ctx.namespaces) > 0 { + qualName = ctx.qualifiedName(name) + } + + return &TypeInfo{ + Name: qualName, + Kind: "typedef", + Doc: doc, + File: ctx.filePath, + Line: int(node.StartPoint().Row) + 1, + Exported: true, + ASTHash: hashString(node.Content(ctx.content)), + } +} + +func (p *CPPParser) extractClass(node *sitter.Node, ctx *cppParseContext) *TypeInfo { + nameNode := node.ChildByFieldName("name") + if nameNode == nil { + return nil + } + + name := nameNode.Content(ctx.content) + doc := extractPrecedingComment(node, ctx.content) + + qualName := name + if len(ctx.namespaces) > 0 { + qualName = ctx.qualifiedName(name) + } + if ctx.template != "" { + qualName = ctx.template + " " + qualName + } + + typeInfo := &TypeInfo{ + Name: qualName, + Kind: "class", + Doc: doc, + File: ctx.filePath, + Line: int(node.StartPoint().Row) + 1, + Exported: true, + ASTHash: hashString(node.Content(ctx.content)), + } + + // Extract methods and fields from class body. + if body := node.ChildByFieldName("body"); body != nil { + p.extractClassMembers(body, ctx, typeInfo, name) + } + + return typeInfo +} + +// extractClassMembers extracts methods and fields from a class/struct body. +func (p *CPPParser) extractClassMembers( + body *sitter.Node, + ctx *cppParseContext, + typeInfo *TypeInfo, + className string, +) { + currentAccess := "private" + if typeInfo.Kind == "struct" { + currentAccess = "public" + } + + for i := 0; i < int(body.ChildCount()); i++ { + child := body.Child(i) + nodeType := child.Type() + + switch nodeType { + case "access_specifier": + // Update access level (public, private, protected). + for j := 0; j < int(child.ChildCount()); j++ { + c := child.Child(j) + switch c.Type() { + case "public", "private", "protected": + currentAccess = c.Type() + } + } + + case "field_declaration": + // Check if this is a method declaration or a field. + if method := p.extractMethodFromField(child, ctx, className, currentAccess); method != nil { + typeInfo.Methods = append(typeInfo.Methods, *method) + } else if field := p.extractFieldFromDecl(child, ctx); field != nil { + typeInfo.Fields = append(typeInfo.Fields, *field) + } + + case "function_definition": + // Inline method definition. + if method := p.extractInlineMethod(child, ctx, className, currentAccess); method != nil { + typeInfo.Methods = append(typeInfo.Methods, *method) + } + + case "declaration": + // Constructor/destructor declarations. + if method := p.extractConstructorDecl(child, ctx, className, currentAccess); method != nil { + typeInfo.Methods = append(typeInfo.Methods, *method) + } + + case "template_declaration": + // Template method inside class. + p.extractTemplateMethod(child, ctx, typeInfo, className, currentAccess) + } + } +} + +// extractMethodFromField extracts a method declaration from a field_declaration. +func (p *CPPParser) extractMethodFromField( + node *sitter.Node, + ctx *cppParseContext, + className string, + access string, +) *FunctionInfo { + // Look for function_declarator to identify this as a method. + var funcDecl *sitter.Node + for i := 0; i < int(node.ChildCount()); i++ { + child := node.Child(i) + if child.Type() == "function_declarator" { + funcDecl = child + break + } + } + if funcDecl == nil { + return nil + } + + name := "" + for i := 0; i < int(funcDecl.ChildCount()); i++ { + child := funcDecl.Child(i) + if child.Type() == "field_identifier" || child.Type() == "identifier" { + name = child.Content(ctx.content) + break + } + } + if name == "" { + return nil + } + + sig := p.buildFieldDeclSignature(node, ctx) + doc := extractPrecedingComment(node, ctx.content) + + return &FunctionInfo{ + Name: name, + Receiver: className, + Signature: sig, + Doc: doc, + File: ctx.filePath, + Line: int(node.StartPoint().Row) + 1, + Exported: access == "public", + ASTHash: hashString(node.Content(ctx.content)), + SigHash: hashString(sig), + } +} + +// extractInlineMethod extracts an inline method definition. +func (p *CPPParser) extractInlineMethod( + node *sitter.Node, + ctx *cppParseContext, + className string, + access string, +) *FunctionInfo { + declarator := node.ChildByFieldName("declarator") + if declarator == nil { + return nil + } + + name := "" + for declarator != nil { + switch declarator.Type() { + case "function_declarator": + for i := 0; i < int(declarator.ChildCount()); i++ { + child := declarator.Child(i) + if child.Type() == "field_identifier" || child.Type() == "identifier" { + name = child.Content(ctx.content) + break + } + } + if name != "" { + break + } + declarator = declarator.ChildByFieldName("declarator") + case "field_identifier", "identifier": + name = declarator.Content(ctx.content) + declarator = nil + default: + declarator = declarator.ChildByFieldName("declarator") + } + if name != "" { + break + } + } + + if name == "" { + return nil + } + + sig := p.buildSignature(node, ctx) + doc := extractPrecedingComment(node, ctx.content) + + return &FunctionInfo{ + Name: name, + Receiver: className, + Signature: sig, + Doc: doc, + File: ctx.filePath, + Line: int(node.StartPoint().Row) + 1, + Exported: access == "public", + ASTHash: hashString(node.Content(ctx.content)), + SigHash: hashString(sig), + } +} + +// extractConstructorDecl extracts constructor/destructor declarations. +func (p *CPPParser) extractConstructorDecl( + node *sitter.Node, + ctx *cppParseContext, + className string, + access string, +) *FunctionInfo { + declarator := node.ChildByFieldName("declarator") + if declarator == nil { + return nil + } + + // Check for function_declarator. + if declarator.Type() != "function_declarator" { + return nil + } + + name := "" + for i := 0; i < int(declarator.ChildCount()); i++ { + child := declarator.Child(i) + switch child.Type() { + case "identifier": + name = child.Content(ctx.content) + case "destructor_name": + // ~ClassName. + for j := 0; j < int(child.ChildCount()); j++ { + if child.Child(j).Type() == "identifier" { + name = "~" + child.Child(j).Content(ctx.content) + break + } + } + } + if name != "" { + break + } + } + + if name == "" { + return nil + } + + sig := strings.TrimSpace(strings.TrimSuffix(node.Content(ctx.content), ";")) + if len(sig) > 200 { + sig = sig[:200] + "..." + } + doc := extractPrecedingComment(node, ctx.content) + + return &FunctionInfo{ + Name: name, + Receiver: className, + Signature: sig, + Doc: doc, + File: ctx.filePath, + Line: int(node.StartPoint().Row) + 1, + Exported: access == "public", + ASTHash: hashString(node.Content(ctx.content)), + SigHash: hashString(sig), + } +} + +// extractTemplateMethod extracts template methods inside a class. +func (p *CPPParser) extractTemplateMethod( + node *sitter.Node, + ctx *cppParseContext, + typeInfo *TypeInfo, + className string, + access string, +) { + // Get template parameters. + templateParams := "" + for i := 0; i < int(node.ChildCount()); i++ { + child := node.Child(i) + if child.Type() == "template_parameter_list" { + templateParams = "template" + child.Content(ctx.content) + break + } + } + + // Find the function inside. + for i := 0; i < int(node.ChildCount()); i++ { + child := node.Child(i) + switch child.Type() { + case "function_definition": + if method := p.extractInlineMethod(child, ctx, className, access); method != nil { + method.Signature = templateParams + " " + method.Signature + typeInfo.Methods = append(typeInfo.Methods, *method) + } + case "field_declaration": + if method := p.extractMethodFromField(child, ctx, className, access); method != nil { + method.Signature = templateParams + " " + method.Signature + typeInfo.Methods = append(typeInfo.Methods, *method) + } + } + } +} + +// extractFieldFromDecl extracts a field (non-method) from a field_declaration. +func (p *CPPParser) extractFieldFromDecl(node *sitter.Node, ctx *cppParseContext) *FieldInfo { + // If it has a function_declarator, it's a method, not a field. + for i := 0; i < int(node.ChildCount()); i++ { + if node.Child(i).Type() == "function_declarator" { + return nil + } + } + + var typeName string + var fieldName string + + for i := 0; i < int(node.ChildCount()); i++ { + child := node.Child(i) + switch child.Type() { + case "primitive_type", "type_identifier", "qualified_identifier": + typeName = child.Content(ctx.content) + case "field_identifier": + fieldName = child.Content(ctx.content) + } + } + + if fieldName == "" { + return nil + } + + doc := extractPrecedingComment(node, ctx.content) + + return &FieldInfo{ + Name: fieldName, + Type: typeName, + Doc: doc, + } +} + +// buildSignature builds a function signature from a function_definition. +func (p *CPPParser) buildSignature(node *sitter.Node, ctx *cppParseContext) string { + // Take everything before the function body. + body := node.ChildByFieldName("body") + var sig string + if body != nil { + sig = strings.TrimSpace(string(ctx.content[node.StartByte():body.StartByte()])) + } else { + // Fallback: first line. + text := node.Content(ctx.content) + if idx := strings.Index(text, "{"); idx > 0 { + sig = strings.TrimSpace(text[:idx]) + } else { + sig = text + } + } + + // Prepend template if present. + if ctx.template != "" { + sig = ctx.template + " " + sig + } + + if len(sig) > 200 { + sig = sig[:200] + "..." + } + return sig +} + +// buildDeclarationSignature builds a signature from a declaration (prototype). +func (p *CPPParser) buildDeclarationSignature(node *sitter.Node, ctx *cppParseContext) string { + sig := strings.TrimSpace(strings.TrimSuffix(node.Content(ctx.content), ";")) + + if ctx.template != "" { + sig = ctx.template + " " + sig + } + + if len(sig) > 200 { + sig = sig[:200] + "..." + } + return sig +} + +// buildFieldDeclSignature builds a signature from a field_declaration (method decl). +func (p *CPPParser) buildFieldDeclSignature(node *sitter.Node, ctx *cppParseContext) string { + sig := strings.TrimSpace(strings.TrimSuffix(node.Content(ctx.content), ";")) + if len(sig) > 200 { + sig = sig[:200] + "..." + } + return sig +} + +// extractCPPNameWithQualifier extracts the name and any class qualifier from a declarator. +// For "Foo::bar", returns ("bar", "Foo"). For "bar", returns ("bar", ""). +func extractCPPNameWithQualifier(declarator *sitter.Node, content []byte) (string, string) { + for declarator != nil { + switch declarator.Type() { + case "identifier", "field_identifier", "type_identifier": + return declarator.Content(content), "" + + case "qualified_identifier": + // Handle Foo::bar or rstudio::core::Foo::bar. + return extractCPPQualifiedName(declarator, content) + + case "function_declarator", "pointer_declarator", "array_declarator", + "parenthesized_declarator": + declarator = declarator.ChildByFieldName("declarator") + if declarator == nil { + return "", "" + } + + default: + // Try to find an identifier child. + for i := 0; i < int(declarator.ChildCount()); i++ { + child := declarator.Child(i) + switch child.Type() { + case "identifier", "field_identifier", "type_identifier": + return child.Content(content), "" + case "qualified_identifier": + return extractCPPQualifiedName(child, content) + } + } + return "", "" + } + } + return "", "" +} + +// extractCPPQualifiedName handles qualified identifiers like Foo::bar or ns::Foo::bar. +// Returns (name, receiver) where receiver is the immediate class qualifier if present. +func extractCPPQualifiedName(node *sitter.Node, content []byte) (string, string) { + // Collect all parts of the qualified name. + var parts []string + collectCPPQualifiedParts(node, content, &parts) + + if len(parts) == 0 { + return "", "" + } + if len(parts) == 1 { + return parts[0], "" + } + + // Last part is the name, second-to-last is the class (receiver). + name := parts[len(parts)-1] + receiver := parts[len(parts)-2] + return name, receiver +} + +// collectCPPQualifiedParts recursively collects parts of a qualified identifier. +func collectCPPQualifiedParts(node *sitter.Node, content []byte, parts *[]string) { + for i := 0; i < int(node.ChildCount()); i++ { + child := node.Child(i) + switch child.Type() { + case "namespace_identifier", "identifier", "type_identifier": + *parts = append(*parts, child.Content(content)) + case "template_type": + // Handle Container. + for j := 0; j < int(child.ChildCount()); j++ { + if child.Child(j).Type() == "type_identifier" { + *parts = append(*parts, child.Child(j).Content(content)) + break + } + } + case "qualified_identifier": + collectCPPQualifiedParts(child, content, parts) + } + } +} diff --git a/indexer/parser_test.go b/indexer/parser_test.go index e26bd68..52885a7 100644 --- a/indexer/parser_test.go +++ b/indexer/parser_test.go @@ -258,7 +258,7 @@ func TestPythonParser(t *testing.T) { } } -// --- C/C++ Parser --- +// --- C Parser --- func TestCParser(t *testing.T) { srcRoot := filepath.Join(testdataDir(), "c") @@ -303,8 +303,23 @@ func TestCParser(t *testing.T) { if create == nil { t.Error("function hash_table_create not found in header") } +} + +// --- C++ Parser --- + +func TestCPPParser(t *testing.T) { + srcRoot := filepath.Join(testdataDir(), "c") + parser := NewCPPParser(srcRoot, nil) + result := NewParseResult() + if err := parser.Parse(result); err != nil { + t.Fatalf("Parse() error: %v", err) + } - // Check C++ file. + if len(result.Files) == 0 { + t.Fatal("expected at least one file parsed") + } + + // Check .cpp file. var cppInfo *FileInfo for _, f := range result.Files { if filepath.Base(f.Path) == "string_pool.cpp" { @@ -323,6 +338,58 @@ func TestCParser(t *testing.T) { } else if stringPool.Kind != "class" { t.Errorf("StringPool.Kind = %q, want %q", stringPool.Kind, "class") } + + // Check .hpp header file. + var hppInfo *FileInfo + for _, f := range result.Files { + if filepath.Base(f.Path) == "container.hpp" { + hppInfo = f + break + } + } + if hppInfo == nil { + t.Fatal("container.hpp not found in parsed files") + } + + // Should have template class with namespace-qualified name. + container := findType(hppInfo.Types, "template testlib::collections::Container") + if container == nil { + t.Error("template class Container not found in container.hpp") + } else { + if container.Kind != "class" { + t.Errorf("Container.Kind = %q, want %q", container.Kind, "class") + } + // Should have methods. + if len(container.Methods) == 0 { + t.Error("Container should have methods") + } + } + + // Should have struct with namespace-qualified name. + fileEntry := findType(hppInfo.Types, "testlib::collections::FileEntry") + if fileEntry == nil { + t.Error("struct FileEntry not found in container.hpp") + } else { + if fileEntry.Kind != "struct" { + t.Errorf("FileEntry.Kind = %q, want %q", fileEntry.Kind, "struct") + } + // Should have fields. + if len(fileEntry.Fields) == 0 { + t.Error("FileEntry should have fields") + } + // Should have methods. + if len(fileEntry.Methods) == 0 { + t.Error("FileEntry should have methods") + } + } + + // Should have enum with namespace-qualified name. + status := findType(hppInfo.Types, "testlib::collections::Status") + if status == nil { + t.Error("enum Status not found in container.hpp") + } else if status.Kind != "enum" { + t.Errorf("Status.Kind = %q, want %q", status.Kind, "enum") + } } // --- R Parser --- diff --git a/testdata/c/container.hpp b/testdata/c/container.hpp new file mode 100644 index 0000000..cf5ef21 --- /dev/null +++ b/testdata/c/container.hpp @@ -0,0 +1,55 @@ +#ifndef CONTAINER_HPP +#define CONTAINER_HPP + +#include +#include + +namespace testlib { +namespace collections { + +/** + * A generic container with basic operations. + */ +template +class Container { +public: + Container(); + explicit Container(size_t capacity); + ~Container(); + + void add(const T& item); + T get(size_t index) const; + size_t size() const { return items_.size(); } + bool empty() const; + +private: + std::vector items_; + size_t capacity_; +}; + +/** + * Metadata about a file in the index. + */ +struct FileEntry { + std::string path; + size_t size; + bool indexed; + + void markIndexed(); + std::string displayName() const; +}; + +/** + * Status codes for operations. + */ +enum class Status { + Ok, + NotFound, + PermissionDenied, + IoError +}; + +} // namespace collections +} // namespace testlib + +#endif // CONTAINER_HPP From 4548992ff02a2bbfbadb38231bfcfeab1044fdea Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 17:39:52 +0000 Subject: [PATCH 2/6] docs: update supported languages table to reflect c/cpp parser split Agent-Logs-Url: https://github.com/posit-dev/code-index/sessions/fd6b6fd0-d232-4b04-91f7-0951b3514988 Co-authored-by: cm421 <56326543+cm421@users.noreply.github.com> --- docs/configuration.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 9fb389c..cbb974e 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -90,8 +90,8 @@ Each source object defines a directory to index: | `typescript` | `.ts`, `.tsx`, `.js`, `.jsx`, `.vue` | tree-sitter — functions, classes, interfaces, enums, JSDoc | | `javascript` | `.js`, `.jsx` | tree-sitter (same as typescript) | | `python` | `.py` | tree-sitter — functions, classes, decorators, docstrings | -| `c` | `.c`, `.h`, `.cpp`, `.cc`, `.hpp` | tree-sitter — functions, structs, classes, enums, typedefs, Doxygen | -| `cpp` | `.c`, `.h`, `.cpp`, `.cc`, `.hpp` | tree-sitter (same as c) | +| `c` | `.c`, `.h` | tree-sitter — functions, structs, enums, typedefs, Doxygen | +| `cpp` | `.cpp`, `.cc`, `.hpp`, `.cxx`, `.hxx`, `.h` | tree-sitter — functions, classes, structs, namespaces, templates, enums, typedefs, Doxygen | | `r` | `.R`, `.r` | Native Rscript with regex fallback — functions, roxygen, S4/R6 classes | | `markdown` | `.md`, `.qmd` | Regex — headings as sections, YAML front matter | From 08e513833e458c9f0bd8fd5592b1cde7adedd90e Mon Sep 17 00:00:00 2001 From: Craig <56326543+cm421@users.noreply.github.com> Date: Wed, 22 Apr 2026 15:25:15 -0600 Subject: [PATCH 3/6] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- indexer/parser_cpp.go | 60 ++++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/indexer/parser_cpp.go b/indexer/parser_cpp.go index af1708b..678340b 100644 --- a/indexer/parser_cpp.go +++ b/indexer/parser_cpp.go @@ -44,7 +44,7 @@ func (p *CPPParser) Parse(result *ParseResult) error { } ext := filepath.Ext(path) - if ext != ".cpp" && ext != ".cc" && ext != ".hpp" && ext != ".cxx" && ext != ".hxx" && ext != ".h" { + if ext != ".cpp" && ext != ".cc" && ext != ".hpp" && ext != ".cxx" && ext != ".hxx" { return nil } @@ -859,37 +859,67 @@ func extractCPPNameWithQualifier(declarator *sitter.Node, content []byte) (strin } // extractCPPQualifiedName handles qualified identifiers like Foo::bar or ns::Foo::bar. -// Returns (name, receiver) where receiver is the immediate class qualifier if present. +// Returns (name, receiver). For member functions, receiver is the full qualifier +// (for example, ns::Foo). For namespace-qualified free functions, the full +// qualified name is returned in name and receiver is empty. func extractCPPQualifiedName(node *sitter.Node, content []byte) (string, string) { - // Collect all parts of the qualified name. - var parts []string + var parts []cppQualifiedPart collectCPPQualifiedParts(node, content, &parts) if len(parts) == 0 { return "", "" } if len(parts) == 1 { - return parts[0], "" + return parts[0].text, "" } - // Last part is the name, second-to-last is the class (receiver). - name := parts[len(parts)-1] - receiver := parts[len(parts)-2] - return name, receiver + name := parts[len(parts)-1].text + qualifierParts := parts[:len(parts)-1] + lastQualifier := qualifierParts[len(qualifierParts)-1] + + qualifierTexts := make([]string, 0, len(qualifierParts)) + for _, part := range qualifierParts { + qualifierTexts = append(qualifierTexts, part.text) + } + + switch lastQualifier.kind { + case "namespace_identifier": + // Namespace-qualified free function: keep the full qualifier in the name. + return strings.Join(append(qualifierTexts, name), "::"), "" + case "type_identifier", "template_type", "identifier": + // Member function: keep the full qualifier chain as the receiver. + return name, strings.Join(qualifierTexts, "::") + default: + // Fall back to preserving the full qualifier in the name. + return strings.Join(append(qualifierTexts, name), "::"), "" + } +} + +type cppQualifiedPart struct { + text string + kind string } -// collectCPPQualifiedParts recursively collects parts of a qualified identifier. -func collectCPPQualifiedParts(node *sitter.Node, content []byte, parts *[]string) { +// collectCPPQualifiedParts recursively collects parts of a qualified identifier +// along with their syntactic kinds so namespaces can be distinguished from types. +func collectCPPQualifiedParts(node *sitter.Node, content []byte, parts *[]cppQualifiedPart) { for i := 0; i < int(node.ChildCount()); i++ { child := node.Child(i) switch child.Type() { case "namespace_identifier", "identifier", "type_identifier": - *parts = append(*parts, child.Content(content)) + *parts = append(*parts, cppQualifiedPart{ + text: child.Content(content), + kind: child.Type(), + }) case "template_type": - // Handle Container. + // Handle Container by recording the template type name as a type-like qualifier. for j := 0; j < int(child.ChildCount()); j++ { - if child.Child(j).Type() == "type_identifier" { - *parts = append(*parts, child.Child(j).Content(content)) + grandchild := child.Child(j) + if grandchild.Type() == "type_identifier" { + *parts = append(*parts, cppQualifiedPart{ + text: grandchild.Content(content), + kind: "template_type", + }) break } } From 20c42cdb5a4ae83522a3e42ecd91af1a144f207a Mon Sep 17 00:00:00 2001 From: craig Date: Thu, 23 Apr 2026 12:03:38 -0600 Subject: [PATCH 4/6] Update docs --- docs/configuration.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/configuration.md b/docs/configuration.md index cbb974e..1542837 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -91,7 +91,9 @@ Each source object defines a directory to index: | `javascript` | `.js`, `.jsx` | tree-sitter (same as typescript) | | `python` | `.py` | tree-sitter — functions, classes, decorators, docstrings | | `c` | `.c`, `.h` | tree-sitter — functions, structs, enums, typedefs, Doxygen | -| `cpp` | `.cpp`, `.cc`, `.hpp`, `.cxx`, `.hxx`, `.h` | tree-sitter — functions, classes, structs, namespaces, templates, enums, typedefs, Doxygen | +| `cpp` | `.cpp`, `.cc`, `.hpp`, `.cxx`, `.hxx` | tree-sitter — functions, classes, structs, namespaces, templates, enums, typedefs, Doxygen | + +> **Note:** `.h` files are handled by the `c` parser, not `cpp`. This avoids double-parsing when a project configures both `c` and `cpp` sources over the same tree. C++ projects with `.h` headers should configure a `c` source pointing at the header directories. | `r` | `.R`, `.r` | Native Rscript with regex fallback — functions, roxygen, S4/R6 classes | | `markdown` | `.md`, `.qmd` | Regex — headings as sections, YAML front matter | From f31f1d3bcf4e8d88b1298b2806506b3712892f8e Mon Sep 17 00:00:00 2001 From: craig Date: Thu, 23 Apr 2026 13:13:59 -0600 Subject: [PATCH 5/6] Move type signature from Name to Signature, and add truncateSignature --- indexer/parser_c.go | 11 ++----- indexer/parser_cpp.go | 68 +++++++++++++++++++----------------------- indexer/parser_test.go | 8 +++-- indexer/types.go | 2 ++ 4 files changed, 41 insertions(+), 48 deletions(-) diff --git a/indexer/parser_c.go b/indexer/parser_c.go index df26b38..8a2d171 100644 --- a/indexer/parser_c.go +++ b/indexer/parser_c.go @@ -326,19 +326,12 @@ func extractCSignature(node *sitter.Node, content []byte) string { // Take everything before the function body. body := node.ChildByFieldName("body") if body != nil { - sig := strings.TrimSpace(string(content[node.StartByte():body.StartByte()])) - if len(sig) > 200 { - sig = sig[:200] + "..." - } - return sig + return truncateSignature(strings.TrimSpace(string(content[node.StartByte():body.StartByte()]))) } // Fallback: first line. text := node.Content(content) if idx := strings.Index(text, "{"); idx > 0 { text = strings.TrimSpace(text[:idx]) } - if len(text) > 200 { - text = text[:200] + "..." - } - return text + return truncateSignature(text) } diff --git a/indexer/parser_cpp.go b/indexer/parser_cpp.go index 678340b..1aa8a26 100644 --- a/indexer/parser_cpp.go +++ b/indexer/parser_cpp.go @@ -12,6 +12,17 @@ import ( "github.com/smacker/go-tree-sitter/cpp" ) +// maxSignatureLen caps stored signatures to keep index entries compact. +const maxSignatureLen = 200 + +// truncateSignature caps a signature at maxSignatureLen, appending "..." when trimmed. +func truncateSignature(sig string) string { + if len(sig) <= maxSignatureLen { + return sig + } + return sig[:maxSignatureLen] + "..." +} + // CPPParser extracts structured information from C++ source files using tree-sitter. type CPPParser struct { srcRoot string @@ -369,18 +380,16 @@ func (p *CPPParser) extractStructOrUnion(node *sitter.Node, ctx *cppParseContext if len(ctx.namespaces) > 0 { qualName = ctx.qualifiedName(name) } - if ctx.template != "" { - qualName = ctx.template + " " + qualName - } typeInfo := &TypeInfo{ - Name: qualName, - Kind: kind, - Doc: doc, - File: ctx.filePath, - Line: int(node.StartPoint().Row) + 1, - Exported: true, - ASTHash: hashString(node.Content(ctx.content)), + Name: qualName, + Kind: kind, + Signature: ctx.template, + Doc: doc, + File: ctx.filePath, + Line: int(node.StartPoint().Row) + 1, + Exported: true, + ASTHash: hashString(node.Content(ctx.content)), } // Extract fields and methods from struct body. @@ -458,18 +467,16 @@ func (p *CPPParser) extractClass(node *sitter.Node, ctx *cppParseContext) *TypeI if len(ctx.namespaces) > 0 { qualName = ctx.qualifiedName(name) } - if ctx.template != "" { - qualName = ctx.template + " " + qualName - } typeInfo := &TypeInfo{ - Name: qualName, - Kind: "class", - Doc: doc, - File: ctx.filePath, - Line: int(node.StartPoint().Row) + 1, - Exported: true, - ASTHash: hashString(node.Content(ctx.content)), + Name: qualName, + Kind: "class", + Signature: ctx.template, + Doc: doc, + File: ctx.filePath, + Line: int(node.StartPoint().Row) + 1, + Exported: true, + ASTHash: hashString(node.Content(ctx.content)), } // Extract methods and fields from class body. @@ -681,10 +688,7 @@ func (p *CPPParser) extractConstructorDecl( return nil } - sig := strings.TrimSpace(strings.TrimSuffix(node.Content(ctx.content), ";")) - if len(sig) > 200 { - sig = sig[:200] + "..." - } + sig := truncateSignature(strings.TrimSpace(strings.TrimSuffix(node.Content(ctx.content), ";"))) doc := extractPrecedingComment(node, ctx.content) return &FunctionInfo{ @@ -793,10 +797,7 @@ func (p *CPPParser) buildSignature(node *sitter.Node, ctx *cppParseContext) stri sig = ctx.template + " " + sig } - if len(sig) > 200 { - sig = sig[:200] + "..." - } - return sig + return truncateSignature(sig) } // buildDeclarationSignature builds a signature from a declaration (prototype). @@ -807,19 +808,12 @@ func (p *CPPParser) buildDeclarationSignature(node *sitter.Node, ctx *cppParseCo sig = ctx.template + " " + sig } - if len(sig) > 200 { - sig = sig[:200] + "..." - } - return sig + return truncateSignature(sig) } // buildFieldDeclSignature builds a signature from a field_declaration (method decl). func (p *CPPParser) buildFieldDeclSignature(node *sitter.Node, ctx *cppParseContext) string { - sig := strings.TrimSpace(strings.TrimSuffix(node.Content(ctx.content), ";")) - if len(sig) > 200 { - sig = sig[:200] + "..." - } - return sig + return truncateSignature(strings.TrimSpace(strings.TrimSuffix(node.Content(ctx.content), ";"))) } // extractCPPNameWithQualifier extracts the name and any class qualifier from a declarator. diff --git a/indexer/parser_test.go b/indexer/parser_test.go index 52885a7..40311a3 100644 --- a/indexer/parser_test.go +++ b/indexer/parser_test.go @@ -351,14 +351,18 @@ func TestCPPParser(t *testing.T) { t.Fatal("container.hpp not found in parsed files") } - // Should have template class with namespace-qualified name. - container := findType(hppInfo.Types, "template testlib::collections::Container") + // Should have template class with namespace-qualified name and + // template parameters carried in the Signature field. + container := findType(hppInfo.Types, "testlib::collections::Container") if container == nil { t.Error("template class Container not found in container.hpp") } else { if container.Kind != "class" { t.Errorf("Container.Kind = %q, want %q", container.Kind, "class") } + if container.Signature != "template" { + t.Errorf("Container.Signature = %q, want %q", container.Signature, "template") + } // Should have methods. if len(container.Methods) == 0 { t.Error("Container should have methods") diff --git a/indexer/types.go b/indexer/types.go index 11bfc10..184e263 100644 --- a/indexer/types.go +++ b/indexer/types.go @@ -63,6 +63,8 @@ type TypeInfo struct { Name string `json:"name"` // Kind describes the type (e.g., "struct", "interface", "class", "enum", "typedef"). Kind string `json:"kind"` + // Signature is the full type signature (e.g., C++ template parameters like "template"). + Signature string `json:"signature,omitempty"` // Doc is the documentation comment. Doc string `json:"doc,omitempty"` // File is the relative file path. From ee2894ca1dfd7d7dcaea9c77d7b5c3845cf1280e Mon Sep 17 00:00:00 2001 From: craig Date: Thu, 23 Apr 2026 13:57:33 -0600 Subject: [PATCH 6/6] refactor: move truncateSignature function to parser_common.go and update C++ parser --- docs/configuration.md | 4 ++-- indexer/parser_common.go | 11 +++++++++++ indexer/parser_cpp.go | 11 ----------- 3 files changed, 13 insertions(+), 13 deletions(-) create mode 100644 indexer/parser_common.go diff --git a/docs/configuration.md b/docs/configuration.md index 1542837..dd2a30e 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -92,11 +92,11 @@ Each source object defines a directory to index: | `python` | `.py` | tree-sitter — functions, classes, decorators, docstrings | | `c` | `.c`, `.h` | tree-sitter — functions, structs, enums, typedefs, Doxygen | | `cpp` | `.cpp`, `.cc`, `.hpp`, `.cxx`, `.hxx` | tree-sitter — functions, classes, structs, namespaces, templates, enums, typedefs, Doxygen | - -> **Note:** `.h` files are handled by the `c` parser, not `cpp`. This avoids double-parsing when a project configures both `c` and `cpp` sources over the same tree. C++ projects with `.h` headers should configure a `c` source pointing at the header directories. | `r` | `.R`, `.r` | Native Rscript with regex fallback — functions, roxygen, S4/R6 classes | | `markdown` | `.md`, `.qmd` | Regex — headings as sections, YAML front matter | +> **Note:** `.h` files are handled by the `c` parser, not `cpp`. This avoids double-parsing when a project configures both `c` and `cpp` sources over the same tree. C++ projects with `.h` headers should configure a `c` source pointing at the header directories. + #### Vendor-aware Go indexing For Go projects, you can include specific vendored dependencies in the index: diff --git a/indexer/parser_common.go b/indexer/parser_common.go new file mode 100644 index 0000000..aa01f0f --- /dev/null +++ b/indexer/parser_common.go @@ -0,0 +1,11 @@ +// Copyright (C) 2026 by Posit Software, PBC +package indexer + +const maxSignatureLen = 200 + +func truncateSignature(sig string) string { + if len(sig) <= maxSignatureLen { + return sig + } + return sig[:maxSignatureLen] + "..." +} diff --git a/indexer/parser_cpp.go b/indexer/parser_cpp.go index 1aa8a26..7c1b8b8 100644 --- a/indexer/parser_cpp.go +++ b/indexer/parser_cpp.go @@ -12,17 +12,6 @@ import ( "github.com/smacker/go-tree-sitter/cpp" ) -// maxSignatureLen caps stored signatures to keep index entries compact. -const maxSignatureLen = 200 - -// truncateSignature caps a signature at maxSignatureLen, appending "..." when trimmed. -func truncateSignature(sig string) string { - if len(sig) <= maxSignatureLen { - return sig - } - return sig[:maxSignatureLen] + "..." -} - // CPPParser extracts structured information from C++ source files using tree-sitter. type CPPParser struct { srcRoot string