diff --git a/sourcecode-parser/dsl/ir_types.go b/sourcecode-parser/dsl/ir_types.go index 81464106..729f86f2 100644 --- a/sourcecode-parser/dsl/ir_types.go +++ b/sourcecode-parser/dsl/ir_types.go @@ -30,6 +30,18 @@ func (c *CallMatcherIR) GetType() IRType { return IRTypeCallMatcher } +// VariableMatcherIR represents variable_matcher JSON IR. +type VariableMatcherIR struct { + Type string `json:"type"` // "variable_matcher" + Pattern string `json:"pattern"` // "user_input" or "user_*" + Wildcard bool `json:"wildcard"` // true if pattern has * +} + +// GetType returns the IR type. +func (v *VariableMatcherIR) GetType() IRType { + return IRTypeVariableMatcher +} + // RuleIR represents a complete rule with metadata. type RuleIR struct { Rule struct { diff --git a/sourcecode-parser/dsl/variable_matcher.go b/sourcecode-parser/dsl/variable_matcher.go new file mode 100644 index 00000000..84a77a19 --- /dev/null +++ b/sourcecode-parser/dsl/variable_matcher.go @@ -0,0 +1,93 @@ +package dsl + +import ( + "strings" + + "github.com/shivasurya/code-pathfinder/sourcecode-parser/graph/callgraph" +) + +// VariableMatcherExecutor executes variable_matcher IR. +type VariableMatcherExecutor struct { + IR *VariableMatcherIR + CallGraph *callgraph.CallGraph +} + +// NewVariableMatcherExecutor creates a new executor. +func NewVariableMatcherExecutor(ir *VariableMatcherIR, cg *callgraph.CallGraph) *VariableMatcherExecutor { + return &VariableMatcherExecutor{ + IR: ir, + CallGraph: cg, + } +} + +// Execute finds all variable references matching the pattern. +// +// Algorithm: +// 1. Iterate over callGraph.CallSites +// 2. For each call site, check arguments for variable references +// 3. Match argument values against pattern (with wildcard support) +// 4. Return list of matching call sites with argument positions +func (e *VariableMatcherExecutor) Execute() []VariableMatchResult { + matches := []VariableMatchResult{} + + for functionFQN, callSites := range e.CallGraph.CallSites { + for _, callSite := range callSites { + // Check each argument + for _, arg := range callSite.Arguments { + if arg.IsVariable && e.matchesPattern(arg.Value) { + matches = append(matches, VariableMatchResult{ + CallSite: callSite, + VariableName: arg.Value, + ArgumentPos: arg.Position, + FunctionFQN: functionFQN, + SourceFile: callSite.Location.File, + Line: callSite.Location.Line, + }) + } + } + } + } + + return matches +} + +// VariableMatchResult contains match information. +type VariableMatchResult struct { + CallSite callgraph.CallSite + VariableName string // The matched variable name + ArgumentPos int // Position in argument list + FunctionFQN string + SourceFile string + Line int +} + +// matchesPattern checks if variable name matches pattern. +func (e *VariableMatcherExecutor) matchesPattern(varName string) bool { + pattern := e.IR.Pattern + + if !e.IR.Wildcard { + return varName == pattern + } + + // Wildcard matching (same as CallMatcher) + if pattern == "*" { + return true + } + + if strings.HasPrefix(pattern, "*") && strings.HasSuffix(pattern, "*") { + substr := strings.Trim(pattern, "*") + return strings.Contains(varName, substr) + } + + if strings.HasPrefix(pattern, "*") { + suffix := strings.TrimPrefix(pattern, "*") + return strings.HasSuffix(varName, suffix) + } + + if strings.HasSuffix(pattern, "*") { + prefix := strings.TrimSuffix(pattern, "*") + return strings.HasPrefix(varName, prefix) + } + + return varName == pattern +} diff --git a/sourcecode-parser/dsl/variable_matcher_test.go b/sourcecode-parser/dsl/variable_matcher_test.go new file mode 100644 index 00000000..ed2bb6ec --- /dev/null +++ b/sourcecode-parser/dsl/variable_matcher_test.go @@ -0,0 +1,89 @@ +package dsl + +import ( + "testing" + + "github.com/shivasurya/code-pathfinder/sourcecode-parser/graph/callgraph" + "github.com/stretchr/testify/assert" +) + +func TestVariableMatcherExecutor_Execute(t *testing.T) { + cg := callgraph.NewCallGraph() + + cg.CallSites["test.main"] = []callgraph.CallSite{ + { + Target: "eval", + Arguments: []callgraph.Argument{ + {Value: "user_input", IsVariable: true, Position: 0}, + }, + Location: callgraph.Location{File: "test.py", Line: 10}, + }, + { + Target: "print", + Arguments: []callgraph.Argument{ + {Value: "\"hello\"", IsVariable: false, Position: 0}, + }, + Location: callgraph.Location{File: "test.py", Line: 15}, + }, + } + + t.Run("exact match", func(t *testing.T) { + ir := &VariableMatcherIR{ + Pattern: "user_input", + Wildcard: false, + } + + executor := NewVariableMatcherExecutor(ir, cg) + matches := executor.Execute() + + assert.Len(t, matches, 1) + assert.Equal(t, "user_input", matches[0].VariableName) + assert.Equal(t, 0, matches[0].ArgumentPos) + }) + + t.Run("wildcard prefix", func(t *testing.T) { + cg2 := callgraph.NewCallGraph() + cg2.CallSites["test.main"] = []callgraph.CallSite{ + { + Target: "process", + Arguments: []callgraph.Argument{ + {Value: "user_input", IsVariable: true}, + {Value: "user_id", IsVariable: true}, + {Value: "admin_name", IsVariable: true}, + }, + }, + } + + ir := &VariableMatcherIR{ + Pattern: "user_*", + Wildcard: true, + } + + executor := NewVariableMatcherExecutor(ir, cg2) + matches := executor.Execute() + + assert.Len(t, matches, 2) // user_input, user_id + }) + + t.Run("no matches - literal argument", func(t *testing.T) { + ir := &VariableMatcherIR{ + Pattern: "user_input", + Wildcard: false, + } + + cg2 := callgraph.NewCallGraph() + cg2.CallSites["test.main"] = []callgraph.CallSite{ + { + Target: "print", + Arguments: []callgraph.Argument{ + {Value: "\"literal\"", IsVariable: false}, // NOT a variable + }, + }, + } + + executor := NewVariableMatcherExecutor(ir, cg2) + matches := executor.Execute() + + assert.Len(t, matches, 0) + }) +}