diff --git a/sourcecode-parser/graph/callgraph/patterns.go b/sourcecode-parser/graph/callgraph/patterns.go new file mode 100644 index 00000000..d0d49e09 --- /dev/null +++ b/sourcecode-parser/graph/callgraph/patterns.go @@ -0,0 +1,261 @@ +package callgraph + +import ( + "strings" +) + +// PatternType categorizes security patterns for analysis. +type PatternType string + +const ( + // PatternTypeSourceSink detects tainted data flow from source to sink. + PatternTypeSourceSink PatternType = "source-sink" + + // PatternTypeMissingSanitizer detects missing sanitization between source and sink. + PatternTypeMissingSanitizer PatternType = "missing-sanitizer" + + // PatternTypeDangerousFunction detects calls to dangerous functions. + PatternTypeDangerousFunction PatternType = "dangerous-function" +) + +// Severity indicates the risk level of a security pattern match. +type Severity string + +const ( + SeverityCritical Severity = "critical" + SeverityHigh Severity = "high" + SeverityMedium Severity = "medium" + SeverityLow Severity = "low" +) + +// Pattern represents a security pattern to detect in the call graph. +type Pattern struct { + ID string // Unique identifier (e.g., "SQL-INJECTION-001") + Name string // Human-readable name + Description string // What this pattern detects + Type PatternType // Pattern category + Severity Severity // Risk level + + // Sources are function names that introduce tainted data + Sources []string + + // Sinks are function names that consume tainted data dangerously + Sinks []string + + // Sanitizers are function names that clean tainted data + Sanitizers []string + + // DangerousFunctions for PatternTypeDangerousFunction + DangerousFunctions []string + + CWE string // Common Weakness Enumeration + OWASP string // OWASP Top 10 category +} + +// PatternRegistry manages security patterns. +type PatternRegistry struct { + Patterns map[string]*Pattern // Pattern ID -> Pattern + PatternsByType map[PatternType][]*Pattern // Type -> Patterns +} + +// NewPatternRegistry creates a new pattern registry. +func NewPatternRegistry() *PatternRegistry { + return &PatternRegistry{ + Patterns: make(map[string]*Pattern), + PatternsByType: make(map[PatternType][]*Pattern), + } +} + +// AddPattern registers a pattern in the registry. +func (pr *PatternRegistry) AddPattern(pattern *Pattern) { + pr.Patterns[pattern.ID] = pattern + pr.PatternsByType[pattern.Type] = append(pr.PatternsByType[pattern.Type], pattern) +} + +// GetPattern retrieves a pattern by ID. +func (pr *PatternRegistry) GetPattern(id string) (*Pattern, bool) { + pattern, exists := pr.Patterns[id] + return pattern, exists +} + +// GetPatternsByType retrieves all patterns of a specific type. +func (pr *PatternRegistry) GetPatternsByType(patternType PatternType) []*Pattern { + return pr.PatternsByType[patternType] +} + +// LoadDefaultPatterns loads the hardcoded example pattern. +// Additional patterns will be loaded from queries in future PRs. +func (pr *PatternRegistry) LoadDefaultPatterns() { + // Example hardcoded pattern: Code injection via eval() + pr.AddPattern(&Pattern{ + ID: "CODE-INJECTION-001", + Name: "Code injection via eval with user input", + Description: "Detects code injection when user input flows to eval() without sanitization", + Type: PatternTypeMissingSanitizer, + Severity: SeverityCritical, + Sources: []string{"request.GET", "request.POST", "input", "raw_input"}, + Sinks: []string{"eval", "exec"}, + Sanitizers: []string{"sanitize", "escape", "validate"}, + CWE: "CWE-94", + OWASP: "A03:2021-Injection", + }) +} + +// MatchPattern checks if a call graph matches a pattern. +func (pr *PatternRegistry) MatchPattern(pattern *Pattern, callGraph *CallGraph) bool { + switch pattern.Type { + case PatternTypeDangerousFunction: + return pr.matchDangerousFunction(pattern, callGraph) + case PatternTypeSourceSink: + return pr.matchSourceSink(pattern, callGraph) + case PatternTypeMissingSanitizer: + return pr.matchMissingSanitizer(pattern, callGraph) + default: + return false + } +} + +// matchDangerousFunction checks if any dangerous function is called. +func (pr *PatternRegistry) matchDangerousFunction(pattern *Pattern, callGraph *CallGraph) bool { + for _, callSites := range callGraph.CallSites { + for _, callSite := range callSites { + for _, dangerousFunc := range pattern.DangerousFunctions { + if matchesFunctionName(callSite.TargetFQN, dangerousFunc) || + matchesFunctionName(callSite.Target, dangerousFunc) { + return true + } + } + } + } + return false +} + +// matchSourceSink checks if there's a path from source to sink. +func (pr *PatternRegistry) matchSourceSink(pattern *Pattern, callGraph *CallGraph) bool { + sourceCalls := pr.findCallsByFunctions(pattern.Sources, callGraph) + if len(sourceCalls) == 0 { + return false + } + + sinkCalls := pr.findCallsByFunctions(pattern.Sinks, callGraph) + if len(sinkCalls) == 0 { + return false + } + + for _, source := range sourceCalls { + for _, sink := range sinkCalls { + if pr.hasPath(source.caller, sink.caller, callGraph) { + return true + } + } + } + + return false +} + +// matchMissingSanitizer checks if there's a path from source to sink without sanitization. +func (pr *PatternRegistry) matchMissingSanitizer(pattern *Pattern, callGraph *CallGraph) bool { + sourceCalls := pr.findCallsByFunctions(pattern.Sources, callGraph) + if len(sourceCalls) == 0 { + return false + } + + sinkCalls := pr.findCallsByFunctions(pattern.Sinks, callGraph) + if len(sinkCalls) == 0 { + return false + } + + sanitizerCalls := pr.findCallsByFunctions(pattern.Sanitizers, callGraph) + + for _, source := range sourceCalls { + for _, sink := range sinkCalls { + if pr.hasPath(source.caller, sink.caller, callGraph) { + hasSanitizer := false + for _, sanitizer := range sanitizerCalls { + if pr.hasPath(source.caller, sanitizer.caller, callGraph) && + pr.hasPath(sanitizer.caller, sink.caller, callGraph) { + hasSanitizer = true + break + } + } + if !hasSanitizer { + return true + } + } + } + } + + return false +} + +// callInfo stores information about a function call location. +type callInfo struct { + caller string + target string +} + +// findCallsByFunctions finds all calls to specific functions. +func (pr *PatternRegistry) findCallsByFunctions(functionNames []string, callGraph *CallGraph) []callInfo { + var calls []callInfo + for caller, callSites := range callGraph.CallSites { + for _, callSite := range callSites { + for _, funcName := range functionNames { + if matchesFunctionName(callSite.TargetFQN, funcName) || + matchesFunctionName(callSite.Target, funcName) { + calls = append(calls, callInfo{caller: caller, target: callSite.TargetFQN}) + } + } + } + } + return calls +} + +// hasPath checks if there's a path from caller to callee in the call graph. +func (pr *PatternRegistry) hasPath(from, to string, callGraph *CallGraph) bool { + if from == to { + return true + } + + visited := make(map[string]bool) + return pr.dfsPath(from, to, callGraph, visited) +} + +// dfsPath performs depth-first search to find a path. +func (pr *PatternRegistry) dfsPath(current, target string, callGraph *CallGraph, visited map[string]bool) bool { + if current == target { + return true + } + + if visited[current] { + return false + } + + visited[current] = true + + callees := callGraph.GetCallees(current) + for _, callee := range callees { + if pr.dfsPath(callee, target, callGraph, visited) { + return true + } + } + + return false +} + +// matchesFunctionName checks if a function name matches a pattern. +// Supports exact matches and suffix matches. +func matchesFunctionName(fqn, pattern string) bool { + if fqn == pattern { + return true + } + + if strings.HasSuffix(fqn, "."+pattern) { + return true + } + + if strings.Contains(fqn, pattern) { + return true + } + + return false +} diff --git a/sourcecode-parser/graph/callgraph/patterns_test.go b/sourcecode-parser/graph/callgraph/patterns_test.go new file mode 100644 index 00000000..d2694a3e --- /dev/null +++ b/sourcecode-parser/graph/callgraph/patterns_test.go @@ -0,0 +1,301 @@ +package callgraph + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewPatternRegistry(t *testing.T) { + registry := NewPatternRegistry() + + assert.NotNil(t, registry) + assert.NotNil(t, registry.Patterns) + assert.NotNil(t, registry.PatternsByType) + assert.Empty(t, registry.Patterns) + assert.Empty(t, registry.PatternsByType) +} + +func TestPatternRegistry_AddPattern(t *testing.T) { + registry := NewPatternRegistry() + + pattern := &Pattern{ + ID: "TEST-001", + Name: "Test Pattern", + Type: PatternTypeDangerousFunction, + Severity: SeverityHigh, + } + + registry.AddPattern(pattern) + + assert.Len(t, registry.Patterns, 1) + assert.Equal(t, pattern, registry.Patterns["TEST-001"]) + assert.Len(t, registry.PatternsByType[PatternTypeDangerousFunction], 1) +} + +func TestPatternRegistry_GetPattern(t *testing.T) { + registry := NewPatternRegistry() + + pattern := &Pattern{ID: "TEST-001", Name: "Test"} + registry.AddPattern(pattern) + + retrieved, exists := registry.GetPattern("TEST-001") + assert.True(t, exists) + assert.Equal(t, pattern, retrieved) + + _, exists = registry.GetPattern("NONEXISTENT") + assert.False(t, exists) +} + +func TestPatternRegistry_GetPatternsByType(t *testing.T) { + registry := NewPatternRegistry() + + p1 := &Pattern{ID: "P1", Type: PatternTypeDangerousFunction} + p2 := &Pattern{ID: "P2", Type: PatternTypeDangerousFunction} + p3 := &Pattern{ID: "P3", Type: PatternTypeSourceSink} + + registry.AddPattern(p1) + registry.AddPattern(p2) + registry.AddPattern(p3) + + dangerous := registry.GetPatternsByType(PatternTypeDangerousFunction) + assert.Len(t, dangerous, 2) + + sourceSink := registry.GetPatternsByType(PatternTypeSourceSink) + assert.Len(t, sourceSink, 1) +} + +func TestPatternRegistry_LoadDefaultPatterns(t *testing.T) { + registry := NewPatternRegistry() + registry.LoadDefaultPatterns() + + pattern, exists := registry.GetPattern("CODE-INJECTION-001") + require.True(t, exists) + assert.Equal(t, "Code injection via eval with user input", pattern.Name) + assert.Equal(t, PatternTypeMissingSanitizer, pattern.Type) + assert.Equal(t, SeverityCritical, pattern.Severity) + assert.Contains(t, pattern.Sources, "input") + assert.Contains(t, pattern.Sinks, "eval") + assert.Contains(t, pattern.Sanitizers, "sanitize") +} + +func TestMatchesFunctionName(t *testing.T) { + tests := []struct { + name string + fqn string + pattern string + expected bool + }{ + {"Exact match", "eval", "eval", true}, + {"Suffix match", "myapp.utils.eval", "eval", true}, + {"Contains match", "myapp.request.GET", "request.GET", true}, + {"No match", "myapp.safe_function", "eval", false}, + {"Partial no match", "evaluation", "eval", true}, // Contains matches + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := matchesFunctionName(tt.fqn, tt.pattern) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestPatternRegistry_MatchDangerousFunction(t *testing.T) { + registry := NewPatternRegistry() + pattern := &Pattern{ + ID: "TEST-DANGEROUS", + Type: PatternTypeDangerousFunction, + DangerousFunctions: []string{"eval", "exec"}, + } + + callGraph := NewCallGraph() + callGraph.AddCallSite("myapp.views.process", CallSite{ + Target: "eval", + TargetFQN: "builtins.eval", + }) + + matched := registry.MatchPattern(pattern, callGraph) + assert.True(t, matched) +} + +func TestPatternRegistry_MatchDangerousFunction_NoMatch(t *testing.T) { + registry := NewPatternRegistry() + pattern := &Pattern{ + ID: "TEST-DANGEROUS", + Type: PatternTypeDangerousFunction, + DangerousFunctions: []string{"eval", "exec"}, + } + + callGraph := NewCallGraph() + callGraph.AddCallSite("myapp.views.process", CallSite{ + Target: "safe_function", + TargetFQN: "myapp.utils.safe_function", + }) + + matched := registry.MatchPattern(pattern, callGraph) + assert.False(t, matched) +} + +func TestPatternRegistry_MatchSourceSink(t *testing.T) { + registry := NewPatternRegistry() + pattern := &Pattern{ + ID: "TEST-SOURCE-SINK", + Type: PatternTypeSourceSink, + Sources: []string{"input"}, + Sinks: []string{"eval"}, + } + + callGraph := NewCallGraph() + + // Create a path: get_input() -> process() -> execute_code() + // get_input calls input(), execute_code calls eval() + callGraph.AddCallSite("myapp.get_input", CallSite{ + Target: "input", + TargetFQN: "builtins.input", + }) + + callGraph.AddCallSite("myapp.execute_code", CallSite{ + Target: "eval", + TargetFQN: "builtins.eval", + }) + + callGraph.AddEdge("myapp.get_input", "myapp.process") + callGraph.AddEdge("myapp.process", "myapp.execute_code") + + matched := registry.MatchPattern(pattern, callGraph) + assert.True(t, matched) +} + +func TestPatternRegistry_MatchMissingSanitizer_WithSanitizer(t *testing.T) { + registry := NewPatternRegistry() + pattern := &Pattern{ + ID: "TEST-SANITIZER", + Type: PatternTypeMissingSanitizer, + Sources: []string{"input"}, + Sinks: []string{"eval"}, + Sanitizers: []string{"sanitize"}, + } + + callGraph := NewCallGraph() + + // Path with sanitizer: get_input() -> sanitize_input() -> execute_code() + callGraph.AddCallSite("myapp.get_input", CallSite{ + Target: "input", + TargetFQN: "builtins.input", + }) + + callGraph.AddCallSite("myapp.sanitize_input", CallSite{ + Target: "sanitize", + TargetFQN: "myapp.utils.sanitize", + }) + + callGraph.AddCallSite("myapp.execute_code", CallSite{ + Target: "eval", + TargetFQN: "builtins.eval", + }) + + callGraph.AddEdge("myapp.get_input", "myapp.sanitize_input") + callGraph.AddEdge("myapp.sanitize_input", "myapp.execute_code") + + matched := registry.MatchPattern(pattern, callGraph) + assert.False(t, matched) // Should not match because sanitizer is present +} + +func TestPatternRegistry_MatchMissingSanitizer_WithoutSanitizer(t *testing.T) { + registry := NewPatternRegistry() + pattern := &Pattern{ + ID: "TEST-SANITIZER", + Type: PatternTypeMissingSanitizer, + Sources: []string{"input"}, + Sinks: []string{"eval"}, + Sanitizers: []string{"sanitize"}, + } + + callGraph := NewCallGraph() + + // Path without sanitizer: get_input() -> execute_code() + callGraph.AddCallSite("myapp.get_input", CallSite{ + Target: "input", + TargetFQN: "builtins.input", + }) + + callGraph.AddCallSite("myapp.execute_code", CallSite{ + Target: "eval", + TargetFQN: "builtins.eval", + }) + + callGraph.AddEdge("myapp.get_input", "myapp.execute_code") + + matched := registry.MatchPattern(pattern, callGraph) + assert.True(t, matched) // Should match because sanitizer is missing +} + +func TestPatternRegistry_HasPath(t *testing.T) { + registry := NewPatternRegistry() + callGraph := NewCallGraph() + + // Create path: A -> B -> C + callGraph.AddEdge("A", "B") + callGraph.AddEdge("B", "C") + + assert.True(t, registry.hasPath("A", "A", callGraph)) + assert.True(t, registry.hasPath("A", "B", callGraph)) + assert.True(t, registry.hasPath("A", "C", callGraph)) + assert.False(t, registry.hasPath("C", "A", callGraph)) + assert.False(t, registry.hasPath("B", "A", callGraph)) +} + +func TestPatternRegistry_HasPath_Cycle(t *testing.T) { + registry := NewPatternRegistry() + callGraph := NewCallGraph() + + // Create cycle: A -> B -> C -> A + callGraph.AddEdge("A", "B") + callGraph.AddEdge("B", "C") + callGraph.AddEdge("C", "A") + + assert.True(t, registry.hasPath("A", "C", callGraph)) + assert.True(t, registry.hasPath("B", "A", callGraph)) +} + +func TestPatternRegistry_FindCallsByFunctions(t *testing.T) { + registry := NewPatternRegistry() + callGraph := NewCallGraph() + + callGraph.AddCallSite("myapp.func1", CallSite{ + Target: "input", + TargetFQN: "builtins.input", + }) + + callGraph.AddCallSite("myapp.func2", CallSite{ + Target: "eval", + TargetFQN: "builtins.eval", + }) + + callGraph.AddCallSite("myapp.func3", CallSite{ + Target: "print", + TargetFQN: "builtins.print", + }) + + calls := registry.findCallsByFunctions([]string{"input", "eval"}, callGraph) + + assert.Len(t, calls, 2) + callers := []string{calls[0].caller, calls[1].caller} + assert.Contains(t, callers, "myapp.func1") + assert.Contains(t, callers, "myapp.func2") +} + +func TestSeverityConstants(t *testing.T) { + assert.Equal(t, Severity("critical"), SeverityCritical) + assert.Equal(t, Severity("high"), SeverityHigh) + assert.Equal(t, Severity("medium"), SeverityMedium) + assert.Equal(t, Severity("low"), SeverityLow) +} + +func TestPatternTypeConstants(t *testing.T) { + assert.Equal(t, PatternType("source-sink"), PatternTypeSourceSink) + assert.Equal(t, PatternType("missing-sanitizer"), PatternTypeMissingSanitizer) + assert.Equal(t, PatternType("dangerous-function"), PatternTypeDangerousFunction) +}