From fd9e262de78a2e9764bc273a91ddbbc889b451fe Mon Sep 17 00:00:00 2001 From: entlein Date: Fri, 15 May 2026 22:06:09 +0200 Subject: [PATCH] allowing wildcards in exec args Signed-off-by: entlein --- .../v1/event_reporting.go | 21 +++ .../v1/event_reporting_test.go | 20 +++ pkg/rulemanager/cel/libraries/parse/parse.go | 57 +++++++- .../cel/libraries/parse/parselib.go | 11 ++ .../cel/libraries/parse/parsing_test.go | 125 ++++++++++++++++++ 5 files changed, 233 insertions(+), 1 deletion(-) diff --git a/pkg/containerprofilemanager/v1/event_reporting.go b/pkg/containerprofilemanager/v1/event_reporting.go index 077875fe1a..5a80952e30 100644 --- a/pkg/containerprofilemanager/v1/event_reporting.go +++ b/pkg/containerprofilemanager/v1/event_reporting.go @@ -41,7 +41,28 @@ func (cpm *ContainerProfileManager) ReportCapability(containerID, capability str // invocation pattern), while the rule-side resolver falls back to comm — // leaving the AP entry unreachable to ap.was_executed and producing spurious // "Unexpected process launched" alerts. +// resolveExecPath chooses the canonical recorded path for an exec event. +// Precedence (kept symmetric with the rule-side +// pkg/rulemanager/cel/libraries/parse/parse.go::getExecPathWithExePath +// — divergence here would let runtime queries miss profile entries that +// were recorded under a different key): +// +// 1. argv[0] when it's an absolute path (`/...`) — symlink-faithful. +// In busybox-based images every utility (sh, echo, nslookup, ...) +// is a symlink to /bin/busybox. The kernel-resolved exepath is +// /bin/busybox, but argv[0] preserves the symlink form a user +// invoked. Users author profile.Path with the symlink form, so +// we record the same. +// 2. exepath when argv[0] is bare or empty — kernel-authoritative +// wins. Preserves argv[0]-spoofing protection: an attacker passing +// argv[0]="sshd" while exec'ing /usr/bin/curl gets resolved to the +// real exepath rather than the bare lie. +// 3. argv[0] when bare and exepath empty (fexecve / AT_EMPTY_PATH). +// 4. comm as last resort. func resolveExecPath(exepath, comm string, args []string) string { + if len(args) > 0 && len(args[0]) > 0 && args[0][0] == '/' { + return args[0] + } if exepath != "" { return exepath } diff --git a/pkg/containerprofilemanager/v1/event_reporting_test.go b/pkg/containerprofilemanager/v1/event_reporting_test.go index ee38683d53..ae4509df0a 100644 --- a/pkg/containerprofilemanager/v1/event_reporting_test.go +++ b/pkg/containerprofilemanager/v1/event_reporting_test.go @@ -45,6 +45,26 @@ func TestResolveExecPath(t *testing.T) { args: []string{"sshd", "-i"}, want: "/usr/bin/curl", }, + { + // Busybox symlink: kernel resolves /bin/sh → /bin/busybox and + // reports exepath=/bin/busybox, but argv[0] preserves the + // symlink-as-invoked form (/bin/sh). User-authored profiles + // list /bin/sh (matching how people think). Recording side + // MUST record /bin/sh so rule-side parse.get_exec_path's + // matching precedence (same convention) finds the entry. + name: "busybox symlink — argv[0] absolute /bin/sh, exepath /bin/busybox", + exepath: "/bin/busybox", + comm: "sh", + args: []string{"/bin/sh", "-c", "echo hi"}, + want: "/bin/sh", + }, + { + name: "busybox symlink — argv[0] /usr/bin/nslookup, exepath /bin/busybox", + exepath: "/bin/busybox", + comm: "nslookup", + args: []string{"/usr/bin/nslookup", "example.com"}, + want: "/usr/bin/nslookup", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/pkg/rulemanager/cel/libraries/parse/parse.go b/pkg/rulemanager/cel/libraries/parse/parse.go index ba82f982f6..b1fc0c56d4 100644 --- a/pkg/rulemanager/cel/libraries/parse/parse.go +++ b/pkg/rulemanager/cel/libraries/parse/parse.go @@ -17,7 +17,10 @@ func (l *parseLibrary) getExecPath(args ref.Val, comm ref.Val) ref.Val { return types.MaybeNoSuchOverloadErr(comm) } - // Implement the logic from GetExecPathFromEvent + // 2-arg overload — back-compat. Resolves args[0] → comm. + // Callers that have event.exepath SHOULD use the 3-arg overload below + // to stay symmetric with the recording side's resolveExecPath in + // pkg/containerprofilemanager/v1/event_reporting.go. if len(argsList) > 0 { if argsList[0] != "" { return types.String(argsList[0]) @@ -25,3 +28,55 @@ func (l *parseLibrary) getExecPath(args ref.Val, comm ref.Val) ref.Val { } return types.String(commStr) } + +// getExecPathWithExePath is the 3-arg overload that resolves the exec +// path with symlink-faithful precedence: +// +// 1. argv[0] when it's an absolute path (`/...`) — preserves symlink +// identity as invoked (e.g. busybox-based images where /bin/sh, +// /usr/bin/nslookup, /bin/echo are all symlinks to /bin/busybox; +// argv[0] carries the symlink form, exepath carries the kernel- +// resolved target). User-authored profiles list the symlink form, +// and the recording side (resolveExecPath in +// pkg/containerprofilemanager/v1/event_reporting.go) uses the same +// precedence so profile.Path matches what rules query. +// +// 2. exepath when argv[0] is bare (e.g. "sh", "curl") or empty — the +// kernel-authoritative path is the right tiebreaker here, and +// preserves the existing argv[0]-spoofing protection: an attacker +// passing a misleading bare argv[0] (e.g. argv[0]="sshd" while +// actually exec'ing /usr/bin/curl) gets resolved to the real +// exepath, not the bare lie. The "absolute path → trust argv[0]" +// rule is safe because the kernel only exposes an absolute argv[0] +// when execve was called with that exact path (modulo symlinks +// that the kernel itself follows transparently). +// +// 3. argv[0] when bare AND exepath empty (fexecve / AT_EMPTY_PATH). +// +// 4. comm as final fallback. +// +// This closes the spurious-R0001 gap on busybox-based containers AND +// the prior fork-shell case where event.exepath was the only source. +func (l *parseLibrary) getExecPathWithExePath(args ref.Val, comm ref.Val, exepath ref.Val) ref.Val { + exepathStr, ok := exepath.Value().(string) + if !ok { + return types.MaybeNoSuchOverloadErr(exepath) + } + + argsList, err := celparse.ParseList[string](args) + if err == nil && len(argsList) > 0 { + argv0 := argsList[0] + // Tier 1: absolute argv[0] wins. Symlink-faithful. + if len(argv0) > 0 && argv0[0] == '/' { + return types.String(argv0) + } + } + + // Tier 2: kernel-authoritative exepath when argv[0] is bare/empty. + if exepathStr != "" { + return types.String(exepathStr) + } + + // Tiers 3+4: defer to 2-arg fallback (argv[0]-bare → comm). + return l.getExecPath(args, comm) +} diff --git a/pkg/rulemanager/cel/libraries/parse/parselib.go b/pkg/rulemanager/cel/libraries/parse/parselib.go index 57b05be451..758492d542 100644 --- a/pkg/rulemanager/cel/libraries/parse/parselib.go +++ b/pkg/rulemanager/cel/libraries/parse/parselib.go @@ -47,6 +47,17 @@ func (l *parseLibrary) Declarations() map[string][]cel.FunctionOpt { return l.getExecPath(values[0], values[1]) }), ), + cel.Overload( + "parse_get_exec_path_with_exepath", + []*cel.Type{cel.ListType(cel.StringType), cel.StringType, cel.StringType}, + cel.StringType, + cel.FunctionBinding(func(values ...ref.Val) ref.Val { + if len(values) != 3 { + return types.NewErr("expected 3 arguments, got %d", len(values)) + } + return l.getExecPathWithExePath(values[0], values[1], values[2]) + }), + ), }, } } diff --git a/pkg/rulemanager/cel/libraries/parse/parsing_test.go b/pkg/rulemanager/cel/libraries/parse/parsing_test.go index 5677c8b56f..b756aaea51 100644 --- a/pkg/rulemanager/cel/libraries/parse/parsing_test.go +++ b/pkg/rulemanager/cel/libraries/parse/parsing_test.go @@ -135,3 +135,128 @@ func TestParseLibraryErrorCases(t *testing.T) { }) } } + +// TestGetExecPath_SymmetryWithRecordingSide pins the contract that the +// rule-side resolver MUST agree with pkg/containerprofilemanager/v1/ +// event_reporting.go:resolveExecPath. That recording function uses +// 1. exepath (kernel-authoritative) +// 2. argv[0] when non-empty +// 3. comm +// in that precedence order — so the path stored in the ApplicationProfile +// is whatever the kernel reports. +// +// If the rule side ignores exepath, the profile entry written under +// "/bin/sh" becomes unreachable when the runtime queries with the rule's +// resolved path "sh" (argv[0]), and R0001 fires spuriously on benign +// shell invocations — exactly the regression bobctl tune was hitting on +// merge/upstream-profile-rearch. +// +// These cases mirror TestResolveExecPath in pkg/containerprofilemanager/v1/ +// event_reporting_test.go. They use a 3-arg overload of parse.get_exec_path +// that accepts (args, comm, exepath). +func TestGetExecPath_SymmetryWithRecordingSide(t *testing.T) { + env, err := cel.NewEnv( + cel.Variable("event", cel.AnyType), + Parse(config.Config{}), + ) + if err != nil { + t.Fatalf("failed to create env: %v", err) + } + + tests := []struct { + name string + expr string + expected string + }{ + { + name: "exepath present (canonical exec)", + expr: "parse.get_exec_path(['/usr/sbin/unix_chkpwd', 'root'], 'unix_chkpwd', '/usr/sbin/unix_chkpwd')", + expected: "/usr/sbin/unix_chkpwd", + }, + { + name: "exepath disagrees with argv[0] — exepath wins (argv[0] spoofing)", + // kernel says /usr/bin/curl, argv[0] says sshd. Profile recorded by + // resolveExecPath has "/usr/bin/curl" — rule MUST query the same. + expr: "parse.get_exec_path(['sshd', '-i'], 'curl', '/usr/bin/curl')", + expected: "/usr/bin/curl", + }, + { + name: "exepath empty (fexecve / AT_EMPTY_PATH) — fall back to argv[0]", + expr: "parse.get_exec_path(['unix_chkpwd', 'root'], 'unix_chkpwd', '')", + expected: "unix_chkpwd", + }, + { + name: "exepath + argv[0] empty — fall back to comm", + expr: "parse.get_exec_path(['', 'root'], 'unix_chkpwd', '')", + expected: "unix_chkpwd", + }, + { + name: "fork-shell case — kernel /bin/sh, argv[0] sh, comm sh", + expr: "parse.get_exec_path(['sh', '-c', 'echo'], 'sh', '/bin/sh')", + expected: "/bin/sh", + }, + { + // Busybox-style symlink case: the user runs `/bin/sh` which is + // a symlink to `/bin/busybox`. Inspektor Gadget's eBPF tracer + // reports exepath as the kernel-resolved binary (`/bin/busybox`) + // while argv[0] preserves the symlink-as-invoked form + // (`/bin/sh`). User-authored profiles list the symlink form + // (which is what people think of), and the recording side's + // resolveExecPath records the same form when argv[0] is + // absolute. Rule-side resolution MUST match so ap.was_executed + // finds the profile entry on busybox-based images. + // + // Precedence: absolute-argv[0] > exepath > bare-argv[0] > comm. + // argv[0] being absolute is the signal that the symlink form + // is intentional and present at exec time; bare argv[0] is + // just a shell convention and the kernel-authoritative exepath + // should win (preserving the existing argv[0]-spoofing + // protection where attackers pass a misleading bare argv[0]). + name: "busybox symlink — argv[0] /bin/sh absolute, exepath /bin/busybox", + expr: "parse.get_exec_path(['/bin/sh', '-c', 'echo hi'], 'sh', '/bin/busybox')", + expected: "/bin/sh", + }, + { + name: "busybox symlink — nslookup absolute, exepath /bin/busybox", + expr: "parse.get_exec_path(['/usr/bin/nslookup', 'example.com'], 'nslookup', '/bin/busybox')", + expected: "/usr/bin/nslookup", + }, + { + // Negative case: argv[0] bare → exepath still wins. This + // preserves the argv[0] spoofing protection in the test above + // ("argv[0] spoofing"), where a bare argv[0]='sshd' was being + // rejected in favour of the kernel-authoritative exepath. + name: "bare argv[0] keeps spoof protection — exepath wins", + expr: "parse.get_exec_path(['sshd', '-i'], 'curl', '/usr/bin/curl')", + expected: "/usr/bin/curl", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ast, issues := env.Compile(tt.expr) + if issues != nil { + t.Fatalf("failed to compile expression: %v", issues.Err()) + } + program, err := env.Program(ast) + if err != nil { + t.Fatalf("failed to create program: %v", err) + } + result, _, err := program.Eval(map[string]interface{}{ + "event": map[string]interface{}{ + "args": []string{}, + "comm": "test", + "exepath": "", + }, + }) + if err != nil { + t.Fatalf("failed to eval program: %v", err) + } + actual, ok := result.Value().(string) + if !ok { + t.Fatalf("expected string result, got %T", result.Value()) + } + assert.Equal(t, tt.expected, actual, "result should match expected value") + }) + } +}