-
Notifications
You must be signed in to change notification settings - Fork 4.8k
/
ig-daemonset.yaml.tmpl
199 lines (192 loc) · 7.56 KB
/
ig-daemonset.yaml.tmpl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: gadget
namespace: gadget
labels:
k8s-app: gadget
spec:
selector:
matchLabels:
k8s-app: gadget
template:
metadata:
labels:
k8s-app: gadget
annotations:
# We need to set gadget container as unconfined so it is able to write
# /sys/fs/bpf as well as /sys/kernel/debug/tracing.
# Otherwise, we can have error like:
# "failed to create server failed to create folder for pinning bpf maps: mkdir /sys/fs/bpf/gadget: permission denied"
# (For reference, see: https://github.com/inspektor-gadget/inspektor-gadget/runs/3966318270?check_suite_focus=true#step:20:221)
container.apparmor.security.beta.kubernetes.io/gadget: "unconfined"
inspektor-gadget.kinvolk.io/option-hook-mode: "auto"
spec:
serviceAccount: gadget
hostPID: true
hostNetwork: true
nodeSelector:
kubernetes.io/os: "linux"
containers:
- name: gadget
terminationMessagePolicy: FallbackToLogsOnError
image: {{.CustomRegistries.InspektorGadget | default .ImageRepository | default .Registries.InspektorGadget }}{{.Images.InspektorGadget}}
imagePullPolicy: "Always"
command: [ "/entrypoint" ]
lifecycle:
preStop:
exec:
command:
- "/cleanup"
readinessProbe:
periodSeconds: 5
timeoutSeconds: 2
exec:
command:
- /bin/gadgettracermanager
- -liveness
livenessProbe:
periodSeconds: 5
timeoutSeconds: 2
exec:
command:
- /bin/gadgettracermanager
- -liveness
env:
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: GADGET_POD_UID
valueFrom:
fieldRef:
fieldPath: metadata.uid
- name: TRACELOOP_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: TRACELOOP_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: TRACELOOP_POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: GADGET_IMAGE
value: "ghcr.io/inspektor-gadget/inspektor-gadget"
- name: INSPEKTOR_GADGET_VERSION
value: "v0.16.1"
- name: INSPEKTOR_GADGET_OPTION_HOOK_MODE
value: "auto"
- name: INSPEKTOR_GADGET_OPTION_FALLBACK_POD_INFORMER
value: "true"
# Make sure to keep these settings in sync with pkg/container-utils/runtime-client/interface.go
- name: INSPEKTOR_GADGET_CONTAINERD_SOCKETPATH
value: "/run/containerd/containerd.sock"
- name: INSPEKTOR_GADGET_CRIO_SOCKETPATH
value: "/run/crio/crio.sock"
- name: INSPEKTOR_GADGET_DOCKER_SOCKETPATH
value: "/run/docker.sock"
- name: HOST_ROOT
value: "/host"
securityContext:
capabilities:
add:
# We need CAP_NET_ADMIN to be able to create BPF link.
# Indeed, link_create is called with prog->type which equals
# BPF_PROG_TYPE_CGROUP_SKB.
# This value is then checked in
# bpf_prog_attach_check_attach_type() which also checks if we have
# CAP_NET_ADMIN:
# https://elixir.bootlin.com/linux/v5.14.14/source/kernel/bpf/syscall.c#L4099
# https://elixir.bootlin.com/linux/v5.14.14/source/kernel/bpf/syscall.c#L2967
- NET_ADMIN
# We need CAP_SYS_ADMIN to use Python-BCC gadgets because bcc
# internally calls bpf_get_map_fd_by_id() which contains the
# following snippet:
# if (!capable(CAP_SYS_ADMIN))
# return -EPERM;
# (https://elixir.bootlin.com/linux/v5.10.73/source/kernel/bpf/syscall.c#L3254)
#
# Details about this are given in:
# > The important design decision is to allow ID->FD transition for
# CAP_SYS_ADMIN only. What it means that user processes can run
# with CAP_BPF and CAP_NET_ADMIN and they will not be able to affect each
# other unless they pass FDs via scm_rights or via pinning in bpffs.
# ID->FD is a mechanism for human override and introspection.
# An admin can do 'sudo bpftool prog ...'. It's possible to enforce via LSM that
# only bpftool binary does bpf syscall with CAP_SYS_ADMIN and the rest of user
# space processes do bpf syscall with CAP_BPF isolating bpf objects (progs, maps,
# links) that are owned by such processes from each other.
# (https://lwn.net/Articles/820560/)
#
# Note that even with a kernel providing CAP_BPF, the above
# statement is still true.
- SYS_ADMIN
# We need this capability to get addresses from /proc/kallsyms.
# Without it, addresses displayed when reading this file will be
# 0.
# Thus, bcc_procutils_each_ksym will never call callback, so KSyms
# syms_ vector will be empty and it will return false.
# As a consequence, no prefix will be found in
# get_syscall_prefix(), so a default prefix (_sys) will be
# returned.
# Sadly, this default prefix is not used by the running kernel,
# which instead uses: __x64_sys_
- SYSLOG
# traceloop gadget uses strace which in turns use ptrace()
# syscall.
# Within kernel code, ptrace() calls ptrace_attach() which in
# turns calls __ptrace_may_access() which calls ptrace_has_cap()
# where CAP_SYS_PTRACE is finally checked:
# https://elixir.bootlin.com/linux/v5.14.14/source/kernel/ptrace.c#L284
- SYS_PTRACE
# Needed by setrlimit in gadgettracermanager and by the traceloop
# gadget.
- SYS_RESOURCE
# Needed for gadgets that don't dumb the memory rlimit.
# (Currently only applies to BCC python-based gadgets)
- IPC_LOCK
# Needed by BCC python-based gadgets to load the kheaders module:
# https://github.com/iovisor/bcc/blob/v0.24.0/src/cc/frontends/clang/kbuild_helper.cc#L158
- SYS_MODULE
# Needed by gadgets that open a raw sock like dns and snisnoop
- NET_RAW
volumeMounts:
- name: host
mountPath: /host
- name: run
mountPath: /run
- name: modules
mountPath: /lib/modules
- name: debugfs
mountPath: /sys/kernel/debug
- name: cgroup
mountPath: /sys/fs/cgroup
- name: bpffs
mountPath: /sys/fs/bpf
tolerations:
- effect: NoSchedule
operator: Exists
- effect: NoExecute
operator: Exists
volumes:
- name: host
hostPath:
path: /
- name: run
hostPath:
path: /run
- name: cgroup
hostPath:
path: /sys/fs/cgroup
- name: modules
hostPath:
path: /lib/modules
- name: bpffs
hostPath:
path: /sys/fs/bpf
- name: debugfs
hostPath:
path: /sys/kernel/debug