diff --git a/.dockerignore b/.dockerignore index 9987431760..2f366a23d2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -6,3 +6,6 @@ Dockerfile /images /.github /tmp +/local_workspace +/.git +/pkg/agent/*.bpf.o diff --git a/.gitignore b/.gitignore index e0cff5b537..75b9b14d54 100644 --- a/.gitignore +++ b/.gitignore @@ -4,5 +4,12 @@ /tmp /out /bin -pkg/agent/parca-agent.bpf.o +pkg/agent/*.bpf.o TODO.md +local_workspace +NOTES.md +minikube.iso +minikube-*.iso +/*.txt +.kernel_config +/*.service diff --git a/.golangci.yml b/.golangci.yml index f5edd089ed..076bd63bea 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -3,6 +3,7 @@ run: skip-dirs: - internal/pprof - internal/go + - pkg/stack/frame linters-settings: errcheck: exclude: ./.errcheck_excludes.txt diff --git a/Dockerfile b/Dockerfile index a3c156dff1..090c141130 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,7 +17,8 @@ WORKDIR /parca-agent COPY go.mod go.sum /parca-agent/ RUN go mod download -modcacherw -COPY parca-agent.bpf.c vmlinux.h Makefile /parca-agent/ +COPY Makefile /parca-agent/ +COPY ./bpf /parca-agent/bpf COPY ./3rdparty /parca-agent/3rdparty RUN make bpf diff --git a/Dockerfile.dev b/Dockerfile.dev index ccf7a84a9e..5bbb88ac73 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -21,15 +21,19 @@ WORKDIR /parca-agent COPY go.mod go.sum /parca-agent/ RUN go mod download -modcacherw -COPY parca-agent.bpf.c vmlinux.h Makefile /parca-agent/ +COPY Makefile /parca-agent/ +COPY ./bpf /parca-agent/bpf COPY ./3rdparty /parca-agent/3rdparty -RUN make bpf +RUN make clean bpf COPY . /parca-agent RUN make build -# TODO(kakkoyun): Add Delve +RUN go install github.com/go-delve/delve/cmd/dlv@v1.7.2 +RUN cp /go/bin/dlv /bin/dlv RUN cp /parca-agent/dist/parca-agent /bin/parca-agent -CMD ["/bin/parca-agent"] +EXPOSE 7071 + +ENTRYPOINT ["/bin/dlv", "--listen=:40000", "--headless=true", "--api-version=2", "--accept-multiclient", "exec", "--continue", "--"] diff --git a/Makefile b/Makefile index cc91208f4c..5c9736e1a1 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ .PHONY: all -all: bpf build +all: clean bpf build # tools: CMD_LLC ?= llc @@ -29,9 +29,10 @@ OUT_DIR ?= dist GO_SRC := $(shell find . -type f -name '*.go') OUT_BIN := $(OUT_DIR)/parca-agent OUT_BIN_DEBUG_INFO := $(OUT_DIR)/debug-info -BPF_SRC := parca-agent.bpf.c -VMLINUX := vmlinux.h -OUT_BPF := pkg/agent/parca-agent.bpf.o +BPF_ROOT := bpf +BPF_SRC := $(BPF_ROOT)/*.bpf.c +VMLINUX := $(BPF_ROOT)/vmlinux.h +BPF_OBJECTS := cpu_profiler cpu_profiler_with_unwinding BPF_HEADERS := 3rdparty/include BPF_BUNDLE := $(OUT_DIR)/parca-agent.bpf.tar.gz LIBBPF_SRC := 3rdparty/libbpf/src @@ -98,11 +99,11 @@ $(BPF_BUNDLE): $(BPF_SRC) $(LIBBPF_HEADERS)/bpf $(BPF_HEADERS) cp $$(find $^ -type f) $(bpf_bundle_dir) .PHONY: bpf -bpf: $(OUT_BPF) +bpf: $(BPF_OBJECTS) linux_arch := $(ARCH:x86_64=x86) ifndef DOCKER -$(OUT_BPF): $(BPF_SRC) $(LIBBPF_HEADERS) $(LIBBPF_OBJ) | $(OUT_DIR) $(bpf_compile_tools) +$(BPF_OBJECTS): %: bpf/%.bpf.c $(LIBBPF_HEADERS) $(LIBBPF_OBJ) | $(OUT_DIR) $(bpf_compile_tools) mkdir -p pkg/agent @v=$$($(CMD_CLANG) --version); test $$(echo $${v#*version} | head -n1 | cut -d '.' -f1) -ge '9' || (echo 'required minimum clang version: 9' ; false) $(CMD_CLANG) -S \ @@ -130,7 +131,7 @@ $(OUT_BPF): $(BPF_SRC) $(LIBBPF_HEADERS) $(LIBBPF_OBJ) | $(OUT_DIR) $(bpf_compil -nostdinc \ -target bpf \ -O2 -emit-llvm -c -g $< -o $(@:.o=.ll) - $(CMD_LLC) -march=bpf -filetype=obj -o $@ $(@:.o=.ll) + $(CMD_LLC) -march=bpf -filetype=obj -o pkg/agent/$@.bpf.o $(@:.o=.ll) rm $(@:.o=.ll) else $(OUT_BPF): $(DOCKER_BUILDER) | $(OUT_DIR) @@ -180,7 +181,7 @@ mostlyclean: .PHONY: clean clean: - rm pkg/agent/parca-agent.bpf.o + -rm pkg/agent/*.bpf.o -FILE="$(docker_builder_file)" ; \ if [ -r "$$FILE" ] ; then \ $(CMD_DOCKER) rmi "$$(< $$FILE)" ; \ @@ -227,7 +228,7 @@ README.md: $(CMD_EMBEDMD) $(OUT_DIR)/help.txt deploy/manifests $(CMD_EMBEDMD) -w README.md .PHONY: format -format: go-fmt check-license +format: go-fmt c-fmt check-license .PHONY: c-fmt c-fmt: @@ -235,7 +236,7 @@ c-fmt: .PHONY: go-fmt go-fmt: - go fmt $(shell go list ./... | grep -E -v "pkg/internal/pprof|pkg/internal/go") + go fmt $(shell go list ./... | grep -E -v "pkg/internal/pprof|pkg/internal/go|pkg/stack/frame") .PHONY: check-license check-license: diff --git a/Tiltfile b/Tiltfile index 8d0906a16d..82dc81d6af 100644 --- a/Tiltfile +++ b/Tiltfile @@ -1,11 +1,14 @@ -docker_prune_settings(num_builds=5) +docker_prune_settings(num_builds=2) + +# allow_k8s_contexts('admin@k8s-festive-perlman') +# default_registry('ttl.sh/tilt-parca-dev') ## Parca Agent docker_build( 'parca.io/parca/parca-agent:dev', '', dockerfile='Dockerfile.dev', - only=['./cmd', './pkg', './3rdparty', './go.mod', './go.sum', './parca-agent.bpf.c', './vmlinux.h', './Makefile'], + only=['./cmd', './pkg', './3rdparty', './go.mod', './go.sum', './bpf', './Makefile'], ) k8s_yaml('deploy/tilt/parca-agent-daemonSet.yaml') -k8s_resource('parca-agent', port_forwards=[7071]) +k8s_resource('parca-agent', port_forwards=[7071, 40000]) diff --git a/parca-agent.bpf.c b/bpf/cpu_profiler.bpf.c similarity index 75% rename from parca-agent.bpf.c rename to bpf/cpu_profiler.bpf.c index 59ac876367..d56f853944 100644 --- a/parca-agent.bpf.c +++ b/bpf/cpu_profiler.bpf.c @@ -17,7 +17,6 @@ #define KBUILD_MODNAME "parca-agent" #undef container_of -//#include "bpf_core_read.h" #include #include #include @@ -33,6 +32,7 @@ #define MAX_STACK_ADDRESSES 1024 // Max depth of each stack trace to track #define MAX_STACK_DEPTH 127 +#define MAX_ENTRIES 10240 #define BPF_MAP(_name, _type, _key_type, _value_type, _max_entries) \ struct bpf_map_def SEC ("maps") _name = { \ @@ -53,8 +53,26 @@ .max_entries = _max_entries, \ }; -#define BPF_HASH(_name, _key_type, _value_type) \ - BPF_MAP (_name, BPF_MAP_TYPE_HASH, _key_type, _value_type, 10240); +#define BPF_HASH(_name, _key_type, _value_type, _max_entries) \ + BPF_MAP (_name, BPF_MAP_TYPE_HASH, _key_type, _value_type, _max_entries); + +#define LOG(_pid, _msg) \ + { \ + log_event_t e = { .pid = _pid, .message = _msg }; \ + bpf_ringbuf_output (&events, &e, sizeof (e), 0); \ + } + +typedef struct log_event +{ + u32 pid; + char message[32]; + // u64 addr; +} log_event_t; + +struct bpf_map_def SEC ("maps") events = { + .type = BPF_MAP_TYPE_RINGBUF, + .max_entries = 1 << 24, +}; /*============================= INTERNAL STRUCTS ============================*/ @@ -67,7 +85,7 @@ typedef struct stack_count_key /*================================ MAPS =====================================*/ -BPF_HASH (counts, stack_count_key_t, u64); +BPF_HASH (counts, stack_count_key_t, u64, MAX_ENTRIES); BPF_STACK_TRACE (stack_traces, MAX_STACK_ADDRESSES); /*=========================== HELPER FUNCTIONS ==============================*/ @@ -90,7 +108,8 @@ bpf_map_lookup_or_try_init (void *map, const void *key, const void *init) return bpf_map_lookup_elem (map, key); } -// This code gets a bit complex. Probably not suitable for casual hacking. +/*=========================== BPF FUNCTIONS ==============================*/ + SEC ("perf_event") int do_sample (struct bpf_perf_event_data *ctx) @@ -105,18 +124,22 @@ do_sample (struct bpf_perf_event_data *ctx) // create map key stack_count_key_t key = { .pid = tgid }; - // get stacks - key.user_stack_id = bpf_get_stackid (ctx, &stack_traces, BPF_F_USER_STACK); + // get user stack + u32 zero = 0; // First element is the PID to lookup. + key.user_stack_id = 0; + int stack_id = bpf_get_stackid (ctx, &stack_traces, BPF_F_USER_STACK); + if (stack_id >= 0) + key.user_stack_id = stack_id; + + // get kernel stack key.kernel_stack_id = bpf_get_stackid (ctx, &stack_traces, 0); - u64 zero = 0; u64 *count; count = bpf_map_lookup_or_try_init (&counts, &key, &zero); if (!count) return 0; __sync_fetch_and_add (count, 1); - return 0; } diff --git a/bpf/cpu_profiler_with_unwinding.bpf.c b/bpf/cpu_profiler_with_unwinding.bpf.c new file mode 100644 index 0000000000..cfd74471eb --- /dev/null +++ b/bpf/cpu_profiler_with_unwinding.bpf.c @@ -0,0 +1,338 @@ +// +build ignore +// ^^ this is a golang build tag meant to exclude this C file from compilation +// by the CGO compiler + +/* In Linux 5.4 asm_inline was introduced, but it's not supported by clang. + * Redefine it to just asm to enable successful compilation. + * see + * https://github.com/iovisor/bcc/commit/2d1497cde1cc9835f759a707b42dea83bee378b8 + * for more details + */ +#include "vmlinux.h" +#ifdef asm_inline +#undef asm_inline +#define asm_inline asm +#endif + +#define KBUILD_MODNAME "parca-agent" + +#undef container_of +#include +#include +#include +#include + +#if defined(bpf_target_x86) +#define PT_REGS_PARM6(ctx) ((ctx)->r9) +#elif defined(bpf_target_arm64) +#define PT_REGS_PARM6(x) (((PT_REGS_ARM64 *)(x))->regs[5]) +#endif + +// Max amount of different stack trace addresses to buffer in the Map +#define MAX_STACK_ADDRESSES 1024 +// Max depth of each stack trace to track +#define MAX_STACK_DEPTH 127 +// TODO(kakkoyun): Explain. +#define MAX_PID_MAP_SIZE 1024 +// TODO(kakkoyun): Explain. +#define MAX_ENTRIES 10240 +// TODO(kakkoyun): Explain. +#define MAX_BINARY_SEARCH_DEPTH 24 + +/* Maximum value an `unsigned long int' can hold. (Minimum is 0.) */ +#if __WORDSIZE == 64 +#define ULONG_MAX 18446744073709551615UL +#else +#define ULONG_MAX 4294967295UL +#endif + +#define BPF_MAP(_name, _type, _key_type, _value_type, _max_entries) \ + struct bpf_map_def SEC ("maps") _name = { \ + .type = _type, \ + .key_size = sizeof (_key_type), \ + .value_size = sizeof (_value_type), \ + .max_entries = _max_entries, \ + }; +// __uint(map_flags, BPF_F_NO_PREALLOC); + +// Stack Traces are slightly different +// in that the value is 1 big byte array +// of the stack addresses +#define BPF_STACK_TRACE(_name, _max_entries) \ + struct bpf_map_def SEC ("maps") _name = { \ + .type = BPF_MAP_TYPE_STACK_TRACE, \ + .key_size = sizeof (u32), \ + .value_size = sizeof (size_t) * MAX_STACK_DEPTH, \ + .max_entries = _max_entries, \ + }; + +#define BPF_HASH(_name, _key_type, _value_type, _max_entries) \ + BPF_MAP (_name, BPF_MAP_TYPE_HASH, _key_type, _value_type, _max_entries); + +#define BPF_ARRAY(_name, _value_type, _max_entries) \ + struct bpf_map_def SEC ("maps") _name = { \ + .type = BPF_MAP_TYPE_ARRAY, \ + .key_size = sizeof (u32), \ + .value_size = sizeof (_value_type), \ + .max_entries = _max_entries, \ + }; + +// .map_flags = BPF_F_NO_PREALLOC + +//// Value size must be u32 because it is inner map id +//#define BPF_PID_HASH_OF_MAP(_name, _max_entries) \ +// struct bpf_map_def SEC ("maps") _name = { \ +// .type = BPF_MAP_TYPE_HASH_OF_MAPS, \ +// .key_size = sizeof (u32), \ +// .value_size = sizeof (u32), \ +// .max_entries = _max_entries, \ +// }; + +#define LOG(_pid, _msg) \ + { \ + log_event_t e = { .pid = _pid, .message = _msg }; \ + bpf_ringbuf_output (&events, &e, sizeof (e), 0); \ + } + +typedef struct log_event +{ + u32 pid; + char message[32]; + // u64 addr; +} log_event_t; + +struct bpf_map_def SEC ("maps") events = { + .type = BPF_MAP_TYPE_RINGBUF, + .max_entries = 1 << 24, +}; + +/*============================= INTERNAL STRUCTS ============================*/ + +typedef struct stack_count_key +{ + u32 pid; + int user_stack_id; + int kernel_stack_id; +} stack_count_key_t; + +typedef struct stack_unwind_instruction +{ + u8 op; + u64 reg; + s64 offset; +} stack_unwind_instruction_t; + +/*================================ MAPS =====================================*/ + +BPF_HASH (counts, stack_count_key_t, u64, MAX_ENTRIES); +BPF_STACK_TRACE (stack_traces, MAX_STACK_ADDRESSES); + +BPF_ARRAY (chosen, u32, 2); // TODO(kakkoyun): Remove later. +BPF_ARRAY (pcs, u64, 200000); // 0xffffff // 200_000 +BPF_ARRAY (rips, stack_unwind_instruction_t, 200000); +BPF_ARRAY (rsps, stack_unwind_instruction_t, 200000); + +BPF_ARRAY (unwinded_stack_traces, u64, MAX_STACK_DEPTH); + +// BPF_PID_HASH_OF_MAP (pcs, MAX_PID_MAP_SIZE); +// BPF_PID_HASH_OF_MAP (rips, MAX_PID_MAP_SIZE); +// BPF_PID_HASH_OF_MAP (rsps, MAX_PID_MAP_SIZE); + +/*=========================== HELPER FUNCTIONS ==============================*/ + +static __always_inline void * +bpf_map_lookup_or_try_init (void *map, const void *key, const void *init) +{ + void *val; + long err; + + val = bpf_map_lookup_elem (map, key); + if (val) + return val; + + err = bpf_map_update_elem (map, key, init, BPF_NOEXIST); + // 17 == EEXIST + if (err && err != -17) + return 0; + + return bpf_map_lookup_elem (map, key); +} + +static __always_inline u32 * +find (u64 target) +{ + // TODO(kakkoyun): Is there a better way to get current size? + u32 right = MAX_ENTRIES - 1; + u32 one = 1; // Second element is the size of the unwind table. + u32 *val; + val = bpf_map_lookup_elem (&chosen, &one); + if (val) + right = *val; + + u32 left = 0; + static u32 mid; + int i = 0; + // #pragma clang loop unroll(full) + while (i < MAX_BINARY_SEARCH_DEPTH && left <= right) + { + i++; + + mid = left + (right - left) / 2; + + u64 *val; + val = bpf_map_lookup_elem (&pcs, &mid); + u64 guess; + if (val) + guess = *val; + else + guess = ULONG_MAX; + + if (guess == target) + return ∣ + else if (guess < target) + left = mid + 1; + else + right = mid - 1; + } + return NULL; +} + +static __always_inline u64 +execute (stack_unwind_instruction_t *ins, u64 rip, u64 rsp, u64 cfa) +{ + u64 addr; + u64 unsafe_ptr = cfa + ins->offset; + u64 res = 0; + switch (ins->op) + { + case 1: // OpUndefined: Undefined register. + if (bpf_probe_read (&addr, 8, &unsafe_ptr) == 0) + res = addr; + case 2: // OpCfaOffset + res = rip + ins->offset; // Value stored at some offset from `CFA`. + case 3: // OpRegister + res = rsp + ins->offset; // Value of a machine register plus offset. + default: + res = 0; + } + return res; +} + +static __always_inline void * +backtrace (bpf_user_pt_regs_t *regs, u32 stack_id) +{ + bpf_printk ("backtrace"); + long unsigned int rip = regs->ip; + long unsigned int rsp = regs->sp; + // TODO(kakkoyun): Bigger than 512B limit. + // - Figure out how to use stack traces map. + // long unsigned int stack[MAX_STACK_DEPTH]; + // #pragma clang loop unroll(full) + for (int d = 0; d < MAX_STACK_DEPTH; d++) + { + bpf_printk ("backtrace, depth: %d, %u", d, rip); + if (rip == 0) + break; + + bpf_printk ("backtrace, step 1, depth: %d, %u", d, rip); + // Push the found return address. + // stack[d] = rip; + if (bpf_map_update_elem (&unwinded_stack_traces, &d, &rip, BPF_ANY) < 0) + { + bpf_printk ("backtrace, failed to update stack trace\n"); + break; + } + + // bpf_printk ("backtrace, step 2, depth: %d, %u", d, rip); + u32 *val = find (rip); + if (val == NULL) + { + bpf_printk ("backtrace, NOT FOUND, depth: %d, %u", d, rip); + break; + } + + bpf_printk ("backtrace, step 3 (FOUND), depth: %d, %u", d, rip); + u32 key = *val; + stack_unwind_instruction_t *ins; + ins = bpf_map_lookup_elem (&rsps, &key); + if (ins == NULL) + break; + + bpf_printk ("backtrace, step 4, depth: %d, %u", d, rip); + u64 cfa; + cfa = execute (ins, rip, rsp, 0); + if (cfa == 0) + break; + + bpf_printk ("backtrace, step 5, depth: %d, %u", d, rip); + ins = bpf_map_lookup_elem (&rips, &key); + if (ins == NULL) + break; + + bpf_printk ("backtrace, step 6, depth: %d, %u", d, rip); + rip = execute (ins, rip, rsp, cfa); + rsp = cfa; + } + + bpf_printk ("backtrace, done, %u\n", rip); + // if (bpf_map_update_elem (&stack_traces, &stack_id, &stack, BPF_ANY) < 0) + // { + // bpf_printk ("backtrace, failed to update stack trace\n"); + // return NULL; + // } + return 0; +} + +/*=========================== BPF FUNCTIONS ==============================*/ + +SEC ("perf_event") +int +do_sample (struct bpf_perf_event_data *ctx) +{ + u64 id = bpf_get_current_pid_tgid (); + u32 tgid = id >> 32; + u32 pid = id; + + if (pid == 0) + return 0; + + // create map key + stack_count_key_t key = { .pid = tgid }; + + // get user stack + key.user_stack_id = 0; + int stack_id = bpf_get_stackid (ctx, &stack_traces, BPF_F_USER_STACK); + if (stack_id >= 0) + key.user_stack_id = stack_id; + + // First element is the PID to lookup. + u32 zero = 0; + u32 *val; + val = bpf_map_lookup_elem (&chosen, &zero); + // // TODO(kakkoyun): Test if we can unwind the stack using frame pointers. + if (val && pid == *val) + { + // TODO(kakkoyun): which appears to correspond to a 32-bit hash of the + // instruction pointer addresses that comprise the stack for the current + // context + // https://github.com/torvalds/linux/blob/5bfc75d92efd494db37f5c4c173d3639d4772966/kernel/bpf/stackmap.c?_pjax=%23js-repo-pjax-container%2C%20div%5Bitemtype%3D%22http%3A%2F%2Fschema.org%2FSoftwareSourceCode%22%5D%20main%2C%20%5Bdata-pjax-container%5D#L252 + // stack_id = bpf_get_prandom_u32 (); + LOG (pid, "attempt to backtrace"); + bpf_printk ("do sample: %d\n", pid); + backtrace (&ctx->regs, stack_id); + } + + // get kernel stack + key.kernel_stack_id = bpf_get_stackid (ctx, &stack_traces, 0); + + // u64 zero = 0; + u64 *count; + count = bpf_map_lookup_or_try_init (&counts, &key, &zero); + if (!count) + return 0; + + __sync_fetch_and_add (count, 1); + return 0; +} + +char LICENSE[] SEC ("license") = "GPL"; diff --git a/vmlinux.h b/bpf/vmlinux.h similarity index 100% rename from vmlinux.h rename to bpf/vmlinux.h diff --git a/cmd/debug-info/main.go b/cmd/debug-info/main.go index d0a5bb1ed5..27f4efcd6d 100644 --- a/cmd/debug-info/main.go +++ b/cmd/debug-info/main.go @@ -162,7 +162,6 @@ func main() { os.Exit(1) } - // TODO(kakkoyun): Shall we eleminate? g.Add(run.SignalHandler(ctx, os.Interrupt, os.Kill)) if err := g.Run(); err != nil { level.Error(logger).Log("err", err) diff --git a/deploy/dev.jsonnet b/deploy/dev.jsonnet index 034b3c7fd3..f07060ee6a 100644 --- a/deploy/dev.jsonnet +++ b/deploy/dev.jsonnet @@ -14,6 +14,7 @@ function(serverVersion='v0.4.2') version: serverVersion, replicas: 1, corsAllowedOrigins: '*', + logLevel: 'debug', }); local agent = (import 'parca-agent/parca-agent.libsonnet')({ @@ -25,7 +26,10 @@ function(serverVersion='v0.4.2') logLevel: 'debug', insecure: true, insecureSkipVerify: true, - tempDir: 'tmp', + tempDir: '/tmp', + podLabelSelector: { + 'app.kubernetes.io/name': 'redpanda', + }, }); { diff --git a/deploy/tilt/parca-agent-daemonSet.yaml b/deploy/tilt/parca-agent-daemonSet.yaml index 85f87d18c4..56a37ca60c 100644 --- a/deploy/tilt/parca-agent-daemonSet.yaml +++ b/deploy/tilt/parca-agent-daemonSet.yaml @@ -31,7 +31,7 @@ spec: - --store-address=parca.parca.svc.cluster.local:7070 - --insecure - --insecure-skip-verify - - --temp-dir=tmp + - --temp-dir=/tmp env: - name: NODE_NAME valueFrom: diff --git a/deploy/tilt/parca-server-deployment.yaml b/deploy/tilt/parca-server-deployment.yaml index 883b73f5cb..cfaebe9a92 100644 --- a/deploy/tilt/parca-server-deployment.yaml +++ b/deploy/tilt/parca-server-deployment.yaml @@ -27,7 +27,7 @@ spec: - args: - /parca - --config-path=/var/parca/parca.yaml - - --log-level=info + - --log-level=debug - --cors-allowed-origins=* image: ghcr.io/parca-dev/parca:v0.5.0 livenessProbe: diff --git a/go.mod b/go.mod index 2742f843d1..e0c774f0e7 100644 --- a/go.mod +++ b/go.mod @@ -5,14 +5,18 @@ go 1.16 require ( github.com/alecthomas/kong v0.2.18 github.com/aquasecurity/libbpfgo v0.2.2-libbpf-0.5.0 + github.com/blang/semver/v4 v4.0.0 // indirect github.com/cespare/xxhash/v2 v2.1.2 github.com/containerd/containerd v1.5.7 // indirect github.com/docker/docker v20.10.11+incompatible + github.com/dustin/go-humanize v1.0.0 + github.com/go-delve/delve v1.7.2 github.com/go-kit/log v0.2.0 github.com/google/pprof v0.0.0-20210609004039-a478d1d731e9 github.com/gorilla/mux v1.8.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639 + github.com/matishsiao/goInfo v0.0.0-20210923090445-da2e3fa8d45f // indirect github.com/minio/highwayhash v1.0.2 github.com/moby/term v0.0.0-20201216013528-df9cb8a40635 // indirect github.com/oklog/run v1.1.0 diff --git a/go.sum b/go.sum index a7ba7b3119..8ab39cbc97 100644 --- a/go.sum +++ b/go.sum @@ -172,6 +172,7 @@ github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kd github.com/apache/arrow/go/arrow v0.0.0-20191024131854-af6fa24be0db/go.mod h1:VTxUBvSJ3s3eHAg65PNgrsn5BtqCRPdmyXh6rAfdxN0= github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= +github.com/aquasecurity/libbpfgo v0.1.2-0.20210708203834-4928d36fafac/go.mod h1:/+clceXE103FaXvVTIY2HAkQjxNtkra4DRWvZYr2SKw= github.com/aquasecurity/libbpfgo v0.2.2-libbpf-0.5.0 h1:Qecy9Qvj4TG0LK7sfuJWzd1QlwMozHo7H0AyZMGjLg8= github.com/aquasecurity/libbpfgo v0.2.2-libbpf-0.5.0/go.mod h1:/+clceXE103FaXvVTIY2HAkQjxNtkra4DRWvZYr2SKw= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= @@ -403,6 +404,7 @@ github.com/cortexproject/cortex v1.7.1-0.20210316085356-3fedc1108a49/go.mod h1:/ github.com/cortexproject/cortex v1.8.1-0.20210422151339-cf1c444e0905/go.mod h1:xxm4/CLvTmDxwE7yXwtClR4dIvkG4S09o5DygPOgc1U= github.com/cortexproject/cortex v1.9.1-0.20210601081042-d7d87369965a h1:Yzto+L/snfYehPt5IRktODwcSzY0IJSXRe17UiTtdiQ= github.com/cortexproject/cortex v1.9.1-0.20210601081042-d7d87369965a/go.mod h1:JxSr6ouD6hK0Wlfe4mv8YFeVyrADZZzP5daI3SnEpYM= +github.com/cosiner/argv v0.1.0/go.mod h1:EusR6TucWKX+zFgtdUsKT2Cvg45K5rtpCcWz4hK06d8= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= @@ -431,6 +433,7 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/denisenkom/go-mssqldb v0.0.0-20190515213511-eb9f6a1743f3/go.mod h1:zAg7JM8CkOJ43xKXIj7eRO9kmWm/TW578qo+oDO6tuM= github.com/denverdino/aliyungo v0.0.0-20190125010748-a747050bb1ba/go.mod h1:dV8lFg6daOBZbT6/BDGIz6Y3WFGn8juu6G+CQ6LHtl0= +github.com/derekparker/trie v0.0.0-20200317170641-1fdf38b7b0e9/go.mod h1:D6ICZm05D9VN1n/8iOtBxLpXtoGp6HDFUJ1RNVieOSE= github.com/desertbit/timer v0.0.0-20180107155436-c41aec40b27f/go.mod h1:xH/i4TFMt8koVQZ6WFms69WAsDWr2XsYL3Hkl7jkoLE= github.com/dgraph-io/sroar v0.0.0-20210806151611-9ba13da43734/go.mod h1:bdNPtQmcxoIQVkZEWZvX0n0/IDlHFab397xdBlP4OoE= github.com/dgrijalva/jwt-go v0.0.0-20170104182250-a601269ab70c/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= @@ -535,6 +538,8 @@ github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd/go.mod github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= github.com/go-chi/cors v1.2.0/go.mod h1:sSbTewc+6wYHBBCW7ytsFSn836hqM7JxpglAy2Vzc58= +github.com/go-delve/delve v1.7.2 h1:QTDJlgx9OwUVYVm7xthyf2XHKrZcTQu3wkRbovktidM= +github.com/go-delve/delve v1.7.2/go.mod h1:CHdOd8kuHlQxtBJr1HmJX5h+KmmWd/7Lk5d+D1zHn4E= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= @@ -800,6 +805,7 @@ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-dap v0.5.1-0.20210713061233-c91b005e3987/go.mod h1:5q8aYQFnHOAZEMP+6vmq25HKYAEwE+LF5yh7JKrrhSQ= github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= @@ -1076,6 +1082,8 @@ github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJ github.com/markbates/oncer v0.0.0-20181203154359-bf2de49a0be2/go.mod h1:Ld9puTsIW75CHf65OeIOkyKbteujpZVXDpWK6YGZbxE= github.com/markbates/safe v1.0.1/go.mod h1:nAqgmRi7cY2nqMc92/bSEeQA+R4OheNU2T1kNSCBdG0= github.com/marstr/guid v1.1.0/go.mod h1:74gB1z2wpxxInTG6yaqA7KrtM0NZ+RbrcqDvYHefzho= +github.com/matishsiao/goInfo v0.0.0-20210923090445-da2e3fa8d45f h1:B0OD7nYl2FPQEVrw8g2uyc1lGEzNbvrKh7fspGZcbvY= +github.com/matishsiao/goInfo v0.0.0-20210923090445-da2e3fa8d45f/go.mod h1:aEt7p9Rvh67BYApmZwNDPpgircTO2kgdmDUoF/1QmwA= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= @@ -1290,6 +1298,7 @@ github.com/pelletier/go-toml v1.7.0/go.mod h1:vwGMzjaWMwyfHwgIBhI2YUM4fB6nL6lVAv github.com/pelletier/go-toml v1.8.1/go.mod h1:T2/BmBdy8dvIRq1a/8aqjN41wvWlN4lrapLU/GW4pbc= github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9iaPbIdPPGyKcA8hKdoy6hAWba7Yac= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= +github.com/peterh/liner v0.0.0-20170317030525-88609521dc4b/go.mod h1:xIteQHvHuaLYG9IFj6mSxM0fCKrs34IrEQUhOYuGPHc= github.com/peterh/liner v1.0.1-0.20180619022028-8c1271fcf47f/go.mod h1:xIteQHvHuaLYG9IFj6mSxM0fCKrs34IrEQUhOYuGPHc= github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= @@ -1460,6 +1469,7 @@ github.com/spf13/cobra v0.0.2-0.20171109065643-2da4a54c5cee/go.mod h1:1l0Ry5zgKv github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE= github.com/spf13/cobra v1.1.1/go.mod h1:WnodtKOvamDL/PwE2M4iKs8aMDBZ5Q5klgD3qfVJQMI= +github.com/spf13/cobra v1.1.3/go.mod h1:pGADOWyqRD/YMrPZigI/zbliZ2wVD/23d+is3pSWzOo= github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.1-0.20171106142849-4c012f6dcd95/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= @@ -1638,6 +1648,7 @@ go.opentelemetry.io/otel/oteltest v0.20.0/go.mod h1:L7bgKf9ZB7qCwT9Up7i9/pn0PWIa go.opentelemetry.io/otel/trace v0.20.0 h1:1DL6EXUdcg95gukhuRRvLDO/4X5THh/5dIV52lqtnbw= go.opentelemetry.io/otel/trace v0.20.0/go.mod h1:6GjCW8zgDjwGHGa6GkyeB8+/5vjT16gUEi0Nf1iBdgw= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= +go.starlark.net v0.0.0-20200821142938-949cc6f4b097/go.mod h1:f0znQkUKRrkk36XxWbGjMqQM8wGv/xHBVE2qc3B5oFU= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= @@ -1661,6 +1672,7 @@ go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= go.uber.org/zap v1.14.1/go.mod h1:Mb2vm2krFEG5DV0W9qcHBYFtp/Wku1cvYaqPsS/WYfc= go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ= go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= +golang.org/x/arch v0.0.0-20190927153633-4e8777c89be4/go.mod h1:flIaEI6LNU6xOCD5PaJvn9wGP0agmIOqjrtsKGRguv4= golang.org/x/crypto v0.0.0-20171113213409-9f005a07e0d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20180608092829-8ac0e0d97ce4/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= @@ -2027,6 +2039,7 @@ golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191127201027-ecd32218bd7f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191203134012-c197fd4bf371/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191216052735-49a3e744a425/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= diff --git a/pkg/agent/podmanager.go b/pkg/agent/podmanager.go index fc221621a2..e48c69d994 100644 --- a/pkg/agent/podmanager.go +++ b/pkg/agent/podmanager.go @@ -138,8 +138,8 @@ func (g *PodManager) Run(ctx context.Context) error { containerIDs[container.ContainerId] = containerProfiler g.mtx.Unlock() go func() { - err := containerProfiler.Run(ctx) - if err != nil { + // TODO(kakkoyun): Fail if all the profilers fail to start. + if err := containerProfiler.Run(ctx); err != nil { level.Error(logger).Log("msg", "running container profiler failed", "err", err) } }() diff --git a/pkg/agent/profile.go b/pkg/agent/profile.go index 5960dec03d..7acf689fc1 100644 --- a/pkg/agent/profile.go +++ b/pkg/agent/profile.go @@ -24,16 +24,22 @@ import ( "math" "os" "runtime" + "sort" "sync" + "syscall" "time" "unsafe" "C" bpf "github.com/aquasecurity/libbpfgo" + "github.com/blang/semver/v4" + "github.com/dustin/go-humanize" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/google/pprof/profile" + "github.com/matishsiao/goInfo" + "github.com/parca-dev/parca-agent/pkg/stack/unwind" profilestorepb "github.com/parca-dev/parca/gen/proto/go/parca/profilestore/v1alpha1" "golang.org/x/sys/unix" "google.golang.org/grpc" @@ -45,14 +51,18 @@ import ( "github.com/parca-dev/parca-agent/pkg/perf" ) -//go:embed parca-agent.bpf.o -var bpfObj []byte +//go:embed cpu_profiler.bpf.o +var cpuProfilerBPFObj []byte + +//go:embed cpu_profiler_with_unwinding.bpf.o +var cpuProfilerWithUnwindingBPFObj []byte var seps = []byte{'\xff'} const ( - stackDepth = 127 // Always needs to be sync with MAX_STACK_DEPTH in parca-agent.bpf.c - doubleStackDepth = 254 + // Always needs to be sync with MAX_STACK_DEPTH in cpu_profiler.bpf.c/cpu_profiler_with_unwinding.bpf.c + stackDepth = 127 + doubleStackDepth = 2 * stackDepth ) type Record struct { @@ -62,6 +72,8 @@ type Record struct { type CgroupProfilingTarget interface { PerfEventCgroupPath() string + PID() int + Labels() []*profilestorepb.Label } @@ -93,6 +105,7 @@ type CgroupProfiler struct { lastError error perfCache *perf.PerfCache + unwinder *unwind.Unwinder } func NewCgroupProfiler( @@ -104,8 +117,8 @@ func NewCgroupProfiler( target CgroupProfilingTarget, profilingDuration time.Duration, sink func(Record), - tmp string, -) *CgroupProfiler { + tmp string) *CgroupProfiler { + pidMappingFileCache := maps.NewPidMappingFileCache(logger) return &CgroupProfiler{ logger: logger, externalLabels: externalLabels, @@ -113,7 +126,7 @@ func NewCgroupProfiler( target: target, profilingDuration: profilingDuration, sink: sink, - pidMappingFileCache: maps.NewPidMappingFileCache(logger), + pidMappingFileCache: pidMappingFileCache, perfCache: perf.NewPerfCache(logger), writeClient: writeClient, debugInfoExtractor: debuginfo.NewExtractor( @@ -121,17 +134,11 @@ func NewCgroupProfiler( debugInfoClient, tmp, ), - mtx: &sync.RWMutex{}, + mtx: &sync.RWMutex{}, + unwinder: unwind.NewUnwinder(logger, pidMappingFileCache), } } -func (p *CgroupProfiler) loopReport(lastProfileTakenAt time.Time, lastError error) { - p.mtx.Lock() - defer p.mtx.Unlock() - p.lastProfileTakenAt = lastProfileTakenAt - p.lastError = lastError -} - func (p *CgroupProfiler) LastProfileTakenAt() time.Time { p.mtx.RLock() defer p.mtx.RUnlock() @@ -169,22 +176,13 @@ func (p *CgroupProfiler) Labels() []*profilestorepb.Label { func (p *CgroupProfiler) Run(ctx context.Context) error { level.Debug(p.logger).Log("msg", "starting cgroup profiler") - ctx, p.cancel = context.WithCancel(ctx) - m, err := bpf.NewModuleFromBufferArgs(bpf.NewModuleArgs{ - BPFObjBuff: bpfObj, - BPFObjName: "parca", - }) + m, err := p.initAndLoadBPFModule() if err != nil { return fmt.Errorf("new bpf module: %w", err) } defer m.Close() - err = m.BPFLoadObject() - if err != nil { - return fmt.Errorf("load bpf object: %w", err) - } - cgroup, err := os.Open(p.target.PerfEventCgroupPath()) if err != nil { return fmt.Errorf("open cgroup: %w", err) @@ -220,6 +218,38 @@ func (p *CgroupProfiler) Run(ctx context.Context) error { } } + logEvents := make(chan []byte) + rb, err := m.InitRingBuf("events", logEvents) + if err != nil { + return fmt.Errorf("init ring buffer: %w", err) + } + + rb.Start() + defer rb.Stop() + + ctx, p.cancel = context.WithCancel(ctx) + go func() { + byteOrder := byteorder.GetHostByteOrder() + + for { + select { + case <-ctx.Done(): + return + case eb := <-logEvents: + if len(eb) > 0 { + pid := int(byteOrder.Uint32(eb[0:4])) + msg := string(bytes.TrimRight(eb[4:], "\x00")) + // TODO(kakkoyun): Add labels to identify profiler. + level.Debug(p.logger).Log( + "msg", "message received from kernel space", + "message", msg, + "pid", pid, + ) + } + } + } + }() + counts, err := m.GetMap("counts") if err != nil { return fmt.Errorf("get counts map: %w", err) @@ -230,6 +260,11 @@ func (p *CgroupProfiler) Run(ctx context.Context) error { return fmt.Errorf("get stack traces map: %w", err) } + unwindedStackTraces, err := m.GetMap("unwinded_stack_traces") + if err != nil { + level.Warn(p.logger).Log("msg", "failed to get unwinded stack trace", "err", err) + } + ticker := time.NewTicker(p.profilingDuration) defer ticker.Stop() @@ -241,13 +276,216 @@ func (p *CgroupProfiler) Run(ctx context.Context) error { } t := time.Now() - err := p.profileLoop(ctx, t, counts, stackTraces) + err := p.profileLoop(ctx, t, counts, stackTraces, unwindedStackTraces) p.loopReport(t, err) } } -func (p *CgroupProfiler) profileLoop(ctx context.Context, now time.Time, counts, stackTraces *bpf.BPFMap) error { +func (p *CgroupProfiler) initAndLoadBPFModule() (*bpf.Module, error) { + btfSupported, err := p.isBTFSupported() + if err != nil { + level.Warn(p.logger).Log("msg", "failed to determine whether BTF supported", "err", err) + } + + pid := uint32(p.target.PID()) + var tables map[profile.Mapping]unwind.PlanTable + if btfSupported { + level.Info(p.logger).Log("msg", "linux version supports BTF") + tables, err = p.unwinder.UnwindTableForPid(pid) + if err != nil { + level.Warn(p.logger).Log("msg", "failed to build unwind tables for process", "err", err, "pid", pid) + } + } + + unwindingPossible := false + if len(tables) != 0 { + unwindingPossible = true + } else { + level.Warn(p.logger).Log("msg", "unwinding tables are empty", "pid", pid) + } + + var bpfObj []byte + if unwindingPossible { + bpfObj = cpuProfilerWithUnwindingBPFObj + level.Info(p.logger).Log("msg", "using CPU profiler with stack unwinding support", "pid", pid) + } else { + bpfObj = cpuProfilerBPFObj + level.Info(p.logger).Log("msg", "using simple CPU profiler", "pid", pid) + } + mod, err := bpf.NewModuleFromBufferArgs(bpf.NewModuleArgs{ + BPFObjBuff: bpfObj, + BPFObjName: "parca", + }) + if err != nil { + return nil, fmt.Errorf("new bpf module: %w", err) + } + + if err := mod.BPFLoadObject(); err != nil { + return nil, fmt.Errorf("load bpf object: %w", err) + } + + if unwindingPossible { + if err := p.prepareUnwindBPFMaps(mod, pid, tables); err != nil { + return nil, err + } + } + + return mod, nil +} + +func (p *CgroupProfiler) isBTFSupported() (bool, error) { + info, err := goInfo.GetInfo() + if err != nil { + return false, fmt.Errorf("failed to fetch OS version, using simplest module: %w", err) + } + v, err := semver.Parse(info.Core) + expectedRange, err := semver.ParseRange(">=5.2.0") + if err != nil { + return false, fmt.Errorf("failed to parse OS version, using simplest module: %w", err) + } + return expectedRange(v), nil +} + +func (p *CgroupProfiler) prepareUnwindBPFMaps(m *bpf.Module, pid uint32, tables map[profile.Mapping]unwind.PlanTable) error { + if err := p.bumpMemlockRlimit(); err != nil { + return fmt.Errorf("bump memlock rlimit: %w", err) + } + // TODO(kakkoyun): Make sure BPF_MAP_HASH_MAPs are properly initialized. + // Needs CREATE_MAP https://github.com/aquasecurity/libbpfgo/issues/93 + + cfg, err := m.GetMap("chosen") + if err != nil { + return fmt.Errorf("get config map: %w", err) + } + + pcs, err := m.GetMap("pcs") + if err != nil { + return fmt.Errorf("get pcs map: %w", err) + } + + rips, err := m.GetMap("rips") + if err != nil { + return fmt.Errorf("get rips map: %w", err) + } + + rsps, err := m.GetMap("rsps") + if err != nil { + return fmt.Errorf("get rsps map: %w", err) + } + + table := unwind.PlanTable{} + var size int + for _, t := range tables { + size += len(t) + } + level.Debug(p.logger).Log("msg", "building unwind tables", "size", size) + for m, t := range tables { + // TODO(kakkoyun): Any mapping calculation should be done here. + dbgPCS := make([]uint64, 10) + for i, row := range t { + if i < 10 { + dbgPCS[i] = row.Begin + } + } + level.Debug(p.logger).Log("msg", "PCs", "pid", pid, "pcs", fmt.Sprintf("%v", dbgPCS), "size", len(t), "start", m.Start, "offset", m.Offset, "limit", m.Limit) + table = append(table, t...) + + // TODO(kakkoyun): Clean up. + //if err := p.updateUnwindBPFMaps(cfg, pcs, rips, rsps, m, pid, t); err != nil { + // level.Debug(p.logger).Log("msg", "failed to build unwind table", + // "pid", pid, "size", len(t), "err", err) + // continue + //} + //level.Debug(p.logger).Log("msg", "unwind table built", + // "pid", pid, "buildid", m.BuildID, "size", len(t)) + //// TODO(kakkoyun): For we only consider first successful mapping. + //// TODO(kakkoyun): Be more clever and ignore library mappings. Or send everything to the kernel space? + //return nil + } + sort.Sort(table) + if err := p.updateUnwindBPFMaps(cfg, pcs, rips, rsps, pid, table); err != nil { + level.Debug(p.logger).Log("msg", "failed to build unwind table", "pid", pid, "size", len(table), "err", err) + return fmt.Errorf("update unwind maps: %w", err) + } + level.Debug(p.logger).Log("msg", "unwind table built", "pid", pid, "size", len(table)) + return nil +} + +func (p *CgroupProfiler) updateUnwindBPFMaps(cfg *bpf.BPFMap, pcs *bpf.BPFMap, rips *bpf.BPFMap, rsps *bpf.BPFMap, pid uint32, table unwind.PlanTable) error { + // TODO(kakkoyun): Update after BPF map of maps. + //if m.BuildID != p.buildID { + // //level.Debug(logger).Log("msg", "skipping unwind table update", "buildid", m.BuildID, "expected_buildid", p.buildID) + // return errors.New("skipping unwind table update") + //} + + level.Debug(p.logger).Log("msg", "found a process with given build id", "pid", pid, "size", len(table)) + + byteOrder := byteorder.GetHostByteOrder() + + zero := uint32(0) + pidBytes, err := cfg.GetValue(unsafe.Pointer(&zero)) + if err != nil { + level.Debug(p.logger).Log("msg", "failed to get config value", "err", err, "pid", pid) + } else { + existingPID := byteOrder.Uint32(pidBytes) + if existingPID == pid { + return nil + } + } + + value := pid + if err := cfg.Update(unsafe.Pointer(&zero), unsafe.Pointer(&value)); err != nil { + // or break and clean? + return fmt.Errorf("failed to update config: %w", err) + } + + one := uint32(1) + size := len(table) + if err := cfg.Update(unsafe.Pointer(&one), unsafe.Pointer(&size)); err != nil { + // or break and clean? + return fmt.Errorf("failed to update config: %w", err) + } + + for i, row := range table { + key := uint32(i) + + pc := row.Begin // + m.Start + if err := pcs.Update(unsafe.Pointer(&key), unsafe.Pointer(&pc)); err != nil { + // or break and clean? + return fmt.Errorf("failed to update PCs: %w", err) + } + + rip := row.RIP.Bytes(byteOrder) + if err := rips.Update(unsafe.Pointer(&key), unsafe.Pointer(&rip[0])); err != nil { + // or break and clean? + return fmt.Errorf("failed to update RIPs: %w", err) + } + + rsp := row.RSP.Bytes(byteOrder) + if err := rsps.Update(unsafe.Pointer(&key), unsafe.Pointer(&rsp[0])); err != nil { + // or break and clean? + return fmt.Errorf("failed to update RSPs: %w", err) + } + } + + dbgPCs := make([]uint64, 10) + for i := 0; i < 10; i++ { + key := uint32(i) + if valueBytes, err := pcs.GetValue(unsafe.Pointer(&key)); err != nil { + level.Debug(p.logger).Log("msg", "failed to get PC value", "err", err, "pid", pid) + } else { + dbgPCs[i] = byteOrder.Uint64(valueBytes) + } + } + level.Debug(p.logger).Log("msg", "written PCs", "pcs", fmt.Sprintf("%v", dbgPCs), "pid", pid) + + level.Debug(p.logger).Log("msg", "BPF maps updated", "pid", pid, "size", len(table)) + return nil +} + +// TODO(kakkoyun): This method is too long. Separate it into smaller methods. +func (p *CgroupProfiler) profileLoop(ctx context.Context, now time.Time, counts, stackTraces, unwindedStackTrace *bpf.BPFMap) error { prof := &profile.Profile{ SampleType: []*profile.ValueType{{ Type: "samples", @@ -265,6 +503,7 @@ func (p *CgroupProfiler) profileLoop(ctx context.Context, now time.Time, counts, } mapping := maps.NewMapping(p.pidMappingFileCache) + kernelMapping := &profile.Mapping{ File: "[kernel.kallsyms]", } @@ -316,28 +555,35 @@ func (p *CgroupProfiler) profileLoop(ctx context.Context, now time.Time, counts, } value := byteOrder.Uint64(valueBytes) + if userStackID == 0 { + // this means that the stack trace is not available for this process + level.Debug(p.logger).Log("msg", "user stack ID is 0", "pid", pid) + continue + } + level.Debug(p.logger).Log("msg", "user stack found", "stackID", userStackID, "pid", pid) + stackBytes, err := stackTraces.GetValue(unsafe.Pointer(&userStackID)) if err != nil { + // TODO(kakkoyun): Add metric. //profile.MissingStacks++ continue } // Twice the stack depth because we have a user and a potential Kernel stack. stack := [doubleStackDepth]uint64{} - err = binary.Read(bytes.NewBuffer(stackBytes), byteOrder, stack[:stackDepth]) - if err != nil { + if err := binary.Read(bytes.NewBuffer(stackBytes), byteOrder, stack[:stackDepth]); err != nil { return fmt.Errorf("read user stack trace: %w", err) } if kernelStackID >= 0 { stackBytes, err = stackTraces.GetValue(unsafe.Pointer(&kernelStackID)) if err != nil { - //profile.MissingStacks++ + // TODO(kakkoyun): Add metric. + // profile.MissingStacks++ continue } - err = binary.Read(bytes.NewBuffer(stackBytes), byteOrder, stack[stackDepth:]) - if err != nil { + if err = binary.Read(bytes.NewBuffer(stackBytes), byteOrder, stack[stackDepth:]); err != nil { return fmt.Errorf("read kernel stack trace: %w", err) } } @@ -350,10 +596,9 @@ func (p *CgroupProfiler) profileLoop(ctx context.Context, now time.Time, counts, continue } - sampleLocations := []*profile.Location{} - // Kernel stack - for _, addr := range stack[stackDepth:] { + sampleLocations := []*profile.Location{} + for _, addr := range stack[stackDepth:] { // Kernel stack if addr != uint64(0) { key := [2]uint64{0, addr} // PID 0 not possible so we'll use it to identify the kernel. @@ -381,7 +626,8 @@ func (p *CgroupProfiler) profileLoop(ctx context.Context, now time.Time, counts, // the perf map. level.Debug(p.logger).Log("msg", "no perfmap", "err", err) } - for _, addr := range stack[:stackDepth] { + + buildLocation := func(addr uint64) { if addr != uint64(0) { key := [2]uint64{uint64(pid), addr} locationIndex, ok := locationIndices[key] @@ -419,6 +665,30 @@ func (p *CgroupProfiler) profileLoop(ctx context.Context, now time.Time, counts, } } + if unwindedStackTrace != nil { + userStack := [stackDepth]uint64{} + for d := 0; d < stackDepth; d++ { + key := uint32(d) + valueBytes, err := unwindedStackTrace.GetValue(unsafe.Pointer(&key)) + if err != nil { + return fmt.Errorf("get unwind stack trace value: %w", err) + } + value := byteOrder.Uint64(valueBytes) + userStack[d] = value + + buildLocation(value) + } + // TODO(kakkoyun): Remove! + level.Debug(p.logger).Log("msg", "unwinded user stack trace", "stackid", userStackID, "stack", fmt.Sprintf("%v", userStack)) + } else { + for _, addr := range stack[:stackDepth] { // User stack + buildLocation(addr) + } + } + + // TODO(kakkoyun): Remove! + level.Debug(p.logger).Log("msg", "user stack trace", "stackid", userStackID, "stack", fmt.Sprintf("%v", stack[:stackDepth])) + sample = &profile.Sample{ Value: []int64{int64(value)}, Location: sampleLocations, @@ -472,23 +742,23 @@ func (p *CgroupProfiler) profileLoop(ctx context.Context, now time.Time, counts, prof.Function = append(prof.Function, f) } + // TODO(kakkoyun): Make it async. p.debugInfoExtractor.EnsureUploaded(ctx, buildIDFiles) buf := bytes.NewBuffer(nil) - err = prof.Write(buf) - if err != nil { + if err = prof.Write(buf); err != nil { return err } + labels := p.Labels() - _, err = p.writeClient.WriteRaw(ctx, &profilestorepb.WriteRawRequest{ + if _, err = p.writeClient.WriteRaw(ctx, &profilestorepb.WriteRawRequest{ Series: []*profilestorepb.RawProfileSeries{{ Labels: &profilestorepb.LabelSet{Labels: labels}, Samples: []*profilestorepb.RawSample{{ RawProfile: buf.Bytes(), }}, }}, - }) - if err != nil { + }); err != nil { level.Error(p.logger).Log("msg", "failed to send profile", "err", err) } @@ -500,7 +770,6 @@ func (p *CgroupProfiler) profileLoop(ctx context.Context, now time.Time, counts, // BPF iterators need the previous value to iterate to the next, so we // can only delete the "previous" item once we've already iterated to // the next. - it = stackTraces.Iterator() var prev []byte = nil for it.Next() { @@ -522,6 +791,17 @@ func (p *CgroupProfiler) profileLoop(ctx context.Context, now time.Time, counts, } } + if unwindedStackTrace != nil { + zero := uint64(0) + for d := 0; d < stackDepth; d++ { + key := uint32(d) + err := unwindedStackTrace.Update(unsafe.Pointer(&key), unsafe.Pointer(&zero)) + if err != nil { + level.Warn(p.logger).Log("msg", "failed to delete unwind stack trace", "err", err) + } + } + } + it = counts.Iterator() prev = nil for it.Next() { @@ -546,6 +826,37 @@ func (p *CgroupProfiler) profileLoop(ctx context.Context, now time.Time, counts, return nil } +func (p *CgroupProfiler) loopReport(lastProfileTakenAt time.Time, lastError error) { + p.mtx.Lock() + defer p.mtx.Unlock() + p.lastProfileTakenAt = lastProfileTakenAt + p.lastError = lastError + if lastError != nil { + level.Debug(p.logger).Log("msg", "cgroup profiler loop report", "lastProfileTakenAt", lastProfileTakenAt, "lastError", lastError) + } +} + +func (p *CgroupProfiler) bumpMemlockRlimit() error { + limit := 2048 << 20 // 2GB + rLimit := syscall.Rlimit{ + Cur: uint64(limit), + Max: uint64(limit), + } + + // RLIMIT_MEMLOCK is 0x8. + if err := syscall.Setrlimit(0x8, &rLimit); err != nil { + return fmt.Errorf("failed to increase rlimit: %w", err) + } + + rLimit = syscall.Rlimit{} + if err := syscall.Getrlimit(0x8, &rLimit); err != nil { + return fmt.Errorf("failed to get rlimit: %w", err) + } + level.Debug(p.logger).Log("msg", "increased max memory locked rlimit", "limit", humanize.Bytes(rLimit.Cur)) + + return nil +} + func probabilisticSampling(ratio float64, labels []*profilestorepb.Label) bool { if ratio == 1.0 { return true diff --git a/pkg/agent/systemdmanager.go b/pkg/agent/systemdmanager.go index b8d6909f3f..2ae28d25a8 100644 --- a/pkg/agent/systemdmanager.go +++ b/pkg/agent/systemdmanager.go @@ -55,6 +55,11 @@ type SystemdUnitTarget struct { NodeName string } +func (t *SystemdUnitTarget) PID() int { + //TODO(kakkoyun): implement me + return -1 +} + func (t *SystemdUnitTarget) Labels() []*profilestorepb.Label { return []*profilestorepb.Label{{ Name: "node", @@ -213,8 +218,8 @@ func (m *SystemdManager) reconcileUnit(ctx context.Context, unit string) error { m.mtx.Unlock() go func() { - err := p.Run(ctx) - if err != nil { + // TODO(kakkoyun): Fail if all the profilers fail to start. + if err := p.Run(ctx); err != nil { level.Error(m.logger).Log("msg", "running systemd-unit profiler failed", "err", err) } }() diff --git a/pkg/debuginfo/debuginfo.go b/pkg/debuginfo/debuginfo.go index fe50863a37..bd0acfc89e 100644 --- a/pkg/debuginfo/debuginfo.go +++ b/pkg/debuginfo/debuginfo.go @@ -137,6 +137,7 @@ func (di *Extractor) Extract(ctx context.Context, buildIDFiles map[string]string } func (di *Extractor) EnsureUploaded(ctx context.Context, buildIDFiles map[string]maps.BuildIDFile) { + // TODO(kakkoyun): Make async. for buildID, buildIDFile := range buildIDFiles { exists, err := di.Client.Exists(ctx, buildID) if err != nil { @@ -155,11 +156,11 @@ func (di *Extractor) EnsureUploaded(ctx context.Context, buildIDFiles map[string } if !hasDebugInfo { + level.Debug(di.logger).Log("msg", "could not find symbols in binary, checking for additional debuginfo file", "buildid", buildID, "file", file) // The object does not have debug symbols, but maybe debuginfos // have been installed separately, typically in /usr/lib/debug, so // we try to discover if there is a debuginfo file, that has the // same build ID as the object. - level.Debug(di.logger).Log("msg", "could not find symbols in binary, checking for additional debuginfo file", "buildid", buildID, "file", file) dbgInfo, err := di.findDebugInfo(buildID, buildIDFile) if err != nil { if !errors.Is(err, errNotFound) { @@ -197,6 +198,11 @@ func (di *Extractor) findDebugInfo(buildID string, buildIDFile maps.BuildIDFile) found = false file string ) + // TODO(kakkoyun): Debian has a special structure for debuginfo files. Under .build-id/ directory, + // there is a directory for each build ID. + // Under this directory, there is a dedicated directory for, first two letters of build ID as directory name. + // And then the rest of the build ID as file name. And .debug as file extension. + // /usr/lib/debug is a symlink to /usr/lib/debug/usr/lib/debug/lib. err := filepath.Walk(path.Join(buildIDFile.Root(), "/usr/lib/debug"), func(path string, info os.FileInfo, err error) error { if err != nil { return err @@ -285,6 +291,9 @@ func (di *Extractor) useStrip(ctx context.Context, dir string, file string) (*ex interimFile := path.Join(dir, "binary.stripped") cmd := exec.CommandContext(ctx, "eu-strip", "--strip-debug", + // TODO(kakkoyun): We don't need these if we can unwind in agent. + // "--keep-section", ".eh_frame", // unwind info + // "--keep-section", ".eh_frame_hdr", // binary search index for unwind info "--remove-section", ".debug_gdb_scripts", // causes some trouble when it's set to SHT_NOBITS "-f", debugInfoFile, "-o", interimFile, @@ -306,6 +315,8 @@ func (di *Extractor) useObjcopy(ctx context.Context, dir string, file string) (* // NOTICE: Keep debug information till we find a better for symbolizing Go binaries without DWARF. //"-R", ".zdebug_*", //"-R", ".debug_*", + // TODO(kakkoyun): We don't need these if we can unwind in agent. + // "--keep-section", ".eh_frame*", // unwind info "--remove-section", ".text", // executable "--remove-section", ".rodata*", // constants "--remove-section", ".debug_gdb_scripts", // causes some trouble when it's set to SHT_NOBITS diff --git a/pkg/k8s/k8s.go b/pkg/k8s/k8s.go index af183dcc02..084382c085 100644 --- a/pkg/k8s/k8s.go +++ b/pkg/k8s/k8s.go @@ -145,6 +145,10 @@ type ContainerDefinition struct { Pid int } +func (c *ContainerDefinition) PID() int { + return c.Pid +} + func (c *ContainerDefinition) Labels() []*profilestorepb.Label { return []*profilestorepb.Label{{ Name: "node", diff --git a/pkg/stack/frame/README.md b/pkg/stack/frame/README.md new file mode 100644 index 0000000000..1292000342 --- /dev/null +++ b/pkg/stack/frame/README.md @@ -0,0 +1,9 @@ +# NOTICE + +This package was directly copied from Delve project. + +https://github.com/go-delve/delve/tree/master/pkg/dwarf/frame + +All rights reserved to Delve developer. + +https://github.com/go-delve/delve/blob/master/LICENSE diff --git a/pkg/stack/frame/entries.go b/pkg/stack/frame/entries.go new file mode 100644 index 0000000000..bfac3e6b02 --- /dev/null +++ b/pkg/stack/frame/entries.go @@ -0,0 +1,152 @@ +package frame + +import ( + "encoding/binary" + "fmt" + "sort" +) + +// CommonInformationEntry represents a Common Information Entry in +// the Dwarf .debug_frame section. +type CommonInformationEntry struct { + Length uint32 + CIE_id uint32 + Version uint8 + Augmentation string + CodeAlignmentFactor uint64 + DataAlignmentFactor int64 + ReturnAddressRegister uint64 + InitialInstructions []byte + staticBase uint64 + + // eh_frame pointer encoding + ptrEncAddr ptrEnc +} + +// FrameDescriptionEntry represents a Frame Descriptor Entry in the +// Dwarf .debug_frame section. +type FrameDescriptionEntry struct { + Length uint32 + CIE *CommonInformationEntry + Instructions []byte + begin, size uint64 + order binary.ByteOrder +} + +// Cover returns whether or not the given address is within the +// bounds of this frame. +func (fde *FrameDescriptionEntry) Cover(addr uint64) bool { + return (addr - fde.begin) < fde.size +} + +// Begin returns address of first location for this frame. +func (fde *FrameDescriptionEntry) Begin() uint64 { + return fde.begin +} + +// End returns address of last location for this frame. +func (fde *FrameDescriptionEntry) End() uint64 { + return fde.begin + fde.size +} + +// Translate moves the beginning of fde forward by delta. +func (fde *FrameDescriptionEntry) Translate(delta uint64) { + fde.begin += delta +} + +// EstablishFrame set up frame for the given PC. +func (fde *FrameDescriptionEntry) EstablishFrame(pc uint64) *FrameContext { + return executeDwarfProgramUntilPC(fde, pc) +} + +type FrameDescriptionEntries []*FrameDescriptionEntry + +func newFrameIndex() FrameDescriptionEntries { + return make(FrameDescriptionEntries, 0, 1000) +} + +// ErrNoFDEForPC FDE for PC not found error +type ErrNoFDEForPC struct { + PC uint64 +} + +func (err *ErrNoFDEForPC) Error() string { + return fmt.Sprintf("could not find FDE for PC %#v", err.PC) +} + +// FDEForPC returns the Frame Description Entry for the given PC. +func (fdes FrameDescriptionEntries) FDEForPC(pc uint64) (*FrameDescriptionEntry, error) { + idx := sort.Search(len(fdes), func(i int) bool { + return fdes[i].Cover(pc) || fdes[i].Begin() >= pc + }) + if idx == len(fdes) || !fdes[idx].Cover(pc) { + return nil, &ErrNoFDEForPC{pc} + } + return fdes[idx], nil +} + +// Append appends otherFDEs to fdes and returns the result. +func (fdes FrameDescriptionEntries) Append(otherFDEs FrameDescriptionEntries) FrameDescriptionEntries { + r := append(fdes, otherFDEs...) + sort.SliceStable(r, func(i, j int) bool { + return r[i].Begin() < r[j].Begin() + }) + // remove duplicates + uniqFDEs := fdes[:0] + for _, fde := range fdes { + if len(uniqFDEs) > 0 { + last := uniqFDEs[len(uniqFDEs)-1] + if last.Begin() == fde.Begin() && last.End() == fde.End() { + continue + } + } + uniqFDEs = append(uniqFDEs, fde) + } + return r +} + +// ptrEnc represents a pointer encoding value, used during eh_frame decoding +// to determine how pointers were encoded. +// Least significant 4 (0xf) bytes encode the size as well as its +// signed-ness, most significant 4 bytes (0xf0) are flags describing how +// the value should be interpreted (absolute, relative...) +// See https://www.airs.com/blog/archives/460. +type ptrEnc uint8 + +const ( + ptrEncAbs ptrEnc = 0x00 // pointer-sized unsigned integer + ptrEncOmit ptrEnc = 0xff // omitted + ptrEncUleb ptrEnc = 0x01 // ULEB128 + ptrEncUdata2 ptrEnc = 0x02 // 2 bytes + ptrEncUdata4 ptrEnc = 0x03 // 4 bytes + ptrEncUdata8 ptrEnc = 0x04 // 8 bytes + ptrEncSigned ptrEnc = 0x08 // pointer-sized signed integer + ptrEncSleb ptrEnc = 0x09 // SLEB128 + ptrEncSdata2 ptrEnc = 0x0a // 2 bytes, signed + ptrEncSdata4 ptrEnc = 0x0b // 4 bytes, signed + ptrEncSdata8 ptrEnc = 0x0c // 8 bytes, signed + + ptrEncPCRel ptrEnc = 0x10 // value is relative to the memory address where it appears + ptrEncTextRel ptrEnc = 0x20 // value is relative to the address of the text section + ptrEncDataRel ptrEnc = 0x30 // value is relative to the address of the data section + ptrEncFuncRel ptrEnc = 0x40 // value is relative to the start of the function + ptrEncAligned ptrEnc = 0x50 // value should be aligned + ptrEncIndirect ptrEnc = 0x80 // value is an address where the real value of the pointer is stored +) + +// Supported returns true if this pointer encoding is supported. +func (ptrEnc ptrEnc) Supported() bool { + if ptrEnc != ptrEncOmit { + szenc := ptrEnc & 0x0f + if ((szenc > ptrEncUdata8) && (szenc < ptrEncSigned)) || (szenc > ptrEncSdata8) { + // These values aren't defined at the moment + return false + } + val := ptrEnc&0xf0 + if val != ptrEncPCRel { + // Currently only the PC relative flag is supported + return false + } + } + return true +} diff --git a/pkg/stack/frame/entries_test.go b/pkg/stack/frame/entries_test.go new file mode 100644 index 0000000000..593d451689 --- /dev/null +++ b/pkg/stack/frame/entries_test.go @@ -0,0 +1,76 @@ +package frame + +import ( + "encoding/binary" + "io/ioutil" + "os" + "testing" + "unsafe" +) + +func ptrSizeByRuntimeArch() int { + return int(unsafe.Sizeof(uintptr(0))) +} + +func TestFDEForPC(t *testing.T) { + frames := newFrameIndex() + frames = append(frames, + &FrameDescriptionEntry{begin: 10, size: 40}, + &FrameDescriptionEntry{begin: 50, size: 50}, + &FrameDescriptionEntry{begin: 100, size: 100}, + &FrameDescriptionEntry{begin: 300, size: 10}) + + for _, test := range []struct { + pc uint64 + fde *FrameDescriptionEntry + }{ + {0, nil}, + {9, nil}, + {10, frames[0]}, + {35, frames[0]}, + {49, frames[0]}, + {50, frames[1]}, + {75, frames[1]}, + {100, frames[2]}, + {199, frames[2]}, + {200, nil}, + {299, nil}, + {300, frames[3]}, + {309, frames[3]}, + {310, nil}, + {400, nil}} { + + out, err := frames.FDEForPC(test.pc) + if test.fde != nil { + if err != nil { + t.Fatal(err) + } + if out != test.fde { + t.Errorf("[pc = %#x] got incorrect fde\noutput:\t%#v\nexpected:\t%#v", test.pc, out, test.fde) + } + } else { + if err == nil { + t.Errorf("[pc = %#x] expected error got fde %#v", test.pc, out) + } + } + } +} + +func BenchmarkFDEForPC(b *testing.B) { + f, err := os.Open("testdata/frame") + if err != nil { + b.Fatal(err) + } + defer f.Close() + + data, err := ioutil.ReadAll(f) + if err != nil { + b.Fatal(err) + } + fdes, _ := Parse(data, binary.BigEndian, 0, ptrSizeByRuntimeArch(), 0) + + for i := 0; i < b.N; i++ { + // bench worst case, exhaustive search + _, _ = fdes.FDEForPC(0x455555555) + } +} diff --git a/pkg/stack/frame/expression_constants.go b/pkg/stack/frame/expression_constants.go new file mode 100644 index 0000000000..95240cfea9 --- /dev/null +++ b/pkg/stack/frame/expression_constants.go @@ -0,0 +1,164 @@ +package frame + +// Operation opcodes +const ( + DW_OP_addr = 0x03 + DW_OP_const1s = 0x09 +) + +const ( + DW_OP_const2u = 0x0a + DW_OP_const2s = 0x0b + DW_OP_const4u = iota + DW_OP_const4s + DW_OP_const8u + DW_OP_const8s + DW_OP_constu + DW_OP_consts + DW_OP_dup + DW_OP_drop + DW_OP_over + DW_OP_pick + DW_OP_swap + DW_OP_rot + DW_OP_xderef + DW_OP_abs + DW_OP_and + DW_OP_div + DW_OP_minus + DW_OP_mod + DW_OP_mul + DW_OP_neg + DW_OP_not + DW_OP_or + DW_OP_plus + DW_OP_plus_uconst + DW_OP_shl + DW_OP_shr + DW_OP_shra + DW_OP_xor + DW_OP_skip + DW_OP_bra + DW_OP_eq + DW_OP_ge + DW_OP_gt + DW_OP_le + DW_OP_lt + DW_OP_ne +) + +const ( + DW_OP_lit0 = 0x30 + DW_OP_lit1 = 0x31 + DW_OP_lit2 = iota + DW_OP_lit3 + DW_OP_lit4 + DW_OP_lit5 + DW_OP_lit6 + DW_OP_lit7 + DW_OP_lit8 + DW_OP_lit9 + DW_OP_lit10 + DW_OP_lit11 + DW_OP_lit12 + DW_OP_lit13 + DW_OP_lit14 + DW_OP_lit15 + DW_OP_lit16 + DW_OP_lit17 + DW_OP_lit18 + DW_OP_lit19 + DW_OP_lit20 + DW_OP_lit21 + DW_OP_lit22 + DW_OP_lit23 + DW_OP_lit24 + DW_OP_lit25 + DW_OP_lit26 + DW_OP_lit27 + DW_OP_lit28 + DW_OP_lit29 + DW_OP_lit30 + DW_OP_lit31 + DW_OP_reg0 + DW_OP_reg1 + DW_OP_reg2 + DW_OP_reg3 + DW_OP_reg4 + DW_OP_reg5 + DW_OP_reg6 + DW_OP_reg7 + DW_OP_reg8 + DW_OP_reg9 + DW_OP_reg10 + DW_OP_reg11 + DW_OP_reg12 + DW_OP_reg13 + DW_OP_reg14 + DW_OP_reg15 + DW_OP_reg16 + DW_OP_reg17 + DW_OP_reg18 + DW_OP_reg19 + DW_OP_reg20 + DW_OP_reg21 + DW_OP_reg22 + DW_OP_reg23 + DW_OP_reg24 + DW_OP_reg25 + DW_OP_reg26 + DW_OP_reg27 + DW_OP_reg28 + DW_OP_reg29 + DW_OP_reg30 + DW_OP_reg31 + DW_OP_breg0 + DW_OP_breg1 + DW_OP_breg2 + DW_OP_breg3 + DW_OP_breg4 + DW_OP_breg5 + DW_OP_breg6 + DW_OP_breg7 + DW_OP_breg8 + DW_OP_breg9 + DW_OP_breg10 + DW_OP_breg11 + DW_OP_breg12 + DW_OP_breg13 + DW_OP_breg14 + DW_OP_breg15 + DW_OP_breg16 + DW_OP_breg17 + DW_OP_breg18 + DW_OP_breg19 + DW_OP_breg20 + DW_OP_breg21 + DW_OP_breg22 + DW_OP_breg23 + DW_OP_breg24 + DW_OP_breg25 + DW_OP_breg26 + DW_OP_breg27 + DW_OP_breg28 + DW_OP_breg29 + DW_OP_breg30 + DW_OP_breg31 + DW_OP_regx + DW_OP_fbreg + DW_OP_bregx + DW_OP_piece + DW_OP_deref_size + DW_OP_xderef_size + DW_OP_nop + DW_OP_push_object_address + DW_OP_call2 + DW_OP_call4 + DW_OP_call_ref + DW_OP_form_tls_address + DW_OP_call_frame_cfa + DW_OP_bit_piece + + DW_OP_lo_user = 0xe0 + DW_OP_hi_user = 0xff +) diff --git a/pkg/stack/frame/parser.go b/pkg/stack/frame/parser.go new file mode 100644 index 0000000000..3357a7dead --- /dev/null +++ b/pkg/stack/frame/parser.go @@ -0,0 +1,291 @@ +// Package frame contains data structures and +// related functions for parsing and searching +// through Dwarf .debug_frame data. +package frame + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + + "github.com/go-delve/delve/pkg/dwarf/util" +) + +type parsefunc func(*parseContext) parsefunc + +type parseContext struct { + staticBase uint64 + + buf *bytes.Buffer + totalLen int + entries FrameDescriptionEntries + ciemap map[int]*CommonInformationEntry + common *CommonInformationEntry + frame *FrameDescriptionEntry + length uint32 + ptrSize int + ehFrameAddr uint64 + err error + warn error +} + +// Parse takes in data (a byte slice) and returns FrameDescriptionEntries, +// which is a slice of FrameDescriptionEntry. Each FrameDescriptionEntry +// has a pointer to CommonInformationEntry. +// If ehFrameAddr is not zero the .eh_frame format will be used, a minor variant of DWARF described at https://www.airs.com/blog/archives/460. +// The value of ehFrameAddr will be used as the address at which eh_frame will be mapped into memory +func Parse(data []byte, order binary.ByteOrder, staticBase uint64, ptrSize int, ehFrameAddr uint64) (FrameDescriptionEntries, error) { + var ( + buf = bytes.NewBuffer(data) + pctx = &parseContext{buf: buf, totalLen: len(data), entries: newFrameIndex(), staticBase: staticBase, ptrSize: ptrSize, ehFrameAddr: ehFrameAddr, ciemap: map[int]*CommonInformationEntry{}} + ) + + for fn := parselength; buf.Len() != 0; { + fn = fn(pctx) + if pctx.err != nil { + return nil, pctx.err + } + //if pctx.warn != nil { + // TODO(kakkoyun): Figure out logging or? + //fmt.Println(pctx.warn) + //} + } + + for i := range pctx.entries { + pctx.entries[i].order = order + } + + return pctx.entries, nil +} + +func (ctx *parseContext) parsingEHFrame() bool { + return ctx.ehFrameAddr > 0 +} + +func (ctx *parseContext) cieEntry(cieid uint32) bool { + if ctx.parsingEHFrame() { + return cieid == 0x00 + } + return cieid == 0xffffffff +} + +func (ctx *parseContext) offset() int { + return ctx.totalLen - ctx.buf.Len() +} + +func parselength(ctx *parseContext) parsefunc { + start := ctx.offset() + binary.Read(ctx.buf, binary.LittleEndian, &ctx.length) //TODO(aarzilli): this does not support 64bit DWARF + + if ctx.length == 0 { + // ZERO terminator + return parselength + } + + var cieid uint32 + binary.Read(ctx.buf, binary.LittleEndian, &cieid) + + ctx.length -= 4 // take off the length of the CIE id / CIE pointer. + + if ctx.cieEntry(cieid) { + ctx.common = &CommonInformationEntry{Length: ctx.length, staticBase: ctx.staticBase, CIE_id: cieid} + ctx.ciemap[start] = ctx.common + return parseCIE + } + + if ctx.ehFrameAddr > 0 { + cieid = uint32(start - int(cieid) + 4) + } + + common := ctx.ciemap[int(cieid)] + + if common == nil { + ctx.err = fmt.Errorf("unknown CIE_id %#x at %#x", cieid, start) + } + + ctx.frame = &FrameDescriptionEntry{Length: ctx.length, CIE: common} + return parseFDE +} + +func parseFDE(ctx *parseContext) parsefunc { + startOff := ctx.offset() + r := ctx.buf.Next(int(ctx.length)) + + reader := bytes.NewReader(r) + num := ctx.readEncodedPtr(addrSum(ctx.ehFrameAddr+uint64(startOff), reader), reader, ctx.frame.CIE.ptrEncAddr) + ctx.frame.begin = num + ctx.staticBase + + // For the size field in .eh_frame only the size encoding portion of the + // address pointer encoding is considered. + // See decode_frame_entry_1 in gdb/dwarf2-frame.c. + // For .debug_frame ptrEncAddr is always ptrEncAbs and never has flags. + sizePtrEnc := ctx.frame.CIE.ptrEncAddr & 0x0f + ctx.frame.size = ctx.readEncodedPtr(0, reader, sizePtrEnc) + + // Insert into the tree after setting address range begin + // otherwise compares won't work. + ctx.entries = append(ctx.entries, ctx.frame) + + if ctx.parsingEHFrame() && len(ctx.frame.CIE.Augmentation) > 0 { + // If we are parsing a .eh_frame and we saw an agumentation string then we + // need to read the augmentation data, which are encoded as a ULEB128 + // size followed by 'size' bytes. + n, _ := util.DecodeULEB128(reader) + reader.Seek(int64(n), io.SeekCurrent) + } + + // The rest of this entry consists of the instructions + // so we can just grab all of the data from the buffer + // cursor to length. + + off, _ := reader.Seek(0, io.SeekCurrent) + ctx.frame.Instructions = r[off:] + ctx.length = 0 + + return parselength +} + +func addrSum(base uint64, buf *bytes.Reader) uint64 { + n, _ := buf.Seek(0, io.SeekCurrent) + return base + uint64(n) +} + +func parseCIE(ctx *parseContext) parsefunc { + data := ctx.buf.Next(int(ctx.length)) + buf := bytes.NewBuffer(data) + // parse version + ctx.common.Version, _ = buf.ReadByte() + + // parse augmentation + ctx.common.Augmentation, _ = util.ParseString(buf) + + if ctx.parsingEHFrame() { + if ctx.common.Augmentation == "eh" { + ctx.err = fmt.Errorf("unsupported 'eh' augmentation at %#x", ctx.offset()) + } + if len(ctx.common.Augmentation) > 0 && ctx.common.Augmentation[0] != 'z' { + ctx.err = fmt.Errorf("unsupported augmentation at %#x (does not start with 'z')", ctx.offset()) + } + } + + // parse code alignment factor + ctx.common.CodeAlignmentFactor, _ = util.DecodeULEB128(buf) + + // parse data alignment factor + ctx.common.DataAlignmentFactor, _ = util.DecodeSLEB128(buf) + + // parse return address register + if ctx.parsingEHFrame() && ctx.common.Version == 1 { + b, _ := buf.ReadByte() + ctx.common.ReturnAddressRegister = uint64(b) + } else { + ctx.common.ReturnAddressRegister, _ = util.DecodeULEB128(buf) + } + + ctx.common.ptrEncAddr = ptrEncAbs + + if ctx.parsingEHFrame() && len(ctx.common.Augmentation) > 0 { + _, _ = util.DecodeULEB128(buf) // augmentation data length + for i := 1; i < len(ctx.common.Augmentation); i++ { + switch ctx.common.Augmentation[i] { + case 'L': + _, _ = buf.ReadByte() // LSDA pointer encoding, we don't support this. + case 'R': + // Pointer encoding, describes how begin and size fields of FDEs are encoded. + b, _ := buf.ReadByte() + ctx.common.ptrEncAddr = ptrEnc(b) + if !ctx.common.ptrEncAddr.Supported() { + ctx.err = fmt.Errorf("pointer encoding not supported %#x at %#x", ctx.common.ptrEncAddr, ctx.offset()) + return nil + } + case 'S': + // Signal handler invocation frame, we don't support this but there is no associated data to read. + case 'P': + // Personality function encoded as a pointer encoding byte followed by + // the pointer to the personality function encoded as specified by the + // pointer encoding. + // We don't support this but have to read it anyway. + e, _ := buf.ReadByte() + if !ptrEnc(e).Supported() { + // TODO(kakkoyun): Is this enough to just read and skip? + ctx.warn = fmt.Errorf("pointer encoding not supported %#x at %#x", e, ctx.offset()) + } + ctx.readEncodedPtr(0, buf, ptrEnc(e)) + default: + ctx.err = fmt.Errorf("unsupported augmentation character %c at %#x", ctx.common.Augmentation[i], ctx.offset()) + return nil + } + } + } + + // parse initial instructions + // The rest of this entry consists of the instructions + // so we can just grab all of the data from the buffer + // cursor to length. + ctx.common.InitialInstructions = buf.Bytes() //ctx.buf.Next(int(ctx.length)) + ctx.length = 0 + + return parselength +} + +// readEncodedPtr reads a pointer from buf encoded as specified by ptrEnc. +// This function is used to read pointers from a .eh_frame section, when +// used to parse a .debug_frame section ptrEnc will always be ptrEncAbs. +// The parameter addr is the address that the current byte of 'buf' will be +// mapped to when the executable file containing the eh_frame section being +// parse is loaded in memory. +func (ctx *parseContext) readEncodedPtr(addr uint64, buf util.ByteReaderWithLen, ptrEnc ptrEnc) uint64 { + if ptrEnc == ptrEncOmit { + return 0 + } + + var ptr uint64 + + switch ptrEnc & 0xf { + case ptrEncAbs, ptrEncSigned: + ptr, _ = util.ReadUintRaw(buf, binary.LittleEndian, ctx.ptrSize) + case ptrEncUleb: + ptr, _ = util.DecodeULEB128(buf) + case ptrEncUdata2: + ptr, _ = util.ReadUintRaw(buf, binary.LittleEndian, 2) + case ptrEncSdata2: + ptr, _ = util.ReadUintRaw(buf, binary.LittleEndian, 2) + ptr = uint64(int16(ptr)) + case ptrEncUdata4: + ptr, _ = util.ReadUintRaw(buf, binary.LittleEndian, 4) + case ptrEncSdata4: + ptr, _ = util.ReadUintRaw(buf, binary.LittleEndian, 4) + ptr = uint64(int32(ptr)) + case ptrEncUdata8, ptrEncSdata8: + ptr, _ = util.ReadUintRaw(buf, binary.LittleEndian, 8) + case ptrEncSleb: + n, _ := util.DecodeSLEB128(buf) + ptr = uint64(n) + } + + if ptrEnc&0xf0 == ptrEncPCRel { + ptr += addr + } + + return ptr +} + +// DwarfEndian determines the endianness of the DWARF by using the version number field in the debug_info section +// Trick borrowed from "debug/dwarf".New() +func DwarfEndian(infoSec []byte) binary.ByteOrder { + if len(infoSec) < 6 { + return binary.BigEndian + } + x, y := infoSec[4], infoSec[5] + switch { + case x == 0 && y == 0: + return binary.BigEndian + case x == 0: + return binary.BigEndian + case y == 0: + return binary.LittleEndian + default: + return binary.BigEndian + } +} diff --git a/pkg/stack/frame/parser_test.go b/pkg/stack/frame/parser_test.go new file mode 100644 index 0000000000..752ff3b5f2 --- /dev/null +++ b/pkg/stack/frame/parser_test.go @@ -0,0 +1,59 @@ +package frame + +import ( + "bytes" + "encoding/binary" + "io/ioutil" + "os" + "testing" +) + +func TestParseCIE(t *testing.T) { + ctx := &parseContext{ + buf: bytes.NewBuffer([]byte{3, 0, 1, 124, 16, 12, 7, 8, 5, 16, 2, 0, 36, 0, 0, 0, 0, 0, 0, 0, 0, 16, 64, 0, 0, 0, 0, 0}), + common: &CommonInformationEntry{Length: 12}, + length: 12, + } + ctx.totalLen = ctx.buf.Len() + _ = parseCIE(ctx) + + common := ctx.common + + if common.Version != 3 { + t.Fatalf("Expected Version 3, but get %d", common.Version) + } + if common.Augmentation != "" { + t.Fatalf("Expected Augmentation \"\", but get %s", common.Augmentation) + } + if common.CodeAlignmentFactor != 1 { + t.Fatalf("Expected CodeAlignmentFactor 1, but get %d", common.CodeAlignmentFactor) + } + if common.DataAlignmentFactor != -4 { + t.Fatalf("Expected DataAlignmentFactor -4, but get %d", common.DataAlignmentFactor) + } + if common.ReturnAddressRegister != 16 { + t.Fatalf("Expected ReturnAddressRegister 16, but get %d", common.ReturnAddressRegister) + } + initialInstructions := []byte{12, 7, 8, 5, 16, 2, 0} + if !bytes.Equal(common.InitialInstructions, initialInstructions) { + t.Fatalf("Expected InitialInstructions %v, but get %v", initialInstructions, common.InitialInstructions) + } +} + +func BenchmarkParse(b *testing.B) { + f, err := os.Open("testdata/frame") + if err != nil { + b.Fatal(err) + } + defer f.Close() + + data, err := ioutil.ReadAll(f) + if err != nil { + b.Fatal(err) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + Parse(data, binary.BigEndian, 0, ptrSizeByRuntimeArch(), 0) + } +} diff --git a/pkg/stack/frame/table.go b/pkg/stack/frame/table.go new file mode 100644 index 0000000000..f1f1729b26 --- /dev/null +++ b/pkg/stack/frame/table.go @@ -0,0 +1,483 @@ +package frame + +import ( + "bytes" + "encoding/binary" + "fmt" + + "github.com/go-delve/delve/pkg/dwarf/util" +) + +// DWRule wrapper of rule defined for register values. +type DWRule struct { + Rule Rule + Offset int64 + Reg uint64 + Expression []byte +} + +// FrameContext wrapper of FDE context +type FrameContext struct { + loc uint64 + order binary.ByteOrder + address uint64 + CFA DWRule + Regs map[uint64]DWRule + initialRegs map[uint64]DWRule + prevRegs map[uint64]DWRule + buf *bytes.Buffer + cie *CommonInformationEntry + RetAddrReg uint64 + codeAlignment uint64 + dataAlignment int64 +} + +// Instructions used to recreate the table from the .debug_frame data. +const ( + DW_CFA_nop = 0x0 // No ops + DW_CFA_set_loc = 0x01 // op1: address + DW_CFA_advance_loc1 = iota // op1: 1-bytes delta + DW_CFA_advance_loc2 // op1: 2-byte delta + DW_CFA_advance_loc4 // op1: 4-byte delta + DW_CFA_offset_extended // op1: ULEB128 register, op2: ULEB128 offset + DW_CFA_restore_extended // op1: ULEB128 register + DW_CFA_undefined // op1: ULEB128 register + DW_CFA_same_value // op1: ULEB128 register + DW_CFA_register // op1: ULEB128 register, op2: ULEB128 register + DW_CFA_remember_state // No ops + DW_CFA_restore_state // No ops + DW_CFA_def_cfa // op1: ULEB128 register, op2: ULEB128 offset + DW_CFA_def_cfa_register // op1: ULEB128 register + DW_CFA_def_cfa_offset // op1: ULEB128 offset + DW_CFA_def_cfa_expression // op1: BLOCK + DW_CFA_expression // op1: ULEB128 register, op2: BLOCK + DW_CFA_offset_extended_sf // op1: ULEB128 register, op2: SLEB128 BLOCK + DW_CFA_def_cfa_sf // op1: ULEB128 register, op2: SLEB128 offset + DW_CFA_def_cfa_offset_sf // op1: SLEB128 offset + DW_CFA_val_offset // op1: ULEB128, op2: ULEB128 + DW_CFA_val_offset_sf // op1: ULEB128, op2: SLEB128 + DW_CFA_val_expression // op1: ULEB128, op2: BLOCK + DW_CFA_lo_user = 0x1c // op1: BLOCK + DW_CFA_hi_user = 0x3f // op1: ULEB128 register, op2: BLOCK + DW_CFA_advance_loc = (0x1 << 6) // High 2 bits: 0x1, low 6: delta + DW_CFA_offset = (0x2 << 6) // High 2 bits: 0x2, low 6: register + DW_CFA_restore = (0x3 << 6) // High 2 bits: 0x3, low 6: register + // TODO(kakkoyun): Find corresponding values in the spec. + // TODO(kakkoyun): Implement noop funcs that skips corresponding bytes. + DW_CFA_MIPS_advance_loc8 = 0x1d + DW_CFA_GNU_window_save = 0x2d + DW_CFA_GNU_args_size = 0x2e + DW_CFA_GNU_negative_offset_extended = 0x2f +) + +// Rule rule defined for register values. +type Rule byte + +const ( + RuleUndefined Rule = iota + RuleSameVal + RuleOffset + RuleValOffset + RuleRegister + RuleExpression + RuleValExpression + RuleArchitectural + RuleCFA // Value is rule.Reg + rule.Offset + RuleFramePointer // Value is stored at address rule.Reg + rule.Offset, but only if it's less than the current CFA, otherwise same value +) + +const low_6_offset = 0x3f + +type instruction func(frame *FrameContext) + +// // Mapping from DWARF opcode to function. +var fnlookup = map[byte]instruction{ + DW_CFA_advance_loc: advanceloc, + DW_CFA_offset: offset, + DW_CFA_restore: restore, + DW_CFA_set_loc: setloc, + DW_CFA_advance_loc1: advanceloc1, + DW_CFA_advance_loc2: advanceloc2, + DW_CFA_advance_loc4: advanceloc4, + DW_CFA_offset_extended: offsetextended, + DW_CFA_restore_extended: restoreextended, + DW_CFA_undefined: undefined, + DW_CFA_same_value: samevalue, + DW_CFA_register: register, + DW_CFA_remember_state: rememberstate, + DW_CFA_restore_state: restorestate, + DW_CFA_def_cfa: defcfa, + DW_CFA_def_cfa_register: defcfaregister, + DW_CFA_def_cfa_offset: defcfaoffset, + DW_CFA_def_cfa_expression: defcfaexpression, + DW_CFA_expression: expression, + DW_CFA_offset_extended_sf: offsetextendedsf, + DW_CFA_def_cfa_sf: defcfasf, + DW_CFA_def_cfa_offset_sf: defcfaoffsetsf, + DW_CFA_val_offset: valoffset, + DW_CFA_val_offset_sf: valoffsetsf, + DW_CFA_val_expression: valexpression, + DW_CFA_lo_user: louser, + DW_CFA_hi_user: hiuser, + DW_CFA_GNU_args_size: gnuargsize, +} + +func executeCIEInstructions(cie *CommonInformationEntry) *FrameContext { + initialInstructions := make([]byte, len(cie.InitialInstructions)) + copy(initialInstructions, cie.InitialInstructions) + frame := &FrameContext{ + cie: cie, + Regs: make(map[uint64]DWRule), + RetAddrReg: cie.ReturnAddressRegister, + initialRegs: make(map[uint64]DWRule), + prevRegs: make(map[uint64]DWRule), + codeAlignment: cie.CodeAlignmentFactor, + dataAlignment: cie.DataAlignmentFactor, + buf: bytes.NewBuffer(initialInstructions), + } + + frame.executeDwarfProgram() + return frame +} + +// Unwind the stack to find the return address register. +func executeDwarfProgramUntilPC(fde *FrameDescriptionEntry, pc uint64) *FrameContext { + frame := executeCIEInstructions(fde.CIE) + frame.order = fde.order + frame.loc = fde.Begin() + frame.address = pc + frame.ExecuteUntilPC(fde.Instructions) + + return frame +} + +// Unwind the stack to find the return address register. +func ExecuteDwarfProgram(fde *FrameDescriptionEntry) *FrameContext { + // TODO(kakkoyun): Consider using sync.Pool for FrameContext. + frame := executeCIEInstructions(fde.CIE) + frame.order = fde.order + frame.loc = fde.Begin() + // frame.address = pc + frame.Execute(fde.Instructions) + + return frame +} + +func (frame *FrameContext) executeDwarfProgram() { + for frame.buf.Len() > 0 { + executeDwarfInstruction(frame) + } +} + +// ExecuteUntilPC execute dwarf instructions. +func (frame *FrameContext) ExecuteUntilPC(instructions []byte) { + frame.buf.Truncate(0) + frame.buf.Write(instructions) + + // We only need to execute the instructions until + // ctx.loc > ctx.address (which is the address we + // are currently at in the traced process). + for frame.address >= frame.loc && frame.buf.Len() > 0 { + executeDwarfInstruction(frame) + } +} + +// Execute execute dwarf instructions. +func (frame *FrameContext) Execute(instructions []byte) { + frame.buf.Truncate(0) + frame.buf.Write(instructions) + + // TODO(kakkoyun): Cleanup. + + // We only need to execute the instructions until + // ctx.loc > ctx.address (which is the address we + // are currently at in the traced process). + // for frame.address >= frame.loc && + + // TODO(kakkoyun): What are the implications without a PC? + for frame.buf.Len() > 0 { + executeDwarfInstruction(frame) + } +} + +func executeDwarfInstruction(frame *FrameContext) { + instruction, err := frame.buf.ReadByte() + if err != nil { + panic("Could not read from instruction buffer") + } + + if instruction == DW_CFA_nop { + return + } + + fn := lookupFunc(instruction, frame.buf) + + fn(frame) +} + +func lookupFunc(instruction byte, buf *bytes.Buffer) instruction { + const high_2_bits = 0xc0 + var restore bool + + // Special case the 3 opcodes that have their argument encoded in the opcode itself. + switch instruction & high_2_bits { + case DW_CFA_advance_loc: + instruction = DW_CFA_advance_loc + restore = true + + case DW_CFA_offset: + instruction = DW_CFA_offset + restore = true + + case DW_CFA_restore: + instruction = DW_CFA_restore + restore = true + } + + if restore { + // Restore the last byte as it actually contains the argument for the opcode. + err := buf.UnreadByte() + if err != nil { + panic("Could not unread byte") + } + } + + fn, ok := fnlookup[instruction] + if !ok { + //msg := fmt.Sprintf("Encountered an unexpected DWARF CFA opcode: %#v", instruction) + //fmt.Println(msg) + //return unknown + + // TODO(kakkoyun): Why do we have to panic? + panic(fmt.Sprintf("Encountered an unexpected DWARF CFA opcode: %#v", instruction)) + } + + return fn +} + +func advanceloc(frame *FrameContext) { + b, err := frame.buf.ReadByte() + if err != nil { + panic("Could not read byte") + } + + delta := b & low_6_offset + frame.loc += uint64(delta) * frame.codeAlignment +} + +func advanceloc1(frame *FrameContext) { + delta, err := frame.buf.ReadByte() + if err != nil { + panic("Could not read byte") + } + + frame.loc += uint64(delta) * frame.codeAlignment +} + +func advanceloc2(frame *FrameContext) { + var delta uint16 + binary.Read(frame.buf, frame.order, &delta) + + frame.loc += uint64(delta) * frame.codeAlignment +} + +func advanceloc4(frame *FrameContext) { + var delta uint32 + binary.Read(frame.buf, frame.order, &delta) + + frame.loc += uint64(delta) * frame.codeAlignment +} + +func offset(frame *FrameContext) { + b, err := frame.buf.ReadByte() + if err != nil { + panic(err) + } + + var ( + reg = b & low_6_offset + offset, _ = util.DecodeULEB128(frame.buf) + ) + + frame.Regs[uint64(reg)] = DWRule{Offset: int64(offset) * frame.dataAlignment, Rule: RuleOffset} +} + +func restore(frame *FrameContext) { + b, err := frame.buf.ReadByte() + if err != nil { + panic(err) + } + + reg := uint64(b & low_6_offset) + oldrule, ok := frame.initialRegs[reg] + if ok { + frame.Regs[reg] = DWRule{Offset: oldrule.Offset, Rule: RuleOffset} + } else { + frame.Regs[reg] = DWRule{Rule: RuleUndefined} + } +} + +func setloc(frame *FrameContext) { + var loc uint64 + binary.Read(frame.buf, frame.order, &loc) + + frame.loc = loc + frame.cie.staticBase +} + +func offsetextended(frame *FrameContext) { + var ( + reg, _ = util.DecodeULEB128(frame.buf) + offset, _ = util.DecodeULEB128(frame.buf) + ) + + frame.Regs[reg] = DWRule{Offset: int64(offset) * frame.dataAlignment, Rule: RuleOffset} +} + +func undefined(frame *FrameContext) { + reg, _ := util.DecodeULEB128(frame.buf) + frame.Regs[reg] = DWRule{Rule: RuleUndefined} +} + +func samevalue(frame *FrameContext) { + reg, _ := util.DecodeULEB128(frame.buf) + frame.Regs[reg] = DWRule{Rule: RuleSameVal} +} + +func register(frame *FrameContext) { + reg1, _ := util.DecodeULEB128(frame.buf) + reg2, _ := util.DecodeULEB128(frame.buf) + frame.Regs[reg1] = DWRule{Reg: reg2, Rule: RuleRegister} +} + +func rememberstate(frame *FrameContext) { + frame.prevRegs = frame.Regs +} + +func restorestate(frame *FrameContext) { + frame.Regs = frame.prevRegs +} + +func restoreextended(frame *FrameContext) { + reg, _ := util.DecodeULEB128(frame.buf) + + oldrule, ok := frame.initialRegs[reg] + if ok { + frame.Regs[reg] = DWRule{Offset: oldrule.Offset, Rule: RuleOffset} + } else { + frame.Regs[reg] = DWRule{Rule: RuleUndefined} + } +} + +func defcfa(frame *FrameContext) { + reg, _ := util.DecodeULEB128(frame.buf) + offset, _ := util.DecodeULEB128(frame.buf) + + frame.CFA.Rule = RuleCFA + frame.CFA.Reg = reg + frame.CFA.Offset = int64(offset) +} + +func defcfaregister(frame *FrameContext) { + reg, _ := util.DecodeULEB128(frame.buf) + frame.CFA.Reg = reg +} + +func defcfaoffset(frame *FrameContext) { + offset, _ := util.DecodeULEB128(frame.buf) + frame.CFA.Offset = int64(offset) +} + +func defcfasf(frame *FrameContext) { + reg, _ := util.DecodeULEB128(frame.buf) + offset, _ := util.DecodeSLEB128(frame.buf) + + frame.CFA.Rule = RuleCFA + frame.CFA.Reg = reg + frame.CFA.Offset = offset * frame.dataAlignment +} + +func defcfaoffsetsf(frame *FrameContext) { + offset, _ := util.DecodeSLEB128(frame.buf) + offset *= frame.dataAlignment + frame.CFA.Offset = offset +} + +func defcfaexpression(frame *FrameContext) { + var ( + l, _ = util.DecodeULEB128(frame.buf) + expr = frame.buf.Next(int(l)) + ) + + frame.CFA.Expression = expr + frame.CFA.Rule = RuleExpression +} + +func expression(frame *FrameContext) { + var ( + reg, _ = util.DecodeULEB128(frame.buf) + l, _ = util.DecodeULEB128(frame.buf) + expr = frame.buf.Next(int(l)) + ) + + frame.Regs[reg] = DWRule{Rule: RuleExpression, Expression: expr} +} + +func offsetextendedsf(frame *FrameContext) { + var ( + reg, _ = util.DecodeULEB128(frame.buf) + offset, _ = util.DecodeSLEB128(frame.buf) + ) + + frame.Regs[reg] = DWRule{Offset: offset * frame.dataAlignment, Rule: RuleOffset} +} + +func valoffset(frame *FrameContext) { + var ( + reg, _ = util.DecodeULEB128(frame.buf) + offset, _ = util.DecodeULEB128(frame.buf) + ) + + frame.Regs[reg] = DWRule{Offset: int64(offset), Rule: RuleValOffset} +} + +func valoffsetsf(frame *FrameContext) { + var ( + reg, _ = util.DecodeULEB128(frame.buf) + offset, _ = util.DecodeSLEB128(frame.buf) + ) + + frame.Regs[reg] = DWRule{Offset: offset * frame.dataAlignment, Rule: RuleValOffset} +} + +func valexpression(frame *FrameContext) { + var ( + reg, _ = util.DecodeULEB128(frame.buf) + l, _ = util.DecodeULEB128(frame.buf) + expr = frame.buf.Next(int(l)) + ) + + frame.Regs[reg] = DWRule{Rule: RuleValExpression, Expression: expr} +} + +func louser(frame *FrameContext) { + frame.buf.Next(1) +} + +func hiuser(frame *FrameContext) { + frame.buf.Next(1) +} + +func gnuargsize(frame *FrameContext) { + // The DW_CFA_GNU_args_size instruction takes an unsigned LEB128 operand representing an argument size. + // Just read and do nothing. + // TODO(kakkoyun): !! + _, _ = util.DecodeSLEB128(frame.buf) +} + +// TODO(kakkoyun): ? How to move cursor without corrupting? Do we actually need to do this? +func unknown(frame *FrameContext) { + _, err := frame.buf.ReadByte() + if err != nil { + panic("Could not read byte") + } +} diff --git a/pkg/stack/frame/testdata/frame b/pkg/stack/frame/testdata/frame new file mode 100644 index 0000000000..a500558a79 Binary files /dev/null and b/pkg/stack/frame/testdata/frame differ diff --git a/pkg/stack/stack.go b/pkg/stack/stack.go new file mode 100644 index 0000000000..105c545181 --- /dev/null +++ b/pkg/stack/stack.go @@ -0,0 +1,14 @@ +// Copyright 2021 The Parca Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stack diff --git a/pkg/stack/unwind/testdata/redpanda.frame b/pkg/stack/unwind/testdata/redpanda.frame new file mode 100755 index 0000000000..093b443244 Binary files /dev/null and b/pkg/stack/unwind/testdata/redpanda.frame differ diff --git a/pkg/stack/unwind/unwind.go b/pkg/stack/unwind/unwind.go new file mode 100644 index 0000000000..22afa4f7e9 --- /dev/null +++ b/pkg/stack/unwind/unwind.go @@ -0,0 +1,213 @@ +// Copyright 2021 The Parca Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package unwind + +import ( + "bytes" + "debug/elf" + "encoding/binary" + "fmt" + "path" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/google/pprof/profile" + "github.com/parca-dev/parca-agent/pkg/buildid" + "github.com/parca-dev/parca-agent/pkg/maps" + "github.com/parca-dev/parca-agent/pkg/stack/frame" +) + +type Unwinder struct { + logger log.Logger + fileCache *maps.PidMappingFileCache +} + +type Op uint8 // TODO(kakkoyun): A better type? + +// TODO(kakkoyun): Clean up comments. +const ( + // This type of register is not supported. + OpUnimplemented Op = iota + // Undefined register. The value will be defined at some later IP in the same DIE. + OpUndefined + // Value stored at some offset from `CFA`. + OpCfaOffset + // Value of a machine register plus offset. + OpRegister +) + +type Instruction struct { + Op Op + Reg uint64 + Off int64 +} + +func (i Instruction) Bytes(order binary.ByteOrder) []byte { + buf := new(bytes.Buffer) + var data = []interface{}{ + uint8(i.Op), + i.Reg, + i.Off, + } + for _, v := range data { + err := binary.Write(buf, order, v) + if err != nil { + fmt.Println("binary.Write failed:", err) + } + } + return buf.Bytes() +} + +type PlanTableRow struct { + Begin, End uint64 + RIP, RSP Instruction +} + +type PlanTable []PlanTableRow + +func (t PlanTable) Len() int { return len(t) } +func (t PlanTable) Less(i, j int) bool { return t[i].Begin < t[j].Begin } +func (t PlanTable) Swap(i, j int) { t[i], t[j] = t[j], t[i] } + +func NewUnwinder(logger log.Logger, fileCache *maps.PidMappingFileCache) *Unwinder { + return &Unwinder{logger: logger, fileCache: fileCache} +} + +func (u *Unwinder) UnwindTableForPid(pid uint32) (map[profile.Mapping]PlanTable, error) { + level.Warn(u.logger).Log("msg", "unwind.UnwindTableForPid", "pid", pid) + mappings, err := u.fileCache.MappingForPid(pid) + if err != nil { + return nil, err + } + + if len(mappings) == 0 { + return nil, fmt.Errorf("no mapping found for pid %d", pid) + } + + // TODO(kakkoyun): Remove. + level.Debug(u.logger).Log("msg", "unwind.UnwindTableForPid", "pid", pid, "mappings", len(mappings)) + res := map[profile.Mapping]PlanTable{} + for _, m := range mappings { + if m.BuildID == "" || m.File == "[vdso]" || m.File == "[vsyscall]" { + continue + } + + // TODO(kakkoyun): Only read the executable. + abs := path.Join(fmt.Sprintf("/proc/%d/root", pid), m.File) + fdes, err := readFDEs(abs, m.Start) + if err != nil { + level.Warn(u.logger).Log("msg", "failed to read frame description entries", "obj", abs, "err", err) + continue + } + + res[*m] = buildTable(fdes) + } + + return res, nil +} + +var fdeCache = map[string]frame.FrameDescriptionEntries{} + +func readFDEs(path string, start uint64) (frame.FrameDescriptionEntries, error) { + buildID, err := buildid.BuildID(path) + if err != nil { + return nil, err + } + + if fde, ok := fdeCache[buildID]; ok { + return fde, nil + } + + obj, err := elf.Open(path) + if err != nil { + return nil, fmt.Errorf("failed to open elf: %w", err) + } + defer obj.Close() + + // TODO(kakkoyun): Consider using the following section as a fallback. + // unwind, err := obj.Section(".debug_frame").Data() + + sec := obj.Section(".eh_frame") + if sec == nil { + return nil, fmt.Errorf("failed to find .eh_frame section") + } + + ehFrame, err := sec.Data() + if err != nil { + return nil, fmt.Errorf("failed to read .eh_frame section: %w", err) + } + + // TODO(kakkoyun): Cache the unwind plan table. + // TODO(kakkoyun): Can we assume byte order of ELF file same with .eh_frame? We can, right?! + fde, err := frame.Parse(ehFrame, obj.ByteOrder, start, pointerSize(obj.Machine), sec.Addr) + if err != nil { + return nil, fmt.Errorf("failed to parse frame data: %w", err) + } + + fdeCache[buildID] = fde + return fde, nil +} + +func buildTable(fdes frame.FrameDescriptionEntries) PlanTable { + table := make(PlanTable, 0, len(fdes)) + for _, fde := range fdes { + table = append(table, buildTableRow(fde)) + } + + return table +} + +func buildTableRow(fde *frame.FrameDescriptionEntry) PlanTableRow { + row := PlanTableRow{ + Begin: fde.Begin(), + End: fde.End(), + } + + fc := frame.ExecuteDwarfProgram(fde) + + // TODO(kakkoyun): Validate. + // TODO(kakkoyun): Filter noop instructions. + + // RetAddrReg is populated by frame.ExecuteDwarfProgram executeCIEInstructions. + // TODO(kakkoyun): Is this enough do we need to any arch specific look up? + // - https://github.com/go-delve/delve/blob/master/pkg/dwarf/regnum + rule, found := fc.Regs[fc.RetAddrReg] + if found { + switch rule.Rule { + case frame.RuleOffset: + row.RIP = Instruction{Op: OpCfaOffset, Off: rule.Offset} + case frame.RuleUndefined: + row.RIP = Instruction{Op: OpUndefined} + default: + row.RIP = Instruction{Op: OpUnimplemented} + } + } else { + row.RIP = Instruction{Op: OpUnimplemented} + } + + row.RSP = Instruction{Op: OpRegister, Reg: fc.CFA.Reg, Off: fc.CFA.Offset} + + return row +} + +func pointerSize(arch elf.Machine) int { + switch arch { + case elf.EM_386: + return 4 + case elf.EM_AARCH64, elf.EM_X86_64: + return 8 + default: + return 0 + } +} diff --git a/pkg/stack/unwind/unwind_test.go b/pkg/stack/unwind/unwind_test.go new file mode 100644 index 0000000000..7965055cee --- /dev/null +++ b/pkg/stack/unwind/unwind_test.go @@ -0,0 +1,32 @@ +// Copyright 2021 The Parca Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package unwind + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestBuildUnwindTable(t *testing.T) { + fdes, err := readFDEs("testdata/redpanda.frame", 0) + require.NoError(t, err) + + planTable := buildTable(fdes) + require.Equal(t, len(fdes), len(planTable)) + require.Equal(t, uint64(0xfb6960), planTable[0].Begin) + require.Equal(t, uint64(0xfb698f), planTable[0].End) + require.Equal(t, Instruction{Op: OpUndefined}, planTable[0].RIP) + require.Equal(t, Instruction{Op: 3, Reg: 0x7, Off: 8}, planTable[0].RSP) +} diff --git a/scripts/check-license.sh b/scripts/check-license.sh index 993cbeb3a1..ed8023c803 100755 --- a/scripts/check-license.sh +++ b/scripts/check-license.sh @@ -7,7 +7,7 @@ set -o pipefail set -u licRes=$( - find . -type f -iname '*.go' ! -path '*/vendor/*' ! -path '*/pkg/internal/pprof/*' -exec \ + find . -type f -iname '*.go' ! -path '*/vendor/*' ! -path '*/pkg/internal/pprof/*' ! -path '*/pkg/stack/frame/*' -exec \ sh -c 'head -n3 $1 | grep -Eq "(Copyright|generated|GENERATED)" || echo -e $1' {} {} \; ) diff --git a/scripts/local-dev.sh b/scripts/local-dev.sh index 2640802d4d..0cfdf7fec2 100644 --- a/scripts/local-dev.sh +++ b/scripts/local-dev.sh @@ -17,9 +17,18 @@ function up() { echo "----------------------------------------------------------" else ctlptl create registry ctlptl-registry || echo 'Registry already exists' + FILE=$(pwd)/minikube.iso + if [ -f "$FILE" ]; then + echo "$FILE exists." + else + echo "$FILE does not exist. Dwonloading..." + wget https://github.com/eiffel-fl/minikube/releases/download/v42-prerelease/minikube-5.10.iso + mv minikube-5.10.iso minikube.iso + fi # kvm2, hyperkit, hyperv, vmwarefusion, virtualbox, vmware, xhyve minikube start -p parca-agent \ - --driver=virtualbox \ + --driver=kvm2 \ + --iso-url=file://$(pwd)/minikube.iso \ --kubernetes-version=v1.22.3 \ --cpus=12 \ --memory=40gb \ @@ -36,6 +45,7 @@ function up() { # Tears down a local minikube cluster function down() { + docker rm -f ctlptl-registry minikube delete -p parca-agent }