Skip to content

Commit

Permalink
Use 64 bit hash and fix out of bounds access (#2102)
Browse files Browse the repository at this point in the history
There don't seem to be any statistically significant changes in
performance. I filtered Ruby processes and ran `loop { puts :a }` in a
loop (see test plan).

Test Plan
=======

https://pprof.me/fc0fff3/

```
sudo bpftool prog | grep profile_cpu
```

```
2938: perf_event  name profile_cpu  tag 4554576c1d62dc61  gpl run_time_ns 136221451 run_cnt 24408 -> 5581.02 ns / run
              2268 run_cnt
          60823512 cycles        -> 26818.13 cycles / run
          42618949 instructions    -> 18791.42 instructions / run #     0.70 insns per cycle
```

```
3004: perf_event  name profile_cpu  tag 75f4ac73b948f3c0q  gpl run_time_ns 144678180 run_cnt 23700 -> 6104.56 ns / run

             2279 run_cnt
          60910573 cycles            -> 26726.89 cycles / run
          42422810 instructions       -> 18614.66 instructions / run #     0.70 insns per cycle
```
  • Loading branch information
javierhonduco committed Oct 4, 2023
2 parents 3ad4223 + cc0c09e commit 442eead
Show file tree
Hide file tree
Showing 8 changed files with 24 additions and 56 deletions.
1 change: 1 addition & 0 deletions bpf/Makefile
Expand Up @@ -61,6 +61,7 @@ BPF_CFLAGS = -Wno-address-of-packed-member \
-Wdate-time \
-Wunused \
-Wall \
-Werror \
-fno-stack-protector \
-fno-jump-tables \
-fno-unwind-tables \
Expand Down
56 changes: 12 additions & 44 deletions bpf/unwinders/hash.h
@@ -1,58 +1,26 @@
#include "common.h"

// Avoid pulling in any other headers.
typedef unsigned int uint32_t;

// murmurhash2 from
// https://github.com/aappleby/smhasher/blob/92cf3702fcfaadc84eb7bef59825a23e0cd84f56/src/MurmurHash2.cpp
uint32_t MurmurHash2(const void *key, int len, uint32_t seed) {
/* 'm' and 'r' are mixing constants generated offline.
They're not really 'magic', they just happen to work well. */

const uint32_t m = 0x5bd1e995;
const int r = 24;

/* Initialize the hash to a 'random' value */
// https://github.com/aappleby/smhasher/blob/92cf3702fcfaadc84eb7bef59825a23e0cd84f56/src/MurmurHash2.cpp/* */

uint32_t h = seed ^ len;
unsigned long long hash_stack(stack_trace_t *stack, int seed) {
const unsigned long long m = 0xc6a4a7935bd1e995LLU;
const int r = 47;
unsigned long long hash = seed ^ (stack->len * m);

/* Mix 4 bytes at a time into the hash */

const unsigned char *data = (const unsigned char *)key;
// MAX_STACK_DEPTH * 2 = 256 (because we hash 32 bits at a time).
for (int i = 0; i < 256; i++) {
if (len < 4) {
break;
}
uint32_t k = *(uint32_t *)data;
for(int i=0; i<MAX_STACK_DEPTH; i++){
unsigned long long k = stack->addresses[i];

k *= m;
k ^= k >> r;
k *= m;

h *= m;
h ^= k;

data += 4;
len -= 4;
hash ^= k;
hash *= m;
}

/* Handle the last few bytes of the input array */

switch (len) {
case 3:
h ^= data[2] << 16;
case 2:
h ^= data[1] << 8;
case 1:
h ^= data[0];
h *= m;
};

/* Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated. */

h ^= h >> 13;
h *= m;
h ^= h >> 15;

return h;
return hash;
}
5 changes: 2 additions & 3 deletions bpf/unwinders/native.bpf.c
Expand Up @@ -27,7 +27,6 @@
#define MAX_TAIL_CALLS 19

// Maximum number of frames.
#define MAX_STACK_DEPTH 127
_Static_assert(MAX_TAIL_CALLS *MAX_STACK_DEPTH_PER_PROGRAM >= MAX_STACK_DEPTH, "enough iterations to traverse the whole stack");
// Number of unique stacks.
#define MAX_STACK_TRACES_ENTRIES 64000
Expand Down Expand Up @@ -213,7 +212,7 @@ BPF_HASH(debug_threads_ids, int, u8, 1); // Table size will be updated in usersp
BPF_HASH(process_info, int, process_info_t, MAX_PROCESSES);

BPF_STACK_TRACE(stack_traces, MAX_STACK_TRACES_ENTRIES);
BPF_HASH(dwarf_stack_traces, int, stack_trace_t, MAX_STACK_TRACES_ENTRIES);
BPF_HASH(dwarf_stack_traces, u64, stack_trace_t, MAX_STACK_TRACES_ENTRIES);

BPF_HASH(unwind_info_chunks, u64, unwind_info_chunks_t,
5 * 1000); // Mapping of executable ID to unwind info chunks.
Expand Down Expand Up @@ -630,7 +629,7 @@ static __always_inline void add_stack(struct bpf_perf_event_data *ctx, u64 pid_t
stack_key->tgid = per_thread_id;

if (method == STACK_WALKING_METHOD_DWARF) {
int stack_hash = MurmurHash2((u32 *)unwind_state->stack.addresses, MAX_STACK_DEPTH * sizeof(u64) / sizeof(u32), 0);
u64 stack_hash = hash_stack(&unwind_state->stack, 0);
LOG("native stack hash %d", stack_hash);
stack_key->user_stack_id_dwarf_id = stack_hash;

Expand Down
2 changes: 1 addition & 1 deletion bpf/unwinders/pyperf.bpf.c
Expand Up @@ -393,7 +393,7 @@ int walk_python_stack(struct bpf_perf_event_data *ctx) {
LOG("[stop] walk_python_stack");

// Hash stack.
int stack_hash = MurmurHash2((u32 *)state->sample.stack.addresses, MAX_STACK * sizeof(u64) / sizeof(u32), 0);
u64 stack_hash = hash_stack(&state->sample.stack, 0);
LOG("[debug] stack hash: %d", stack_hash);

// Insert stack.
Expand Down
2 changes: 1 addition & 1 deletion bpf/unwinders/rbperf.bpf.c
Expand Up @@ -267,7 +267,7 @@ int walk_ruby_stack(struct bpf_perf_event_data *ctx) {
}

// Hash stack.
int ruby_stack_hash = MurmurHash2((u32 *)state->stack.frames.addresses, MAX_STACK * sizeof(u64) / sizeof(u32), 0);
u64 ruby_stack_hash = hash_stack(&state->stack.frames, 0);
LOG("[debug] ruby stack hash: %d", ruby_stack_hash);

unwind_state_t *unwind_state = bpf_map_lookup_elem(&heap, &zero);
Expand Down
6 changes: 3 additions & 3 deletions bpf/unwinders/shared.h
Expand Up @@ -14,8 +14,8 @@ typedef struct {
int tgid;
int user_stack_id;
int kernel_stack_id;
int user_stack_id_dwarf_id;
int interpreter_stack_id;
u64 user_stack_id_dwarf_id;
u64 interpreter_stack_id;
} stack_count_key_t;

typedef struct {
Expand Down Expand Up @@ -47,7 +47,7 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, MAX_STACK_COUNTS_ENTRIES);
__type(key, int);
__type(key, u64);
__type(value, stack_trace_t);
} interpreter_stack_traces SEC(".maps"); // TODO think about this.

Expand Down
4 changes: 2 additions & 2 deletions pkg/profiler/cpu/bpf/maps/maps.go
Expand Up @@ -928,7 +928,7 @@ func (m *Maps) ReadUserStack(userStackID int32, stack *bpfprograms.CombinedStack
}

// ReadUserStackWithDwarf reads the DWARF walked user stack traces into the given buffer.
func (m *Maps) ReadUserStackWithDwarf(userStackID int32, stack *bpfprograms.CombinedStack) error {
func (m *Maps) ReadUserStackWithDwarf(userStackID uint64, stack *bpfprograms.CombinedStack) error {
if userStackID == 0 {
return ErrUnwindFailed
}
Expand Down Expand Up @@ -975,7 +975,7 @@ func cStringToGo(in []uint8) string {
}

// ReadInterpreterStack fills in the stack with the interpreter frame ids.
func (m *Maps) ReadInterpreterStack(interpreterStackID int32, stack []uint64) (map[uint32]*profile.Function, error) {
func (m *Maps) ReadInterpreterStack(interpreterStackID uint64, stack []uint64) (map[uint32]*profile.Function, error) {
var res map[uint32]*profile.Function

if interpreterStackID == 0 {
Expand Down
4 changes: 2 additions & 2 deletions pkg/profiler/cpu/cpu.go
Expand Up @@ -912,8 +912,8 @@ type (
TID int32
UserStackID int32
KernelStackID int32
UserStackIDDWARF int32
InterpreterStackID int32
UserStackIDDWARF uint64
InterpreterStackID uint64
}
)

Expand Down

0 comments on commit 442eead

Please sign in to comment.