Skip to content

Commit

Permalink
Implement a 'aggs' per-CPU BPF map to store aggregation data
Browse files Browse the repository at this point in the history
Aggregation data consists of data items (all uint64_t) that are updated
when an aggregation function is executed.  This is quite different from
other data recording actions that append data to an output buffer.

We need support for concurrency control to allow reading the data during
non-atomic updates.  We use a latch mechanism, i.e. a multiversion
concurrency control mechanism, to satify these requirements.

Simply put, the per-CPU aggregation data area stores a sequence id that
tracks the generation of the data updates.  Since only one aggregation
can be updated at a time on a single CPU, one latch is sufficient for
all aggregations (per CPU-buffer).  We also allocate two copies of each
aggregation, which we will call A and B.

When we update the aggregation data, we first modify A and then we modify
B, while ensuring that while we modify one, the reader is directed to
read from the other.

Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
Reviewed-by: Eugene Loh <eugene.loh@oracle.com>
  • Loading branch information
kvanhees committed Nov 30, 2020
1 parent 1ca381c commit 287e4bf
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 22 deletions.
20 changes: 19 additions & 1 deletion libdtrace/dt_bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ set_task_offsets(dtrace_hdl_t *dtp)
* - state: DTrace session state, used to communicate state between BPF
* programs and userspace. The content of the map is defined in
* dt_state.h.
* - aggs: Aggregation data buffer map, associated with each CPU. The
* map is implemented as a global per-CPU map with a singleton
* element (key 0). Every aggregation is stored with two copies
* of its data to provide a lockless latch-based mechanism for
* atomic reading and writing.
* - buffers: Perf event output buffer map, associating a perf event output
* buffer with each CPU. The map is indexed by CPU id.
* - cpuinfo: CPU information map, associating a cpuinfo_t structure with
Expand Down Expand Up @@ -191,7 +196,7 @@ set_task_offsets(dtrace_hdl_t *dtp)
int
dt_bpf_gmap_create(dtrace_hdl_t *dtp)
{
int gvarc, tvarc;
int gvarc, tvarc, aggsz;
int ci_mapfd;
uint32_t key = 0;

Expand All @@ -202,6 +207,9 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
/* Mark global maps creation as completed. */
dt_gmap_done = 1;

/* Determine the aggregation buffer size. */
aggsz = dt_idhash_nextoff(dtp->dt_aggs, 1, 0);

/* Determine the number of global and TLS variables. */
gvarc = dt_idhash_peekid(dtp->dt_globals) - DIF_VAR_OTHER_UBASE;
tvarc = dt_idhash_peekid(dtp->dt_tls) - DIF_VAR_OTHER_UBASE;
Expand All @@ -214,6 +222,16 @@ dt_bpf_gmap_create(dtrace_hdl_t *dtp)
if (dtp->dt_stmap_fd == -1)
return -1; /* dt_errno is set for us */

/*
* If there is aggregation data to be collected, we need to add a
* uint64_t to the map value size to hold a latch sequence number (seq)
* for concurrent access to the data.
*/
if (aggsz > 0 &&
create_gmap(dtp, "aggs", BPF_MAP_TYPE_PERCPU_ARRAY,
sizeof(uint32_t), sizeof(uint64_t) + aggsz, 1) == -1)
return -1; /* dt_errno is set for us */

if (create_gmap(dtp, "buffers", BPF_MAP_TYPE_PERF_EVENT_ARRAY,
sizeof(uint32_t), sizeof(uint32_t),
dtp->dt_conf.num_online_cpus) == -1)
Expand Down
16 changes: 9 additions & 7 deletions libdtrace/dt_dctx.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,14 @@ typedef struct dt_dctx {
dt_activity_t *act; /* pointer to activity state */
dt_mstate_t *mst; /* DTrace machine state */
char *buf; /* Output buffer scratch memory */
char *agg; /* Aggregation data */
} dt_dctx_t;

#define DCTX_CTX offsetof(dt_dctx_t, ctx)
#define DCTX_ACT offsetof(dt_dctx_t, act)
#define DCTX_MST offsetof(dt_dctx_t, mst)
#define DCTX_BUF offsetof(dt_dctx_t, buf)
#define DCTX_AGG offsetof(dt_dctx_t, agg)
#define DCTX_SIZE ((int16_t)sizeof(dt_dctx_t))

/*
Expand Down Expand Up @@ -92,23 +94,23 @@ typedef struct dt_dctx {
* +----------------+
* SCRATCH_BASE = -512 | Scratch Memory |
* +----------------+
* LVAR_END = LVAR(n) = -256 | LVAR n | (n = DT_LVAR_MAX = 18)
* LVAR_END = LVAR(n) = -256 | LVAR n | (n = DT_LVAR_MAX = 17)
* +----------------+
* | ... |
* +----------------+
* LVAR(1) = -120 | LVAR 1 |
* LVAR(1) = -128 | LVAR 1 |
* +----------------+
* LVAR_BASE = LVAR(0) = -112 | LVAR 0 |
* LVAR_BASE = LVAR(0) = -120 | LVAR 0 |
* +----------------+
* SPILL(n) = -104 | %r8 | (n = DT_STK_NREGS - 1 = 8)
* SPILL(n) = -112 | %r8 | (n = DT_STK_NREGS - 1 = 8)
* +----------------+
* | ... |
* +----------------+
* SPILL(1) = -48 | %r1 |
* SPILL(1) = -56 | %r1 |
* +----------------+
* SPILL_BASE = SPILL(0) = -40 | %r0 |
* SPILL_BASE = SPILL(0) = -48 | %r0 |
* +----------------+
* DCTX = -32 | DTrace Context | -1
* DCTX = -40 | DTrace Context | -1
* +----------------+
*/
#define DT_STK_BASE ((int16_t)0)
Expand Down
1 change: 1 addition & 0 deletions libdtrace/dt_dlibs.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ static const dt_ident_t dt_bpf_symbols[] = {
DT_BPF_SYMBOL(dt_set_tvar, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_strnlen, DT_IDENT_SYMBOL),
/* BPF maps */
DT_BPF_SYMBOL(aggs, DT_IDENT_PTR),
DT_BPF_SYMBOL(buffers, DT_IDENT_PTR),
DT_BPF_SYMBOL(cpuinfo, DT_IDENT_PTR),
DT_BPF_SYMBOL(gvars, DT_IDENT_PTR),
Expand Down
28 changes: 14 additions & 14 deletions test/unittest/codegen/tst.stack_layout.r
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
Base: 0
dctx: -32
%r0: -40
%r1: -48
%r2: -56
%r3: -64
%r4: -72
%r5: -80
%r6: -88
%r7: -96
%r8: -104
lvar[ -1]: -111 (ID -1)
lvar[ 0]: -112 (ID 0)
lvar[ 1]: -120 (ID 1)
lvar[ 18]: -256 (ID 18)
dctx: -40
%r0: -48
%r1: -56
%r2: -64
%r3: -72
%r4: -80
%r5: -88
%r6: -96
%r7: -104
%r8: -112
lvar[ -1]: -119 (ID -1)
lvar[ 0]: -120 (ID 0)
lvar[ 1]: -128 (ID 1)
lvar[ 17]: -256 (ID 17)
lvar[ -1]: -257 (ID -1)
scratch: -257 .. -512

0 comments on commit 287e4bf

Please sign in to comment.