Skip to content

Commit

Permalink
Store strings in the string table with varint length prefix
Browse files Browse the repository at this point in the history
With the introduction of variable length integers, strings in D (and
then also BPF) can be stored with their length embedded in the byte
stream.  This makes it possible to perform operations on strings
without the need to recalculate the length of the string multiple
times.

The string constant table stored in DIFOs contains strings that will
be made available to the BPF program by loading the string constant
table into a BPF map.  It makes sense to already store strings using
the variable length prefix when the string constant table is
constructed, which then also requires code that uses it to know
about the variable length integer prefixing each string.

This patch introduces a dt_difo_getstr() function that returns a
regular C char * entity given a DIFO and a string offset into the
string constant table for that DIFO.  Any code that needs to access
a string constant from a DIFO is updated to make use of this new
function.

The string constant table creation code is updated to account for
the fact that the empty string will now occupy 2 bytes (one for the
length that is 0, and one for the terminating 0-byte.

Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
Reviewed-by: Eugene Loh <eugene.loh@oracle.com>
  • Loading branch information
kvanhees committed Jun 18, 2021
1 parent 71f26e7 commit aa9b705
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 51 deletions.
4 changes: 2 additions & 2 deletions libdtrace/dt_bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ dt_bpf_reloc_prog(dtrace_hdl_t *dtp, const dtrace_difo_t *dp)
struct bpf_insn *text = dp->dtdo_buf;

for (; len != 0; len--, rp++) {
char *name = &dp->dtdo_strtab[rp->dofr_name];
const char *name = dt_difo_getstr(dp, rp->dofr_name);
dt_ident_t *idp = dt_idhash_lookup(dtp->dt_bpfsyms, name);
int ioff = rp->dofr_offset /
sizeof(struct bpf_insn);
Expand Down Expand Up @@ -423,7 +423,7 @@ dt_bpf_reloc_error_prog(dtrace_hdl_t *dtp, dtrace_difo_t *dp)
struct bpf_insn *text = dp->dtdo_buf;

for (; len != 0; len--, rp++) {
char *name = &dp->dtdo_strtab[rp->dofr_name];
const char *name = dt_difo_getstr(dp, rp->dofr_name);
dt_ident_t *idp = dt_idhash_lookup(dtp->dt_bpfsyms, name);
int ioff = rp->dofr_offset /
sizeof(struct bpf_insn);
Expand Down
12 changes: 6 additions & 6 deletions libdtrace/dt_cc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2188,8 +2188,8 @@ dt_link_layout(dtrace_hdl_t *dtp, const dtrace_difo_t *dp, uint_t *pcp,
return pc;

for (; len != 0; len--, rp++) {
char *name = &dp->dtdo_strtab[rp->dofr_name];
dtrace_difo_t *rdp;
const char *name = dt_difo_getstr(dp, rp->dofr_name);
dtrace_difo_t *rdp;
int ipc;

idp = dt_dlib_get_func(dtp, name);
Expand Down Expand Up @@ -2251,7 +2251,7 @@ dt_link_construct(dtrace_hdl_t *dtp, const dt_probe_t *prp, dtrace_difo_t *dp,
*/
(*vcp) += vlen;
for (; vlen != 0; vlen--, vp++, nvp++) {
const char *name = &sdp->dtdo_strtab[vp->dtdv_name];
const char *name = dt_difo_getstr(sdp, vp->dtdv_name);

*nvp = *vp;
nvp->dtdv_name = dt_strtab_insert(stab, name);
Expand All @@ -2268,7 +2268,7 @@ dt_link_construct(dtrace_hdl_t *dtp, const dt_probe_t *prp, dtrace_difo_t *dp,
*/
(*rcp) += len;
for (; len != 0; len--, rp++, nrp++) {
const char *name = &sdp->dtdo_strtab[rp->dofr_name];
const char *name = dt_difo_getstr(sdp, rp->dofr_name);
dt_ident_t *idp = dt_dlib_get_func(dtp, name);

nrp->dofr_name = dt_strtab_insert(stab, name);
Expand All @@ -2294,7 +2294,7 @@ dt_link_construct(dtrace_hdl_t *dtp, const dt_probe_t *prp, dtrace_difo_t *dp,
rp = sdp->dtdo_breltab;
nrp = &dp->dtdo_breltab[rc];
for (; len != 0; len--, rp++, nrp++) {
const char *name = &sdp->dtdo_strtab[rp->dofr_name];
const char *name = dt_difo_getstr(sdp, rp->dofr_name);
dtrace_difo_t *rdp;
dtrace_epid_t nepid;
int ipc;
Expand Down Expand Up @@ -2376,7 +2376,7 @@ dt_link_resolve(dtrace_hdl_t *dtp, dtrace_difo_t *dp)
const dof_relodesc_t *rp = dp->dtdo_breltab;

for (; len != 0; len--, rp++) {
const char *name = &dp->dtdo_strtab[rp->dofr_name];
const char *name = dt_difo_getstr(dp, rp->dofr_name);
dt_ident_t *idp = dt_dlib_get_sym(dtp, name);
uint_t ioff = rp->dofr_offset /
sizeof(struct bpf_insn);
Expand Down
26 changes: 13 additions & 13 deletions libdtrace/dt_dis.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ dt_dis_varname_id(const dtrace_difo_t *dp, uint_t id, uint_t scope, uint_t addr)
if (dvp->dtdv_id == id && dvp->dtdv_scope == scope &&
dvp->dtdv_insn_from <= addr && addr <= dvp->dtdv_insn_to) {
if (dvp->dtdv_name < dp->dtdo_strlen)
return dp->dtdo_strtab + dvp->dtdv_name;
return dt_difo_getstr(dp, dvp->dtdv_name);
break;
}
}
Expand All @@ -73,7 +73,7 @@ dt_dis_varname_off(const dtrace_difo_t *dp, uint_t off, uint_t scope, uint_t add
if (dvp->dtdv_offset == off && dvp->dtdv_scope == scope &&
dvp->dtdv_insn_from <= addr && addr <= dvp->dtdv_insn_to) {
if (dvp->dtdv_name < dp->dtdo_strlen)
return dp->dtdo_strtab + dvp->dtdv_name;
return dt_difo_getstr(dp, dvp->dtdv_name);
break;
}
}
Expand Down Expand Up @@ -296,8 +296,6 @@ static char *
dt_dis_bpf_args(const dtrace_difo_t *dp, const char *fn,
const struct bpf_insn *in, char *buf, size_t len, uint_t addr)
{
char *s;

if (strcmp(fn, "dt_get_bvar") == 0) {
/*
* We know that the previous instruction exists and assigns
Expand All @@ -321,6 +319,9 @@ dt_dis_bpf_args(const dtrace_difo_t *dp, const char *fn,
DIFV_SCOPE_THREAD, addr));
return buf;
} else if (strcmp(fn, "dt_get_string") == 0) {
const char *s;
char *se;

/*
* We know that the previous instruction exists and assigns
* the string offset to %r1 (because we wrote the code
Expand All @@ -330,11 +331,10 @@ dt_dis_bpf_args(const dtrace_difo_t *dp, const char *fn,
if (in->imm >= dp->dtdo_strlen)
return NULL;

s = dp->dtdo_strtab + in->imm;
s = strchr2esc(s, strlen(s));
snprintf(buf, len, "\"%s\"n",
s ? s : dp->dtdo_strtab + in->imm);
free(s);
s = dt_difo_getstr(dp, in->imm);
se = strchr2esc(s, strlen(s));
snprintf(buf, len, "\"%s\"n", se ? se : s);
free(se);
return buf;
}

Expand Down Expand Up @@ -501,12 +501,12 @@ dt_dis_rtab(const char *rtag, const dtrace_difo_t *dp, FILE *fp,
fprintf(fp, "%-17s %-8llu %-8llu %s\n", tstr,
(u_longlong_t)rp->dofr_offset,
(u_longlong_t)rp->dofr_data,
&dp->dtdo_strtab[rp->dofr_name]);
dt_difo_getstr(dp, rp->dofr_name));
else
fprintf(fp, "%-17s %-8llu %-8s %s\n", tstr,
(u_longlong_t)rp->dofr_offset,
"*UND*",
&dp->dtdo_strtab[rp->dofr_name]);
dt_difo_getstr(dp, rp->dofr_name));
}
}

Expand Down Expand Up @@ -693,7 +693,7 @@ dt_dis_difo(const dtrace_difo_t *dp, FILE *fp, const dt_ident_t *idp,
if (rp->dofr_offset < i * sizeof(uint64_t))
continue;
if (rp->dofr_offset == i * sizeof(uint64_t))
rname = &dp->dtdo_strtab[rp->dofr_name];
rname = dt_difo_getstr(dp, rp->dofr_name);

break;
}
Expand Down Expand Up @@ -764,7 +764,7 @@ dt_dis_difo(const dtrace_difo_t *dp, FILE *fp, const dt_ident_t *idp,
strcat(flags, "/w");

fprintf(fp, "%-16s %-4x %-6s %-3s %-3s %-11s %-4s %s\n",
&dp->dtdo_strtab[v->dtdv_name], v->dtdv_id,
dt_difo_getstr(dp, v->dtdv_name), v->dtdv_id,
offset, kind, scope, range, flags + 1,
dt_dis_typestr(&v->dtdv_type, type, sizeof(type)));
}
Expand Down
1 change: 1 addition & 0 deletions libdtrace/dt_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,7 @@ extern void *dt_calloc(dtrace_hdl_t *, size_t, size_t);
extern void *dt_alloc(dtrace_hdl_t *, size_t);
extern void dt_free(dtrace_hdl_t *, void *);
extern void dt_difo_free(dtrace_hdl_t *, dtrace_difo_t *);
extern const char *dt_difo_getstr(const dtrace_difo_t *, ssize_t);

extern void dt_conf_init(dtrace_hdl_t *);

Expand Down
10 changes: 5 additions & 5 deletions libdtrace/dt_link.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Oracle Linux DTrace.
* Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
Expand Down Expand Up @@ -1255,7 +1255,7 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
* populate our string table and count the number of extra
* symbols we'll require.
*/
strtab = dt_strtab_create(1);
strtab = dt_strtab_create(BUFSIZ);
nsym = 0;
isym = data_sym->d_size / symsize;
istr = data_str->d_size;
Expand Down Expand Up @@ -1337,10 +1337,10 @@ process_obj(dtrace_hdl_t *dtp, const char *obj, int *eprobesp)
*/
if (nsym > 0) {
/*
* The first byte of the string table is reserved for
* the \0 entry.
* The first two bytes of the string table are reserved
* for the \0 entry.
*/
len = dt_strtab_size(strtab) - 1;
len = dt_strtab_size(strtab) - 2;

assert(len > 0);
assert(dt_strtab_index(strtab, "") == 0);
Expand Down
1 change: 1 addition & 0 deletions libdtrace/dt_open.c
Original file line number Diff line number Diff line change
Expand Up @@ -1262,6 +1262,7 @@ dtrace_close(dtrace_hdl_t *dtp)
free(dtp->dt_module_path);
free(dtp->dt_kernpaths);
free(dtp->dt_provs);
free(dtp->dt_strtab);
free(dtp);

dt_debug_dump(0);
Expand Down
90 changes: 66 additions & 24 deletions libdtrace/dt_strtab.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include <dt_strtab.h>
#include <dt_string.h>
#include <dt_varint.h>
#include <dt_impl.h>

static int
Expand Down Expand Up @@ -40,8 +41,9 @@ dt_strtab_grow(dt_strtab_t *sp)
dt_strtab_t *
dt_strtab_create(size_t bufsz)
{
dt_strtab_t *sp = malloc(sizeof(dt_strtab_t));
uint_t nbuckets = _dtrace_strbuckets;
dt_strtab_t *sp = malloc(sizeof(dt_strtab_t));
uint_t nbuckets = _dtrace_strbuckets;
int n;

assert(bufsz != 0);

Expand Down Expand Up @@ -71,7 +73,12 @@ dt_strtab_create(size_t bufsz)
* at offset 0. We use this guarantee in dt_strtab_insert() and
* dt_strtab_index().
*/
n = dt_int2vint(0, sp->str_ptr);
sp->str_ptr += n;
*sp->str_ptr++ = '\0';
sp->str_size = n + 1;
sp->str_nstrs = 1;

return sp;

err:
Expand Down Expand Up @@ -169,17 +176,9 @@ dt_strtab_copyin(dt_strtab_t *sp, const char *str, size_t len)
}

ssize_t
dt_strtab_index(dt_strtab_t *sp, const char *str)
dt_strtab_xindex(dt_strtab_t *sp, const char *str, size_t len, ulong_t h)
{
dt_strhash_t *hp;
size_t len;
ulong_t h;

if (str == NULL || str[0] == '\0')
return 0; /* The empty string is always at offset 0. */

len = strlen(str);
h = str2hval(str, 0) % sp->str_hashsz;
dt_strhash_t *hp;

for (hp = sp->str_hash[h]; hp != NULL; hp = hp->str_next) {
if (dt_strtab_compare(sp, hp, str, len + 1) == 0)
Expand All @@ -189,45 +188,88 @@ dt_strtab_index(dt_strtab_t *sp, const char *str)
return -1;
}

ssize_t
dt_strtab_index(dt_strtab_t *sp, const char *str)
{
size_t plen, slen;
ssize_t rc;
ulong_t h;
char *s;

if (str == NULL || str[0] == '\0')
return 0; /* The empty string is always at offset 0. */

slen = strlen(str);
s = malloc(VARINT_MAX_BYTES + slen + 1);
if (s == NULL)
return -1L;

plen = dt_int2vint(slen, s);
memcpy(s + plen, str, slen + 1);

h = str2hval(str, slen) % sp->str_hashsz;
rc = dt_strtab_xindex(sp, s, plen + slen, h);
free(s);

return rc;
}

ssize_t
dt_strtab_insert(dt_strtab_t *sp, const char *str)
{
dt_strhash_t *hp;
size_t len;
ssize_t off;
ulong_t h;
dt_strhash_t *hp;
size_t slen, plen;
ssize_t off;
ulong_t h;
char *s;

if (str == NULL || str[0] == '\0')
return 0; /* The empty string is always at offset 0. */

if ((off = dt_strtab_index(sp, str)) != -1)
return off;
slen = strlen(str);
s = malloc(VARINT_MAX_BYTES + slen + 1);
if (s == NULL)
return -1L;

plen = dt_int2vint(slen, s);
memcpy(s + plen, str, slen + 1);

len = strlen(str);
h = str2hval(str, 0) % sp->str_hashsz;
h = str2hval(str, slen) % sp->str_hashsz;
slen += plen;
off = dt_strtab_xindex(sp, s, slen, h);
if (off != -1) {
free(s);
return off;
}

/*
* Create a new hash bucket, initialize it, and insert it at the front
* of the hash chain for the appropriate bucket.
*/
if ((hp = malloc(sizeof(dt_strhash_t))) == NULL)
if ((hp = malloc(sizeof(dt_strhash_t))) == NULL) {
free(s);
return -1L;
}

hp->str_data = sp->str_ptr;
hp->str_buf = sp->str_nbufs - 1;
hp->str_off = sp->str_size;
hp->str_len = len;
hp->str_len = slen;
hp->str_next = sp->str_hash[h];

/*
* Now copy the string data into our buffer list, and then update
* the global counts of strings and bytes. Return str's byte offset.
*/
if (dt_strtab_copyin(sp, str, len + 1) == -1)
if (dt_strtab_copyin(sp, s, slen + 1) == -1) {
free(s);
free(hp);
return -1L;
}
free(s);

sp->str_nstrs++;
sp->str_size += len + 1;
sp->str_size += slen + 1;
sp->str_hash[h] = hp;

return hp->str_off;
Expand Down
11 changes: 10 additions & 1 deletion libdtrace/dt_subr.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Oracle Linux DTrace.
* Copyright (c) 2010, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2010, 2021, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
Expand All @@ -23,6 +23,7 @@
#include <sys/ioctl.h>
#include <port.h>

#include <dt_varint.h>
#include <dt_impl.h>
#include <sys/dtrace.h>

Expand Down Expand Up @@ -747,6 +748,14 @@ dt_difo_free(dtrace_hdl_t *dtp, dtrace_difo_t *dp)
dt_free(dtp, dp);
}

const char *
dt_difo_getstr(const dtrace_difo_t *dp, ssize_t idx)
{
assert(idx < dp->dtdo_strlen);

return dt_vint_skip(&dp->dtdo_strtab[idx]);
}

/*
* dt_gmatch() is similar to gmatch(3GEN) and dtrace(7D) globbing, but also
* implements the behavior that an empty pattern matches any string.
Expand Down

0 comments on commit aa9b705

Please sign in to comment.