Skip to content

Commit

Permalink
Add support for NULL strings
Browse files Browse the repository at this point in the history
In C, strings are (char*), and it is possible to have a NULL pointer.

In D, strings have fixed length strsize.  There may still be NULL
pointers -- say, as inputs to strtok() or as outputs from unsuccessful
strchr() (and other) functions.

Such NULL pointers are not a problem for storing strings as dynamic
variables (thread-local variables or associative arrays), since storing
0 clears a dynamic element and reading a cleared element means 0.

Static (global and local) variables are a different story.  Each
string has strsize bytes allocated for it, and every possible string
value is legal:  bytes up to the first NUL are part of the string
and subsequent bytes are ignored.  There is no value that represents
a NULL string.

Specifically, there is an important semantic difference between NULL
and empty strings.  The former are NULL pointers, while the latter are
strings that simply start with the NUL terminating char.

Add code to handle store and load of NULL-pointer strings to and from
global and local variables.

Specifically, define a byte string DT_NULL_STRING whose first byte is
0x00 but with extra nonzero bytes to distinguish between empty and NULL
strings.  An empty string stored as a static variable will have its
first bytes all zero.  A NULL string will have its first bytes be
DT_NULL_STRING.  That is, both will have initial byte 0, and then we
have to go further to distinguish the two cases.

We require strsize >= sizeof(DT_NULL_STRING), which is reasonable.

Note that comparisons between NULL and empty strings should work the
same way as between NULL strings and any other non-NULL strings.  Note
that Solaris and legacy DTrace on Linux incorrectly treated comparisons
between NULL and empty strings as between equal values.

Signed-off-by: Eugene Loh <eugene.loh@oracle.com>
Reviewed-by: Nick Alcock <nick.alcock@oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees@oracle.com>
  • Loading branch information
euloh authored and kvanhees committed May 25, 2023
1 parent ddd1454 commit 832b3d4
Show file tree
Hide file tree
Showing 29 changed files with 555 additions and 22 deletions.
127 changes: 118 additions & 9 deletions libdtrace/dt_cg.c
Original file line number Diff line number Diff line change
Expand Up @@ -2747,7 +2747,32 @@ dt_cg_load_var(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
emit(dlp, BPF_LOAD(BPF_DW, dnp->dn_reg, dnp->dn_reg, DCTX_GVARS));

/* load the variable value or address */
if (dnp->dn_flags & DT_NF_REF) {
if (dt_node_is_string(dnp)) {
/*
* Strings are a special case of by-reference. If we have
* a NULL string, we want to set the pointer to 0.
*/
size_t size = sizeof(DT_NULL_STRING);
int reg;
uint_t L1 = dt_irlist_label(dlp);
uint_t L2 = dt_irlist_label(dlp);

assert(dnp->dn_flags & DT_NF_REF);
assert(size > 0 && size <= 8 &&
(size & (size - 1)) == 0);

if ((reg = dt_regset_alloc(drp)) == -1)
longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
emit(dlp, BPF_LOAD(ldstw[size], reg, dnp->dn_reg, idp->di_offset));
emit(dlp, BPF_BRANCH_IMM(BPF_JNE, reg, DT_NULL_STRING, L1));
emit(dlp, BPF_MOV_IMM(dnp->dn_reg, 0));
emit(dlp, BPF_JUMP(L2));
emitl(dlp, L1,
BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, idp->di_offset));
emitl(dlp, L2,
BPF_NOP());
dt_regset_free(drp, reg);
} else if (dnp->dn_flags & DT_NF_REF) {
assert(!(dnp->dn_flags & DT_NF_ALLOCA));
emit(dlp, BPF_ALU64_IMM(BPF_ADD, dnp->dn_reg, idp->di_offset));
} else {
Expand Down Expand Up @@ -3382,11 +3407,64 @@ dt_cg_store_var(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp,
else
emit(dlp, BPF_LOAD(BPF_DW, reg, reg, DCTX_GVARS));

/* store by value or by reference */
if (dnp->dn_flags & DT_NF_REF) {
size_t srcsz;
/* Store */
if (dt_node_is_string(dnp) && dt_node_is_integer(dnp->dn_right)) {
/* Store a compile-time NULL pointer (string = NULL) */
/*
* The parser has already ensured the rhs is 0. In
* dt_cook_op2() for DT_TOK_ASGN, it checks dt_node_is_argcompat().
* Since rhs is not string compatible (string or char*), we move on
* to dt_node_is_ptrcompat(lp, rp, NULL, NULL). There we see a test
* if (rp_is_int && (rp->dn_kind != DT_NODE_INT || rp->dn_value != 0))
* return 0; // fail if rp is an integer that is not 0 constant
*/

size = sizeof(DT_NULL_STRING);

assert(dt_node_is_string(dnp) == dt_node_is_string(dnp->dn_left));
assert(idp->di_size == dt_node_type_size(dnp->dn_left));
assert(dt_node_is_integer(dnp->dn_right) == (dnp->dn_right->dn_kind == DT_NODE_INT));
assert(dnp->dn_right->dn_value == 0);
assert(size > 0 && size <= 8 &&
(size & (size - 1)) == 0);

emit(dlp, BPF_ALU64_IMM(BPF_ADD, reg, idp->di_offset));
emit(dlp, BPF_STORE_IMM(ldstw[size], reg, 0, DT_NULL_STRING));

/*
* Since strings are passed by value, we need to force
* the value of the assignment to be the destination
* address.
*/
dt_regset_free(drp, dnp->dn_reg);
dnp->dn_reg = reg;
} else if (dt_node_is_string(dnp)) {
/* General store to string */
uint_t Lnull = 0, Ldone = 0;
size_t srcsz;

assert(dt_node_is_string(dnp) == dt_node_is_string(dnp->dn_left));
assert(idp->di_size == dt_node_type_size(dnp->dn_left));
assert(dnp->dn_reg == dnp->dn_right->dn_reg);

emit(dlp, BPF_ALU64_IMM(BPF_ADD, reg, idp->di_offset));

/* Check if we are storing a NULL */
if (!(dnp->dn_right->dn_flags & DT_NF_ALLOCA)) {
Lnull = dt_irlist_label(dlp);
Ldone = dt_irlist_label(dlp);

emit(dlp, BPF_BRANCH_IMM(BPF_JEQ, dnp->dn_reg, 0, Lnull));
}

/* Normal store to string */

/* Start by zeroing out the first bytes */
size = sizeof(DT_NULL_STRING);
assert(size > 0 && size <= 8 &&
(size & (size - 1)) == 0);

emit(dlp, BPF_STORE_IMM(ldstw[size], reg, 0, 0));

/*
* Determine the amount of data to be copied. It is
Expand All @@ -3397,19 +3475,50 @@ dt_cg_store_var(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp,
size = MIN(srcsz, idp->di_size);

dt_cg_check_ptr_arg(dlp, drp, dnp->dn_right, NULL);

dt_cg_memcpy(dlp, drp, reg, dnp->dn_reg, size);

if (!(dnp->dn_right->dn_flags & DT_NF_ALLOCA)) {
emit(dlp, BPF_JUMP(Ldone));

/* Store a null pointer */
emitl(dlp, Lnull,
BPF_NOP());
size = sizeof(DT_NULL_STRING);
assert(size > 0 && size <= 8 && (size & (size - 1)) == 0);
emit(dlp, BPF_STORE_IMM(ldstw[size], reg, 0, DT_NULL_STRING));
emitl(dlp, Ldone,
BPF_NOP());
}

/*
* Since strings are passed by value, we need to force
* the value of the assignment to be the destination
* address.
*/
if (dt_node_is_string(dnp)) {
dt_regset_free(drp, dnp->dn_reg);
dnp->dn_reg = reg;
} else
dt_regset_free(drp, reg);

dt_regset_free(drp, dnp->dn_reg);
dnp->dn_reg = reg;
} else if (dnp->dn_flags & DT_NF_REF) {
/* Store by reference (copy) */
size_t srcsz;

emit(dlp, BPF_ALU64_IMM(BPF_ADD, reg, idp->di_offset));

/*
* Determine the amount of data to be copied. It is
* the lesser of the size of the identifier and the
* size of the data being copied in.
*/
srcsz = dt_node_type_size(dnp->dn_right);
size = MIN(srcsz, idp->di_size);

dt_cg_check_ptr_arg(dlp, drp, dnp->dn_right, NULL);
dt_cg_memcpy(dlp, drp, reg, dnp->dn_reg, size);

dt_regset_free(drp, reg);
} else {
/* Store by value */
size = idp->di_size;

assert(size > 0 && size <= 8 &&
Expand Down
23 changes: 23 additions & 0 deletions libdtrace/dt_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,29 @@ extern "C" {
# define __stringify(x) __stringify_(x)
#endif

/*
* Note that string = "" is an empty string: its first byte is 0x00.
* To store a NULL string -- basically, (char*)0 -- make the first byte 0x00.
* Then, to distinguish between empty and NULL strings, follow the initial
* byte either with 0s (empty string) or a special set of nonzero bytes.
* That is, to designate a NULL string, make the initial bytes DT_NULL_STRING,
* where:
* - the initial byte is 0x00
* - subsequent bytes are nonzero
* - the width is at least 2 bytes
* (that is, there is at least one nonzero byte)
* - the width is at most 4 bytes
* (so that DT_NULL_STRING can be used as an IMM)
* - the highest bit is 0
* (so that if DT_NULL_STRING is an IMM, it won't get sign extended)
* Finally, note that strsize must be large enough to hold DT_NULL_STRING.
*/
#ifdef _BIG_ENDIAN
#define DT_NULL_STRING ((uint16_t)0x007f)
#else
#define DT_NULL_STRING ((uint16_t)0x7f00)
#endif

struct dt_module; /* see below */
struct dt_pfdict; /* see <dt_printf.h> */
struct dt_arg; /* see below */
Expand Down
5 changes: 5 additions & 0 deletions libdtrace/dt_options.c
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,11 @@ dt_opt_strsize(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
if (dt_opt_size(dtp, arg, option) != 0)
return -1; /* dt_errno is set for us */

if (dtp->dt_options[option] < sizeof(DT_NULL_STRING)) {
dtp->dt_options[option] = val;
return dt_set_errno(dtp, EDT_BADOPTVAL);
}

if (dtp->dt_options[option] > UINT_MAX) {
dtp->dt_options[option] = val;
return dt_set_errno(dtp, EOVERFLOW);
Expand Down
30 changes: 30 additions & 0 deletions test/unittest/codegen/err.deref_string-assoc.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Oracle Linux DTrace.
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/

#pragma D option quiet

BEGIN
{
s[1234] = "ABCDEFG";
trace(s[1234][1]);
}

BEGIN
{
s[1234] = NULL;
trace(s[1234][1]);
}

BEGIN
{
exit(0);
}

ERROR
{
exit(1);
}
3 changes: 3 additions & 0 deletions test/unittest/codegen/err.deref_string-assoc.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
66
-- @@stderr --
dtrace: error on enabled probe ID 4 (ID 1: dtrace:::BEGIN): invalid address (1) in action #2 at BPF pc NNN
6 changes: 6 additions & 0 deletions test/unittest/codegen/err.deref_string-assoc.r.p
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/sed -f

# runtest.sh looks for "0x" to filter out pointer values.
# Strip the 0x so that the illegal address will not be filtered out;
# we want the address to be checked.
s/0x//
30 changes: 30 additions & 0 deletions test/unittest/codegen/err.deref_string-gvar.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Oracle Linux DTrace.
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/

#pragma D option quiet

BEGIN
{
s = "ABCDEFG";
trace(s[1]);
}

BEGIN
{
s = NULL;
trace(s[1]);
}

BEGIN
{
exit(0);
}

ERROR
{
exit(1);
}
3 changes: 3 additions & 0 deletions test/unittest/codegen/err.deref_string-gvar.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
66
-- @@stderr --
dtrace: error on enabled probe ID 4 (ID 1: dtrace:::BEGIN): invalid address (1) in action #2 at BPF pc NNN
6 changes: 6 additions & 0 deletions test/unittest/codegen/err.deref_string-gvar.r.p
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/sed -f

# runtest.sh looks for "0x" to filter out pointer values.
# Strip the 0x so that the illegal address will not be filtered out;
# we want the address to be checked.
s/0x//
30 changes: 30 additions & 0 deletions test/unittest/codegen/err.deref_string-lvar.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Oracle Linux DTrace.
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/

#pragma D option quiet

BEGIN
{
this->s = "ABCDEFG";
trace(this->s[1]);
}

BEGIN
{
this->s = NULL;
trace(this->s[1]);
}

BEGIN
{
exit(0);
}

ERROR
{
exit(1);
}
3 changes: 3 additions & 0 deletions test/unittest/codegen/err.deref_string-lvar.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
66
-- @@stderr --
dtrace: error on enabled probe ID 4 (ID 1: dtrace:::BEGIN): invalid address (1) in action #2 at BPF pc NNN
6 changes: 6 additions & 0 deletions test/unittest/codegen/err.deref_string-lvar.r.p
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/sed -f

# runtest.sh looks for "0x" to filter out pointer values.
# Strip the 0x so that the illegal address will not be filtered out;
# we want the address to be checked.
s/0x//
30 changes: 30 additions & 0 deletions test/unittest/codegen/err.deref_string-tvar.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Oracle Linux DTrace.
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/

#pragma D option quiet

BEGIN
{
self->s = "ABCDEFG";
trace(self->s[1]);
}

BEGIN
{
self->s = NULL;
trace(self->s[1]);
}

BEGIN
{
exit(0);
}

ERROR
{
exit(1);
}
3 changes: 3 additions & 0 deletions test/unittest/codegen/err.deref_string-tvar.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
66
-- @@stderr --
dtrace: error on enabled probe ID 4 (ID 1: dtrace:::BEGIN): invalid address (1) in action #2 at BPF pc NNN
6 changes: 6 additions & 0 deletions test/unittest/codegen/err.deref_string-tvar.r.p
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/sed -f

# runtest.sh looks for "0x" to filter out pointer values.
# Strip the 0x so that the illegal address will not be filtered out;
# we want the address to be checked.
s/0x//

0 comments on commit 832b3d4

Please sign in to comment.