Skip to content

Commit

Permalink
Add support for rindex() subroutine
Browse files Browse the repository at this point in the history
Signed-off-by: Eugene Loh <eugene.loh@oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees@oracle.com>
  • Loading branch information
euloh authored and kvanhees committed Oct 14, 2021
1 parent 5f8c410 commit 67e980d
Show file tree
Hide file tree
Showing 7 changed files with 345 additions and 4 deletions.
1 change: 1 addition & 0 deletions bpf/Build
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ bpf_dlib_SOURCES = \
get_tvar.c set_tvar.c \
index.S \
probe_error.c \
rindex.S \
speculation.c \
strchr.S \
strcmp.S \
Expand Down
168 changes: 168 additions & 0 deletions bpf/rindex.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
*/

/*
* This approach to implementing rindex(s, t[, start]) mimics the approach
* used for index(). Notably, it uses dt_index_match().
*
* The rindex() semantics are to find the highest index where t appears in s,
* but at most the start value.
*
* The user semantics are that if the start is not specified, it is as though
* len(s)-len(t) were used. A negative start means no match will be found.
* These user semantics are modified internally, within the implementation:
* - In dt_cg.c, start<0 is immediately rejected.
* - In the call to rindex.S, start<0 means to use the default.
*/

#define DT_STRLEN_BYTES 2

#define BPF_FUNC_probe_read 4
#define BPF_FUNC_probe_read_str 45

.text
/*
* int dt_rindex(const char *s, const char *t, int start, char *tmp1, char *tmp2)
* {
* uint64_t r0, tlen;
* uint64_t buflen;
*
* // ignore length prefix
* s += DT_STRLEN_BYTES;
* t += DT_STRLEN_BYTES;
*
* // round buflen for dt_index_match()
* buflen = STRSZ rounded up to multiple of 8;
*
* // keep a copy of t in tmp2
* tlen = bpf_probe_read_str(tmp2, buflen, t);
*
* // determine actual start index
* r0 = bpf_probe_read_str(tmp1, buflen, s);
* r0 -= tlen; // maximum possible index value
* if (start < 0) start = r0;
* if (start > r0) start = r0;
*
* // drop terminating NULL
* tlen--;
*
* // Fill end of tmp1 with contents from tmp2
* // to suppress spurious mismatches.
* bpf_probe_read(tmp1 + tlen, buflen - tlen, tmp2 + tlen);
*
* // Apparently, the BPF verifier prefers incrementing loop counts.
* // So loop cnt goes from 0 to start; idx goes from start to 0.
* cnt = 0;
* Lloop:
* // check loop
* if (cnt > start) return -1;
* idx = start - cnt;
*
* // fill start of tmp1 with s, starting at the proposed index
* bpf_probe_read(tmp1, tlen, s + idx);
*
* // keep looping if not a match
* r0 = dt_index_match(tmp1, tmp2, buflen);
* cnt++;
* if (r0 == 0) goto Lloop;
*
* return idx;
* }
*
* Some variables are kept in registers or spilled to the stack:
* r6 = idx [%fp+-8] = s
* r7 = tmp1 [%fp+-16] = buflen
* r8 = tmp2 [%fp+-24] = start
* r9 = tlen [%fp+-32] = cnt
* but t is not needed once we have copied its contents to tmp2.
*/
.align 4
.global dt_rindex
.type dt_rindex, @function
dt_rindex:
add %r1, DT_STRLEN_BYTES /* s += DT_STRLEN_BYTES */
add %r2, DT_STRLEN_BYTES /* t += DT_STRLEN_BYTES */

lddw %r6, STRSZ
add %r6, 7
and %r6, -8
stxdw [%fp+-16], %r6 /* buflen = STRSZ rounded up to multiple of 8 */

stxdw [%fp+-8], %r1 /* stash copies of some variables */
stxdw [%fp+-24], %r3
mov %r7, %r4
mov %r8, %r5

mov %r3, %r2
ldxdw %r2, [%fp+-16]
mov %r1, %r8
call BPF_FUNC_probe_read_str /* tlen = bpf_probe_read_str(tmp2, buflen, t) */
jsle %r0, 0, .Lerror
mov %r9, %r0

mov %r1, %r7
ldxdw %r2, [%fp+-16]
ldxdw %r3, [%fp+-8]
call BPF_FUNC_probe_read_str /* r0 = bpf_probe_read_str(tmp1, buflen, s) */

sub %r0, %r9 /* r0 -= tlen */
jslt %r0, 0, .Lerror
ldxdw %r3, [%fp+-24]
jsge %r3, 0, 1 /* if (start < 0) start = r0 */
mov %r3, %r0
jle %r3, %r0, 1 /* if (start > r0) start = r0 */
mov %r3, %r0
stxdw [%fp+-24], %r3

sub %r9, 1 /* tlen-- */

mov %r1, %r7
add %r1, %r9
ldxdw %r2, [%fp+-16]
sub %r2, %r9
mov %r3, %r8
add %r3, %r9
call BPF_FUNC_probe_read /* bpf_probe_read(tmp1 + tlen, buflen - tlen, tmp2 + tlen) */

mov %r0, 0
stxdw [%fp+-32], %r0 /* cnt = 0 */
.Lloop:
ldxdw %r0, [%fp+-32]
ldxdw %r6, [%fp+-24]
jgt %r0, %r6, .Lerror /* if (cnt > start) return -1 */

/* help the BPF verifier */
ldxdw %r2, [%fp+-16]
jge %r0, %r2, .Lerror /* if (cnt >= buflen) return -1 */

sub %r6, %r0 /* idx = start - cnt */
jlt %r6, 0, .Lerror
jge %r6, %r2, .Lerror

mov %r1, %r7
mov %r2, %r9
ldxdw %r3, [%fp+-8]
add %r3, %r6
call BPF_FUNC_probe_read /* bpf_probe_read(tmp1, tlen, s + idx) */

mov %r1, %r7
mov %r2, %r8
ldxdw %r3, [%fp+-16]
call dt_index_match /* r0 = dt_index_match(tmp1, tmp2, buflen) */

ldxdw %r1, [%fp+-32]
add %r1, 1 /* cnt++ */
stxdw [%fp+-32], %r1

jeq %r0, 0, .Lloop /* if (r0 == 0) goto Lloop */

/* done */
mov %r0, %r6 /* return idx */
exit

.Lerror:
mov %r0, -1
exit
.size dt_rindex, .-dt_rindex
87 changes: 86 additions & 1 deletion libdtrace/dt_cg.c
Original file line number Diff line number Diff line change
Expand Up @@ -3303,6 +3303,91 @@ dt_cg_subr_index(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
TRACE_REGSET(" subr-index:End ");
}

static void
dt_cg_subr_rindex(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
{
dt_node_t *s = dnp->dn_args;
dt_node_t *t = s->dn_list;
dt_node_t *start = t->dn_list;
dt_ident_t *idp = dt_dlib_get_func(yypcb->pcb_hdl, "dt_rindex");
uint64_t off1, off2;
uint_t Lneg_start = dt_irlist_label(dlp);

assert(idp != NULL);

TRACE_REGSET(" subr-rindex:Begin");

/* evaluate arguments to D subroutine rindex() */
dt_cg_node(s, dlp, drp);
dt_cg_check_notnull(dlp, drp, s->dn_reg);
dt_cg_node(t, dlp, drp);
dt_cg_check_notnull(dlp, drp, t->dn_reg);
if (start != NULL)
dt_cg_node(start, dlp, drp);

/* start setting up function call to BPF function dt_rindex() */
if (dt_regset_xalloc_args(drp) == -1)
longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);

/* string s and substring t */
emit(dlp, BPF_MOV_REG(BPF_REG_1, s->dn_reg));
dt_regset_free(drp, s->dn_reg);
if (s->dn_tstring)
dt_cg_tstring_free(yypcb, s);
emit(dlp, BPF_MOV_REG(BPF_REG_2, t->dn_reg));
dt_regset_free(drp, t->dn_reg);
if (t->dn_tstring)
dt_cg_tstring_free(yypcb, t);

/* scratch memory */
off1 = dt_cg_tstring_xalloc(yypcb);
off2 = dt_cg_tstring_xalloc(yypcb);

emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_FP, DT_STK_DCTX));
emit(dlp, BPF_LOAD(BPF_DW, BPF_REG_4, BPF_REG_4, DCTX_MEM));
emit(dlp, BPF_MOV_REG(BPF_REG_5, BPF_REG_4));
emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, off1));
emit(dlp, BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, off2));

/* allocate return register */
dt_regset_xalloc(drp, BPF_REG_0);

/*
* Now deal with the "start" argument. If it's negative, we will
* bypass the function call. That means we set all those other
* arguments up for nothing, but doing it this way simplifies
* code generation and start<0 should be a rare condition.
*
* If there is no "start" argument, then specify -1. That is,
* we are modifying the semantics of start<0 for internal purposes.
*/
if (start) {
emit(dlp, BPF_MOV_IMM(BPF_REG_0, -1));
emit(dlp, BPF_BRANCH_IMM(BPF_JSLT, start->dn_reg, 0, Lneg_start));
emit(dlp, BPF_MOV_REG(BPF_REG_3, start->dn_reg));
dt_regset_free(drp, start->dn_reg);
} else
emit(dlp, BPF_MOV_IMM(BPF_REG_3, -1));

/* function call */
emite(dlp, BPF_CALL_FUNC(idp->di_id), idp);
emitl(dlp, Lneg_start,
BPF_NOP());
dt_regset_free_args(drp);

/* clean up */
dt_cg_tstring_xfree(yypcb, off1);
dt_cg_tstring_xfree(yypcb, off2);

dnp->dn_reg = dt_regset_alloc(drp);
if (dnp->dn_reg == -1)
longjmp(yypcb->pcb_jmpbuf, EDT_NOREG);
emit(dlp, BPF_MOV_REG(dnp->dn_reg, BPF_REG_0));
dt_regset_free(drp, BPF_REG_0);

TRACE_REGSET(" subr-rindex:End ");
}

static void
dt_cg_subr_speculation(dt_node_t *dnp, dt_irlist_t *dlp, dt_regset_t *drp)
{
Expand Down Expand Up @@ -3619,7 +3704,7 @@ static dt_cg_subr_f *_dt_cg_subr[DIF_SUBR_MAX + 1] = {
[DIF_SUBR_STRTOK] = NULL,
[DIF_SUBR_SUBSTR] = &dt_cg_subr_substr,
[DIF_SUBR_INDEX] = &dt_cg_subr_index,
[DIF_SUBR_RINDEX] = NULL,
[DIF_SUBR_RINDEX] = &dt_cg_subr_rindex,
[DIF_SUBR_HTONS] = &dt_cg_subr_htons,
[DIF_SUBR_HTONL] = &dt_cg_subr_htonl,
[DIF_SUBR_HTONLL] = &dt_cg_subr_htonll,
Expand Down
1 change: 1 addition & 0 deletions libdtrace/dt_dlibs.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ static const dt_ident_t dt_bpf_symbols[] = {
DT_BPF_SYMBOL(dt_get_tvar, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_set_tvar, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_index, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_rindex, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_strchr, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_strcmp, DT_IDENT_SYMBOL),
DT_BPF_SYMBOL(dt_strjoin, DT_IDENT_SYMBOL),
Expand Down
5 changes: 2 additions & 3 deletions test/stress/fbtsafety/tst.shortstr.d
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
/*
* Oracle Linux DTrace.
* Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
/* @@skip: dtv2 */
/* @@no-xfail */
/* @@xfail: dtv2 */

#pragma D option quiet
#pragma D option strsize=16
Expand Down
60 changes: 60 additions & 0 deletions test/unittest/funcs/tst.rindex.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Oracle Linux DTrace.
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/

/*
* Start at the prescribed index and work backwards for the first match.
* The default start is len(str)-len(substr).
*/

#pragma D option quiet

/* cut string size back a little to ease pressure on BPF verifier */
#pragma D option strsize=144

BEGIN {
x = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#";
y = "abcdefghijklmnopqrstuvwxyz";
printf(" 64 %3d\n", rindex(x, y));
printf(" -1 %3d\n", rindex(x, y, -1));
printf(" 0 %3d\n", rindex(x, y, 0));
printf(" 0 %3d\n", rindex(x, y, 1));
printf(" 64 %3d\n", rindex(x, y, 70));
printf(" 64 %3d\n", rindex(x, y, 200));

y = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#a";
printf(" -1 %3d\n", rindex(x, y, -1));
printf(" 0 %3d\n", rindex(x, y));
printf(" 0 %3d\n", rindex(x, y, 1));

x = "";
y = "klmnopqrstuvw";
printf(" -1 %3d\n", rindex(x, y));
printf(" 13 %3d\n", rindex(y, x));

x = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
y = "klmnopqrstuvw";
printf("114 %3d\n", rindex(x, y));
printf(" -1 %3d\n", rindex(x, y, -1));
printf(" -1 %3d\n", rindex(x, y, 0));
printf(" -1 %3d\n", rindex(x, y, 5));
printf(" 10 %3d\n", rindex(x, y, 10));
printf(" 10 %3d\n", rindex(x, y, 30));
printf(" 36 %3d\n", rindex(x, y, 40));
printf(" 36 %3d\n", rindex(x, y, 60));
printf(" 62 %3d\n", rindex(x, y, 70));
printf(" 62 %3d\n", rindex(x, y, 80));
printf(" 88 %3d\n", rindex(x, y, 90));
printf("114 %3d\n", rindex(x, y, 120));
printf("114 %3d\n", rindex(x, y, 200));

x = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
y = "aaaa";
printf(" 28 %3d\n", rindex(x, y));
printf(" -1 %3d\n", rindex(y, x));

exit(0);
}
27 changes: 27 additions & 0 deletions test/unittest/funcs/tst.rindex.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
64 64
-1 -1
0 0
0 0
64 64
64 64
-1 -1
0 0
0 0
-1 -1
13 13
114 114
-1 -1
-1 -1
-1 -1
10 10
10 10
36 36
36 36
62 62
62 62
88 88
114 114
114 114
28 28
-1 -1

0 comments on commit 67e980d

Please sign in to comment.