Skip to content

Commit

Permalink
Fix length stored by substr() and optimize the implementation
Browse files Browse the repository at this point in the history
In certain cases, substr() would store an incorrect length for the
result string.  This problem is resolved with this patch.

Better comments have been added as well to make the code easier to
follow.

The code handling the case where (idx < 0) failed to recognize that
when the adjusted idx value was more negative than the value of cnt,
the result would always be the empty string.  This has been corrected
which improved code performance as well.

Another important change can be found in label .Lcheck_idx (formerly
.Ladjust_cnt).  The original four conditionals were overkill, and the
case for (cnt > 0) was jumping to .Lcnt_pos incorrectly (causing the
incorrect string length value).  It now correctly jumps to .Lcopy.
In fact, the .Lcnt_pos code was unnecessary.

Copying the substring now makes use of the probe_read_str() helper
rather than the probe_read() helper.

10 new tests are added to test various special conditions.

Signed-off-by: Kris Van Hees <kris.van.hees@oracle.com>
Reviewed-by: Eugene Loh <eugene.loh@oracle.com>
  • Loading branch information
kvanhees committed Nov 20, 2021
1 parent 40a9fe5 commit b4baa3e
Show file tree
Hide file tree
Showing 21 changed files with 550 additions and 44 deletions.
132 changes: 88 additions & 44 deletions bpf/substr.S
Original file line number Diff line number Diff line change
Expand Up @@ -5,88 +5,132 @@

#define DT_STRLEN_BYTES 2

#define BPF_FUNC_probe_read 4
#define BPF_FUNC_probe_read_str 45

/*
* void dt_substr(char *dst, const char *src, int32_t idx, int32_t cnt,
* uint64_t argc)
*
* %r1 = dst, %r2 = src, %r3 = idx, %r4 = cnt, %r5 = argc
*/
.text
.align 4
.global dt_substr
.type dt_substr, @function
dt_substr :
/* Store the arguments (sign-extend idx). */
mov %r9, %r1 /* %r9 = dst */
stxdw [%fp+-8], %r2 /* Spill src */
lsh %r3, 32 /* Sign-extend idx */
arsh %r3, 32
mov %r6, %r3 /* %r6 = idx */

lddw %r8, STRSZ /* %r8 = STRSZ (temporary) */
jgt %r5, 2, .Lhave_cnt
mov %r4, %r8 /* cnt = STRSZ */
/* Initialize the string length with its maximum value. */
lddw %r8, STRSZ /* %r8 = len = STRSZ */

/*
* If we do not have a cnt, use the maximum value.
* Otherwise, sign-extend cnt.
*/
jgt %r5, 2, .Lhave_cnt /* if (argc > 2) goto Lhave_cnt; */
mov %r7, %r8 /* cnt = STRSZ */
ja .Lcnt_set
.Lhave_cnt:
lsh %r4, 32 /* Sign-extend cnt */
arsh %r4, 32
mov %r7, %r4 /* %r7 = cnt */
.Lcnt_set:

/*
* Get the source string length and validate it. If the length is 0,
* the result is the empty string. If the length is greater than the
* maximum string length (STRSZ), cap it at that value.
* Get the source string length and validate it.
* If the length is 0, the result is the empty string.
* If the length is less than the maximum string length (STRSZ), use
* the length we got. (The length is initialized in %r8 as the default
* string length.)
*/
ldxdw %r1, [%fp+-8]
call dt_strlen /* len = dt_strlen(src) */
jeq %r0, 0, .Lempty
mov %r1, %r8 /* %r1 = STRSZ */
jle %r0, %r8, .Llen_ok
mov %r0, %r8 /* len = STRSZ */
.Llen_ok:
jeq %r0, 0, .Lempty /* if (len == 0) goto Lempty; */
jge %r0, %r8, .Llen_set /* if (len >= STRSZ) goto Llen_set; */
mov %r8, %r0 /* %r8 = len */
.Llen_set:

jsge %r6, 0, .Ladjust_cnt
jsge %r6, 0, .Lcheck_idx /* if (idx s>= 0) goto Lcheck_idx; */

/*
* If idx is negative it is a count from the end of the string. Turn
* it into the equivalent character offset (len + idx).
* If the offset is not negative, we can use it as the index.
* If not, we need to determine whether idx + cnt falls within the
* [0, len[ interval. If it does not, the result is the empty string.
* If it does, we will copy cnt + idx characters, starting from idx 0..
*/
add %r6, %r8 /* idx += len */
jsge %r6, 0, .Ladjust_cnt
jsge %r6, 0, .Lcheck_idx /* if (idx s>= 0) goto Lcheck_idx; */
mov %r0, 0
sub32 %r0, %r6 /* neg messes up the verifier */
jsle %r7, %r0, .Ladjust_cnt
add32 %r7, %r6 /* cnt += idx */
sub %r0, %r6 /* neg messes up the verifier */
jsle %r7, %r0, .Lempty /* if (cnt s<= -idx) goto Lempty; */
add %r7, %r6 /* cnt += idx */
mov %r6, 0 /* idx = 0 */

.Ladjust_cnt:
jsge %r6, %r1, .Lempty
jsge %r6, %r8, .Lempty
jslt %r6, 0, .Lempty
jsge %r7, 0, .Lcnt_pos
mov %r0, %r8
sub32 %r0, %r6
add32 %r7, %r0 /* cnt += len - idx */
lsh %r7, 32
arsh %r7, 32
jsle %r7, 0, .Lempty
.Lcnt_pos:
sub %r1, %r6
jsge %r1, %r7, .Lcopy
mov %r7, %r1 /* cnt = STRSZ - idx */
.Lcheck_idx:
/*
* Validate the idx value. If idx is greater than the string length,
* we get the empty string.
*/
jsge %r6, %r8, .Lempty /* if (idx s>= len) goto Lempty; */

.Lcopy:
ldxdw %r8, [%fp+-8]
add %r8, DT_STRLEN_BYTES
add %r8, %r6 /* %r8 = src + DT_STRLEN_BYTES + idx */
/* If cnt is positive (or 0), we are ready to copy the slice. */
jsge %r7, 0, .Lcopy /* if (cnt s>= 0) goto Lcopy; */

mov %r1, %r7
mov %r2, %r9
call dt_strlen_store /* dt_strlen_store(cnt, dst) */
add %r9, DT_STRLEN_BYTES /* %r9 = dst + DT_STRLEN_BYTES */
/*
* If cnt is negative it is a count from the last character in string.
* Use the equivalent character offset to calculate the true (positive)
* cnt:
* start = idx
* end = (len - 1) - (-cnt)
* new_cnt = end - start + 1
* = (len - 1) - (-cnt) - idx + 1
* = len - 1 + cnt - idx + 1
* = len + cnt - idx
* which is equivalent with:
* cnt += len - idx
*/
mov %r0, %r8
sub %r0, %r6
add %r7, %r0 /* cnt += len - idx */
jsle %r7, 0, .Lempty /* if (cnt s<= 0) goto Lempty; */

.Lcopy:
/*
* Use the probe_read_str() BPF helper to copy (cnt + 1) bytes from
* &src[DT_STRLEN_BYTES + idx] to &dst[DT_STRLEN_BYTES]. We ensure
* that cnt is capped at STRSZ.
*/
mov %r1, %r9
add %r1, DT_STRLEN_BYTES
mov %r2, %r7
mov %r3, %r8
call BPF_FUNC_probe_read
lddw %r0, STRSZ
jle %r2, %r0, .Lcnt_ok /* if (cnt <= STRSZ) goto Lcnt_ok; */
mov %r2, %r0 /* cnt = STRSZ */
.Lcnt_ok:
add %r2, 1
ldxdw %r3, [%fp+-8]
add %r3, DT_STRLEN_BYTES
add %r3, %r6
call BPF_FUNC_probe_read_str /*
* rc = probe_read_str(
* &dst[DT_STRLEN_BYTES],
* cnt + 1,
* &src[DT_STRLEN_BYTES + idx]);
*/

/* Store the result string length (rc - 1) at dst. */
mov %r1, %r0
sub %r1, 1
mov %r2, %r9
call dt_strlen_store /* dt_strlen_store(rc - 1, dst) */

add %r9, %r7
stb [%r9+0], 0
exit

.Lempty:
Expand Down
30 changes: 30 additions & 0 deletions test/unittest/funcs/substr/tst.substr-multi-const-idx-neg-no-cnt.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Oracle Linux DTrace.
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/

/*
* ASSERTION: The substr() subroutine supports negative index values passed as
* constants.
*
* SECTION: Actions and Subroutines/substr()
*/

#pragma D option quiet

BEGIN
{
printf("\n% 3d %s", -10, substr("abcdefghijklmnop", -10));
printf("\n% 3d %s", -9, substr("abcdefghijklmnop", -9));
printf("\n% 3d %s", -8, substr("abcdefghijklmnop", -8));
printf("\n% 3d %s", -7, substr("abcdefghijklmnop", -7));
printf("\n% 3d %s", -6, substr("abcdefghijklmnop", -6));
printf("\n% 3d %s", -5, substr("abcdefghijklmnop", -5));
printf("\n% 3d %s", -4, substr("abcdefghijklmnop", -4));
printf("\n% 3d %s", -3, substr("abcdefghijklmnop", -3));
printf("\n% 3d %s", -2, substr("abcdefghijklmnop", -2));
printf("\n% 3d %s", -1, substr("abcdefghijklmnop", -1));
exit(0);
}
11 changes: 11 additions & 0 deletions test/unittest/funcs/substr/tst.substr-multi-const-idx-neg-no-cnt.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

-10 ghijklmnop
-9 hijklmnop
-8 ijklmnop
-7 jklmnop
-6 klmnop
-5 lmnop
-4 mnop
-3 nop
-2 op
-1 p
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Oracle Linux DTrace.
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/

/*
* ASSERTION: The substr() subroutine supports negative index values passed as
* constants, with some values out of range.
*
* SECTION: Actions and Subroutines/substr()
*/

#pragma D option quiet

BEGIN
{
printf("\n% 3d %s", -10, substr("abcde", -10));
printf("\n% 3d %s", -9, substr("abcde", -9));
printf("\n% 3d %s", -8, substr("abcde", -8));
printf("\n% 3d %s", -7, substr("abcde", -7));
printf("\n% 3d %s", -6, substr("abcde", -6));
printf("\n% 3d %s", -5, substr("abcde", -5));
printf("\n% 3d %s", -4, substr("abcde", -4));
printf("\n% 3d %s", -3, substr("abcde", -3));
printf("\n% 3d %s", -2, substr("abcde", -2));
printf("\n% 3d %s", -1, substr("abcde", -1));
exit(0);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

-10 abcde
-9 abcde
-8 abcde
-7 abcde
-6 abcde
-5 abcde
-4 bcde
-3 cde
-2 de
-1 e
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Oracle Linux DTrace.
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/

/*
* ASSERTION: The substr() subroutine supports negative index values passed as
* constants, with some values out of range.
*
* SECTION: Actions and Subroutines/substr()
*/

#pragma D option quiet

BEGIN
{
printf("\n% 3d % 3d %s", -10, 3, substr("abcde", -10, 3));
printf("\n% 3d % 3d %s", -9, 3, substr("abcde", -9, 3));
printf("\n% 3d % 3d %s", -8, 3, substr("abcde", -8, 3));
printf("\n% 3d % 3d %s", -7, 3, substr("abcde", -7, 3));
printf("\n% 3d % 3d %s", -6, 3, substr("abcde", -6, 3));
printf("\n% 3d % 3d %s", -5, 3, substr("abcde", -5, 3));
printf("\n% 3d % 3d %s", -4, 3, substr("abcde", -4, 3));
printf("\n% 3d % 3d %s", -3, 3, substr("abcde", -3, 3));
printf("\n% 3d % 3d %s", -2, 3, substr("abcde", -2, 3));
printf("\n% 3d % 3d %s", -1, 3, substr("abcde", -1, 3));
exit(0);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

-10 3
-9 3
-8 3
-7 3 a
-6 3 ab
-5 3 abc
-4 3 bcd
-3 3 cde
-2 3 de
-1 3 e
31 changes: 31 additions & 0 deletions test/unittest/funcs/substr/tst.substr-multi-const-idx-pos-no-cnt.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Oracle Linux DTrace.
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/

/*
* ASSERTION: The substr() subroutine supports positive index values passed as
* constants.
*
* SECTION: Actions and Subroutines/substr()
*/

#pragma D option quiet

BEGIN
{
printf("\n% 3d %s", 0, substr("abcdefghijklmnop", 0));
printf("\n% 3d %s", 1, substr("abcdefghijklmnop", 1));
printf("\n% 3d %s", 2, substr("abcdefghijklmnop", 2));
printf("\n% 3d %s", 3, substr("abcdefghijklmnop", 3));
printf("\n% 3d %s", 4, substr("abcdefghijklmnop", 4));
printf("\n% 3d %s", 5, substr("abcdefghijklmnop", 5));
printf("\n% 3d %s", 6, substr("abcdefghijklmnop", 6));
printf("\n% 3d %s", 7, substr("abcdefghijklmnop", 7));
printf("\n% 3d %s", 8, substr("abcdefghijklmnop", 8));
printf("\n% 3d %s", 9, substr("abcdefghijklmnop", 9));
printf("\n% 3d %s", 10, substr("abcdefghijklmnop", 10));
exit(0);
}
12 changes: 12 additions & 0 deletions test/unittest/funcs/substr/tst.substr-multi-const-idx-pos-no-cnt.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@

0 abcdefghijklmnop
1 bcdefghijklmnop
2 cdefghijklmnop
3 defghijklmnop
4 efghijklmnop
5 fghijklmnop
6 ghijklmnop
7 hijklmnop
8 ijklmnop
9 jklmnop
10 klmnop
40 changes: 40 additions & 0 deletions test/unittest/funcs/substr/tst.substr-multi-var-idx-neg-no-cnt.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Oracle Linux DTrace.
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/

/*
* ASSERTION: The substr() subroutine supports negative index values passed by
* variable.
*
* SECTION: Actions and Subroutines/substr()
*/

#pragma D option quiet

BEGIN
{
idx = -10;
printf("\n% 3d %s", idx, substr("abcdefghijklmnop", idx));
idx = -9;
printf("\n% 3d %s", idx, substr("abcdefghijklmnop", idx));
idx = -8;
printf("\n% 3d %s", idx, substr("abcdefghijklmnop", idx));
idx = -7;
printf("\n% 3d %s", idx, substr("abcdefghijklmnop", idx));
idx = -6;
printf("\n% 3d %s", idx, substr("abcdefghijklmnop", idx));
idx = -5;
printf("\n% 3d %s", idx, substr("abcdefghijklmnop", idx));
idx = -4;
printf("\n% 3d %s", idx, substr("abcdefghijklmnop", idx));
idx = -3;
printf("\n% 3d %s", idx, substr("abcdefghijklmnop", idx));
idx = -2;
printf("\n% 3d %s", idx, substr("abcdefghijklmnop", idx));
idx = -1;
printf("\n% 3d %s", idx, substr("abcdefghijklmnop", idx));
exit(0);
}

0 comments on commit b4baa3e

Please sign in to comment.