Skip to content

Commit

Permalink
Rollup merge of #84751 - Soveu:is_char_boundary_opt, r=Amanieu
Browse files Browse the repository at this point in the history
str::is_char_boundary - slight optimization

Current `str::is_char_boundary` implementation emits slightly more instructions, because it includes an additional branch for `index == s.len()`
```rust
pub fn is_char_boundary(s: &str, index: usize) -> bool {
    if index == 0 || index == s.len() {
        return true;
    }
    match s.as_bytes().get(index) {
        None => false,
        Some(&b) => (b as i8) >= -0x40,
    }
}
```
Just changing the place of `index == s.len()` merges it with `index < s.len()` from `s.as_bytes().get(index)`
```rust
pub fn is_char_boundary2(s: &str, index: usize) -> bool {
    if index == 0 {
        return true;
    }

    match s.as_bytes().get(index) {
        // For some reason, LLVM likes this comparison here more
        None => index == s.len(),
        // This is bit magic equivalent to: b < 128 || b >= 192
        Some(&b) => (b as i8) >= -0x40,
    }
}
```
This one has better codegen on every platform, except powerpc
<details><summary>x86 codegen</summary>
<p>

```nasm
example::is_char_boundary:
        mov     al, 1
        test    rdx, rdx
        je      .LBB0_5
        cmp     rsi, rdx
        je      .LBB0_5
        cmp     rsi, rdx
        jbe     .LBB0_3
        cmp     byte ptr [rdi + rdx], -65
        setg    al
.LBB0_5:
        ret
.LBB0_3:
        xor     eax, eax
        ret

example::is_char_boundary2:
        test    rdx, rdx
        je      .LBB1_1
        cmp     rsi, rdx
        jbe     .LBB1_4
        cmp     byte ptr [rdi + rdx], -65
        setg    al
        ret
.LBB1_1:  ; technically this branch is the same as LBB1_4
        mov     al, 1
        ret
.LBB1_4:
        sete    al
        ret
 ```
</p>
</details>

<details><summary>aarch64 codegen</summary>
<p>

```as
example::is_char_boundary:
        mov     x8, x0
        mov     w0, #1
        cbz     x2, .LBB0_4
        cmp     x1, x2
        b.eq    .LBB0_4
        b.ls    .LBB0_5
        ldrsb   w8, [x8, x2]
        cmn     w8, #65
        cset    w0, gt
.LBB0_4:
        ret
.LBB0_5:
        mov     w0, wzr
        ret

example::is_char_boundary2:
        cbz     x2, .LBB1_3
        cmp     x1, x2
        b.ls    .LBB1_4
        ldrsb   w8, [x0, x2]
        cmn     w8, #65
        cset    w0, gt
        ret
.LBB1_3:
        mov     w0, #1
        ret
.LBB1_4:
        cset    w0, eq
        ret
```

</p>
</details>

<details><summary>riscv64gc codegen</summary>
<p>

example::is_char_boundary:
        seqz    a3, a2
        xor     a4, a1, a2
        seqz    a4, a4
        or      a4, a4, a3
        addi    a3, zero, 1
        bnez    a4, .LBB0_3
        bgeu    a2, a1, .LBB0_4
        add     a0, a0, a2
        lb      a0, 0(a0)
        addi    a1, zero, -65
        slt     a3, a1, a0
.LBB0_3:
        mv      a0, a3
        ret
.LBB0_4:
        mv      a0, zero
        ret

example::is_char_boundary2:
        beqz    a2, .LBB1_3
        bgeu    a2, a1, .LBB1_4
        add     a0, a0, a2
        lb      a0, 0(a0)
        addi    a1, zero, -65
        slt     a0, a1, a0
        ret
.LBB1_3:
        addi    a0, zero, 1
        ret
.LBB1_4:
        xor     a0, a1, a2
        seqz    a0, a0
        ret

</p>
</details>

[Link to godbolt](https://godbolt.org/z/K8avEz8Gr)

`@rustbot` label: A-codegen
  • Loading branch information
GuillaumeGomez committed May 15, 2021
2 parents 2a245f4 + 7bd9d9f commit 62b834f
Showing 1 changed file with 15 additions and 3 deletions.
18 changes: 15 additions & 3 deletions library/core/src/str/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,14 +192,26 @@ impl str {
#[stable(feature = "is_char_boundary", since = "1.9.0")]
#[inline]
pub fn is_char_boundary(&self, index: usize) -> bool {
// 0 and len are always ok.
// 0 is always ok.
// Test for 0 explicitly so that it can optimize out the check
// easily and skip reading string data for that case.
if index == 0 || index == self.len() {
// Note that optimizing `self.get(..index)` relies on this.
if index == 0 {
return true;
}

match self.as_bytes().get(index) {
None => false,
// For `None` we have two options:
//
// - index == self.len()
// Empty strings are valid, so return true
// - index > self.len()
// In this case return false
//
// The check is placed exactly here, because it improves generated
// code on higher opt-levels. See PR #84751 for more details.
None => index == self.len(),

// This is bit magic equivalent to: b < 128 || b >= 192
Some(&b) => (b as i8) >= -0x40,
}
Expand Down

0 comments on commit 62b834f

Please sign in to comment.