Skip to content

Commit

Permalink
8296602: RISC-V: improve performance of copy_memory stub
Browse files Browse the repository at this point in the history
Reviewed-by: fyang
  • Loading branch information
Vladimir Kempik committed Nov 17, 2022
1 parent 4527dc6 commit bd57e21
Showing 1 changed file with 69 additions and 29 deletions.
98 changes: 69 additions & 29 deletions src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
Expand Up @@ -881,7 +881,11 @@ class StubGenerator: public StubCodeGenerator {
//
/*
* if (is_aligned) {
* goto copy_8_bytes;
* if (count >= 32)
* goto copy32_loop;
* if (count >= 8)
* goto copy8_loop;
* goto copy_small;
* }
* bool is_backwards = step < 0;
* int granularity = uabs(step);
Expand All @@ -899,9 +903,12 @@ class StubGenerator: public StubCodeGenerator {
*
* if ((dst % 8) == (src % 8)) {
* aligned;
* goto copy8;
* goto copy_big;
* }
*
* copy_big:
* if the amount to copy is more than (or equal to) 32 bytes goto copy32_loop
* else goto copy8_loop
* copy_small:
* load element one by one;
* done;
Expand Down Expand Up @@ -962,10 +969,10 @@ class StubGenerator: public StubCodeGenerator {
bool is_backwards = step < 0;
int granularity = uabs(step);

const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17;
const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17, tmp5 = x14, tmp6 = x13;

Label same_aligned;
Label copy8, copy_small, done;
Label copy_big, copy32_loop, copy8_loop, copy_small, done;

copy_insn ld_arr = NULL, st_arr = NULL;
switch (granularity) {
Expand Down Expand Up @@ -1000,36 +1007,69 @@ class StubGenerator: public StubCodeGenerator {
}

if (is_aligned) {
__ addi(tmp, cnt, -32);
__ bgez(tmp, copy32_loop);
__ addi(tmp, cnt, -8);
__ bgez(tmp, copy8);
__ bgez(tmp, copy8_loop);
__ j(copy_small);
}

__ mv(tmp, 16);
__ blt(cnt, tmp, copy_small);

__ xorr(tmp, src, dst);
__ andi(tmp, tmp, 0b111);
__ bnez(tmp, copy_small);
} else {
__ mv(tmp, 16);
__ blt(cnt, tmp, copy_small);

__ xorr(tmp, src, dst);
__ andi(tmp, tmp, 0b111);
__ bnez(tmp, copy_small);

__ bind(same_aligned);
__ andi(tmp, src, 0b111);
__ beqz(tmp, copy_big);
if (is_backwards) {
__ addi(src, src, step);
__ addi(dst, dst, step);
}
(_masm->*ld_arr)(tmp3, Address(src), t0);
(_masm->*st_arr)(tmp3, Address(dst), t0);
if (!is_backwards) {
__ addi(src, src, step);
__ addi(dst, dst, step);
}
__ addi(cnt, cnt, -granularity);
__ beqz(cnt, done);
__ j(same_aligned);

__ bind(same_aligned);
__ andi(tmp, src, 0b111);
__ beqz(tmp, copy8);
__ bind(copy_big);
__ mv(tmp, 32);
__ blt(cnt, tmp, copy8_loop);
}
__ bind(copy32_loop);
if (is_backwards) {
__ addi(src, src, step);
__ addi(dst, dst, step);
__ addi(src, src, -wordSize * 4);
__ addi(dst, dst, -wordSize * 4);
}
(_masm->*ld_arr)(tmp3, Address(src), t0);
(_masm->*st_arr)(tmp3, Address(dst), t0);
// we first load 32 bytes, then write it, so the direction here doesn't matter
__ ld(tmp3, Address(src));
__ ld(tmp4, Address(src, 8));
__ ld(tmp5, Address(src, 16));
__ ld(tmp6, Address(src, 24));
__ sd(tmp3, Address(dst));
__ sd(tmp4, Address(dst, 8));
__ sd(tmp5, Address(dst, 16));
__ sd(tmp6, Address(dst, 24));

if (!is_backwards) {
__ addi(src, src, step);
__ addi(dst, dst, step);
__ addi(src, src, wordSize * 4);
__ addi(dst, dst, wordSize * 4);
}
__ addi(cnt, cnt, -granularity);
__ beqz(cnt, done);
__ j(same_aligned);
__ addi(tmp, cnt, -(32 + wordSize * 4));
__ addi(cnt, cnt, -wordSize * 4);
__ bgez(tmp, copy32_loop); // cnt >= 32, do next loop

__ beqz(cnt, done); // if that's all - done

__ addi(tmp, cnt, -8); // if not - copy the reminder
__ bltz(tmp, copy_small); // cnt < 8, go to copy_small, else fall throught to copy8_loop

__ bind(copy8);
__ bind(copy8_loop);
if (is_backwards) {
__ addi(src, src, -wordSize);
__ addi(dst, dst, -wordSize);
Expand All @@ -1040,11 +1080,11 @@ class StubGenerator: public StubCodeGenerator {
__ addi(src, src, wordSize);
__ addi(dst, dst, wordSize);
}
__ addi(tmp, cnt, -(8 + wordSize));
__ addi(cnt, cnt, -wordSize);
__ addi(tmp4, cnt, -8);
__ bgez(tmp4, copy8); // cnt >= 8, do next loop
__ bgez(tmp, copy8_loop); // cnt >= 8, do next loop

__ beqz(cnt, done);
__ beqz(cnt, done); // if that's all - done

__ bind(copy_small);
if (is_backwards) {
Expand Down

3 comments on commit bd57e21

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@VladimirKempik
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/backport jdk19u

@openjdk
Copy link

@openjdk openjdk bot commented on bd57e21 Nov 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@VladimirKempik the backport was successfully created on the branch VladimirKempik-backport-bd57e213 in my personal fork of openjdk/jdk19u. To create a pull request with this backport targeting openjdk/jdk19u:master, just click the following link:

➡️ Create pull request

The title of the pull request is automatically filled in correctly and below you find a suggestion for the pull request body:

Hi all,

This pull request contains a backport of commit bd57e213 from the openjdk/jdk repository.

The commit being backported was authored by Vladimir Kempik on 17 Nov 2022 and was reviewed by Fei Yang.

Thanks!

If you need to update the source branch of the pull then run the following commands in a local clone of your personal fork of openjdk/jdk19u:

$ git fetch https://github.com/openjdk-bots/jdk19u VladimirKempik-backport-bd57e213:VladimirKempik-backport-bd57e213
$ git checkout VladimirKempik-backport-bd57e213
# make changes
$ git add paths/to/changed/files
$ git commit --message 'Describe additional changes made'
$ git push https://github.com/openjdk-bots/jdk19u VladimirKempik-backport-bd57e213

Please sign in to comment.