Skip to content
This repository has been archived by the owner before Nov 9, 2022. It is now read-only.
Permalink
Browse files
8257436: [aarch64] Regressions in ArrayCopyUnalignedDst.testByte/test…
…Char for 65-78 bytes when UseSIMDForMemoryOps is on

Reviewed-by: simonis
  • Loading branch information
eastig authored and simonis committed Dec 3, 2020
1 parent b170c83 commit e83639620739749834d91e026cfecb56529aa295
Showing 1 changed file with 23 additions and 2 deletions.
@@ -1094,10 +1094,10 @@ class StubGenerator: public StubCodeGenerator {
Register count, Register tmp, int step) {
copy_direction direction = step < 0 ? copy_backwards : copy_forwards;
bool is_backwards = step < 0;
int granularity = uabs(step);
unsigned int granularity = uabs(step);
const Register t0 = r3, t1 = r4;

// <= 96 bytes do inline. Direction doesn't matter because we always
// <= 80 (or 96 for SIMD) bytes do inline. Direction doesn't matter because we always
// load all the data before writing anything
Label copy4, copy8, copy16, copy32, copy80, copy_big, finish;
const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8;
@@ -1154,7 +1154,28 @@ class StubGenerator: public StubCodeGenerator {
if (UseSIMDForMemoryOps) {
__ ldpq(v0, v1, Address(s, 0));
__ ldpq(v2, v3, Address(s, 32));
// Unaligned pointers can be an issue for copying.
// The issue has more chances to happen when granularity of data is
// less than 4(sizeof(jint)). Pointers for arrays of jint are at least
// 4 byte aligned. Pointers for arrays of jlong are 8 byte aligned.
// The most performance drop has been seen for the range 65-80 bytes.
// For such cases using the pair of ldp/stp instead of the third pair of
// ldpq/stpq fixes the performance issue.
if (granularity < sizeof (jint)) {
Label copy96;
__ cmp(count, u1(80/granularity));
__ br(Assembler::HI, copy96);
__ ldp(t0, t1, Address(send, -16));

__ stpq(v0, v1, Address(d, 0));
__ stpq(v2, v3, Address(d, 32));
__ stp(t0, t1, Address(dend, -16));
__ b(finish);

__ bind(copy96);
}
__ ldpq(v4, v5, Address(send, -32));

__ stpq(v0, v1, Address(d, 0));
__ stpq(v2, v3, Address(d, 32));
__ stpq(v4, v5, Address(dend, -32));

0 comments on commit e836396

Please sign in to comment.