Skip to content

Commit f3abc40

Browse files
Yi-Fan TsaiPaul Hohensee
Yi-Fan Tsai
authored and
Paul Hohensee
committed
8302783: Improve CRC32C intrinsic with crypto pmull on AArch64
Reviewed-by: simonis, phh
1 parent 45d8a17 commit f3abc40

File tree

3 files changed

+78
-1
lines changed

3 files changed

+78
-1
lines changed

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

+63-1
Original file line numberDiff line numberDiff line change
@@ -3968,6 +3968,64 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
39683968
mvnw(crc, crc);
39693969
}
39703970

3971+
void MacroAssembler::kernel_crc32c_using_crypto_pmull(Register crc, Register buf,
3972+
Register len, Register tmp0, Register tmp1, Register tmp2, Register tmp3) {
3973+
Label CRC_by4_loop, CRC_by1_loop, CRC_less128, CRC_by128_pre, CRC_by32_loop, CRC_less32, L_exit;
3974+
assert_different_registers(crc, buf, len, tmp0, tmp1, tmp2);
3975+
3976+
subs(tmp0, len, 384);
3977+
br(Assembler::GE, CRC_by128_pre);
3978+
BIND(CRC_less128);
3979+
subs(len, len, 32);
3980+
br(Assembler::GE, CRC_by32_loop);
3981+
BIND(CRC_less32);
3982+
adds(len, len, 32 - 4);
3983+
br(Assembler::GE, CRC_by4_loop);
3984+
adds(len, len, 4);
3985+
br(Assembler::GT, CRC_by1_loop);
3986+
b(L_exit);
3987+
3988+
BIND(CRC_by32_loop);
3989+
ldp(tmp0, tmp1, Address(buf));
3990+
crc32cx(crc, crc, tmp0);
3991+
ldr(tmp2, Address(buf, 16));
3992+
crc32cx(crc, crc, tmp1);
3993+
ldr(tmp3, Address(buf, 24));
3994+
crc32cx(crc, crc, tmp2);
3995+
add(buf, buf, 32);
3996+
subs(len, len, 32);
3997+
crc32cx(crc, crc, tmp3);
3998+
br(Assembler::GE, CRC_by32_loop);
3999+
cmn(len, (u1)32);
4000+
br(Assembler::NE, CRC_less32);
4001+
b(L_exit);
4002+
4003+
BIND(CRC_by4_loop);
4004+
ldrw(tmp0, Address(post(buf, 4)));
4005+
subs(len, len, 4);
4006+
crc32cw(crc, crc, tmp0);
4007+
br(Assembler::GE, CRC_by4_loop);
4008+
adds(len, len, 4);
4009+
br(Assembler::LE, L_exit);
4010+
BIND(CRC_by1_loop);
4011+
ldrb(tmp0, Address(post(buf, 1)));
4012+
subs(len, len, 1);
4013+
crc32cb(crc, crc, tmp0);
4014+
br(Assembler::GT, CRC_by1_loop);
4015+
b(L_exit);
4016+
4017+
BIND(CRC_by128_pre);
4018+
kernel_crc32_common_fold_using_crypto_pmull(crc, buf, len, tmp0, tmp1, tmp2,
4019+
4*256*sizeof(juint) + 8*sizeof(juint) + 0x50);
4020+
mov(crc, 0);
4021+
crc32cx(crc, crc, tmp0);
4022+
crc32cx(crc, crc, tmp1);
4023+
4024+
cbnz(len, CRC_less128);
4025+
4026+
BIND(L_exit);
4027+
}
4028+
39714029
void MacroAssembler::kernel_crc32c_using_crc32c(Register crc, Register buf,
39724030
Register len, Register tmp0, Register tmp1, Register tmp2,
39734031
Register tmp3) {
@@ -4074,7 +4132,11 @@ void MacroAssembler::kernel_crc32c_using_crc32c(Register crc, Register buf,
40744132
void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len,
40754133
Register table0, Register table1, Register table2, Register table3,
40764134
Register tmp, Register tmp2, Register tmp3) {
4077-
kernel_crc32c_using_crc32c(crc, buf, len, table0, table1, table2, table3);
4135+
if (UseCryptoPmullForCRC32) {
4136+
kernel_crc32c_using_crypto_pmull(crc, buf, len, table0, table1, table2, table3);
4137+
} else {
4138+
kernel_crc32c_using_crc32c(crc, buf, len, table0, table1, table2, table3);
4139+
}
40784140
}
40794141

40804142
void MacroAssembler::kernel_crc32_common_fold_using_crypto_pmull(Register crc, Register buf,

src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -1429,6 +1429,9 @@ class MacroAssembler: public Assembler {
14291429
void kernel_crc32_using_crc32(Register crc, Register buf,
14301430
Register len, Register tmp0, Register tmp1, Register tmp2,
14311431
Register tmp3);
1432+
void kernel_crc32c_using_crypto_pmull(Register crc, Register buf,
1433+
Register len, Register tmp0, Register tmp1, Register tmp2,
1434+
Register tmp3);
14321435
void kernel_crc32c_using_crc32c(Register crc, Register buf,
14331436
Register len, Register tmp0, Register tmp1, Register tmp2,
14341437
Register tmp3);

src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,18 @@ ATTRIBUTE_ALIGNED(4096) juint StubRoutines::aarch64::_crc_table[] =
302302
0x5a546366UL, 0x00000001UL,
303303
0x751997d0UL, 0x00000001UL,
304304
0xccaa009eUL, 0x00000000UL,
305+
306+
// Constants for CRC-32C crypto pmull implementation
307+
0x6992cea2UL, 0x00000000UL,
308+
0x0d3b6092UL, 0x00000000UL,
309+
0x740eef02UL, 0x00000000UL,
310+
0x9e4addf8UL, 0x00000000UL,
311+
0x1c291d04UL, 0x00000000UL,
312+
0xd82c63daUL, 0x00000001UL,
313+
0x384aa63aUL, 0x00000001UL,
314+
0xba4fc28eUL, 0x00000000UL,
315+
0xf20c0dfeUL, 0x00000000UL,
316+
0x4cd00bd6UL, 0x00000001UL,
305317
};
306318

307319
// Accumulation coefficients for adler32 upper 16 bits

0 commit comments

Comments
 (0)