Skip to content

Commit 2f4f6cc

Browse files
Arseny Bochkarevluhenry
Arseny Bochkarev
authored andcommitted
8317721: RISC-V: Implement CRC32 intrinsic
Reviewed-by: vkempik, rehn
1 parent 3ca2bcd commit 2f4f6cc

8 files changed

+553
-4
lines changed

src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1607,7 +1607,22 @@ void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
16071607
__ la(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
16081608
}
16091609

1610-
void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { Unimplemented(); }
1610+
void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
1611+
assert(op->crc()->is_single_cpu(), "crc must be register");
1612+
assert(op->val()->is_single_cpu(), "byte value must be register");
1613+
assert(op->result_opr()->is_single_cpu(), "result must be register");
1614+
Register crc = op->crc()->as_register();
1615+
Register val = op->val()->as_register();
1616+
Register res = op->result_opr()->as_register();
1617+
1618+
assert_different_registers(val, crc, res);
1619+
__ la(res, ExternalAddress(StubRoutines::crc_table_addr()));
1620+
1621+
__ notr(crc, crc); // ~crc
1622+
__ zero_extend(crc, crc, 32);
1623+
__ update_byte_crc32(crc, val, res);
1624+
__ notr(res, crc); // ~crc
1625+
}
16111626

16121627
void LIR_Assembler::check_conflict(ciKlass* exact_klass, intptr_t current_klass,
16131628
Register tmp, Label &next, Label &none,

src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,79 @@ void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
781781
}
782782

783783
void LIRGenerator::do_update_CRC32(Intrinsic* x) {
784-
ShouldNotReachHere();
784+
assert(UseCRC32Intrinsics, "why are we here?");
785+
// Make all state_for calls early since they can emit code
786+
LIR_Opr result = rlock_result(x);
787+
switch (x->id()) {
788+
case vmIntrinsics::_updateCRC32: {
789+
LIRItem crc(x->argument_at(0), this);
790+
LIRItem val(x->argument_at(1), this);
791+
// val is destroyed by update_crc32
792+
val.set_destroys_register();
793+
crc.load_item();
794+
val.load_item();
795+
__ update_crc32(crc.result(), val.result(), result);
796+
break;
797+
}
798+
case vmIntrinsics::_updateBytesCRC32:
799+
case vmIntrinsics::_updateByteBufferCRC32: {
800+
bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
801+
802+
LIRItem crc(x->argument_at(0), this);
803+
LIRItem buf(x->argument_at(1), this);
804+
LIRItem off(x->argument_at(2), this);
805+
LIRItem len(x->argument_at(3), this);
806+
buf.load_item();
807+
off.load_nonconstant();
808+
809+
LIR_Opr index = off.result();
810+
int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
811+
if (off.result()->is_constant()) {
812+
index = LIR_OprFact::illegalOpr;
813+
offset += off.result()->as_jint();
814+
}
815+
LIR_Opr base_op = buf.result();
816+
817+
if (index->is_valid()) {
818+
LIR_Opr tmp = new_register(T_LONG);
819+
__ convert(Bytecodes::_i2l, index, tmp);
820+
index = tmp;
821+
}
822+
823+
if (offset) {
824+
LIR_Opr tmp = new_pointer_register();
825+
__ add(base_op, LIR_OprFact::intConst(offset), tmp);
826+
base_op = tmp;
827+
offset = 0;
828+
}
829+
830+
LIR_Address* a = new LIR_Address(base_op,
831+
index,
832+
offset,
833+
T_BYTE);
834+
BasicTypeList signature(3);
835+
signature.append(T_INT);
836+
signature.append(T_ADDRESS);
837+
signature.append(T_INT);
838+
CallingConvention* cc = frame_map()->c_calling_convention(&signature);
839+
const LIR_Opr result_reg = result_register_for(x->type());
840+
841+
LIR_Opr addr = new_pointer_register();
842+
__ leal(LIR_OprFact::address(a), addr);
843+
844+
crc.load_item_force(cc->at(0));
845+
__ move(addr, cc->at(1));
846+
len.load_item_force(cc->at(2));
847+
848+
__ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args());
849+
__ move(result_reg, result);
850+
851+
break;
852+
}
853+
default: {
854+
ShouldNotReachHere();
855+
}
856+
}
785857
}
786858

787859
void LIRGenerator::do_update_CRC32C(Intrinsic* x) {

src/hotspot/cpu/riscv/macroAssembler_riscv.cpp

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1428,6 +1428,175 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
14281428
return count;
14291429
}
14301430

1431+
static const int64_t right_32_bits = right_n_bits(32);
1432+
static const int64_t right_8_bits = right_n_bits(8);
1433+
1434+
/**
1435+
* Emits code to update CRC-32 with a byte value according to constants in table
1436+
*
1437+
* @param [in,out]crc Register containing the crc.
1438+
* @param [in]val Register containing the byte to fold into the CRC.
1439+
* @param [in]table Register containing the table of crc constants.
1440+
*
1441+
* uint32_t crc;
1442+
* val = crc_table[(val ^ crc) & 0xFF];
1443+
* crc = val ^ (crc >> 8);
1444+
*
1445+
*/
1446+
void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
1447+
assert_different_registers(crc, val, table);
1448+
1449+
xorr(val, val, crc);
1450+
andi(val, val, right_8_bits);
1451+
shadd(val, val, table, val, 2);
1452+
lwu(val, Address(val));
1453+
srli(crc, crc, 8);
1454+
xorr(crc, val, crc);
1455+
}
1456+
1457+
/**
1458+
* Emits code to update CRC-32 with a 32-bit value according to tables 0 to 3
1459+
*
1460+
* @param [in,out]crc Register containing the crc.
1461+
* @param [in]v Register containing the 32-bit to fold into the CRC.
1462+
* @param [in]table0 Register containing table 0 of crc constants.
1463+
* @param [in]table1 Register containing table 1 of crc constants.
1464+
* @param [in]table2 Register containing table 2 of crc constants.
1465+
* @param [in]table3 Register containing table 3 of crc constants.
1466+
*
1467+
* uint32_t crc;
1468+
* v = crc ^ v
1469+
* crc = table3[v&0xff]^table2[(v>>8)&0xff]^table1[(v>>16)&0xff]^table0[v>>24]
1470+
*
1471+
*/
1472+
void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp1, Register tmp2, Register tmp3,
1473+
Register table0, Register table1, Register table2, Register table3, bool upper) {
1474+
assert_different_registers(crc, v, tmp1, tmp2, tmp3, table0, table1, table2, table3);
1475+
1476+
if (upper)
1477+
srli(v, v, 32);
1478+
xorr(v, v, crc);
1479+
1480+
andi(tmp1, v, right_8_bits);
1481+
shadd(tmp1, tmp1, table3, tmp2, 2);
1482+
lwu(crc, Address(tmp1));
1483+
1484+
slli(tmp1, v, 16);
1485+
slli(tmp3, v, 8);
1486+
1487+
srliw(tmp1, tmp1, 24);
1488+
srliw(tmp3, tmp3, 24);
1489+
1490+
shadd(tmp1, tmp1, table2, tmp1, 2);
1491+
lwu(tmp2, Address(tmp1));
1492+
1493+
shadd(tmp3, tmp3, table1, tmp3, 2);
1494+
xorr(crc, crc, tmp2);
1495+
1496+
lwu(tmp2, Address(tmp3));
1497+
if (upper) {
1498+
tmp1 = v;
1499+
srli(tmp1, v, 24);
1500+
}
1501+
else
1502+
srliw(tmp1, v, 24);
1503+
1504+
// no need to clear bits other than lowest two
1505+
shadd(tmp1, tmp1, table0, tmp1, 2);
1506+
xorr(crc, crc, tmp2);
1507+
lwu(tmp2, Address(tmp1));
1508+
xorr(crc, crc, tmp2);
1509+
}
1510+
1511+
/**
1512+
* @param crc register containing existing CRC (32-bit)
1513+
* @param buf register pointing to input byte buffer (byte*)
1514+
* @param len register containing number of bytes
1515+
* @param table register that will contain address of CRC table
1516+
* @param tmp scratch registers
1517+
*/
1518+
void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
1519+
Register table0, Register table1, Register table2, Register table3,
1520+
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6) {
1521+
assert_different_registers(crc, buf, len, table0, table1, table2, table3, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
1522+
Label L_by16_loop, L_unroll_loop, L_unroll_loop_entry, L_by4, L_by4_loop, L_by1, L_by1_loop, L_exit;
1523+
1524+
const int64_t unroll = 16;
1525+
const int64_t unroll_words = unroll*wordSize;
1526+
mv(tmp5, right_32_bits);
1527+
subw(len, len, unroll_words);
1528+
andn(crc, tmp5, crc);
1529+
1530+
const ExternalAddress table_addr = StubRoutines::crc_table_addr();
1531+
la(table0, table_addr);
1532+
add(table1, table0, 1*256*sizeof(juint), tmp1);
1533+
add(table2, table0, 2*256*sizeof(juint), tmp1);
1534+
add(table3, table2, 1*256*sizeof(juint), tmp1);
1535+
1536+
bge(len, zr, L_unroll_loop_entry);
1537+
addiw(len, len, unroll_words-4);
1538+
bge(len, zr, L_by4_loop);
1539+
addiw(len, len, 4);
1540+
bgt(len, zr, L_by1_loop);
1541+
j(L_exit);
1542+
1543+
align(CodeEntryAlignment);
1544+
bind(L_unroll_loop_entry);
1545+
const Register buf_end = tmp3;
1546+
add(buf_end, buf, len); // buf_end will be used as endpoint for loop below
1547+
andi(len, len, unroll_words-1); // len = (len % unroll_words)
1548+
sub(len, len, unroll_words); // Length after all iterations
1549+
bind(L_unroll_loop);
1550+
for (int i = 0; i < unroll; i++) {
1551+
ld(tmp1, Address(buf, i*wordSize));
1552+
update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, false);
1553+
update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, true);
1554+
}
1555+
1556+
addi(buf, buf, unroll_words);
1557+
ble(buf, buf_end, L_unroll_loop);
1558+
addiw(len, len, unroll_words-4);
1559+
bge(len, zr, L_by4_loop);
1560+
addiw(len, len, 4);
1561+
bgt(len, zr, L_by1_loop);
1562+
j(L_exit);
1563+
1564+
bind(L_by4_loop);
1565+
lwu(tmp1, Address(buf));
1566+
update_word_crc32(crc, tmp1, tmp2, tmp4, tmp6, table0, table1, table2, table3, false);
1567+
subw(len, len, 4);
1568+
addi(buf, buf, 4);
1569+
bge(len, zr, L_by4_loop);
1570+
addiw(len, len, 4);
1571+
ble(len, zr, L_exit);
1572+
1573+
bind(L_by1_loop);
1574+
subw(len, len, 1);
1575+
lwu(tmp1, Address(buf));
1576+
andi(tmp2, tmp1, right_8_bits);
1577+
update_byte_crc32(crc, tmp2, table0);
1578+
ble(len, zr, L_exit);
1579+
1580+
subw(len, len, 1);
1581+
srli(tmp2, tmp1, 8);
1582+
andi(tmp2, tmp2, right_8_bits);
1583+
update_byte_crc32(crc, tmp2, table0);
1584+
ble(len, zr, L_exit);
1585+
1586+
subw(len, len, 1);
1587+
srli(tmp2, tmp1, 16);
1588+
andi(tmp2, tmp2, right_8_bits);
1589+
update_byte_crc32(crc, tmp2, table0);
1590+
ble(len, zr, L_exit);
1591+
1592+
srli(tmp2, tmp1, 24);
1593+
andi(tmp2, tmp2, right_8_bits);
1594+
update_byte_crc32(crc, tmp2, table0);
1595+
1596+
bind(L_exit);
1597+
andn(crc, tmp5, crc);
1598+
}
1599+
14311600
#ifdef COMPILER2
14321601
// Push vector registers in the bitset supplied.
14331602
// Return the number of words pushed

src/hotspot/cpu/riscv/macroAssembler_riscv.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1288,6 +1288,15 @@ class MacroAssembler: public Assembler {
12881288
void compute_match_mask(Register src, Register pattern, Register match_mask,
12891289
Register mask1, Register mask2);
12901290

1291+
// CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
1292+
void kernel_crc32(Register crc, Register buf, Register len,
1293+
Register table0, Register table1, Register table2, Register table3,
1294+
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6);
1295+
void update_word_crc32(Register crc, Register v, Register tmp1, Register tmp2, Register tmp3,
1296+
Register table0, Register table1, Register table2, Register table3,
1297+
bool upper);
1298+
void update_byte_crc32(Register crc, Register val, Register table);
1299+
12911300
#ifdef COMPILER2
12921301
void mul_add(Register out, Register in, Register offset,
12931302
Register len, Register k, Register tmp);
@@ -1317,6 +1326,7 @@ class MacroAssembler: public Assembler {
13171326
Register z, Register tmp0,
13181327
Register tmp1, Register tmp2, Register tmp3, Register tmp4,
13191328
Register tmp5, Register tmp6, Register product_hi);
1329+
13201330
#endif
13211331

13221332
void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);

src/hotspot/cpu/riscv/stubGenerator_riscv.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5313,6 +5313,52 @@ static const int64_t right_3_bits = right_n_bits(3);
53135313

53145314
#endif // COMPILER2
53155315

5316+
/**
5317+
* Arguments:
5318+
*
5319+
* Inputs:
5320+
* c_rarg0 - int crc
5321+
* c_rarg1 - byte* buf
5322+
* c_rarg2 - int length
5323+
*
5324+
* Output:
5325+
* c_rarg0 - int crc result
5326+
*/
5327+
address generate_updateBytesCRC32() {
5328+
assert(UseCRC32Intrinsics, "what are we doing here?");
5329+
5330+
__ align(CodeEntryAlignment);
5331+
StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
5332+
5333+
address start = __ pc();
5334+
5335+
const Register crc = c_rarg0; // crc
5336+
const Register buf = c_rarg1; // source java byte array address
5337+
const Register len = c_rarg2; // length
5338+
const Register table0 = c_rarg3; // crc_table address
5339+
const Register table1 = c_rarg4;
5340+
const Register table2 = c_rarg5;
5341+
const Register table3 = c_rarg6;
5342+
5343+
const Register tmp1 = c_rarg7;
5344+
const Register tmp2 = t2;
5345+
const Register tmp3 = x28; // t3
5346+
const Register tmp4 = x29; // t4
5347+
const Register tmp5 = x30; // t5
5348+
const Register tmp6 = x31; // t6
5349+
5350+
BLOCK_COMMENT("Entry:");
5351+
__ enter(); // required for proper stackwalking of RuntimeStub frame
5352+
5353+
__ kernel_crc32(crc, buf, len, table0, table1, table2,
5354+
table3, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
5355+
5356+
__ leave(); // required for proper stackwalking of RuntimeStub frame
5357+
__ ret();
5358+
5359+
return start;
5360+
}
5361+
53165362
#if INCLUDE_JFR
53175363

53185364
static void jfr_prologue(address the_pc, MacroAssembler* _masm, Register thread) {
@@ -5559,6 +5605,12 @@ static const int64_t right_3_bits = right_n_bits(3);
55595605
generate_throw_exception("delayed StackOverflowError throw_exception",
55605606
CAST_FROM_FN_PTR(address,
55615607
SharedRuntime::throw_delayed_StackOverflowError));
5608+
5609+
if (UseCRC32Intrinsics) {
5610+
// set table address before stub generation which use it
5611+
StubRoutines::_crc_table_adr = (address)StubRoutines::riscv::_crc_table;
5612+
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
5613+
}
55625614
}
55635615

55645616
void generate_continuation_stubs() {

0 commit comments

Comments
 (0)