Skip to content

Commit e470f47

Browse files
author
Hamlin Li
committed
8350095: RISC-V: Refactor string_compare
Reviewed-by: fyang
1 parent f53de92 commit e470f47

File tree

4 files changed

+207
-170
lines changed

4 files changed

+207
-170
lines changed

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

Lines changed: 183 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -1382,15 +1382,183 @@ void C2_MacroAssembler::string_indexof_linearscan(Register haystack, Register ne
13821382
bind(DONE);
13831383
}
13841384

1385+
// Compare longwords
1386+
void C2_MacroAssembler::string_compare_long_same_encoding(Register result, Register str1, Register str2,
1387+
const bool isLL, Register cnt1, Register cnt2,
1388+
Register tmp1, Register tmp2, Register tmp3,
1389+
const int STUB_THRESHOLD, Label *STUB, Label *SHORT_STRING, Label *DONE) {
1390+
Label TAIL_CHECK, TAIL, NEXT_WORD, DIFFERENCE;
1391+
1392+
const int base_offset = isLL ? arrayOopDesc::base_offset_in_bytes(T_BYTE)
1393+
: arrayOopDesc::base_offset_in_bytes(T_CHAR);
1394+
assert((base_offset % (UseCompactObjectHeaders ? 4 :
1395+
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
1396+
1397+
const int minCharsInWord = isLL ? wordSize : wordSize / 2;
1398+
1399+
// load first parts of strings and finish initialization while loading
1400+
beq(str1, str2, *DONE);
1401+
// Alignment
1402+
if (AvoidUnalignedAccesses && (base_offset % 8) != 0) {
1403+
lwu(tmp1, Address(str1));
1404+
lwu(tmp2, Address(str2));
1405+
bne(tmp1, tmp2, DIFFERENCE);
1406+
addi(str1, str1, 4);
1407+
addi(str2, str2, 4);
1408+
subi(cnt2, cnt2, minCharsInWord / 2);
1409+
1410+
// A very short string
1411+
mv(t0, minCharsInWord);
1412+
ble(cnt2, t0, *SHORT_STRING);
1413+
}
1414+
#ifdef ASSERT
1415+
if (AvoidUnalignedAccesses) {
1416+
Label align_ok;
1417+
orr(t0, str1, str2);
1418+
andi(t0, t0, 0x7);
1419+
beqz(t0, align_ok);
1420+
stop("bad alignment");
1421+
bind(align_ok);
1422+
}
1423+
#endif
1424+
// load 8 bytes once to compare
1425+
ld(tmp1, Address(str1));
1426+
ld(tmp2, Address(str2));
1427+
mv(t0, STUB_THRESHOLD);
1428+
bge(cnt2, t0, *STUB);
1429+
subi(cnt2, cnt2, minCharsInWord);
1430+
beqz(cnt2, TAIL_CHECK);
1431+
// convert cnt2 from characters to bytes
1432+
if (!isLL) {
1433+
slli(cnt2, cnt2, 1);
1434+
}
1435+
add(str2, str2, cnt2);
1436+
add(str1, str1, cnt2);
1437+
sub(cnt2, zr, cnt2);
1438+
addi(cnt2, cnt2, 8);
1439+
bne(tmp1, tmp2, DIFFERENCE);
1440+
bgez(cnt2, TAIL);
1441+
1442+
// main loop
1443+
bind(NEXT_WORD);
1444+
// 8-byte aligned loads when AvoidUnalignedAccesses is enabled
1445+
add(t0, str1, cnt2);
1446+
ld(tmp1, Address(t0));
1447+
add(t0, str2, cnt2);
1448+
ld(tmp2, Address(t0));
1449+
addi(cnt2, cnt2, 8);
1450+
bne(tmp1, tmp2, DIFFERENCE);
1451+
bltz(cnt2, NEXT_WORD);
1452+
1453+
bind(TAIL);
1454+
load_long_misaligned(tmp1, Address(str1), tmp3, isLL ? 1 : 2);
1455+
load_long_misaligned(tmp2, Address(str2), tmp3, isLL ? 1 : 2);
1456+
1457+
bind(TAIL_CHECK);
1458+
beq(tmp1, tmp2, *DONE);
1459+
1460+
// Find the first different characters in the longwords and
1461+
// compute their difference.
1462+
bind(DIFFERENCE);
1463+
xorr(tmp3, tmp1, tmp2);
1464+
// count bits of trailing zero chars
1465+
ctzc_bits(result, tmp3, isLL);
1466+
srl(tmp1, tmp1, result);
1467+
srl(tmp2, tmp2, result);
1468+
if (isLL) {
1469+
zext(tmp1, tmp1, 8);
1470+
zext(tmp2, tmp2, 8);
1471+
} else {
1472+
zext(tmp1, tmp1, 16);
1473+
zext(tmp2, tmp2, 16);
1474+
}
1475+
sub(result, tmp1, tmp2);
1476+
1477+
j(*DONE);
1478+
}
1479+
1480+
// Compare longwords
1481+
void C2_MacroAssembler::string_compare_long_different_encoding(Register result, Register str1, Register str2,
1482+
bool isLU, Register cnt1, Register cnt2,
1483+
Register tmp1, Register tmp2, Register tmp3,
1484+
const int STUB_THRESHOLD, Label *STUB, Label *DONE) {
1485+
Label TAIL, NEXT_WORD, DIFFERENCE;
1486+
1487+
const int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR);
1488+
assert((base_offset % (UseCompactObjectHeaders ? 4 :
1489+
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
1490+
1491+
Register strL = isLU ? str1 : str2;
1492+
Register strU = isLU ? str2 : str1;
1493+
Register tmpL = tmp1, tmpU = tmp2;
1494+
1495+
// load first parts of strings and finish initialization while loading
1496+
mv(t0, STUB_THRESHOLD);
1497+
bge(cnt2, t0, *STUB);
1498+
lwu(tmpL, Address(strL));
1499+
load_long_misaligned(tmpU, Address(strU), tmp3, (base_offset % 8) != 0 ? 4 : 8);
1500+
subi(cnt2, cnt2, 4);
1501+
add(strL, strL, cnt2);
1502+
sub(cnt1, zr, cnt2);
1503+
slli(cnt2, cnt2, 1);
1504+
add(strU, strU, cnt2);
1505+
inflate_lo32(tmp3, tmpL);
1506+
mv(tmpL, tmp3);
1507+
sub(cnt2, zr, cnt2);
1508+
addi(cnt1, cnt1, 4);
1509+
addi(cnt2, cnt2, 8);
1510+
bne(tmpL, tmpU, DIFFERENCE);
1511+
bgez(cnt2, TAIL);
1512+
1513+
// main loop
1514+
bind(NEXT_WORD);
1515+
add(t0, strL, cnt1);
1516+
lwu(tmpL, Address(t0));
1517+
add(t0, strU, cnt2);
1518+
load_long_misaligned(tmpU, Address(t0), tmp3, (base_offset % 8) != 0 ? 4 : 8);
1519+
addi(cnt1, cnt1, 4);
1520+
inflate_lo32(tmp3, tmpL);
1521+
mv(tmpL, tmp3);
1522+
addi(cnt2, cnt2, 8);
1523+
bne(tmpL, tmpU, DIFFERENCE);
1524+
bltz(cnt2, NEXT_WORD);
1525+
1526+
bind(TAIL);
1527+
load_int_misaligned(tmpL, Address(strL), tmp3, false);
1528+
load_long_misaligned(tmpU, Address(strU), tmp3, 2);
1529+
inflate_lo32(tmp3, tmpL);
1530+
mv(tmpL, tmp3);
1531+
1532+
beq(tmpL, tmpU, *DONE);
1533+
1534+
// Find the first different characters in the longwords and
1535+
// compute their difference.
1536+
bind(DIFFERENCE);
1537+
xorr(tmp3, tmpL, tmpU);
1538+
// count bits of trailing zero chars
1539+
ctzc_bits(result, tmp3);
1540+
srl(tmpL, tmpL, result);
1541+
srl(tmpU, tmpU, result);
1542+
zext(tmpL, tmpL, 16);
1543+
zext(tmpU, tmpU, 16);
1544+
if (isLU) {
1545+
sub(result, tmpL, tmpU);
1546+
} else {
1547+
sub(result, tmpU, tmpL);
1548+
}
1549+
1550+
j(*DONE);
1551+
}
1552+
13851553
// Compare strings.
13861554
void C2_MacroAssembler::string_compare(Register str1, Register str2,
13871555
Register cnt1, Register cnt2, Register result,
13881556
Register tmp1, Register tmp2, Register tmp3,
13891557
int ae)
13901558
{
1391-
Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
1392-
DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
1393-
SHORT_LOOP_START, TAIL_CHECK, L;
1559+
Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, STUB,
1560+
SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
1561+
SHORT_LOOP_START, L;
13941562

13951563
const int STUB_THRESHOLD = 64 + 8;
13961564
bool isLL = ae == StrIntrinsicNode::LL;
@@ -1409,14 +1577,6 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
14091577
load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
14101578
load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
14111579

1412-
int base_offset1 = arrayOopDesc::base_offset_in_bytes(T_BYTE);
1413-
int base_offset2 = arrayOopDesc::base_offset_in_bytes(T_CHAR);
1414-
1415-
assert((base_offset1 % (UseCompactObjectHeaders ? 4 :
1416-
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
1417-
assert((base_offset2 % (UseCompactObjectHeaders ? 4 :
1418-
(UseCompressedClassPointers ? 8 : 4))) == 0, "Must be");
1419-
14201580
BLOCK_COMMENT("string_compare {");
14211581

14221582
// Bizarrely, the counts are passed in bytes, regardless of whether they
@@ -1434,154 +1594,23 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
14341594
mv(cnt2, cnt1);
14351595
bind(L);
14361596

1437-
// Load 4 bytes once to compare for alignment before main loop. Note that this
1438-
// is only possible for LL/UU case. We need to resort to load_long_misaligned
1439-
// for both LU and UL cases.
1440-
if (str1_isL == str2_isL) { // LL or UU
1441-
beq(str1, str2, DONE);
1442-
int base_offset = isLL ? base_offset1 : base_offset2;
1443-
if (AvoidUnalignedAccesses && (base_offset % 8) != 0) {
1444-
mv(t0, minCharsInWord / 2);
1445-
ble(cnt2, t0, SHORT_STRING);
1446-
lwu(tmp1, Address(str1));
1447-
lwu(tmp2, Address(str2));
1448-
bne(tmp1, tmp2, DIFFERENCE);
1449-
addi(str1, str1, 4);
1450-
addi(str2, str2, 4);
1451-
subi(cnt2, cnt2, minCharsInWord / 2);
1452-
}
1453-
}
1454-
14551597
// A very short string
14561598
mv(t0, minCharsInWord);
14571599
ble(cnt2, t0, SHORT_STRING);
14581600

14591601
// Compare longwords
1460-
// load first parts of strings and finish initialization while loading
14611602
{
14621603
if (str1_isL == str2_isL) { // LL or UU
1463-
#ifdef ASSERT
1464-
if (AvoidUnalignedAccesses) {
1465-
Label align_ok;
1466-
orr(t0, str1, str2);
1467-
andi(t0, t0, 0x7);
1468-
beqz(t0, align_ok);
1469-
stop("bad alignment");
1470-
bind(align_ok);
1471-
}
1472-
#endif
1473-
// load 8 bytes once to compare
1474-
ld(tmp1, Address(str1));
1475-
ld(tmp2, Address(str2));
1476-
mv(t0, STUB_THRESHOLD);
1477-
bge(cnt2, t0, STUB);
1478-
subi(cnt2, cnt2, minCharsInWord);
1479-
beqz(cnt2, TAIL_CHECK);
1480-
// convert cnt2 from characters to bytes
1481-
if (!str1_isL) {
1482-
slli(cnt2, cnt2, 1);
1483-
}
1484-
add(str2, str2, cnt2);
1485-
add(str1, str1, cnt2);
1486-
sub(cnt2, zr, cnt2);
1487-
} else if (isLU) { // LU case
1488-
mv(t0, STUB_THRESHOLD);
1489-
bge(cnt2, t0, STUB);
1490-
lwu(tmp1, Address(str1));
1491-
load_long_misaligned(tmp2, Address(str2), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
1492-
subi(cnt2, cnt2, 4);
1493-
add(str1, str1, cnt2);
1494-
sub(cnt1, zr, cnt2);
1495-
slli(cnt2, cnt2, 1);
1496-
add(str2, str2, cnt2);
1497-
inflate_lo32(tmp3, tmp1);
1498-
mv(tmp1, tmp3);
1499-
sub(cnt2, zr, cnt2);
1500-
addi(cnt1, cnt1, 4);
1501-
} else { // UL case
1502-
mv(t0, STUB_THRESHOLD);
1503-
bge(cnt2, t0, STUB);
1504-
load_long_misaligned(tmp1, Address(str1), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
1505-
lwu(tmp2, Address(str2));
1506-
subi(cnt2, cnt2, 4);
1507-
slli(t0, cnt2, 1);
1508-
sub(cnt1, zr, t0);
1509-
add(str1, str1, t0);
1510-
add(str2, str2, cnt2);
1511-
inflate_lo32(tmp3, tmp2);
1512-
mv(tmp2, tmp3);
1513-
sub(cnt2, zr, cnt2);
1514-
addi(cnt1, cnt1, 8);
1515-
}
1516-
addi(cnt2, cnt2, isUL ? 4 : 8);
1517-
bne(tmp1, tmp2, DIFFERENCE);
1518-
bgez(cnt2, TAIL);
1519-
1520-
// main loop
1521-
bind(NEXT_WORD);
1522-
if (str1_isL == str2_isL) { // LL or UU
1523-
// 8-byte aligned loads when AvoidUnalignedAccesses is enabled
1524-
add(t0, str1, cnt2);
1525-
ld(tmp1, Address(t0));
1526-
add(t0, str2, cnt2);
1527-
ld(tmp2, Address(t0));
1528-
addi(cnt2, cnt2, 8);
1529-
} else if (isLU) { // LU case
1530-
add(t0, str1, cnt1);
1531-
lwu(tmp1, Address(t0));
1532-
add(t0, str2, cnt2);
1533-
load_long_misaligned(tmp2, Address(t0), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
1534-
addi(cnt1, cnt1, 4);
1535-
inflate_lo32(tmp3, tmp1);
1536-
mv(tmp1, tmp3);
1537-
addi(cnt2, cnt2, 8);
1538-
} else { // UL case
1539-
add(t0, str2, cnt2);
1540-
lwu(tmp2, Address(t0));
1541-
add(t0, str1, cnt1);
1542-
load_long_misaligned(tmp1, Address(t0), tmp3, (base_offset2 % 8) != 0 ? 4 : 8);
1543-
inflate_lo32(tmp3, tmp2);
1544-
mv(tmp2, tmp3);
1545-
addi(cnt1, cnt1, 8);
1546-
addi(cnt2, cnt2, 4);
1604+
string_compare_long_same_encoding(result,
1605+
str1, str2, isLL,
1606+
cnt1, cnt2, tmp1, tmp2, tmp3,
1607+
STUB_THRESHOLD, &STUB, &SHORT_STRING, &DONE);
1608+
} else { // LU or UL
1609+
string_compare_long_different_encoding(result,
1610+
str1, str2, isLU,
1611+
cnt1, cnt2, tmp1, tmp2, tmp3,
1612+
STUB_THRESHOLD, &STUB, &DONE);
15471613
}
1548-
bne(tmp1, tmp2, DIFFERENCE);
1549-
bltz(cnt2, NEXT_WORD);
1550-
bind(TAIL);
1551-
if (str1_isL == str2_isL) { // LL or UU
1552-
load_long_misaligned(tmp1, Address(str1), tmp3, isLL ? 1 : 2);
1553-
load_long_misaligned(tmp2, Address(str2), tmp3, isLL ? 1 : 2);
1554-
} else if (isLU) { // LU case
1555-
load_int_misaligned(tmp1, Address(str1), tmp3, false);
1556-
load_long_misaligned(tmp2, Address(str2), tmp3, 2);
1557-
inflate_lo32(tmp3, tmp1);
1558-
mv(tmp1, tmp3);
1559-
} else { // UL case
1560-
load_int_misaligned(tmp2, Address(str2), tmp3, false);
1561-
load_long_misaligned(tmp1, Address(str1), tmp3, 2);
1562-
inflate_lo32(tmp3, tmp2);
1563-
mv(tmp2, tmp3);
1564-
}
1565-
bind(TAIL_CHECK);
1566-
beq(tmp1, tmp2, DONE);
1567-
1568-
// Find the first different characters in the longwords and
1569-
// compute their difference.
1570-
bind(DIFFERENCE);
1571-
xorr(tmp3, tmp1, tmp2);
1572-
// count bits of trailing zero chars
1573-
ctzc_bits(result, tmp3, isLL);
1574-
srl(tmp1, tmp1, result);
1575-
srl(tmp2, tmp2, result);
1576-
if (isLL) {
1577-
zext(tmp1, tmp1, 8);
1578-
zext(tmp2, tmp2, 8);
1579-
} else {
1580-
zext(tmp1, tmp1, 16);
1581-
zext(tmp2, tmp2, 16);
1582-
}
1583-
sub(result, tmp1, tmp2);
1584-
j(DONE);
15851614
}
15861615

15871616
bind(STUB);
@@ -2636,7 +2665,7 @@ void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register
26362665

26372666
int minCharsInWord = encLL ? wordSize : wordSize / 2;
26382667

2639-
BLOCK_COMMENT("string_compare {");
2668+
BLOCK_COMMENT("string_compare_v {");
26402669

26412670
// for Latin strings, 1 byte for 1 character
26422671
// for UTF16 strings, 2 bytes for 1 character
@@ -2696,6 +2725,8 @@ void C2_MacroAssembler::string_compare_v(Register str1, Register str2, Register
26962725
sub(result, tmp1, tmp2);
26972726

26982727
bind(DONE);
2728+
2729+
BLOCK_COMMENT("} string_compare_v");
26992730
}
27002731

27012732
void C2_MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) {

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,15 @@
3939
VectorRegister vrs,
4040
bool is_latin, Label& DONE, Assembler::LMUL lmul);
4141

42+
void string_compare_long_same_encoding(Register result, Register str1, Register str2,
43+
const bool isLL, Register cnt1, Register cnt2,
44+
Register tmp1, Register tmp2, Register tmp3,
45+
const int STUB_THRESHOLD, Label *STUB, Label *SHORT_STRING, Label *DONE);
46+
void string_compare_long_different_encoding(Register result, Register str1, Register str2,
47+
bool isLU, Register cnt1, Register cnt2,
48+
Register tmp1, Register tmp2, Register tmp3,
49+
const int STUB_THRESHOLD, Label *STUB, Label *DONE);
50+
4251
public:
4352
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
4453
void fast_lock(Register object, Register box,

0 commit comments

Comments
 (0)