Skip to content

Commit cb7875d

Browse files
author
Hamlin Li
committed
8318218: RISC-V: C2 CompressBits
Reviewed-by: fyang, fjiang
1 parent 1535528 commit cb7875d

File tree

4 files changed

+105
-0
lines changed

4 files changed

+105
-0
lines changed

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

+38
Original file line numberDiff line numberDiff line change
@@ -1676,6 +1676,44 @@ void C2_MacroAssembler::signum_fp(FloatRegister dst, FloatRegister one, bool is_
16761676
bind(done);
16771677
}
16781678

1679+
void C2_MacroAssembler::compress_bits_v(Register dst, Register src, Register mask, bool is_long) {
1680+
Assembler::SEW sew = is_long ? Assembler::e64 : Assembler::e32;
1681+
// intrinsic is enabled when MaxVectorSize >= 16
1682+
Assembler::LMUL lmul = is_long ? Assembler::m4 : Assembler::m2;
1683+
long len = is_long ? 64 : 32;
1684+
1685+
// load the src data(in bits) to be compressed.
1686+
vsetivli(x0, 1, sew, Assembler::m1);
1687+
vmv_s_x(v0, src);
1688+
// reset the src data(in bytes) to zero.
1689+
mv(t0, len);
1690+
vsetvli(x0, t0, Assembler::e8, lmul);
1691+
vmv_v_i(v4, 0);
1692+
// convert the src data from bits to bytes.
1693+
vmerge_vim(v4, v4, 1); // v0 as the implicit mask register
1694+
// reset the dst data(in bytes) to zero.
1695+
vmv_v_i(v8, 0);
1696+
// load the mask data(in bits).
1697+
vsetivli(x0, 1, sew, Assembler::m1);
1698+
vmv_s_x(v0, mask);
1699+
// compress the src data(in bytes) to dst(in bytes).
1700+
vsetvli(x0, t0, Assembler::e8, lmul);
1701+
vcompress_vm(v8, v4, v0);
1702+
// convert the dst data from bytes to bits.
1703+
vmseq_vi(v0, v8, 1);
1704+
// store result back.
1705+
vsetivli(x0, 1, sew, Assembler::m1);
1706+
vmv_x_s(dst, v0);
1707+
}
1708+
1709+
void C2_MacroAssembler::compress_bits_i_v(Register dst, Register src, Register mask) {
1710+
compress_bits_v(dst, src, mask, /* is_long */ false);
1711+
}
1712+
1713+
void C2_MacroAssembler::compress_bits_l_v(Register dst, Register src, Register mask) {
1714+
compress_bits_v(dst, src, mask, /* is_long */ true);
1715+
}
1716+
16791717
void C2_MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
16801718
VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) {
16811719
Label loop;

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp

+8
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@
3838
VectorRegister vr1, VectorRegister vr2,
3939
VectorRegister vrs,
4040
bool is_latin, Label& DONE);
41+
42+
void compress_bits_v(Register dst, Register src, Register mask, bool is_long);
43+
4144
public:
4245
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
4346
// See full description in macroAssembler_riscv.cpp.
@@ -160,6 +163,11 @@
160163
void signum_fp(FloatRegister dst, FloatRegister one, bool is_double);
161164

162165
// intrinsic methods implemented by rvv instructions
166+
167+
// compress bits, i.e. j.l.Integer/Long::compress.
168+
void compress_bits_i_v(Register dst, Register src, Register mask);
169+
void compress_bits_l_v(Register dst, Register src, Register mask);
170+
163171
void string_equals_v(Register r1, Register r2,
164172
Register result, Register cnt1,
165173
int elem_size);

src/hotspot/cpu/riscv/riscv.ad

+4
Original file line numberDiff line numberDiff line change
@@ -1893,6 +1893,10 @@ bool Matcher::match_rule_supported(int opcode) {
18931893
case Op_CountPositives:
18941894
return UseRVV;
18951895

1896+
case Op_CompressBits:
1897+
guarantee(UseRVV == (MaxVectorSize >= 16), "UseRVV and MaxVectorSize not matched");
1898+
return UseRVV;
1899+
18961900
case Op_EncodeISOArray:
18971901
return UseRVV && SpecialEncodeISOArray;
18981902

src/hotspot/cpu/riscv/riscv_v.ad

+55
Original file line numberDiff line numberDiff line change
@@ -2880,6 +2880,61 @@ instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
28802880
ins_pipe(pipe_class_memory);
28812881
%}
28822882

2883+
// CompressBits of Long & Integer
2884+
2885+
instruct compressBitsI(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask, vRegMask_V0 v0,
2886+
vReg_V4 v4, vReg_V5 v5, vReg_V8 v8, vReg_V9 v9) %{
2887+
predicate(UseRVV);
2888+
match(Set dst (CompressBits src mask));
2889+
effect(TEMP v0, TEMP v4, TEMP v5, TEMP v8, TEMP v9);
2890+
format %{ "vsetivli x0, 1, e32, m1, tu, mu\t#@compressBitsI\n\t"
2891+
"vmv.s.x $v0, $src\n\t"
2892+
"mv t0, 32\n\t"
2893+
"vsetvli x0, t0, e8, m2, tu, mu\n\t"
2894+
"vmv.v.i $v4, 0\n\t"
2895+
"vmerge.vim $v4, $v4, 1, $v0\n\t"
2896+
"vmv.v.i $v8, 0\n\t"
2897+
"vsetivli x0, 1, e32, m1, tu, mu\n\t"
2898+
"vmv.s.x $v0, $mask\n\t"
2899+
"vsetvli x0, t0, e8, m2, tu, mu\n\t"
2900+
"vcompress.vm $v8, $v4, $v0\n\t"
2901+
"vmseq.vi $v0, $v8, 1\n\t"
2902+
"vsetivli x0, 1, e32, m1, tu, mu\n\t"
2903+
"vmv.x.s $dst, $v0\t#@compressBitsI\n\t"
2904+
%}
2905+
ins_encode %{
2906+
__ compress_bits_i_v(as_Register($dst$$reg), as_Register($src$$reg), as_Register($mask$$reg));
2907+
%}
2908+
ins_pipe(pipe_slow);
2909+
%}
2910+
2911+
instruct compressBitsL(iRegLNoSp dst, iRegL src, iRegL mask, vRegMask_V0 v0,
2912+
vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7,
2913+
vReg_V8 v8, vReg_V9 v9, vReg_V10 v10, vReg_V11 v11) %{
2914+
predicate(UseRVV);
2915+
match(Set dst (CompressBits src mask));
2916+
effect(TEMP v0, TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP v8, TEMP v9, TEMP v10, TEMP v11);
2917+
format %{ "vsetivli x0, 1, e64, m1, tu, mu\t#@compressBitsL\n\t"
2918+
"vmv.s.x $v0, $src\n\t"
2919+
"mv t0, 64\n\t"
2920+
"vsetvli x0, t0, e8, m4, tu, mu\n\t"
2921+
"vmv.v.i $v4, 0\n\t"
2922+
"vmerge.vim $v4, $v4, 1, $v0\n\t"
2923+
"vmv.v.i $v8, 0\n\t"
2924+
"vsetivli x0, 1, e64, m1, tu, mu\n\t"
2925+
"vmv.s.x $v0, $mask\n\t"
2926+
"vsetvli x0, t0, e8, m4, tu, mu\n\t"
2927+
"vcompress.vm $v8, $v4, $v0\n\t"
2928+
"vmseq.vi $v0, $v8, 1\n\t"
2929+
"vsetivli x0, 1, e64, m1, tu, mu\n\t"
2930+
"vmv.x.s $dst, $v0\t#@compressBitsL\n\t"
2931+
%}
2932+
ins_encode %{
2933+
__ compress_bits_l_v(as_Register($dst$$reg), as_Register($src$$reg), as_Register($mask$$reg));
2934+
%}
2935+
ins_pipe(pipe_slow);
2936+
%}
2937+
28832938
// Vector Load Const
28842939
instruct vloadcon(vReg dst, immI0 src) %{
28852940
match(Set dst (VectorLoadConst src));

0 commit comments

Comments
 (0)