Skip to content

Commit 5d82d67

Browse files
author
Jatin Bhateja
committed
8290034: Auto vectorize reverse bit operations.
Reviewed-by: xgong, kvn
1 parent 348a052 commit 5d82d67

File tree

18 files changed

+425
-0
lines changed

18 files changed

+425
-0
lines changed

src/hotspot/cpu/x86/assembler_x86.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10115,6 +10115,14 @@ void Assembler::evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegist
1011510115
emit_int8(imm8);
1011610116
}
1011710117

10118+
void Assembler::gf2p8affineqb(XMMRegister dst, XMMRegister src, int imm8) {
10119+
assert(VM_Version::supports_gfni(), "");
10120+
assert(VM_Version::supports_sse(), "");
10121+
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
10122+
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
10123+
emit_int24((unsigned char)0xCE, (unsigned char)(0xC0 | encode), imm8);
10124+
}
10125+
1011810126
void Assembler::vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len) {
1011910127
assert(VM_Version::supports_gfni(), "requires GFNI support");
1012010128
assert(VM_Version::supports_sse(), "");

src/hotspot/cpu/x86/assembler_x86.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2801,6 +2801,7 @@ class Assembler : public AbstractAssembler {
28012801
void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
28022802

28032803
// Galois field affine transformation instructions.
2804+
void gf2p8affineqb(XMMRegister dst, XMMRegister src, int imm8);
28042805
void vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len);
28052806

28062807
protected:

src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5484,6 +5484,90 @@ void C2_MacroAssembler::udivmodI(Register rax, Register divisor, Register rdx, R
54845484
}
54855485

54865486
#ifdef _LP64
5487+
void C2_MacroAssembler::reverseI(Register dst, Register src, XMMRegister xtmp1,
5488+
XMMRegister xtmp2, Register rtmp) {
5489+
if(VM_Version::supports_gfni()) {
5490+
// Galois field instruction based bit reversal based on following algorithm.
5491+
// http://0x80.pl/articles/avx512-galois-field-for-bit-shuffling.html
5492+
mov64(rtmp, 0x8040201008040201L);
5493+
movq(xtmp1, src);
5494+
movq(xtmp2, rtmp);
5495+
gf2p8affineqb(xtmp1, xtmp2, 0);
5496+
movq(dst, xtmp1);
5497+
} else {
5498+
// Swap even and odd numbered bits.
5499+
movl(rtmp, src);
5500+
andl(rtmp, 0x55555555);
5501+
shll(rtmp, 1);
5502+
movl(dst, src);
5503+
andl(dst, 0xAAAAAAAA);
5504+
shrl(dst, 1);
5505+
orl(dst, rtmp);
5506+
5507+
// Swap LSB and MSB 2 bits of each nibble.
5508+
movl(rtmp, dst);
5509+
andl(rtmp, 0x33333333);
5510+
shll(rtmp, 2);
5511+
andl(dst, 0xCCCCCCCC);
5512+
shrl(dst, 2);
5513+
orl(dst, rtmp);
5514+
5515+
// Swap LSB and MSB 4 bits of each byte.
5516+
movl(rtmp, dst);
5517+
andl(rtmp, 0x0F0F0F0F);
5518+
shll(rtmp, 4);
5519+
andl(dst, 0xF0F0F0F0);
5520+
shrl(dst, 4);
5521+
orl(dst, rtmp);
5522+
}
5523+
bswapl(dst);
5524+
}
5525+
5526+
void C2_MacroAssembler::reverseL(Register dst, Register src, XMMRegister xtmp1,
5527+
XMMRegister xtmp2, Register rtmp1, Register rtmp2) {
5528+
if(VM_Version::supports_gfni()) {
5529+
// Galois field instruction based bit reversal based on following algorithm.
5530+
// http://0x80.pl/articles/avx512-galois-field-for-bit-shuffling.html
5531+
mov64(rtmp1, 0x8040201008040201L);
5532+
movq(xtmp1, src);
5533+
movq(xtmp2, rtmp1);
5534+
gf2p8affineqb(xtmp1, xtmp2, 0);
5535+
movq(dst, xtmp1);
5536+
} else {
5537+
// Swap even and odd numbered bits.
5538+
movq(rtmp1, src);
5539+
mov64(rtmp2, 0x5555555555555555L);
5540+
andq(rtmp1, rtmp2);
5541+
shlq(rtmp1, 1);
5542+
movq(dst, src);
5543+
notq(rtmp2);
5544+
andq(dst, rtmp2);
5545+
shrq(dst, 1);
5546+
orq(dst, rtmp1);
5547+
5548+
// Swap LSB and MSB 2 bits of each nibble.
5549+
movq(rtmp1, dst);
5550+
mov64(rtmp2, 0x3333333333333333L);
5551+
andq(rtmp1, rtmp2);
5552+
shlq(rtmp1, 2);
5553+
notq(rtmp2);
5554+
andq(dst, rtmp2);
5555+
shrq(dst, 2);
5556+
orq(dst, rtmp1);
5557+
5558+
// Swap LSB and MSB 4 bits of each byte.
5559+
movq(rtmp1, dst);
5560+
mov64(rtmp2, 0x0F0F0F0F0F0F0F0FL);
5561+
andq(rtmp1, rtmp2);
5562+
shlq(rtmp1, 4);
5563+
notq(rtmp2);
5564+
andq(dst, rtmp2);
5565+
shrq(dst, 4);
5566+
orq(dst, rtmp1);
5567+
}
5568+
bswapq(dst);
5569+
}
5570+
54875571
void C2_MacroAssembler::udivL(Register rax, Register divisor, Register rdx) {
54885572
Label done;
54895573
Label neg_divisor_fastpath;

src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,10 @@
368368
void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
369369

370370
#ifdef _LP64
371+
void reverseI(Register dst, Register src, XMMRegister xtmp1,
372+
XMMRegister xtmp2, Register rtmp);
373+
void reverseL(Register dst, Register src, XMMRegister xtmp1,
374+
XMMRegister xtmp2, Register rtmp1, Register rtmp2);
371375
void udivL(Register rax, Register divisor, Register rdx);
372376
void umodL(Register rax, Register divisor, Register rdx);
373377
void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);

src/hotspot/cpu/x86/x86_64.ad

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6721,6 +6721,50 @@ instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
67216721
ins_pipe(ialu_reg);
67226722
%}
67236723

6724+
//--------------- Reverse Operation Instructions ----------------
6725+
instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
6726+
predicate(!VM_Version::supports_gfni());
6727+
match(Set dst (ReverseI src));
6728+
effect(TEMP dst, TEMP rtmp, KILL cr);
6729+
format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
6730+
ins_encode %{
6731+
__ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
6732+
%}
6733+
ins_pipe( ialu_reg );
6734+
%}
6735+
6736+
instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, regF xtmp1, regF xtmp2, rRegL rtmp, rFlagsReg cr) %{
6737+
predicate(VM_Version::supports_gfni());
6738+
match(Set dst (ReverseI src));
6739+
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
6740+
format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
6741+
ins_encode %{
6742+
__ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
6743+
%}
6744+
ins_pipe( ialu_reg );
6745+
%}
6746+
6747+
instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
6748+
predicate(!VM_Version::supports_gfni());
6749+
match(Set dst (ReverseL src));
6750+
effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
6751+
format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
6752+
ins_encode %{
6753+
__ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
6754+
%}
6755+
ins_pipe( ialu_reg );
6756+
%}
6757+
6758+
instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, regD xtmp1, regD xtmp2, rRegL rtmp, rFlagsReg cr) %{
6759+
predicate(VM_Version::supports_gfni());
6760+
match(Set dst (ReverseL src));
6761+
effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
6762+
format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
6763+
ins_encode %{
6764+
__ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
6765+
%}
6766+
ins_pipe( ialu_reg );
6767+
%}
67246768

67256769
//---------- Population Count Instructions -------------------------------------
67266770

src/hotspot/share/classfile/vmIntrinsics.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,9 @@ class methodHandle;
246246
do_intrinsic(_expand_i, java_lang_Integer, expand_name, int2_int_signature, F_S) \
247247
do_intrinsic(_expand_l, java_lang_Long, expand_name, long2_long_signature, F_S) \
248248
\
249+
do_intrinsic(_reverse_i, java_lang_Integer, reverse_name, int_int_signature, F_S) \
250+
do_name( reverse_name, "reverse") \
251+
do_intrinsic(_reverse_l, java_lang_Long, reverse_name, long_long_signature, F_S) \
249252
do_intrinsic(_reverseBytes_i, java_lang_Integer, reverseBytes_name, int_int_signature, F_S) \
250253
do_name( reverseBytes_name, "reverseBytes") \
251254
do_intrinsic(_reverseBytes_l, java_lang_Long, reverseBytes_name, long_long_signature, F_S) \

src/hotspot/share/opto/c2compiler.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,12 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
263263
case vmIntrinsics::_numberOfTrailingZeros_l:
264264
if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return false;
265265
break;
266+
case vmIntrinsics::_reverse_i:
267+
if (!Matcher::match_rule_supported(Op_ReverseI)) return false;
268+
break;
269+
case vmIntrinsics::_reverse_l:
270+
if (!Matcher::match_rule_supported(Op_ReverseL)) return false;
271+
break;
266272
case vmIntrinsics::_reverseBytes_c:
267273
if (!Matcher::match_rule_supported(Op_ReverseBytesUS)) return false;
268274
break;

src/hotspot/share/opto/library_call.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
525525
case vmIntrinsics::_numberOfTrailingZeros_l:
526526
case vmIntrinsics::_bitCount_i:
527527
case vmIntrinsics::_bitCount_l:
528+
case vmIntrinsics::_reverse_i:
529+
case vmIntrinsics::_reverse_l:
528530
case vmIntrinsics::_reverseBytes_i:
529531
case vmIntrinsics::_reverseBytes_l:
530532
case vmIntrinsics::_reverseBytes_s:
@@ -2060,6 +2062,8 @@ bool LibraryCallKit::inline_number_methods(vmIntrinsics::ID id) {
20602062
case vmIntrinsics::_reverseBytes_s: n = new ReverseBytesSNode( 0, arg); break;
20612063
case vmIntrinsics::_reverseBytes_i: n = new ReverseBytesINode( 0, arg); break;
20622064
case vmIntrinsics::_reverseBytes_l: n = new ReverseBytesLNode( 0, arg); break;
2065+
case vmIntrinsics::_reverse_i: n = new ReverseINode(0, arg); break;
2066+
case vmIntrinsics::_reverse_l: n = new ReverseLNode(0, arg); break;
20632067
default: fatal_unexpected_iid(id); break;
20642068
}
20652069
set_result(_gvn.transform(n));

src/hotspot/share/opto/subnode.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1899,3 +1899,50 @@ const Type* SqrtFNode::Value(PhaseGVN* phase) const {
18991899
if( f < 0.0f ) return Type::FLOAT;
19001900
return TypeF::make( (float)sqrt( (double)f ) );
19011901
}
1902+
1903+
static jlong reverse_bits(jlong val) {
1904+
jlong res = ((val & 0xF0F0F0F0F0F0F0F0L) >> 4) | ((val & 0x0F0F0F0F0F0F0F0F) << 4);
1905+
res = ((res & 0xCCCCCCCCCCCCCCCCL) >> 2) | ((res & 0x3333333333333333L) << 2);
1906+
res = ((res & 0xAAAAAAAAAAAAAAAAL) >> 1) | ((res & 0x5555555555555555L) << 1);
1907+
return res;
1908+
}
1909+
1910+
const Type* ReverseINode::Value(PhaseGVN* phase) const {
1911+
const Type *t1 = phase->type( in(1) );
1912+
if (t1 == Type::TOP) {
1913+
return Type::TOP;
1914+
}
1915+
const TypeInt* t1int = t1->isa_int();
1916+
if (t1int && t1int->is_con()) {
1917+
jint res = reverse_bits(t1int->get_con());
1918+
return TypeInt::make(res);
1919+
}
1920+
return t1int;
1921+
}
1922+
1923+
const Type* ReverseLNode::Value(PhaseGVN* phase) const {
1924+
const Type *t1 = phase->type( in(1) );
1925+
if (t1 == Type::TOP) {
1926+
return Type::TOP;
1927+
}
1928+
const TypeLong* t1long = t1->isa_long();
1929+
if (t1long && t1long->is_con()) {
1930+
jint res = reverse_bits(t1long->get_con());
1931+
return TypeLong::make(res);
1932+
}
1933+
return t1long;
1934+
}
1935+
1936+
Node* ReverseINode::Identity(PhaseGVN* phase) {
1937+
if (in(1)->Opcode() == Op_ReverseI) {
1938+
return in(1)->in(1);
1939+
}
1940+
return this;
1941+
}
1942+
1943+
Node* ReverseLNode::Identity(PhaseGVN* phase) {
1944+
if (in(1)->Opcode() == Op_ReverseL) {
1945+
return in(1)->in(1);
1946+
}
1947+
return this;
1948+
}

src/hotspot/share/opto/subnode.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,8 @@ class ReverseINode : public Node {
580580
virtual int Opcode() const;
581581
const Type *bottom_type() const { return TypeInt::INT; }
582582
virtual uint ideal_reg() const { return Op_RegI; }
583+
virtual Node* Identity(PhaseGVN* phase);
584+
virtual const Type* Value(PhaseGVN* phase) const;
583585
};
584586

585587
//-------------------------------ReverseLNode--------------------------------
@@ -590,6 +592,8 @@ class ReverseLNode : public Node {
590592
virtual int Opcode() const;
591593
const Type *bottom_type() const { return TypeLong::LONG; }
592594
virtual uint ideal_reg() const { return Op_RegL; }
595+
virtual Node* Identity(PhaseGVN* phase);
596+
virtual const Type* Value(PhaseGVN* phase) const;
593597
};
594598

595599
#endif // SHARE_OPTO_SUBNODE_HPP

0 commit comments

Comments
 (0)