Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 58 additions & 4 deletions src/hotspot/cpu/x86/assembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,8 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
tail_size = 1;
break;

case 0x10: // movups
case 0x11: // movups
case 0x12: // movlps
case 0x28: // movaps
case 0x2E: // ucomiss
Expand Down Expand Up @@ -2538,10 +2540,22 @@ void Assembler::movddup(XMMRegister dst, XMMRegister src) {
emit_int16(0x12, 0xC0 | encode);
}

void Assembler::movddup(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_DUP, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x12);
emit_operand(dst, src);
}

void Assembler::vmovddup(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_DUP, /* input_size_in_bits */ EVEX_64bit);
attributes.set_rex_vex_w_reverted();
simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x12);
Expand Down Expand Up @@ -3482,6 +3496,46 @@ void Assembler::movswl(Register dst, Register src) { // movsxw
emit_int24(0x0F, (unsigned char)0xBF, (0xC0 | encode));
}

void Assembler::movups(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8(0x10);
emit_operand(dst, src);
}

void Assembler::vmovups(XMMRegister dst, Address src, int vector_len) {
assert(vector_len == AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8(0x10);
emit_operand(dst, src);
}

void Assembler::movups(Address dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse(), ""));
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8(0x11);
emit_operand(src, dst);
}

void Assembler::vmovups(Address dst, XMMRegister src, int vector_len) {
assert(vector_len == AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8(0x11);
emit_operand(src, dst);
}

void Assembler::movw(Address dst, int imm16) {
InstructionMark im(this);

Expand Down Expand Up @@ -5133,30 +5187,30 @@ void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, i
emit_int24(0x43, (0xC0 | encode), imm8 & 0xFF);
}

void Assembler::pshufpd(XMMRegister dst, XMMRegister src, int imm8) {
void Assembler::shufpd(XMMRegister dst, XMMRegister src, int imm8) {
assert(isByte(imm8), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
}

void Assembler::vpshufpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
void Assembler::vshufpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_rex_vex_w_reverted();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
}

void Assembler::pshufps(XMMRegister dst, XMMRegister src, int imm8) {
void Assembler::shufps(XMMRegister dst, XMMRegister src, int imm8) {
assert(isByte(imm8), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
}

void Assembler::vpshufps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
void Assembler::vshufps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
Expand Down
14 changes: 10 additions & 4 deletions src/hotspot/cpu/x86/assembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1477,6 +1477,7 @@ class Assembler : public AbstractAssembler {
void movb(Register dst, Address src);

void movddup(XMMRegister dst, XMMRegister src);
void movddup(XMMRegister dst, Address src);
void vmovddup(XMMRegister dst, Address src, int vector_len);

void kandbl(KRegister dst, KRegister src1, KRegister src2);
Expand Down Expand Up @@ -1648,6 +1649,11 @@ class Assembler : public AbstractAssembler {
void movswq(Register dst, Register src);
#endif

void movups(XMMRegister dst, Address src);
void vmovups(XMMRegister dst, Address src, int vector_len);
void movups(Address dst, XMMRegister src);
void vmovups(Address dst, XMMRegister src, int vector_len);

void movw(Address dst, int imm16);
void movw(Register dst, Address src);
void movw(Address dst, Register src);
Expand Down Expand Up @@ -1927,10 +1933,10 @@ class Assembler : public AbstractAssembler {
void pshuflw(XMMRegister dst, Address src, int mode);

//shuffle floats and doubles
void pshufps(XMMRegister, XMMRegister, int);
void pshufpd(XMMRegister, XMMRegister, int);
void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
void shufps(XMMRegister, XMMRegister, int);
void shufpd(XMMRegister, XMMRegister, int);
void vshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
void vshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);

// Shuffle packed values at 128 bit granularity
void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
Expand Down
48 changes: 40 additions & 8 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1643,12 +1643,12 @@ void C2_MacroAssembler::load_vector_mask(KRegister dst, XMMRegister src, XMMRegi

void C2_MacroAssembler::load_vector(XMMRegister dst, Address src, int vlen_in_bytes) {
switch (vlen_in_bytes) {
case 4: movdl(dst, src); break;
case 8: movq(dst, src); break;
case 16: movdqu(dst, src); break;
case 32: vmovdqu(dst, src); break;
case 64: evmovdquq(dst, src, Assembler::AVX_512bit); break;
default: ShouldNotReachHere();
case 4: movdl(dst, src); break;
case 8: movq(dst, src); break;
case 16: movdqu(dst, src); break;
case 32: vmovdqu(dst, src); break;
case 64: evmovdqul(dst, src, Assembler::AVX_512bit); break;
default: ShouldNotReachHere();
}
}

Expand All @@ -1661,6 +1661,38 @@ void C2_MacroAssembler::load_vector(XMMRegister dst, AddressLiteral src, int vle
}
}

void C2_MacroAssembler::load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen) {
int vlen_enc = vector_length_encoding(vlen);
if (VM_Version::supports_avx()) {
if (bt == T_LONG) {
if (VM_Version::supports_avx2()) {
vpbroadcastq(dst, src, vlen_enc, noreg);
} else {
vmovddup(dst, src, vlen_enc, noreg);
}
} else if (bt == T_DOUBLE) {
if (vlen_enc != Assembler::AVX_128bit) {
vbroadcastsd(dst, src, vlen_enc, noreg);
} else {
vmovddup(dst, src, vlen_enc, noreg);
}
} else {
if (VM_Version::supports_avx2() && is_integral_type(bt)) {
vpbroadcastd(dst, src, vlen_enc, noreg);
} else {
vbroadcastss(dst, src, vlen_enc, noreg);
}
}
} else if (VM_Version::supports_sse3()) {
movddup(dst, src);
} else {
movq(dst, src);
if (vlen == 16) {
punpcklqdq(dst, dst);
}
}
}

void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) {
ExternalAddress addr(StubRoutines::x86::vector_iota_indices());
if (vlen_in_bytes <= 4) {
Expand Down Expand Up @@ -2317,9 +2349,9 @@ void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src
if (typ == T_FLOAT) {
if (UseAVX == 0) {
movdqu(dst, src);
pshufps(dst, dst, eindex);
shufps(dst, dst, eindex);
} else {
vpshufps(dst, src, src, eindex, Assembler::AVX_128bit);
vshufps(dst, src, src, eindex, Assembler::AVX_128bit);
}
} else {
if (UseAVX == 0) {
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@

void load_vector(XMMRegister dst, Address src, int vlen_in_bytes);
void load_vector(XMMRegister dst, AddressLiteral src, int vlen_in_bytes, Register rscratch = rscratch1);
void load_constant_vector(BasicType bt, XMMRegister dst, InternalAddress src, int vlen);
void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes);

// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
Expand Down
36 changes: 31 additions & 5 deletions src/hotspot/cpu/x86/macroAssembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2732,6 +2732,15 @@ void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) {
}
}

void MacroAssembler::movddup(XMMRegister dst, AddressLiteral src, Register rscratch) {
if (reachable(src)) {
Assembler::movddup(dst, as_Address(src));
} else {
lea(rscratch, src);
Assembler::movddup(dst, Address(rscratch, 0));
}
}

void MacroAssembler::vmovddup(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
if (reachable(src)) {
Assembler::vmovddup(dst, as_Address(src), vector_len);
Expand Down Expand Up @@ -3288,9 +3297,13 @@ void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src,
}
}

void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
Assembler::vpbroadcastw(dst, src, vector_len);
void MacroAssembler::vpbroadcastd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
if (reachable(src)) {
Assembler::vpbroadcastd(dst, as_Address(src), vector_len);
} else {
lea(rscratch, src);
Assembler::vpbroadcastd(dst, Address(rscratch, 0), vector_len);
}
}

void MacroAssembler::vpbroadcastq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
Expand All @@ -3311,6 +3324,15 @@ void MacroAssembler::vbroadcastsd(XMMRegister dst, AddressLiteral src, int vecto
}
}

void MacroAssembler::vbroadcastss(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
if (reachable(src)) {
Assembler::vbroadcastss(dst, as_Address(src), vector_len);
} else {
lea(rscratch, src);
Assembler::vbroadcastss(dst, Address(rscratch, 0), vector_len);
}
}

void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
Assembler::vpcmpeqb(dst, nds, src, vector_len);
Expand Down Expand Up @@ -4364,10 +4386,14 @@ void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file,

void MacroAssembler::vallones(XMMRegister dst, int vector_len) {
if (UseAVX > 2 && (vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) {
// Only pcmpeq has dependency breaking treatment (i.e the execution can begin without
// waiting for the previous result on dst), not vpcmpeqd, so just use vpternlog
vpternlogd(dst, 0xFF, dst, dst, vector_len);
} else if (VM_Version::supports_avx()) {
vpcmpeqd(dst, dst, dst, vector_len);
} else {
assert(UseAVX > 0, "");
vpcmpeqb(dst, dst, dst, vector_len);
assert(VM_Version::supports_sse2(), "");
pcmpeqd(dst, dst);
}
}

Expand Down
26 changes: 14 additions & 12 deletions src/hotspot/cpu/x86/macroAssembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1122,6 +1122,12 @@ class MacroAssembler: public Assembler {
void addpd(XMMRegister dst, Address src) { Assembler::addpd(dst, src); }
void addpd(XMMRegister dst, AddressLiteral src);

using Assembler::vbroadcastsd;
void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);

using Assembler::vbroadcastss;
void vbroadcastss(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);

void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); }
void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); }
void divsd(XMMRegister dst, AddressLiteral src);
Expand Down Expand Up @@ -1158,14 +1164,18 @@ class MacroAssembler: public Assembler {
void kmov(Register dst, KRegister src);
void kmov(KRegister dst, Register src);

using Assembler::movddup;
void movddup(XMMRegister dst, AddressLiteral src, Register rscratch = rscratch1);
using Assembler::vmovddup;
void vmovddup(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);

// AVX Unaligned forms
void vmovdqu(Address dst, XMMRegister src);
void vmovdqu(XMMRegister dst, Address src);
void vmovdqu(XMMRegister dst, XMMRegister src);
void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg, int vector_len);


// AVX512 Unaligned
void evmovdqu(BasicType type, KRegister kmask, Address dst, XMMRegister src, bool merge, int vector_len);
void evmovdqu(BasicType type, KRegister kmask, XMMRegister dst, Address src, bool merge, int vector_len);
Expand Down Expand Up @@ -1237,9 +1247,6 @@ class MacroAssembler: public Assembler {
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, AddressLiteral src);

using Assembler::vmovddup;
void vmovddup(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);

void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); }
void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); }
void mulpd(XMMRegister dst, AddressLiteral src);
Expand Down Expand Up @@ -1345,16 +1352,11 @@ class MacroAssembler: public Assembler {
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);

void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); }
using Assembler::vpbroadcastd;
void vpbroadcastd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);

using Assembler::vbroadcastsd;
void vbroadcastsd(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
using Assembler::vpbroadcastq;
void vpbroadcastq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch = rscratch1);
void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpbroadcastq(dst, src, vector_len); }
void vpbroadcastq(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastq(dst, src, vector_len); }



void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

Expand Down
Loading