Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
Browse files
8238681: Make -XX:UseSSE flag x86-specific
Reviewed-by: dholmes, kvn
  • Loading branch information
Vladimir Ivanov committed Mar 10, 2020
1 parent 8208b9c commit 072cfd2e48fc2fb16e07a98ad8fbce3dfe82d56c
@@ -192,8 +192,6 @@ void VM_Version::initialize() {
_supports_atomic_getset8 = true;
_supports_atomic_getadd8 = true;

UseSSE = 0; // Only on x86 and x64

intx cache_line_size = L1_data_cache_line_size();

if (PowerArchitecturePPC64 >= 9) {
@@ -68,8 +68,6 @@ void VM_Version::initialize() {
}
}

UseSSE = false; // Only used on x86 and x64.

_supports_cx8 = true; // All SPARC V9 implementations.
_supports_atomic_getset4 = true; // Using the 'swap' instruction.

@@ -93,9 +93,13 @@ LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
case intTag: opr = FrameMap::rax_opr; break;
case objectTag: opr = FrameMap::rax_oop_opr; break;
case longTag: opr = FrameMap::long0_opr; break;
#ifdef _LP64
case floatTag: opr = FrameMap::xmm0_float_opr; break;
case doubleTag: opr = FrameMap::xmm0_double_opr; break;
#else
case floatTag: opr = UseSSE >= 1 ? FrameMap::xmm0_float_opr : FrameMap::fpu0_float_opr; break;
case doubleTag: opr = UseSSE >= 2 ? FrameMap::xmm0_double_opr : FrameMap::fpu0_double_opr; break;

#endif // _LP64
case addressTag:
default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
}
@@ -356,6 +360,7 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
left.dont_load_item();
}

#ifndef _LP64
// do not load right operand if it is a constant. only 0 and 1 are
// loaded because there are special instructions for loading them
// without memory access (not needed for SSE2 instructions)
@@ -371,13 +376,18 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
must_load_right = UseSSE < 2 && (c->is_one_double() || c->is_zero_double());
}
}
#endif // !LP64

if (must_load_both) {
// frem and drem destroy also right operand, so move it to a new register
right.set_destroys_register();
right.load_item();
} else if (right.is_register() || must_load_right) {
} else if (right.is_register()) {
right.load_item();
#ifndef _LP64
} else if (must_load_right) {
right.load_item();
#endif // !LP64
} else {
right.dont_load_item();
}
@@ -788,9 +798,11 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
LIRItem value(x->argument_at(0), this);

bool use_fpu = false;
#ifndef _LP64
if (UseSSE < 2) {
value.set_destroys_register();
}
#endif // !LP64
value.load_item();

LIR_Opr calc_input = value.result();
@@ -1552,10 +1564,12 @@ void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
LIR_Opr temp_double = new_register(T_DOUBLE);
__ volatile_move(LIR_OprFact::address(address), temp_double, T_LONG, info);
__ volatile_move(temp_double, result, T_LONG);
#ifndef _LP64
if (UseSSE < 2) {
// no spill slot needed in SSE2 mode because xmm->cpu register move is possible
set_vreg_flag(result, must_start_in_memory);
}
#endif // !LP64
} else {
__ load(address, result, info);
}
@@ -367,6 +367,7 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
#endif

if (save_fpu_registers) {
#ifndef _LP64
if (UseSSE < 2) {
int fpu_off = float_regs_as_doubles_off;
for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
@@ -379,7 +380,18 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
fpu_off += 2;
}
assert(fpu_off == fpu_state_off, "incorrect number of fpu stack slots");

if (UseSSE == 1) {
int xmm_off = xmm_regs_as_doubles_off;
for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
xmm_off += 2;
}
assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
}
}
#endif // !LP64

if (UseSSE >= 2) {
int xmm_off = xmm_regs_as_doubles_off;
@@ -395,15 +407,6 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,
xmm_off += 2;
}
assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");

} else if (UseSSE == 1) {
int xmm_off = xmm_regs_as_doubles_off;
for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
xmm_off += 2;
}
assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
}
}

@@ -454,6 +457,16 @@ void C1_MacroAssembler::save_live_registers_no_oop_map(bool save_fpu_registers)
__ fstp_d(Address(rsp, float_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset));
offset += 8;
}

if (UseSSE == 1) {
// save XMM registers as float because double not supported without SSE2(num MMX == num fpu)
int offset = 0;
for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
XMMRegister xmm_name = as_XMMRegister(n);
__ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
offset += 8;
}
}
}
#endif // !_LP64

@@ -475,16 +488,6 @@ void C1_MacroAssembler::save_live_registers_no_oop_map(bool save_fpu_registers)
__ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
offset += 8;
}
#ifndef _LP64
} else if (UseSSE == 1) {
// save XMM registers as float because double not supported without SSE2(num MMX == num fpu)
int offset = 0;
for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
XMMRegister xmm_name = as_XMMRegister(n);
__ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
offset += 8;
}
#endif // !_LP64
}
}

@@ -103,6 +103,10 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
product(bool, UseStoreImmI16, true, \
"Use store immediate 16-bits value instruction on x86") \
\
product(intx, UseSSE, 99, \
"Highest supported SSE instructions set on x86/x64") \
range(0, 99) \
\
product(intx, UseAVX, 3, \
"Highest supported AVX instructions set on x86/x64") \
range(0, 99) \
@@ -2724,17 +2724,6 @@ void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) {
}
}

#ifndef _LP64
void MacroAssembler::empty_FPU_stack() {
if (VM_Version::supports_mmx()) {
emms();
} else {
for (int i = 8; i-- > 0; ) ffree(i);
}
}
#endif // !LP64


void MacroAssembler::enter() {
push(rbp);
mov(rbp, rsp);
@@ -2753,7 +2742,7 @@ void MacroAssembler::fat_nop() {
}
}

#if !defined(_LP64)
#ifndef _LP64
void MacroAssembler::fcmp(Register tmp) {
fcmp(tmp, 1, true, true);
}
@@ -2856,6 +2845,14 @@ void MacroAssembler::fremr(Register tmp) {
fxch(1);
fpop();
}

void MacroAssembler::empty_FPU_stack() {
if (VM_Version::supports_mmx()) {
emms();
} else {
for (int i = 8; i-- > 0; ) ffree(i);
}
}
#endif // !LP64

void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
@@ -2868,39 +2865,51 @@ void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
}

void MacroAssembler::load_float(Address src) {
#ifdef _LP64
movflt(xmm0, src);
#else
if (UseSSE >= 1) {
movflt(xmm0, src);
} else {
LP64_ONLY(ShouldNotReachHere());
NOT_LP64(fld_s(src));
fld_s(src);
}
#endif // LP64
}

void MacroAssembler::store_float(Address dst) {
#ifdef _LP64
movflt(dst, xmm0);
#else
if (UseSSE >= 1) {
movflt(dst, xmm0);
} else {
LP64_ONLY(ShouldNotReachHere());
NOT_LP64(fstp_s(dst));
fstp_s(dst);
}
#endif // LP64
}

void MacroAssembler::load_double(Address src) {
#ifdef _LP64
movdbl(xmm0, src);
#else
if (UseSSE >= 2) {
movdbl(xmm0, src);
} else {
LP64_ONLY(ShouldNotReachHere());
NOT_LP64(fld_d(src));
fld_d(src);
}
#endif // LP64
}

void MacroAssembler::store_double(Address dst) {
#ifdef _LP64
movdbl(dst, xmm0);
#else
if (UseSSE >= 2) {
movdbl(dst, xmm0);
} else {
LP64_ONLY(ShouldNotReachHere());
NOT_LP64(fstp_d(dst));
fstp_d(dst);
}
#endif // LP64
}

// dst = c = a * b + c
@@ -1295,14 +1295,6 @@ const bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
case Op_AddReductionVF:
case Op_AddReductionVD:
case Op_MulReductionVF:
case Op_MulReductionVD:
if (UseSSE < 1) { // requires at least SSE
return false;
}
break;
case Op_SqrtVD:
case Op_SqrtVF:
if (UseAVX < 1) { // enabled for AVX only
@@ -1338,14 +1330,6 @@ const bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
case Op_MulAddVS2VI:
case Op_RShiftVL:
case Op_AbsVD:
case Op_NegVD:
if (UseSSE < 2) {
return false;
}
break;
case Op_MulVB:
case Op_LShiftVB:
case Op_RShiftVB:
@@ -1381,6 +1365,24 @@ const bool Matcher::match_rule_supported(int opcode) {
return false; // 128bit vroundpd is not available
}
break;
#ifndef _LP64
case Op_AddReductionVF:
case Op_AddReductionVD:
case Op_MulReductionVF:
case Op_MulReductionVD:
if (UseSSE < 1) { // requires at least SSE
return false;
}
break;
case Op_MulAddVS2VI:
case Op_RShiftVL:
case Op_AbsVD:
case Op_NegVD:
if (UseSSE < 2) {
return false;
}
break;
#endif // !LP64
}
return true; // Match rules are supported by default.
}

0 comments on commit 072cfd2

Please sign in to comment.