Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
Browse files
8258134: assert(size == calc_size) failed: incorrect size calculation…
… on x86_32 with AVX512 machines

Reviewed-by: kvn, thartmann
  • Loading branch information
DamonFool committed Dec 18, 2020
1 parent 38593a4 commit 45a150b8dcf61158d6e95be550282e8a6bfc9cee
Showing with 21 additions and 113 deletions.
  1. +3 −3 src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
  2. +4 −68 src/hotspot/cpu/x86/x86.ad
  3. +9 −37 src/hotspot/cpu/x86/x86_32.ad
  4. +5 −5 src/hotspot/cpu/x86/x86_64.ad
@@ -374,7 +374,7 @@ OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::N
}

// We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel
extern int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
extern int vec_spill_helper(CodeBuffer *cbuf, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st);

#undef __
@@ -435,13 +435,13 @@ class ZSaveLiveRegisters {
const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
_spill_offset -= reg_data._size;
vec_spill_helper(__ code(), false /* do_size */, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
vec_spill_helper(__ code(), false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
}

void xmm_register_restore(const XMMRegisterData& reg_data) {
const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
vec_spill_helper(__ code(), false /* do_size */, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
vec_spill_helper(__ code(), true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
_spill_offset += reg_data._size;
}

@@ -2129,18 +2129,14 @@ static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_
}

// Helper methods for MachSpillCopyNode::implementation().
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st) {
// In 64-bit VM size calculation is very complex. Emitting instructions
// into scratch buffer is used to get size in 64-bit VM.
LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
assert(ireg == Op_VecS || // 32bit vector
(src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
(dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
"no non-adjacent vector moves" );
if (cbuf) {
C2_MacroAssembler _masm(cbuf);
int offset = __ offset();
switch (ireg) {
case Op_VecS: // copy whole register
case Op_VecD:
@@ -2172,14 +2168,8 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
default:
ShouldNotReachHere();
}
int size = __ offset() - offset;
#ifdef ASSERT
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
assert(!do_size || size == 4, "incorrect size calculattion");
#endif
return size;
#ifndef PRODUCT
} else if (!do_size) {
} else {
switch (ireg) {
case Op_VecS:
case Op_VecD:
@@ -2195,18 +2185,12 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
}
#endif
}
// VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
return (UseAVX > 2) ? 6 : 4;
}

int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st) {
// In 64-bit VM size calculation is very complex. Emitting instructions
// into scratch buffer is used to get size in 64-bit VM.
LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
if (cbuf) {
C2_MacroAssembler _masm(cbuf);
int offset = __ offset();
if (is_load) {
switch (ireg) {
case Op_VecS:
@@ -2284,15 +2268,8 @@ int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
ShouldNotReachHere();
}
}
int size = __ offset() - offset;
#ifdef ASSERT
int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
#endif
return size;
#ifndef PRODUCT
} else if (!do_size) {
} else {
if (is_load) {
switch (ireg) {
case Op_VecS:
@@ -2332,47 +2309,6 @@ int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
}
#endif
}
bool is_single_byte = false;
int vec_len = 0;
if ((UseAVX > 2) && (stack_offset != 0)) {
int tuple_type = Assembler::EVEX_FVM;
int input_size = Assembler::EVEX_32bit;
switch (ireg) {
case Op_VecS:
tuple_type = Assembler::EVEX_T1S;
break;
case Op_VecD:
tuple_type = Assembler::EVEX_T1S;
input_size = Assembler::EVEX_64bit;
break;
case Op_VecX:
break;
case Op_VecY:
vec_len = 1;
break;
case Op_VecZ:
vec_len = 2;
break;
}
is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0);
}
int offset_size = 0;
int size = 5;
if (UseAVX > 2 ) {
if (VM_Version::supports_avx512novl() && (vec_len == 2)) {
offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
size += 2; // Need an additional two bytes for EVEX encoding
} else if (VM_Version::supports_avx512novl() && (vec_len < 2)) {
offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
} else {
offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
size += 2; // Need an additional two bytes for EVEX encodding
}
} else {
offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
}
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
return size+offset_size;
}

static inline jlong replicate8_imm(int con, int width) {
@@ -945,41 +945,16 @@ static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int
}

// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st);

static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st);

static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
int dst_offset, uint ireg, outputStream* st) {
int calc_size = 0;
int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
switch (ireg) {
case Op_VecS:
calc_size = 3+src_offset_size + 3+dst_offset_size;
break;
case Op_VecD: {
calc_size = 3+src_offset_size + 3+dst_offset_size;
int tmp_src_offset = src_offset + 4;
int tmp_dst_offset = dst_offset + 4;
src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
calc_size += 3+src_offset_size + 3+dst_offset_size;
break;
}
case Op_VecX:
case Op_VecY:
case Op_VecZ:
calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
break;
default:
ShouldNotReachHere();
}
if (cbuf) {
MacroAssembler _masm(cbuf);
int offset = __ offset();
switch (ireg) {
case Op_VecS:
__ pushl(Address(rsp, src_offset));
@@ -1012,11 +987,8 @@ static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_off
default:
ShouldNotReachHere();
}
int size = __ offset() - offset;
assert(size == calc_size, "incorrect size calculation");
return size;
#ifndef PRODUCT
} else if (!do_size) {
} else {
switch (ireg) {
case Op_VecS:
st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
@@ -1056,7 +1028,6 @@ static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_off
}
#endif
}
return calc_size;
}

uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
@@ -1088,18 +1059,19 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
// mem -> mem
int src_offset = ra_->reg2offset(src_first);
int dst_offset = ra_->reg2offset(dst_first);
return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
} else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
} else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
int stack_offset = ra_->reg2offset(dst_first);
return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
} else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
int stack_offset = ra_->reg2offset(src_first);
return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st);
vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st);
} else {
ShouldNotReachHere();
}
return 0;
}

// --------------------------------------
@@ -1033,10 +1033,10 @@ static enum RC rc_class(OptoReg::Name reg)
}

// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st);

int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st);

static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
@@ -1151,13 +1151,13 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
int dst_offset = ra_->reg2offset(dst_first);
vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
} else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
} else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
int stack_offset = ra_->reg2offset(dst_first);
vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
} else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
int stack_offset = ra_->reg2offset(src_first);
vec_spill_helper(cbuf, false, true, stack_offset, dst_first, ireg, st);
vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st);
} else {
ShouldNotReachHere();
}

1 comment on commit 45a150b

@openjdk-notifier

This comment has been minimized.

Copy link

@openjdk-notifier openjdk-notifier bot commented on 45a150b Dec 18, 2020

Please sign in to comment.