Skip to content
This repository has been archived by the owner on Sep 2, 2022. It is now read-only.

8258134: assert(size == calc_size) failed: incorrect size calculation on x86_32 with AVX512 machines #21

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
Expand Up @@ -374,7 +374,7 @@ OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::N
}

// We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel
extern int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
extern int vec_spill_helper(CodeBuffer *cbuf, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st);

#undef __
Expand Down Expand Up @@ -435,13 +435,13 @@ class ZSaveLiveRegisters {
const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
_spill_offset -= reg_data._size;
vec_spill_helper(__ code(), false /* do_size */, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
vec_spill_helper(__ code(), false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
}

void xmm_register_restore(const XMMRegisterData& reg_data) {
const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
vec_spill_helper(__ code(), false /* do_size */, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
vec_spill_helper(__ code(), true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
_spill_offset += reg_data._size;
}

Expand Down
72 changes: 4 additions & 68 deletions src/hotspot/cpu/x86/x86.ad
Expand Up @@ -2129,18 +2129,14 @@ static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_
}

// Helper methods for MachSpillCopyNode::implementation().
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st) {
// In 64-bit VM size calculation is very complex. Emitting instructions
// into scratch buffer is used to get size in 64-bit VM.
LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
assert(ireg == Op_VecS || // 32bit vector
(src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
(dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
"no non-adjacent vector moves" );
if (cbuf) {
C2_MacroAssembler _masm(cbuf);
int offset = __ offset();
switch (ireg) {
case Op_VecS: // copy whole register
case Op_VecD:
Expand Down Expand Up @@ -2172,14 +2168,8 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
default:
ShouldNotReachHere();
}
int size = __ offset() - offset;
#ifdef ASSERT
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
assert(!do_size || size == 4, "incorrect size calculattion");
#endif
return size;
#ifndef PRODUCT
} else if (!do_size) {
} else {
switch (ireg) {
case Op_VecS:
case Op_VecD:
Expand All @@ -2195,18 +2185,12 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
}
#endif
}
// VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
return (UseAVX > 2) ? 6 : 4;
}

int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st) {
// In 64-bit VM size calculation is very complex. Emitting instructions
// into scratch buffer is used to get size in 64-bit VM.
LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
if (cbuf) {
C2_MacroAssembler _masm(cbuf);
int offset = __ offset();
if (is_load) {
switch (ireg) {
case Op_VecS:
Expand Down Expand Up @@ -2284,15 +2268,8 @@ int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
ShouldNotReachHere();
}
}
int size = __ offset() - offset;
#ifdef ASSERT
int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
#endif
return size;
#ifndef PRODUCT
} else if (!do_size) {
} else {
if (is_load) {
switch (ireg) {
case Op_VecS:
Expand Down Expand Up @@ -2332,47 +2309,6 @@ int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
}
#endif
}
bool is_single_byte = false;
int vec_len = 0;
if ((UseAVX > 2) && (stack_offset != 0)) {
int tuple_type = Assembler::EVEX_FVM;
int input_size = Assembler::EVEX_32bit;
switch (ireg) {
case Op_VecS:
tuple_type = Assembler::EVEX_T1S;
break;
case Op_VecD:
tuple_type = Assembler::EVEX_T1S;
input_size = Assembler::EVEX_64bit;
break;
case Op_VecX:
break;
case Op_VecY:
vec_len = 1;
break;
case Op_VecZ:
vec_len = 2;
break;
}
is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0);
}
int offset_size = 0;
int size = 5;
if (UseAVX > 2 ) {
if (VM_Version::supports_avx512novl() && (vec_len == 2)) {
offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
size += 2; // Need an additional two bytes for EVEX encoding
} else if (VM_Version::supports_avx512novl() && (vec_len < 2)) {
offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
} else {
offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
size += 2; // Need an additional two bytes for EVEX encodding
}
} else {
offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
}
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
return size+offset_size;
}

static inline jlong replicate8_imm(int con, int width) {
Expand Down
46 changes: 9 additions & 37 deletions src/hotspot/cpu/x86/x86_32.ad
Expand Up @@ -945,41 +945,16 @@ static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int
}

// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st);

static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st);

static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
int dst_offset, uint ireg, outputStream* st) {
int calc_size = 0;
int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
switch (ireg) {
case Op_VecS:
calc_size = 3+src_offset_size + 3+dst_offset_size;
break;
case Op_VecD: {
calc_size = 3+src_offset_size + 3+dst_offset_size;
int tmp_src_offset = src_offset + 4;
int tmp_dst_offset = dst_offset + 4;
src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
calc_size += 3+src_offset_size + 3+dst_offset_size;
break;
}
case Op_VecX:
case Op_VecY:
case Op_VecZ:
calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
break;
default:
ShouldNotReachHere();
}
if (cbuf) {
MacroAssembler _masm(cbuf);
int offset = __ offset();
switch (ireg) {
case Op_VecS:
__ pushl(Address(rsp, src_offset));
Expand Down Expand Up @@ -1012,11 +987,8 @@ static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_off
default:
ShouldNotReachHere();
}
int size = __ offset() - offset;
assert(size == calc_size, "incorrect size calculation");
return size;
#ifndef PRODUCT
} else if (!do_size) {
} else {
switch (ireg) {
case Op_VecS:
st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
Expand Down Expand Up @@ -1056,7 +1028,6 @@ static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_off
}
#endif
}
return calc_size;
}

uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
Expand Down Expand Up @@ -1088,18 +1059,19 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
// mem -> mem
int src_offset = ra_->reg2offset(src_first);
int dst_offset = ra_->reg2offset(dst_first);
return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
} else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
} else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
int stack_offset = ra_->reg2offset(dst_first);
return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
} else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
int stack_offset = ra_->reg2offset(src_first);
return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st);
vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st);
} else {
ShouldNotReachHere();
}
return 0;
}

// --------------------------------------
Expand Down
10 changes: 5 additions & 5 deletions src/hotspot/cpu/x86/x86_64.ad
Expand Up @@ -1033,10 +1033,10 @@ static enum RC rc_class(OptoReg::Name reg)
}

// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st);

int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
int stack_offset, int reg, uint ireg, outputStream* st);

static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
Expand Down Expand Up @@ -1151,13 +1151,13 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
int dst_offset = ra_->reg2offset(dst_first);
vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
} else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
} else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
int stack_offset = ra_->reg2offset(dst_first);
vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
} else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
int stack_offset = ra_->reg2offset(src_first);
vec_spill_helper(cbuf, false, true, stack_offset, dst_first, ireg, st);
vec_spill_helper(cbuf, true, stack_offset, dst_first, ireg, st);
} else {
ShouldNotReachHere();
}
Expand Down