Skip to content
Permalink
Browse files
8277617: Adjust AVX3Threshold for copy/fill stubs
Reviewed-by: jbhateja, dholmes, neliasso, jiefu
  • Loading branch information
Sandhya Viswanathan committed Dec 3, 2021
1 parent 2b87c2b commit 24e16ac637095d7dee1d6fe34f996b68eedfa8bc
Showing 5 changed files with 29 additions and 14 deletions.
@@ -5021,7 +5021,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X
// cnt - number of qwords (8-byte words).
// base - start address, qword aligned.
Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
bool use64byteVector = MaxVectorSize == 64 && AVX3Threshold == 0;
bool use64byteVector = (MaxVectorSize == 64) && (VM_Version::avx3_threshold() == 0);
if (use64byteVector) {
vpxor(xtmp, xtmp, xtmp, AVX_512bit);
} else if (MaxVectorSize >= 32) {
@@ -5085,7 +5085,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X
// Clearing constant sized memory using YMM/ZMM registers.
void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
assert(UseAVX > 2 && VM_Version::supports_avx512vlbw(), "");
bool use64byteVector = MaxVectorSize > 32 && AVX3Threshold == 0;
bool use64byteVector = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);

int vector64_count = (cnt & (~0x7)) >> 3;
cnt = cnt & 0x7;
@@ -5328,8 +5328,8 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
// Fill 64-byte chunks
Label L_fill_64_bytes_loop_avx3, L_check_fill_64_bytes_avx2;

// If number of bytes to fill < AVX3Threshold, perform fill using AVX2
cmpl(count, AVX3Threshold);
// If number of bytes to fill < VM_Version::avx3_threshold(), perform fill using AVX2
cmpl(count, VM_Version::avx3_threshold());
jccb(Assembler::below, L_check_fill_64_bytes_avx2);

vpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit);
@@ -8717,6 +8717,7 @@ void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register va
Label L_fill_zmm_sequence;

int shift = -1;
int avx3threshold = VM_Version::avx3_threshold();
switch(type) {
case T_BYTE: shift = 0;
break;
@@ -8732,10 +8733,10 @@ void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register va
fatal("Unhandled type: %s\n", type2name(type));
}

if (AVX3Threshold != 0 || MaxVectorSize == 32) {
if ((avx3threshold != 0) || (MaxVectorSize == 32)) {

if (MaxVectorSize == 64) {
cmpq(count, AVX3Threshold >> shift);
cmpq(count, avx3threshold >> shift);
jcc(Assembler::greater, L_fill_zmm_sequence);
}

@@ -114,7 +114,7 @@ void MacroAssembler::arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KReg
bool use64byteVector, Label& L_entry, Label& L_exit) {
Label L_entry_64, L_entry_96, L_entry_128;
Label L_entry_160, L_entry_192;
bool avx3 = MaxVectorSize > 32 && AVX3Threshold == 0;
bool avx3 = (MaxVectorSize > 32) && (VM_Version::avx3_threshold() == 0);

int size_mat[][6] = {
/* T_BYTE */ {32 , 64, 96 , 128 , 160 , 192 },
@@ -1384,8 +1384,8 @@ class StubGenerator: public StubCodeGenerator {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();

bool use64byteVector = MaxVectorSize > 32 && AVX3Threshold == 0;
int avx3threshold = VM_Version::avx3_threshold();
bool use64byteVector = (MaxVectorSize > 32) && (avx3threshold == 0);
Label L_main_loop, L_main_loop_64bytes, L_tail, L_tail64, L_exit, L_entry;
Label L_repmovs, L_main_pre_loop, L_main_pre_loop_64bytes, L_pre_main_post_64;
const Register from = rdi; // source array address
@@ -1448,7 +1448,7 @@ class StubGenerator: public StubCodeGenerator {
// PRE-MAIN-POST loop for aligned copy.
__ BIND(L_entry);

if (AVX3Threshold != 0) {
if (avx3threshold != 0) {
__ cmpq(count, threshold[shift]);
if (MaxVectorSize == 64) {
// Copy using 64 byte vectors.
@@ -1460,7 +1460,7 @@ class StubGenerator: public StubCodeGenerator {
}
}

if (MaxVectorSize < 64 || AVX3Threshold != 0) {
if ((MaxVectorSize < 64) || (avx3threshold != 0)) {
// Partial copy to make dst address 32 byte aligned.
__ movq(temp2, to);
__ andq(temp2, 31);
@@ -1603,7 +1603,8 @@ class StubGenerator: public StubCodeGenerator {
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();

bool use64byteVector = MaxVectorSize > 32 && AVX3Threshold == 0;
int avx3threshold = VM_Version::avx3_threshold();
bool use64byteVector = (MaxVectorSize > 32) && (avx3threshold == 0);

Label L_main_pre_loop, L_main_pre_loop_64bytes, L_pre_main_post_64;
Label L_main_loop, L_main_loop_64bytes, L_tail, L_tail64, L_exit, L_entry;
@@ -1668,12 +1669,12 @@ class StubGenerator: public StubCodeGenerator {
// PRE-MAIN-POST loop for aligned copy.
__ BIND(L_entry);

if (MaxVectorSize > 32 && AVX3Threshold != 0) {
if ((MaxVectorSize > 32) && (avx3threshold != 0)) {
__ cmpq(temp1, threshold[shift]);
__ jcc(Assembler::greaterEqual, L_pre_main_post_64);
}

if (MaxVectorSize < 64 || AVX3Threshold != 0) {
if ((MaxVectorSize < 64) || (avx3threshold != 0)) {
// Partial copy to make dst address 32 byte aligned.
__ leaq(temp2, Address(to, temp1, (Address::ScaleFactor)(shift), 0));
__ andq(temp2, 31);
@@ -1878,6 +1878,17 @@ void VM_Version::check_virtualizations() {
}
}

// avx3_threshold() sets the threshold at which 64-byte instructions are used
// for implementing the array copy and clear operations.
// The Intel platforms that supports the serialize instruction
// has improved implementation of 64-byte load/stores and so the default
// threshold is set to 0 for these platforms.
int VM_Version::avx3_threshold() {
return (is_intel_family_core() &&
supports_serialize() &&
FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
}

void VM_Version::initialize() {
ResourceMark rm;
// Making this stub must be FIRST use of assembler
@@ -911,6 +911,8 @@ enum Extended_Family {
static bool is_intel_skylake() { return is_intel_family_core() &&
extended_cpu_model() == CPU_MODEL_SKYLAKE; }

static int avx3_threshold();

static bool is_intel_tsc_synched_at_init() {
if (is_intel_family_core()) {
uint32_t ext_model = extended_cpu_model();

1 comment on commit 24e16ac

@openjdk-notifier
Copy link

@openjdk-notifier openjdk-notifier bot commented on 24e16ac Dec 3, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.