Skip to content
Closed
6 changes: 4 additions & 2 deletions src/hotspot/cpu/x86/globals_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
"Highest supported AVX instructions set on x86/x64") \
range(0, 3) \
\
\
product(bool, UseAPX, false, EXPERIMENTAL, \
"Use Intel Advanced Performance Extensions") \
\
product(bool, UseKNLSetting, false, DIAGNOSTIC, \
"Control whether Knights platform setting should be used") \
\
Expand Down Expand Up @@ -234,8 +238,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
"Turn off JVM mitigations related to Intel micro code " \
"mitigations for the Intel JCC erratum") \
\
product(bool, UseAPX, false, EXPERIMENTAL, \
"Use Advanced Performance Extensions on x86") \
// end of ARCH_FLAGS

#endif // CPU_X86_GLOBALS_X86_HPP
123 changes: 110 additions & 13 deletions src/hotspot/cpu/x86/vm_version_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,22 @@ const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEAT
address VM_Version::_cpuinfo_segv_addr = 0;
// Address of instruction after the one which causes SEGV
address VM_Version::_cpuinfo_cont_addr = 0;
// Address of instruction which causes APX specific SEGV
address VM_Version::_cpuinfo_segv_addr_apx = 0;
// Address of instruction after the one which causes APX specific SEGV
address VM_Version::_cpuinfo_cont_addr_apx = 0;

static BufferBlob* stub_blob;
static const int stub_size = 2000;

extern "C" {
typedef void (*get_cpu_info_stub_t)(void*);
typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
typedef void (*clear_apx_test_state_t)(void);
}
static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
static detect_virt_stub_t detect_virt_stub = nullptr;
static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;

#ifdef _LP64

Expand Down Expand Up @@ -102,6 +108,27 @@ class VM_Version_StubGenerator: public StubCodeGenerator {

VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}

address clear_apx_test_state() {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to clear_apx_test_state? r16 onwards are not callee saved. And checking r15 save/restore is not needed so we could remove r15 changes altogether.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, EGPRs are call clobbered registers, but here we are trying to ascertain if their values are preserved across signal handling. Explicit clearing of r16 and r31 during signal handling guarantees that preserved register values post signal handling were re-instantiated by operating system and not because they were not modified externally.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, add comment about that.

# define __ _masm->
address start = __ pc();
// EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
// handling guarantees that preserved register values post signal handling were
// re-instantiated by operating system and not because they were not modified externally.

/* FIXME Uncomment following code after OS enablement of
bool save_apx = UseAPX;
VM_Version::set_apx_cpuFeatures();
UseAPX = true;
// EGPR state save/restoration.
__ mov64(r16, 0L);
__ mov64(r31, 0L);
UseAPX = save_apx;
VM_Version::clean_cpuFeatures();
*/
__ ret(0);
return start;
}

address generate_get_cpu_info() {
// Flags to test CPU type.
const uint32_t HS_EFL_AC = 0x40000;
Expand All @@ -113,7 +140,8 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);

Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;

StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
Expand Down Expand Up @@ -288,7 +316,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 4), rdx);

//
// cpuid(0x7) Structured Extended Features
// cpuid(0x7) Structured Extended Features Enumeration Leaf.
//
__ bind(sef_cpuid);
__ movl(rax, 7);
Expand All @@ -303,12 +331,16 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi, 12), rdx);

// ECX = 1
//
// cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
//
__ bind(sefsl1_cpuid);
__ movl(rax, 7);
__ movl(rcx, 1);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_ecx1_offset())));
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rdx);

//
// Extended cpuid(0x80000000)
Expand Down Expand Up @@ -387,6 +419,46 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);

#ifndef PRODUCT
//
// Check if OS has enabled XGETBV instruction to access XCR0
// (OSXSAVE feature flag) and CPU supports APX
//
// To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
// and XCRO[19] bit for OS support to save/restore extended GPR state.
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
__ movl(rax, 0x200000);
__ andl(rax, Address(rsi, 4));
__ cmpl(rax, 0x200000);
__ jcc(Assembler::notEqual, vector_save_restore);
// check _cpuid_info.xem_xcr0_eax.bits.apx_f
__ movl(rax, 0x80000);
__ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
__ cmpl(rax, 0x80000);
__ jcc(Assembler::notEqual, vector_save_restore);

/* FIXME: Uncomment while integrating JDK-8329032
bool save_apx = UseAPX;
Comment on lines +440 to +441
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are you missing to uncomment this code?
8329032 is about .ad file changes. It should not affect execution of this code.
You need changes in register_x86.* files and may be somewhere else but you don't need C2 changes for this code to work.

Copy link
Member Author

@jatin-bhateja jatin-bhateja Jun 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we already have that in place with #19042, which will be open for review after this patch. I added it in comments since this piece of logic is centered around CPUID feature check and pertinent to this patch.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay.

VM_Version::set_apx_cpuFeatures();
UseAPX = true;
__ mov64(r16, VM_Version::egpr_test_value());
__ mov64(r31, VM_Version::egpr_test_value());
*/
__ xorl(rsi, rsi);
VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
// Generate SEGV
__ movl(rax, Address(rsi, 0));

VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
/* FIXME: Uncomment after integration of JDK-8329032
__ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
__ movq(Address(rsi, 0), r16);
__ movq(Address(rsi, 8), r31);

UseAPX = save_apx;
*/
#endif
__ bind(vector_save_restore);
//
// Check if OS has enabled XGETBV instruction to access XCR0
// (OSXSAVE feature flag) and CPU supports AVX
Expand Down Expand Up @@ -580,6 +652,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ vmovdqu(xmm7, Address(rsp, 0));
__ addptr(rsp, 32);
#endif // _WINDOWS

generate_vzeroupper(wrapup);
VM_Version::clean_cpuFeatures();
UseAVX = saved_useavx;
Expand Down Expand Up @@ -940,6 +1013,7 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
}
}

if (UseAVX > use_avx_limit) {
if (UseSSE < 4) {
warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
Expand All @@ -963,6 +1037,16 @@ void VM_Version::get_processor_features() {
_features &= ~CPU_AVX512_VBMI2;
_features &= ~CPU_AVX512_BITALG;
_features &= ~CPU_AVX512_IFMA;
_features &= ~CPU_APX_F;
}

// Currently APX support is only enabled for targets supporting AVX512VL feature.
bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
if (UseAPX && !apx_supported) {
warning("UseAPX is not supported on this CPU, setting it to false");
FLAG_SET_DEFAULT(UseAPX, false);
} else if (FLAG_IS_DEFAULT(UseAPX)) {
FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false);
}

if (UseAVX < 2) {
Expand Down Expand Up @@ -1002,14 +1086,6 @@ void VM_Version::get_processor_features() {
}
}

// APX support not enabled yet
if (UseAPX) {
if (!FLAG_IS_DEFAULT(UseAPX)) {
warning("APX is not supported on this CPU.");
}
FLAG_SET_DEFAULT(UseAPX, false);
}

if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
_has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
} else {
Expand Down Expand Up @@ -2143,6 +2219,10 @@ int VM_Version::avx3_threshold() {
FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
}

void VM_Version::clear_apx_test_state() {
clear_apx_test_state_stub();
}

static bool _vm_version_initialized = false;

void VM_Version::initialize() {
Expand All @@ -2160,6 +2240,8 @@ void VM_Version::initialize() {
detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
g.generate_detect_virt());

clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
g.clear_apx_test_state());
get_processor_features();

LP64_ONLY(Assembler::precompute_instructions();)
Expand Down Expand Up @@ -2958,6 +3040,10 @@ uint64_t VM_Version::CpuidInfo::feature_flags() const {
result |= CPU_SSE4_2;
if (std_cpuid1_ecx.bits.popcnt != 0)
result |= CPU_POPCNT;
if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
xem_xcr0_eax.bits.apx_f != 0) {
result |= CPU_APX_F;
}
if (std_cpuid1_ecx.bits.avx != 0 &&
std_cpuid1_ecx.bits.osxsave != 0 &&
xem_xcr0_eax.bits.sse != 0 &&
Expand All @@ -2968,7 +3054,7 @@ uint64_t VM_Version::CpuidInfo::feature_flags() const {
result |= CPU_F16C;
if (sef_cpuid7_ebx.bits.avx2 != 0) {
result |= CPU_AVX2;
if (sef_cpuid7_ecx1_eax.bits.avx_ifma != 0)
if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
result |= CPU_AVX_IFMA;
}
if (sef_cpuid7_ecx.bits.gfni != 0)
Expand Down Expand Up @@ -3142,6 +3228,17 @@ bool VM_Version::os_supports_avx_vectors() {
return retVal;
}

bool VM_Version::os_supports_apx_egprs() {
if (!supports_apx_f()) {
return false;
}
if (_cpuid_info.apx_save[0] != egpr_test_value() ||
_cpuid_info.apx_save[1] != egpr_test_value()) {
return false;
}
return true;
}

uint VM_Version::cores_per_cpu() {
uint result = 1;
if (is_intel()) {
Expand Down
Loading