Skip to content

Commit e9d9cc6

Browse files
committed
8290027: Move inline functions from vm_version_x86.hpp to cpp
Reviewed-by: kbarrett, dholmes
1 parent 87aa3ce commit e9d9cc6

File tree

2 files changed

+327
-318
lines changed

2 files changed

+327
-318
lines changed

src/hotspot/cpu/x86/vm_version_x86.cpp

+318
Original file line numberDiff line numberDiff line change
@@ -2843,3 +2843,321 @@ int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
28432843
return _max_qualified_cpu_frequency;
28442844
}
28452845

2846+
uint64_t VM_Version::feature_flags() {
2847+
uint64_t result = 0;
2848+
if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
2849+
result |= CPU_CX8;
2850+
if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
2851+
result |= CPU_CMOV;
2852+
if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0)
2853+
result |= CPU_FLUSH;
2854+
#ifdef _LP64
2855+
// clflush should always be available on x86_64
2856+
// if not we are in real trouble because we rely on it
2857+
// to flush the code cache.
2858+
assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2859+
#endif
2860+
if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2861+
_cpuid_info.ext_cpuid1_edx.bits.fxsr != 0))
2862+
result |= CPU_FXSR;
2863+
// HT flag is set for multi-core processors also.
2864+
if (threads_per_core() > 1)
2865+
result |= CPU_HT;
2866+
if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2867+
_cpuid_info.ext_cpuid1_edx.bits.mmx != 0))
2868+
result |= CPU_MMX;
2869+
if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
2870+
result |= CPU_SSE;
2871+
if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
2872+
result |= CPU_SSE2;
2873+
if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
2874+
result |= CPU_SSE3;
2875+
if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
2876+
result |= CPU_SSSE3;
2877+
if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
2878+
result |= CPU_SSE4_1;
2879+
if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
2880+
result |= CPU_SSE4_2;
2881+
if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
2882+
result |= CPU_POPCNT;
2883+
if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
2884+
_cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
2885+
_cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
2886+
_cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
2887+
result |= CPU_AVX;
2888+
result |= CPU_VZEROUPPER;
2889+
if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
2890+
result |= CPU_AVX2;
2891+
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
2892+
_cpuid_info.xem_xcr0_eax.bits.opmask != 0 &&
2893+
_cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 &&
2894+
_cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
2895+
result |= CPU_AVX512F;
2896+
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
2897+
result |= CPU_AVX512CD;
2898+
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
2899+
result |= CPU_AVX512DQ;
2900+
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
2901+
result |= CPU_AVX512PF;
2902+
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
2903+
result |= CPU_AVX512ER;
2904+
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
2905+
result |= CPU_AVX512BW;
2906+
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
2907+
result |= CPU_AVX512VL;
2908+
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2909+
result |= CPU_AVX512_VPOPCNTDQ;
2910+
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2911+
result |= CPU_AVX512_VPCLMULQDQ;
2912+
if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
2913+
result |= CPU_AVX512_VAES;
2914+
if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0)
2915+
result |= CPU_GFNI;
2916+
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
2917+
result |= CPU_AVX512_VNNI;
2918+
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2919+
result |= CPU_AVX512_BITALG;
2920+
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2921+
result |= CPU_AVX512_VBMI;
2922+
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2923+
result |= CPU_AVX512_VBMI2;
2924+
}
2925+
}
2926+
if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0)
2927+
result |= CPU_HV;
2928+
if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
2929+
result |= CPU_BMI1;
2930+
if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
2931+
result |= CPU_TSC;
2932+
if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
2933+
result |= CPU_TSCINV_BIT;
2934+
if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
2935+
result |= CPU_AES;
2936+
if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
2937+
result |= CPU_ERMS;
2938+
if (_cpuid_info.sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
2939+
result |= CPU_FSRM;
2940+
if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
2941+
result |= CPU_CLMUL;
2942+
if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
2943+
result |= CPU_RTM;
2944+
if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
2945+
result |= CPU_ADX;
2946+
if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
2947+
result |= CPU_BMI2;
2948+
if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
2949+
result |= CPU_SHA;
2950+
if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
2951+
result |= CPU_FMA;
2952+
if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0)
2953+
result |= CPU_FLUSHOPT;
2954+
if (_cpuid_info.ext_cpuid1_edx.bits.rdtscp != 0)
2955+
result |= CPU_RDTSCP;
2956+
if (_cpuid_info.sef_cpuid7_ecx.bits.rdpid != 0)
2957+
result |= CPU_RDPID;
2958+
2959+
// AMD|Hygon features.
2960+
if (is_amd_family()) {
2961+
if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
2962+
(_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
2963+
result |= CPU_3DNOW_PREFETCH;
2964+
if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
2965+
result |= CPU_LZCNT;
2966+
if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
2967+
result |= CPU_SSE4A;
2968+
}
2969+
2970+
// Intel features.
2971+
if (is_intel()) {
2972+
if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
2973+
result |= CPU_LZCNT;
2974+
}
2975+
if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
2976+
result |= CPU_3DNOW_PREFETCH;
2977+
}
2978+
if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) {
2979+
result |= CPU_CLWB;
2980+
}
2981+
if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0)
2982+
result |= CPU_SERIALIZE;
2983+
}
2984+
2985+
// ZX features.
2986+
if (is_zx()) {
2987+
if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
2988+
result |= CPU_LZCNT;
2989+
}
2990+
if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
2991+
result |= CPU_3DNOW_PREFETCH;
2992+
}
2993+
}
2994+
2995+
// Composite features.
2996+
if (supports_tscinv_bit() &&
2997+
((is_amd_family() && !is_amd_Barcelona()) ||
2998+
is_intel_tsc_synched_at_init())) {
2999+
result |= CPU_TSCINV;
3000+
}
3001+
3002+
return result;
3003+
}
3004+
3005+
bool VM_Version::os_supports_avx_vectors() {
3006+
bool retVal = false;
3007+
int nreg = 2 LP64_ONLY(+2);
3008+
if (supports_evex()) {
3009+
// Verify that OS save/restore all bits of EVEX registers
3010+
// during signal processing.
3011+
retVal = true;
3012+
for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3013+
if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3014+
retVal = false;
3015+
break;
3016+
}
3017+
}
3018+
} else if (supports_avx()) {
3019+
// Verify that OS save/restore all bits of AVX registers
3020+
// during signal processing.
3021+
retVal = true;
3022+
for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3023+
if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3024+
retVal = false;
3025+
break;
3026+
}
3027+
}
3028+
// zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3029+
if (retVal == false) {
3030+
// Verify that OS save/restore all bits of EVEX registers
3031+
// during signal processing.
3032+
retVal = true;
3033+
for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3034+
if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3035+
retVal = false;
3036+
break;
3037+
}
3038+
}
3039+
}
3040+
}
3041+
return retVal;
3042+
}
3043+
3044+
uint VM_Version::cores_per_cpu() {
3045+
uint result = 1;
3046+
if (is_intel()) {
3047+
bool supports_topology = supports_processor_topology();
3048+
if (supports_topology) {
3049+
result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3050+
_cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3051+
}
3052+
if (!supports_topology || result == 0) {
3053+
result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3054+
}
3055+
} else if (is_amd_family()) {
3056+
result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3057+
} else if (is_zx()) {
3058+
bool supports_topology = supports_processor_topology();
3059+
if (supports_topology) {
3060+
result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3061+
_cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3062+
}
3063+
if (!supports_topology || result == 0) {
3064+
result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3065+
}
3066+
}
3067+
return result;
3068+
}
3069+
3070+
uint VM_Version::threads_per_core() {
3071+
uint result = 1;
3072+
if (is_intel() && supports_processor_topology()) {
3073+
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3074+
} else if (is_zx() && supports_processor_topology()) {
3075+
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3076+
} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3077+
if (cpu_family() >= 0x17) {
3078+
result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3079+
} else {
3080+
result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3081+
cores_per_cpu();
3082+
}
3083+
}
3084+
return (result == 0 ? 1 : result);
3085+
}
3086+
3087+
intx VM_Version::L1_line_size() {
3088+
intx result = 0;
3089+
if (is_intel()) {
3090+
result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3091+
} else if (is_amd_family()) {
3092+
result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3093+
} else if (is_zx()) {
3094+
result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3095+
}
3096+
if (result < 32) // not defined ?
3097+
result = 32; // 32 bytes by default on x86 and other x64
3098+
return result;
3099+
}
3100+
3101+
bool VM_Version::is_intel_tsc_synched_at_init() {
3102+
if (is_intel_family_core()) {
3103+
uint32_t ext_model = extended_cpu_model();
3104+
if (ext_model == CPU_MODEL_NEHALEM_EP ||
3105+
ext_model == CPU_MODEL_WESTMERE_EP ||
3106+
ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3107+
ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3108+
// <= 2-socket invariant tsc support. EX versions are usually used
3109+
// in > 2-socket systems and likely don't synchronize tscs at
3110+
// initialization.
3111+
// Code that uses tsc values must be prepared for them to arbitrarily
3112+
// jump forward or backward.
3113+
return true;
3114+
}
3115+
}
3116+
return false;
3117+
}
3118+
3119+
intx VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3120+
// Hardware prefetching (distance/size in bytes):
3121+
// Pentium 3 - 64 / 32
3122+
// Pentium 4 - 256 / 128
3123+
// Athlon - 64 / 32 ????
3124+
// Opteron - 128 / 64 only when 2 sequential cache lines accessed
3125+
// Core - 128 / 64
3126+
//
3127+
// Software prefetching (distance in bytes / instruction with best score):
3128+
// Pentium 3 - 128 / prefetchnta
3129+
// Pentium 4 - 512 / prefetchnta
3130+
// Athlon - 128 / prefetchnta
3131+
// Opteron - 256 / prefetchnta
3132+
// Core - 256 / prefetchnta
3133+
// It will be used only when AllocatePrefetchStyle > 0
3134+
3135+
if (is_amd_family()) { // AMD | Hygon
3136+
if (supports_sse2()) {
3137+
return 256; // Opteron
3138+
} else {
3139+
return 128; // Athlon
3140+
}
3141+
} else { // Intel
3142+
if (supports_sse3() && cpu_family() == 6) {
3143+
if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3144+
return 192;
3145+
} else if (use_watermark_prefetch) { // watermark prefetching on Core
3146+
#ifdef _LP64
3147+
return 384;
3148+
#else
3149+
return 320;
3150+
#endif
3151+
}
3152+
}
3153+
if (supports_sse2()) {
3154+
if (cpu_family() == 6) {
3155+
return 256; // Pentium M, Core, Core2
3156+
} else {
3157+
return 512; // Pentium 4
3158+
}
3159+
} else {
3160+
return 128; // Pentium 3 (and all other old CPUs)
3161+
}
3162+
}
3163+
}

0 commit comments

Comments
 (0)