@@ -2843,3 +2843,321 @@ int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2843
2843
return _max_qualified_cpu_frequency;
2844
2844
}
2845
2845
2846
+ uint64_t VM_Version::feature_flags () {
2847
+ uint64_t result = 0 ;
2848
+ if (_cpuid_info.std_cpuid1_edx .bits .cmpxchg8 != 0 )
2849
+ result |= CPU_CX8;
2850
+ if (_cpuid_info.std_cpuid1_edx .bits .cmov != 0 )
2851
+ result |= CPU_CMOV;
2852
+ if (_cpuid_info.std_cpuid1_edx .bits .clflush != 0 )
2853
+ result |= CPU_FLUSH;
2854
+ #ifdef _LP64
2855
+ // clflush should always be available on x86_64
2856
+ // if not we are in real trouble because we rely on it
2857
+ // to flush the code cache.
2858
+ assert ((result & CPU_FLUSH) != 0 , " clflush should be available" );
2859
+ #endif
2860
+ if (_cpuid_info.std_cpuid1_edx .bits .fxsr != 0 || (is_amd_family () &&
2861
+ _cpuid_info.ext_cpuid1_edx .bits .fxsr != 0 ))
2862
+ result |= CPU_FXSR;
2863
+ // HT flag is set for multi-core processors also.
2864
+ if (threads_per_core () > 1 )
2865
+ result |= CPU_HT;
2866
+ if (_cpuid_info.std_cpuid1_edx .bits .mmx != 0 || (is_amd_family () &&
2867
+ _cpuid_info.ext_cpuid1_edx .bits .mmx != 0 ))
2868
+ result |= CPU_MMX;
2869
+ if (_cpuid_info.std_cpuid1_edx .bits .sse != 0 )
2870
+ result |= CPU_SSE;
2871
+ if (_cpuid_info.std_cpuid1_edx .bits .sse2 != 0 )
2872
+ result |= CPU_SSE2;
2873
+ if (_cpuid_info.std_cpuid1_ecx .bits .sse3 != 0 )
2874
+ result |= CPU_SSE3;
2875
+ if (_cpuid_info.std_cpuid1_ecx .bits .ssse3 != 0 )
2876
+ result |= CPU_SSSE3;
2877
+ if (_cpuid_info.std_cpuid1_ecx .bits .sse4_1 != 0 )
2878
+ result |= CPU_SSE4_1;
2879
+ if (_cpuid_info.std_cpuid1_ecx .bits .sse4_2 != 0 )
2880
+ result |= CPU_SSE4_2;
2881
+ if (_cpuid_info.std_cpuid1_ecx .bits .popcnt != 0 )
2882
+ result |= CPU_POPCNT;
2883
+ if (_cpuid_info.std_cpuid1_ecx .bits .avx != 0 &&
2884
+ _cpuid_info.std_cpuid1_ecx .bits .osxsave != 0 &&
2885
+ _cpuid_info.xem_xcr0_eax .bits .sse != 0 &&
2886
+ _cpuid_info.xem_xcr0_eax .bits .ymm != 0 ) {
2887
+ result |= CPU_AVX;
2888
+ result |= CPU_VZEROUPPER;
2889
+ if (_cpuid_info.sef_cpuid7_ebx .bits .avx2 != 0 )
2890
+ result |= CPU_AVX2;
2891
+ if (_cpuid_info.sef_cpuid7_ebx .bits .avx512f != 0 &&
2892
+ _cpuid_info.xem_xcr0_eax .bits .opmask != 0 &&
2893
+ _cpuid_info.xem_xcr0_eax .bits .zmm512 != 0 &&
2894
+ _cpuid_info.xem_xcr0_eax .bits .zmm32 != 0 ) {
2895
+ result |= CPU_AVX512F;
2896
+ if (_cpuid_info.sef_cpuid7_ebx .bits .avx512cd != 0 )
2897
+ result |= CPU_AVX512CD;
2898
+ if (_cpuid_info.sef_cpuid7_ebx .bits .avx512dq != 0 )
2899
+ result |= CPU_AVX512DQ;
2900
+ if (_cpuid_info.sef_cpuid7_ebx .bits .avx512pf != 0 )
2901
+ result |= CPU_AVX512PF;
2902
+ if (_cpuid_info.sef_cpuid7_ebx .bits .avx512er != 0 )
2903
+ result |= CPU_AVX512ER;
2904
+ if (_cpuid_info.sef_cpuid7_ebx .bits .avx512bw != 0 )
2905
+ result |= CPU_AVX512BW;
2906
+ if (_cpuid_info.sef_cpuid7_ebx .bits .avx512vl != 0 )
2907
+ result |= CPU_AVX512VL;
2908
+ if (_cpuid_info.sef_cpuid7_ecx .bits .avx512_vpopcntdq != 0 )
2909
+ result |= CPU_AVX512_VPOPCNTDQ;
2910
+ if (_cpuid_info.sef_cpuid7_ecx .bits .avx512_vpclmulqdq != 0 )
2911
+ result |= CPU_AVX512_VPCLMULQDQ;
2912
+ if (_cpuid_info.sef_cpuid7_ecx .bits .vaes != 0 )
2913
+ result |= CPU_AVX512_VAES;
2914
+ if (_cpuid_info.sef_cpuid7_ecx .bits .gfni != 0 )
2915
+ result |= CPU_GFNI;
2916
+ if (_cpuid_info.sef_cpuid7_ecx .bits .avx512_vnni != 0 )
2917
+ result |= CPU_AVX512_VNNI;
2918
+ if (_cpuid_info.sef_cpuid7_ecx .bits .avx512_bitalg != 0 )
2919
+ result |= CPU_AVX512_BITALG;
2920
+ if (_cpuid_info.sef_cpuid7_ecx .bits .avx512_vbmi != 0 )
2921
+ result |= CPU_AVX512_VBMI;
2922
+ if (_cpuid_info.sef_cpuid7_ecx .bits .avx512_vbmi2 != 0 )
2923
+ result |= CPU_AVX512_VBMI2;
2924
+ }
2925
+ }
2926
+ if (_cpuid_info.std_cpuid1_ecx .bits .hv != 0 )
2927
+ result |= CPU_HV;
2928
+ if (_cpuid_info.sef_cpuid7_ebx .bits .bmi1 != 0 )
2929
+ result |= CPU_BMI1;
2930
+ if (_cpuid_info.std_cpuid1_edx .bits .tsc != 0 )
2931
+ result |= CPU_TSC;
2932
+ if (_cpuid_info.ext_cpuid7_edx .bits .tsc_invariance != 0 )
2933
+ result |= CPU_TSCINV_BIT;
2934
+ if (_cpuid_info.std_cpuid1_ecx .bits .aes != 0 )
2935
+ result |= CPU_AES;
2936
+ if (_cpuid_info.sef_cpuid7_ebx .bits .erms != 0 )
2937
+ result |= CPU_ERMS;
2938
+ if (_cpuid_info.sef_cpuid7_edx .bits .fast_short_rep_mov != 0 )
2939
+ result |= CPU_FSRM;
2940
+ if (_cpuid_info.std_cpuid1_ecx .bits .clmul != 0 )
2941
+ result |= CPU_CLMUL;
2942
+ if (_cpuid_info.sef_cpuid7_ebx .bits .rtm != 0 )
2943
+ result |= CPU_RTM;
2944
+ if (_cpuid_info.sef_cpuid7_ebx .bits .adx != 0 )
2945
+ result |= CPU_ADX;
2946
+ if (_cpuid_info.sef_cpuid7_ebx .bits .bmi2 != 0 )
2947
+ result |= CPU_BMI2;
2948
+ if (_cpuid_info.sef_cpuid7_ebx .bits .sha != 0 )
2949
+ result |= CPU_SHA;
2950
+ if (_cpuid_info.std_cpuid1_ecx .bits .fma != 0 )
2951
+ result |= CPU_FMA;
2952
+ if (_cpuid_info.sef_cpuid7_ebx .bits .clflushopt != 0 )
2953
+ result |= CPU_FLUSHOPT;
2954
+ if (_cpuid_info.ext_cpuid1_edx .bits .rdtscp != 0 )
2955
+ result |= CPU_RDTSCP;
2956
+ if (_cpuid_info.sef_cpuid7_ecx .bits .rdpid != 0 )
2957
+ result |= CPU_RDPID;
2958
+
2959
+ // AMD|Hygon features.
2960
+ if (is_amd_family ()) {
2961
+ if ((_cpuid_info.ext_cpuid1_edx .bits .tdnow != 0 ) ||
2962
+ (_cpuid_info.ext_cpuid1_ecx .bits .prefetchw != 0 ))
2963
+ result |= CPU_3DNOW_PREFETCH;
2964
+ if (_cpuid_info.ext_cpuid1_ecx .bits .lzcnt != 0 )
2965
+ result |= CPU_LZCNT;
2966
+ if (_cpuid_info.ext_cpuid1_ecx .bits .sse4a != 0 )
2967
+ result |= CPU_SSE4A;
2968
+ }
2969
+
2970
+ // Intel features.
2971
+ if (is_intel ()) {
2972
+ if (_cpuid_info.ext_cpuid1_ecx .bits .lzcnt != 0 ) {
2973
+ result |= CPU_LZCNT;
2974
+ }
2975
+ if (_cpuid_info.ext_cpuid1_ecx .bits .prefetchw != 0 ) {
2976
+ result |= CPU_3DNOW_PREFETCH;
2977
+ }
2978
+ if (_cpuid_info.sef_cpuid7_ebx .bits .clwb != 0 ) {
2979
+ result |= CPU_CLWB;
2980
+ }
2981
+ if (_cpuid_info.sef_cpuid7_edx .bits .serialize != 0 )
2982
+ result |= CPU_SERIALIZE;
2983
+ }
2984
+
2985
+ // ZX features.
2986
+ if (is_zx ()) {
2987
+ if (_cpuid_info.ext_cpuid1_ecx .bits .lzcnt != 0 ) {
2988
+ result |= CPU_LZCNT;
2989
+ }
2990
+ if (_cpuid_info.ext_cpuid1_ecx .bits .prefetchw != 0 ) {
2991
+ result |= CPU_3DNOW_PREFETCH;
2992
+ }
2993
+ }
2994
+
2995
+ // Composite features.
2996
+ if (supports_tscinv_bit () &&
2997
+ ((is_amd_family () && !is_amd_Barcelona ()) ||
2998
+ is_intel_tsc_synched_at_init ())) {
2999
+ result |= CPU_TSCINV;
3000
+ }
3001
+
3002
+ return result;
3003
+ }
3004
+
3005
+ bool VM_Version::os_supports_avx_vectors () {
3006
+ bool retVal = false ;
3007
+ int nreg = 2 LP64_ONLY (+2 );
3008
+ if (supports_evex ()) {
3009
+ // Verify that OS save/restore all bits of EVEX registers
3010
+ // during signal processing.
3011
+ retVal = true ;
3012
+ for (int i = 0 ; i < 16 * nreg; i++) { // 64 bytes per zmm register
3013
+ if (_cpuid_info.zmm_save [i] != ymm_test_value ()) {
3014
+ retVal = false ;
3015
+ break ;
3016
+ }
3017
+ }
3018
+ } else if (supports_avx ()) {
3019
+ // Verify that OS save/restore all bits of AVX registers
3020
+ // during signal processing.
3021
+ retVal = true ;
3022
+ for (int i = 0 ; i < 8 * nreg; i++) { // 32 bytes per ymm register
3023
+ if (_cpuid_info.ymm_save [i] != ymm_test_value ()) {
3024
+ retVal = false ;
3025
+ break ;
3026
+ }
3027
+ }
3028
+ // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3029
+ if (retVal == false ) {
3030
+ // Verify that OS save/restore all bits of EVEX registers
3031
+ // during signal processing.
3032
+ retVal = true ;
3033
+ for (int i = 0 ; i < 16 * nreg; i++) { // 64 bytes per zmm register
3034
+ if (_cpuid_info.zmm_save [i] != ymm_test_value ()) {
3035
+ retVal = false ;
3036
+ break ;
3037
+ }
3038
+ }
3039
+ }
3040
+ }
3041
+ return retVal;
3042
+ }
3043
+
3044
+ uint VM_Version::cores_per_cpu () {
3045
+ uint result = 1 ;
3046
+ if (is_intel ()) {
3047
+ bool supports_topology = supports_processor_topology ();
3048
+ if (supports_topology) {
3049
+ result = _cpuid_info.tpl_cpuidB1_ebx .bits .logical_cpus /
3050
+ _cpuid_info.tpl_cpuidB0_ebx .bits .logical_cpus ;
3051
+ }
3052
+ if (!supports_topology || result == 0 ) {
3053
+ result = (_cpuid_info.dcp_cpuid4_eax .bits .cores_per_cpu + 1 );
3054
+ }
3055
+ } else if (is_amd_family ()) {
3056
+ result = (_cpuid_info.ext_cpuid8_ecx .bits .cores_per_cpu + 1 );
3057
+ } else if (is_zx ()) {
3058
+ bool supports_topology = supports_processor_topology ();
3059
+ if (supports_topology) {
3060
+ result = _cpuid_info.tpl_cpuidB1_ebx .bits .logical_cpus /
3061
+ _cpuid_info.tpl_cpuidB0_ebx .bits .logical_cpus ;
3062
+ }
3063
+ if (!supports_topology || result == 0 ) {
3064
+ result = (_cpuid_info.dcp_cpuid4_eax .bits .cores_per_cpu + 1 );
3065
+ }
3066
+ }
3067
+ return result;
3068
+ }
3069
+
3070
+ uint VM_Version::threads_per_core () {
3071
+ uint result = 1 ;
3072
+ if (is_intel () && supports_processor_topology ()) {
3073
+ result = _cpuid_info.tpl_cpuidB0_ebx .bits .logical_cpus ;
3074
+ } else if (is_zx () && supports_processor_topology ()) {
3075
+ result = _cpuid_info.tpl_cpuidB0_ebx .bits .logical_cpus ;
3076
+ } else if (_cpuid_info.std_cpuid1_edx .bits .ht != 0 ) {
3077
+ if (cpu_family () >= 0x17 ) {
3078
+ result = _cpuid_info.ext_cpuid1E_ebx .bits .threads_per_core + 1 ;
3079
+ } else {
3080
+ result = _cpuid_info.std_cpuid1_ebx .bits .threads_per_cpu /
3081
+ cores_per_cpu ();
3082
+ }
3083
+ }
3084
+ return (result == 0 ? 1 : result);
3085
+ }
3086
+
3087
+ intx VM_Version::L1_line_size () {
3088
+ intx result = 0 ;
3089
+ if (is_intel ()) {
3090
+ result = (_cpuid_info.dcp_cpuid4_ebx .bits .L1_line_size + 1 );
3091
+ } else if (is_amd_family ()) {
3092
+ result = _cpuid_info.ext_cpuid5_ecx .bits .L1_line_size ;
3093
+ } else if (is_zx ()) {
3094
+ result = (_cpuid_info.dcp_cpuid4_ebx .bits .L1_line_size + 1 );
3095
+ }
3096
+ if (result < 32 ) // not defined ?
3097
+ result = 32 ; // 32 bytes by default on x86 and other x64
3098
+ return result;
3099
+ }
3100
+
3101
+ bool VM_Version::is_intel_tsc_synched_at_init () {
3102
+ if (is_intel_family_core ()) {
3103
+ uint32_t ext_model = extended_cpu_model ();
3104
+ if (ext_model == CPU_MODEL_NEHALEM_EP ||
3105
+ ext_model == CPU_MODEL_WESTMERE_EP ||
3106
+ ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3107
+ ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3108
+ // <= 2-socket invariant tsc support. EX versions are usually used
3109
+ // in > 2-socket systems and likely don't synchronize tscs at
3110
+ // initialization.
3111
+ // Code that uses tsc values must be prepared for them to arbitrarily
3112
+ // jump forward or backward.
3113
+ return true ;
3114
+ }
3115
+ }
3116
+ return false ;
3117
+ }
3118
+
3119
+ intx VM_Version::allocate_prefetch_distance (bool use_watermark_prefetch) {
3120
+ // Hardware prefetching (distance/size in bytes):
3121
+ // Pentium 3 - 64 / 32
3122
+ // Pentium 4 - 256 / 128
3123
+ // Athlon - 64 / 32 ????
3124
+ // Opteron - 128 / 64 only when 2 sequential cache lines accessed
3125
+ // Core - 128 / 64
3126
+ //
3127
+ // Software prefetching (distance in bytes / instruction with best score):
3128
+ // Pentium 3 - 128 / prefetchnta
3129
+ // Pentium 4 - 512 / prefetchnta
3130
+ // Athlon - 128 / prefetchnta
3131
+ // Opteron - 256 / prefetchnta
3132
+ // Core - 256 / prefetchnta
3133
+ // It will be used only when AllocatePrefetchStyle > 0
3134
+
3135
+ if (is_amd_family ()) { // AMD | Hygon
3136
+ if (supports_sse2 ()) {
3137
+ return 256 ; // Opteron
3138
+ } else {
3139
+ return 128 ; // Athlon
3140
+ }
3141
+ } else { // Intel
3142
+ if (supports_sse3 () && cpu_family () == 6 ) {
3143
+ if (supports_sse4_2 () && supports_ht ()) { // Nehalem based cpus
3144
+ return 192 ;
3145
+ } else if (use_watermark_prefetch) { // watermark prefetching on Core
3146
+ #ifdef _LP64
3147
+ return 384 ;
3148
+ #else
3149
+ return 320 ;
3150
+ #endif
3151
+ }
3152
+ }
3153
+ if (supports_sse2 ()) {
3154
+ if (cpu_family () == 6 ) {
3155
+ return 256 ; // Pentium M, Core, Core2
3156
+ } else {
3157
+ return 512 ; // Pentium 4
3158
+ }
3159
+ } else {
3160
+ return 128 ; // Pentium 3 (and all other old CPUs)
3161
+ }
3162
+ }
3163
+ }
0 commit comments