From ce5966292720140f6aadc2ae48cd29dccbb1e0ed Mon Sep 17 00:00:00 2001 From: Jiaxi Chen Date: Fri, 25 Nov 2022 20:58:43 +0800 Subject: [PATCH 01/18] KVM: x86: Advertise AVX-VNNI-INT8 CPUID to user space commit 24d74b9f5f2a972ac9228372adeac62b2dc10ea2 upstream. AVX-VNNI-INT8 is a new set of instructions in the latest Intel platform Sierra Forest, aims for the platform to have superior AI capabilities. This instruction multiplies the individual bytes of two unsigned or unsigned source operands, then adds and accumulates the results into the destination dword element size operand. The bit definition: CPUID.(EAX=7,ECX=1):EDX[bit 4] AVX-VNNI-INT8 is on a new and sparse CPUID leaf and all bits on this leaf have no truly kernel use case for now. Given that and to save space for kernel feature bits, move this new leaf to KVM-only subleaf and plus an x86_FEATURE definition for AVX-VNNI-INT8 to direct it to the KVM entry. Advertise AVX-VNNI-INT8 to KVM userspace. This is safe because there are no new VMX controls or additional host enabling required for guests to use this feature. Intel-SIG: commit 24d74b9f5f2a KVM: x86: Advertise AVX-VNNI-INT8 CPUID to user space. ClearWater support including CPU model and new ISAs and its dependency Signed-off-by: Jiaxi Chen Message-Id: <20221125125845.1182922-7-jiaxi.chen@linux.intel.com> Signed-off-by: Paolo Bonzini [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kvm/cpuid.c | 6 +++++- arch/x86/kvm/reverse_cpuid.h | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9e6225bab6d9a..cd3ce2e288f87 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -528,6 +528,10 @@ void kvm_set_cpu_caps(void) F(FZRM) | F(FSRS) | F(FSRC) ); + kvm_cpu_cap_init_scattered(CPUID_7_1_EDX, + F(AVX_VNNI_INT8) + ); + kvm_cpu_cap_mask(CPUID_D_1_EAX, F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd ); @@ -783,9 +787,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) goto out; cpuid_entry_override(entry, CPUID_7_1_EAX); + cpuid_entry_override(entry, CPUID_7_1_EDX); entry->ebx = 0; entry->ecx = 0; - entry->edx = 0; } break; case 0xa: { /* Architectural Performance Monitoring */ diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 94c0eb4dc8e66..f16c4d5e04f0b 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -13,6 +13,7 @@ */ enum kvm_only_cpuid_leafs { CPUID_12_EAX = NCAPINTS, + CPUID_7_1_EDX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, @@ -24,6 +25,9 @@ enum kvm_only_cpuid_leafs { #define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0) #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1) +/* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */ +#define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4) + struct cpuid_reg { u32 function; u32 index; @@ -49,6 +53,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX}, [CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX}, + [CPUID_7_1_EDX] = { 7, 1, CPUID_EDX}, }; /* From 0f64779f1d36d4d706a08fd1162bdeb542b3655a Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 17 Jan 2024 11:18:44 -0800 Subject: [PATCH 02/18] x86/cpu: Add model number for Intel Clearwater Forest processor commit 090e3bec01763e415bccae445f5bfe3d0c61b629 upstream. Server product based on the Atom Darkmont core. Intel-SIG: commit 090e3bec0176 x86/cpu: Add model number for Intel Clearwater Forest processor. ClearWater support including CPU model and new ISAs and its dependency Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240117191844.56180-1-tony.luck@intel.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/intel-family.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 4fdeaf2730e8c..9a854273be930 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -152,8 +152,11 @@ #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ #define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */ -#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */ -#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */ +#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */ +#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */ + +#define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */ + /* Xeon Phi */ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ From eeb8a6b3c204b8446ec7c69a81b6fa37480c495c Mon Sep 17 00:00:00 2001 From: Tao Su Date: Tue, 5 Nov 2024 13:48:25 +0800 Subject: [PATCH 03/18] x86: KVM: Advertise CPUIDs for new instructions in Clearwater Forest commit a0423af92cb31e6fc4f53ef9b6e19fdf08ad4395 upstream. Latest Intel platform Clearwater Forest has introduced new instructions enumerated by CPUIDs of SHA512, SM3, SM4 and AVX-VNNI-INT16. Advertise these CPUIDs to userspace so that guests can query them directly. SHA512, SM3 and SM4 are on an expected-dense CPUID leaf and some other bits on this leaf have kernel usages. Considering they have not truly kernel usages, hide them in /proc/cpuinfo. These new instructions only operate in xmm, ymm registers and have no new VMX controls, so there is no additional host enabling required for guests to use these instructions, i.e. advertising these CPUIDs to userspace is safe. Intel-SIG: commit a0423af92cb3 x86: KVM: Advertise CPUIDs for new instructions in Clearwater Forest. ClearWater support including CPU model and new ISAs and its dependency Tested-by: Jiaan Lu Tested-by: Xuelian Guo Signed-off-by: Tao Su Message-ID: <20241105054825.870939-1-tao1.su@linux.intel.com> Signed-off-by: Paolo Bonzini [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/cpufeatures.h | 3 +++ arch/x86/kvm/cpuid.c | 4 ++-- arch/x86/kvm/reverse_cpuid.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index bef9b899727e1..c538fc5b3fad4 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -317,6 +317,9 @@ #define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ +#define X86_FEATURE_SHA512 (12*32+ 0) /* SHA512 instructions */ +#define X86_FEATURE_SM3 (12*32+ 1) /* SM3 instructions */ +#define X86_FEATURE_SM4 (12*32+ 2) /* SM4 instructions */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ #define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index cd3ce2e288f87..d1bef842649e4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -524,12 +524,12 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD); kvm_cpu_cap_mask(CPUID_7_1_EAX, - F(AVX_VNNI) | F(AVX512_BF16) | + F(SHA512) | F(SM3) | F(SM4) | F(AVX_VNNI) | F(AVX512_BF16) | F(FZRM) | F(FSRS) | F(FSRC) ); kvm_cpu_cap_init_scattered(CPUID_7_1_EDX, - F(AVX_VNNI_INT8) + F(AVX_VNNI_INT8) | F(AVX_VNNI_INT16) ); kvm_cpu_cap_mask(CPUID_D_1_EAX, diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index f16c4d5e04f0b..a7cdec160a31f 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -27,6 +27,7 @@ enum kvm_only_cpuid_leafs { /* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */ #define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4) +#define X86_FEATURE_AVX_VNNI_INT16 KVM_X86_FEATURE(CPUID_7_1_EDX, 10) struct cpuid_reg { u32 function; From dddc91956ba01c36ec9ac41cf85f16a7fcb21d2e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 22 Mar 2024 09:17:25 -0700 Subject: [PATCH 04/18] x86/cpu: Add model number for another Intel Arrow Lake mobile processor commit 8a8a9c9047d1089598bdb010ec44d7f14b4f9203 upstream. This one is the regular laptop CPU. Intel-SIG: commit 8a8a9c9047d1 x86/cpu: Add model number for another Intel Arrow Lake mobile processor. New Intel X86 CPU Family definition Signed-off-by: Tony Luck Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20240322161725.195614-1-tony.luck@intel.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/intel-family.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 9a854273be930..941db18d3526e 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -121,6 +121,7 @@ #define INTEL_FAM6_METEORLAKE_L 0xAA #define INTEL_FAM6_ARROWLAKE 0xC6 +#define INTEL_FAM6_ARROWLAKE_U 0xB5 #define INTEL_FAM6_LUNARLAKE_M 0xBD From 00221986a9e7f116542606c2e322d0123599a337 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 16 Apr 2024 14:19:03 -0700 Subject: [PATCH 05/18] x86/cpu/vfm: Add/initialize x86_vfm field to struct cpuinfo_x86 commit a9d0adce69075192961f3be466c4810a21b7bc9e upstream. Refactor struct cpuinfo_x86 so that the vendor, family, and model fields are overlaid in a union with a 32-bit field that combines all three (together with a one byte reserved field in the upper byte). This will make it easy, cheap, and reliable to check all three values at once. See https://lore.kernel.org/r/Zgr6kT8oULbnmEXx@agluck-desk3 for why the ordering is (low-to-high bits): (vendor, family, model) [ bp: Move comments over the line, add the backstory about the particular order of the fields. ] Intel-SIG: commit a9d0adce6907 x86/cpu/vfm: Add/initialize x86_vfm field to struct cpuinfo_x86. New Intel X86 CPU Family definition Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240416211941.9369-2-tony.luck@intel.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/processor.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1c819649fd3b6..2e028526038a4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -79,9 +79,23 @@ extern u16 __read_mostly tlb_lld_1g[NR_INFO]; */ struct cpuinfo_x86 { - __u8 x86; /* CPU family */ - __u8 x86_vendor; /* CPU vendor */ - __u8 x86_model; + union { + /* + * The particular ordering (low-to-high) of (vendor, + * family, model) is done in case range of models, like + * it is usually done on AMD, need to be compared. + */ + struct { + __u8 x86_model; + /* CPU family */ + __u8 x86; + /* CPU vendor */ + __u8 x86_vendor; + __u8 x86_reserved; + }; + /* combined vendor, family, model */ + __u32 x86_vfm; + }; __u8 x86_stepping; #ifdef CONFIG_X86_64 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ From 3f3718b847ed4bd578f0dce2b21436a6f2639edd Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 16 Apr 2024 14:19:04 -0700 Subject: [PATCH 06/18] x86/cpu/vfm: Add new macros to work with (vendor/family/model) values commit e6dfdc2e89a0adedf455814c91b977d6a584cc88 upstream. To avoid adding a slew of new macros for each new Intel CPU family switch over from providing CPU model number #defines to a new scheme that encodes vendor, family, and model in a single number. [ bp: s/casted/cast/g ] Intel-SIG: commit e6dfdc2e89a0 x86/cpu/vfm: Add new macros to work with (vendor/family/model) values. New Intel X86 CPU Family definition Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240416211941.9369-3-tony.luck@intel.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/cpu_device_id.h | 93 ++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index eb8fcede9e3bf..dd7b9463696f5 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -2,6 +2,39 @@ #ifndef _ASM_X86_CPU_DEVICE_ID #define _ASM_X86_CPU_DEVICE_ID +/* + * Can't use because it generates expressions that + * cannot be used in structure initializers. Bitfield construction + * here must match the union in struct cpuinfo_86: + * union { + * struct { + * __u8 x86_model; + * __u8 x86; + * __u8 x86_vendor; + * __u8 x86_reserved; + * }; + * __u32 x86_vfm; + * }; + */ +#define VFM_MODEL_BIT 0 +#define VFM_FAMILY_BIT 8 +#define VFM_VENDOR_BIT 16 +#define VFM_RSVD_BIT 24 + +#define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT) +#define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT) +#define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT) + +#define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT) +#define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT) +#define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT) + +#define VFM_MAKE(_vendor, _family, _model) ( \ + ((_model) << VFM_MODEL_BIT) | \ + ((_family) << VFM_FAMILY_BIT) | \ + ((_vendor) << VFM_VENDOR_BIT) \ +) + /* * Declare drivers belonging to specific x86 CPUs * Similar in spirit to pci_device_id and related PCI functions @@ -49,6 +82,16 @@ .driver_data = (unsigned long) _data \ } +#define X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \ + _steppings, _feature, _data) { \ + .vendor = _vendor, \ + .family = _family, \ + .model = _model, \ + .steppings = _steppings, \ + .feature = _feature, \ + .driver_data = (unsigned long) _data \ +} + /** * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Macro for CPU matching * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY @@ -164,6 +207,56 @@ X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ steppings, X86_FEATURE_ANY, data) +/** + * X86_MATCH_VFM - Match encoded vendor/family/model + * @vfm: Encoded 8-bits each for vendor, family, model + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is cast to unsigned long internally. + * + * Stepping and feature are set to wildcards + */ +#define X86_MATCH_VFM(vfm, data) \ + X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \ + VFM_VENDOR(vfm), \ + VFM_FAMILY(vfm), \ + VFM_MODEL(vfm), \ + X86_STEPPING_ANY, X86_FEATURE_ANY, data) + +/** + * X86_MATCH_VFM_STEPPINGS - Match encoded vendor/family/model/stepping + * @vfm: Encoded 8-bits each for vendor, family, model + * @steppings: Bitmask of steppings to match + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is cast to unsigned long internally. + * + * feature is set to wildcard + */ +#define X86_MATCH_VFM_STEPPINGS(vfm, steppings, data) \ + X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \ + VFM_VENDOR(vfm), \ + VFM_FAMILY(vfm), \ + VFM_MODEL(vfm), \ + steppings, X86_FEATURE_ANY, data) + +/** + * X86_MATCH_VFM_FEATURE - Match encoded vendor/family/model/feature + * @vfm: Encoded 8-bits each for vendor, family, model + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is cast to unsigned long internally. + * + * Steppings is set to wildcard + */ +#define X86_MATCH_VFM_FEATURE(vfm, feature, data) \ + X86_MATCH_VENDORID_FAM_MODEL_STEPPINGS_FEATURE( \ + VFM_VENDOR(vfm), \ + VFM_FAMILY(vfm), \ + VFM_MODEL(vfm), \ + X86_STEPPING_ANY, feature, data) + /* * Match specific microcode revisions. * From 15d8cfdc63e32de1bb9fb6b3fa3c0d9bd9058341 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 16 Apr 2024 14:19:05 -0700 Subject: [PATCH 07/18] x86/cpu/vfm: Update arch/x86/include/asm/intel-family.h commit f055b6260eb3ef20a6e310d1e555a5d5a0a28ca0 upstream. New CPU #defines encode vendor and family as well as model. Update the example usage comment in arch/x86/kernel/cpu/match.c Intel-SIG: commit f055b6260eb3 x86/cpu/vfm: Update arch/x86/include/asm/intel-family.h. New Intel X86 CPU Family definition Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240416211941.9369-4-tony.luck@intel.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/intel-family.h | 93 ++++++++++++++++++++++++++++- arch/x86/kernel/cpu/match.c | 3 +- 2 files changed, 92 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 941db18d3526e..5d5380756445f 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -36,134 +36,223 @@ * names. An exception is made for skylake/kabylake where steppings seem to have gotten * their own names :-( */ + +#define IFM(_fam, _model) VFM_MAKE(X86_VENDOR_INTEL, _fam, _model) /* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */ #define INTEL_FAM6_ANY X86_MODEL_ANY +/* Wildcard match for FAM6 so X86_MATCH_VFM(ANY) works */ +#define INTEL_ANY IFM(X86_FAMILY_ANY, X86_MODEL_ANY) #define INTEL_FAM6_CORE_YONAH 0x0E +#define INTEL_CORE_YONAH IFM(6, 0x0E) #define INTEL_FAM6_CORE2_MEROM 0x0F +#define INTEL_CORE2_MEROM IFM(6, 0x0F) #define INTEL_FAM6_CORE2_MEROM_L 0x16 +#define INTEL_CORE2_MEROM_L IFM(6, 0x16) #define INTEL_FAM6_CORE2_PENRYN 0x17 +#define INTEL_CORE2_PENRYN IFM(6, 0x17) #define INTEL_FAM6_CORE2_DUNNINGTON 0x1D +#define INTEL_CORE2_DUNNINGTON IFM(6, 0x1D) #define INTEL_FAM6_NEHALEM 0x1E +#define INTEL_NEHALEM IFM(6, 0x1E) #define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */ +#define INTEL_NEHALEM_G IFM(6, 0x1F) /* Auburndale / Havendale */ #define INTEL_FAM6_NEHALEM_EP 0x1A +#define INTEL_NEHALEM_EP IFM(6, 0x1A) #define INTEL_FAM6_NEHALEM_EX 0x2E +#define INTEL_NEHALEM_EX IFM(6, 0x2E) #define INTEL_FAM6_WESTMERE 0x25 +#define INTEL_WESTMERE IFM(6, 0x25) #define INTEL_FAM6_WESTMERE_EP 0x2C +#define INTEL_WESTMERE_EP IFM(6, 0x2C) #define INTEL_FAM6_WESTMERE_EX 0x2F +#define INTEL_WESTMERE_EX IFM(6, 0x2F) #define INTEL_FAM6_SANDYBRIDGE 0x2A +#define INTEL_SANDYBRIDGE IFM(6, 0x2A) #define INTEL_FAM6_SANDYBRIDGE_X 0x2D +#define INTEL_SANDYBRIDGE_X IFM(6, 0x2D) #define INTEL_FAM6_IVYBRIDGE 0x3A +#define INTEL_IVYBRIDGE IFM(6, 0x3A) #define INTEL_FAM6_IVYBRIDGE_X 0x3E +#define INTEL_IVYBRIDGE_X IFM(6, 0x3E) #define INTEL_FAM6_HASWELL 0x3C +#define INTEL_HASWELL IFM(6, 0x3C) #define INTEL_FAM6_HASWELL_X 0x3F +#define INTEL_HASWELL_X IFM(6, 0x3F) #define INTEL_FAM6_HASWELL_L 0x45 +#define INTEL_HASWELL_L IFM(6, 0x45) #define INTEL_FAM6_HASWELL_G 0x46 +#define INTEL_HASWELL_G IFM(6, 0x46) #define INTEL_FAM6_BROADWELL 0x3D +#define INTEL_BROADWELL IFM(6, 0x3D) #define INTEL_FAM6_BROADWELL_G 0x47 +#define INTEL_BROADWELL_G IFM(6, 0x47) #define INTEL_FAM6_BROADWELL_X 0x4F +#define INTEL_BROADWELL_X IFM(6, 0x4F) #define INTEL_FAM6_BROADWELL_D 0x56 +#define INTEL_BROADWELL_D IFM(6, 0x56) #define INTEL_FAM6_SKYLAKE_L 0x4E /* Sky Lake */ +#define INTEL_SKYLAKE_L IFM(6, 0x4E) /* Sky Lake */ #define INTEL_FAM6_SKYLAKE 0x5E /* Sky Lake */ +#define INTEL_SKYLAKE IFM(6, 0x5E) /* Sky Lake */ #define INTEL_FAM6_SKYLAKE_X 0x55 /* Sky Lake */ +#define INTEL_SKYLAKE_X IFM(6, 0x55) /* Sky Lake */ /* CASCADELAKE_X 0x55 Sky Lake -- s: 7 */ /* COOPERLAKE_X 0x55 Sky Lake -- s: 11 */ #define INTEL_FAM6_KABYLAKE_L 0x8E /* Sky Lake */ +#define INTEL_KABYLAKE_L IFM(6, 0x8E) /* Sky Lake */ /* AMBERLAKE_L 0x8E Sky Lake -- s: 9 */ /* COFFEELAKE_L 0x8E Sky Lake -- s: 10 */ /* WHISKEYLAKE_L 0x8E Sky Lake -- s: 11,12 */ #define INTEL_FAM6_KABYLAKE 0x9E /* Sky Lake */ +#define INTEL_KABYLAKE IFM(6, 0x9E) /* Sky Lake */ /* COFFEELAKE 0x9E Sky Lake -- s: 10-13 */ #define INTEL_FAM6_COMETLAKE 0xA5 /* Sky Lake */ +#define INTEL_COMETLAKE IFM(6, 0xA5) /* Sky Lake */ #define INTEL_FAM6_COMETLAKE_L 0xA6 /* Sky Lake */ +#define INTEL_COMETLAKE_L IFM(6, 0xA6) /* Sky Lake */ #define INTEL_FAM6_CANNONLAKE_L 0x66 /* Palm Cove */ +#define INTEL_CANNONLAKE_L IFM(6, 0x66) /* Palm Cove */ #define INTEL_FAM6_ICELAKE_X 0x6A /* Sunny Cove */ +#define INTEL_ICELAKE_X IFM(6, 0x6A) /* Sunny Cove */ #define INTEL_FAM6_ICELAKE_D 0x6C /* Sunny Cove */ +#define INTEL_ICELAKE_D IFM(6, 0x6C) /* Sunny Cove */ #define INTEL_FAM6_ICELAKE 0x7D /* Sunny Cove */ +#define INTEL_ICELAKE IFM(6, 0x7D) /* Sunny Cove */ #define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */ +#define INTEL_ICELAKE_L IFM(6, 0x7E) /* Sunny Cove */ #define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */ - -#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ +#define INTEL_ICELAKE_NNPI IFM(6, 0x9D) /* Sunny Cove */ #define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */ +#define INTEL_ROCKETLAKE IFM(6, 0xA7) /* Cypress Cove */ #define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */ +#define INTEL_TIGERLAKE_L IFM(6, 0x8C) /* Willow Cove */ #define INTEL_FAM6_TIGERLAKE 0x8D /* Willow Cove */ +#define INTEL_TIGERLAKE IFM(6, 0x8D) /* Willow Cove */ #define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ +#define INTEL_SAPPHIRERAPIDS_X IFM(6, 0x8F) /* Golden Cove */ #define INTEL_FAM6_EMERALDRAPIDS_X 0xCF +#define INTEL_EMERALDRAPIDS_X IFM(6, 0xCF) #define INTEL_FAM6_GRANITERAPIDS_X 0xAD +#define INTEL_GRANITERAPIDS_X IFM(6, 0xAD) #define INTEL_FAM6_GRANITERAPIDS_D 0xAE +#define INTEL_GRANITERAPIDS_D IFM(6, 0xAE) + +/* "Hybrid" Processors (P-Core/E-Core) */ + +#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ +#define INTEL_LAKEFIELD IFM(6, 0x8A) /* Sunny Cove / Tremont */ #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ +#define INTEL_ALDERLAKE IFM(6, 0x97) /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ +#define INTEL_ALDERLAKE_L IFM(6, 0x9A) /* Golden Cove / Gracemont */ #define INTEL_FAM6_RAPTORLAKE 0xB7 +#define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */ +#define INTEL_RAPTORLAKE IFM(6, 0xB7) /* Raptor Cove / Enhanced Gracemont */ #define INTEL_FAM6_RAPTORLAKE_P 0xBA +#define INTEL_RAPTORLAKE_P IFM(6, 0xBA) #define INTEL_FAM6_RAPTORLAKE_S 0xBF +#define INTEL_RAPTORLAKE_S IFM(6, 0xBF) #define INTEL_FAM6_METEORLAKE 0xAC +#define INTEL_METEORLAKE IFM(6, 0xAC) #define INTEL_FAM6_METEORLAKE_L 0xAA +#define INTEL_METEORLAKE_L IFM(6, 0xAA) +#define INTEL_FAM6_ARROWLAKE_H 0xC5 +#define INTEL_ARROWLAKE_H IFM(6, 0xC5) #define INTEL_FAM6_ARROWLAKE 0xC6 +#define INTEL_ARROWLAKE IFM(6, 0xC6) #define INTEL_FAM6_ARROWLAKE_U 0xB5 +#define INTEL_ARROWLAKE_U IFM(6, 0xB5) #define INTEL_FAM6_LUNARLAKE_M 0xBD +#define INTEL_LUNARLAKE_M IFM(6, 0xBD) /* "Small Core" Processors (Atom/E-Core) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ +#define INTEL_ATOM_BONNELL IFM(6, 0x1C) /* Diamondville, Pineview */ #define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ +#define INTEL_ATOM_BONNELL_MID IFM(6, 0x26) /* Silverthorne, Lincroft */ #define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */ +#define INTEL_ATOM_SALTWELL IFM(6, 0x36) /* Cedarview */ #define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */ +#define INTEL_ATOM_SALTWELL_MID IFM(6, 0x27) /* Penwell */ #define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */ +#define INTEL_ATOM_SALTWELL_TABLET IFM(6, 0x35) /* Cloverview */ #define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */ +#define INTEL_ATOM_SILVERMONT IFM(6, 0x37) /* Bay Trail, Valleyview */ #define INTEL_FAM6_ATOM_SILVERMONT_D 0x4D /* Avaton, Rangely */ +#define INTEL_ATOM_SILVERMONT_D IFM(6, 0x4D) /* Avaton, Rangely */ #define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */ +#define INTEL_ATOM_SILVERMONT_MID IFM(6, 0x4A) /* Merriefield */ #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */ +#define INTEL_ATOM_AIRMONT IFM(6, 0x4C) /* Cherry Trail, Braswell */ #define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */ +#define INTEL_ATOM_AIRMONT_MID IFM(6, 0x5A) /* Moorefield */ #define INTEL_FAM6_ATOM_AIRMONT_NP 0x75 /* Lightning Mountain */ +#define INTEL_ATOM_AIRMONT_NP IFM(6, 0x75) /* Lightning Mountain */ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ +#define INTEL_ATOM_GOLDMONT IFM(6, 0x5C) /* Apollo Lake */ #define INTEL_FAM6_ATOM_GOLDMONT_D 0x5F /* Denverton */ +#define INTEL_ATOM_GOLDMONT_D IFM(6, 0x5F) /* Denverton */ /* Note: the micro-architecture is "Goldmont Plus" */ #define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ +#define INTEL_ATOM_GOLDMONT_PLUS IFM(6, 0x7A) /* Gemini Lake */ #define INTEL_FAM6_ATOM_TREMONT_D 0x86 /* Jacobsville */ +#define INTEL_ATOM_TREMONT_D IFM(6, 0x86) /* Jacobsville */ #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ +#define INTEL_ATOM_TREMONT IFM(6, 0x96) /* Elkhart Lake */ #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ +#define INTEL_ATOM_TREMONT_L IFM(6, 0x9C) /* Jasper Lake */ #define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */ +#define INTEL_ATOM_GRACEMONT IFM(6, 0xBE) /* Alderlake N */ + #define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */ +#define INTEL_ATOM_CRESTMONT_X IFM(6, 0xAF) /* Sierra Forest */ #define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */ +#define INTEL_ATOM_CRESTMONT IFM(6, 0xB6) /* Grand Ridge */ #define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */ +#define INTEL_ATOM_DARKMONT_X IFM(6, 0xDD) /* Clearwater Forest */ /* Xeon Phi */ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ +#define INTEL_XEON_PHI_KNL IFM(6, 0x57) /* Knights Landing */ #define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ +#define INTEL_XEON_PHI_KNM IFM(6, 0x85) /* Knights Mill */ /* Family 5 */ #define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */ +#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */ #endif /* _ASM_X86_INTEL_FAMILY_H */ diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index ad6776081e60d..2243083f0bc22 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -17,8 +17,7 @@ * * A typical table entry would be to match a specific CPU * - * X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_BROADWELL, - * X86_FEATURE_ANY, NULL); + * X86_MATCH_VFM_FEATURE(INTEL_BROADWELL, X86_FEATURE_ANY, NULL); * * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY, * %X86_MODEL_ANY, %X86_FEATURE_ANY (except for vendor) From 1e155a428044ee1dbd8ce1900635cc3dd384bde5 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 20 May 2024 15:46:04 -0700 Subject: [PATCH 08/18] x86/cpu: Switch to new Intel CPU model defines commit 744866f5c0e2e13dccde754ade8c89924a29e04d upstream. New CPU #defines encode vendor and family as well as model. Update INTEL_CPU_DESC() to work with vendor/family/model. Intel-SIG: commit 744866f5c0e2 x86/cpu: Switch to new Intel CPU model defines. New Intel X86 CPU Family definition Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240520224620.9480-34-tony.luck%40intel.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/events/intel/core.c | 64 ++++++++++++++-------------- arch/x86/include/asm/cpu_device_id.h | 8 ++-- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index ddd5817042f7a..9d866fb5fec8c 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4912,35 +4912,35 @@ static __init void intel_clovertown_quirk(void) } static const struct x86_cpu_desc isolation_ucodes[] = { - INTEL_CPU_DESC(INTEL_FAM6_HASWELL, 3, 0x0000001f), - INTEL_CPU_DESC(INTEL_FAM6_HASWELL_L, 1, 0x0000001e), - INTEL_CPU_DESC(INTEL_FAM6_HASWELL_G, 1, 0x00000015), - INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 2, 0x00000037), - INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X, 4, 0x0000000a), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL, 4, 0x00000023), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_G, 1, 0x00000014), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 2, 0x00000010), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 3, 0x07000009), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 4, 0x0f000009), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 5, 0x0e000002), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 1, 0x0b000014), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 11, 0x00000000), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c), - INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 9, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 10, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 11, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_L, 12, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 10, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 11, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 12, 0x0000004e), - INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 13, 0x0000004e), + INTEL_CPU_DESC(INTEL_HASWELL, 3, 0x0000001f), + INTEL_CPU_DESC(INTEL_HASWELL_L, 1, 0x0000001e), + INTEL_CPU_DESC(INTEL_HASWELL_G, 1, 0x00000015), + INTEL_CPU_DESC(INTEL_HASWELL_X, 2, 0x00000037), + INTEL_CPU_DESC(INTEL_HASWELL_X, 4, 0x0000000a), + INTEL_CPU_DESC(INTEL_BROADWELL, 4, 0x00000023), + INTEL_CPU_DESC(INTEL_BROADWELL_G, 1, 0x00000014), + INTEL_CPU_DESC(INTEL_BROADWELL_D, 2, 0x00000010), + INTEL_CPU_DESC(INTEL_BROADWELL_D, 3, 0x07000009), + INTEL_CPU_DESC(INTEL_BROADWELL_D, 4, 0x0f000009), + INTEL_CPU_DESC(INTEL_BROADWELL_D, 5, 0x0e000002), + INTEL_CPU_DESC(INTEL_BROADWELL_X, 1, 0x0b000014), + INTEL_CPU_DESC(INTEL_SKYLAKE_X, 3, 0x00000021), + INTEL_CPU_DESC(INTEL_SKYLAKE_X, 4, 0x00000000), + INTEL_CPU_DESC(INTEL_SKYLAKE_X, 5, 0x00000000), + INTEL_CPU_DESC(INTEL_SKYLAKE_X, 6, 0x00000000), + INTEL_CPU_DESC(INTEL_SKYLAKE_X, 7, 0x00000000), + INTEL_CPU_DESC(INTEL_SKYLAKE_X, 11, 0x00000000), + INTEL_CPU_DESC(INTEL_SKYLAKE_L, 3, 0x0000007c), + INTEL_CPU_DESC(INTEL_SKYLAKE, 3, 0x0000007c), + INTEL_CPU_DESC(INTEL_KABYLAKE, 9, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE_L, 9, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE_L, 10, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE_L, 11, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE_L, 12, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE, 10, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE, 11, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE, 12, 0x0000004e), + INTEL_CPU_DESC(INTEL_KABYLAKE, 13, 0x0000004e), {} }; @@ -4957,9 +4957,9 @@ static __init void intel_pebs_isolation_quirk(void) } static const struct x86_cpu_desc pebs_ucodes[] = { - INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE, 7, 0x00000028), - INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 6, 0x00000618), - INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X, 7, 0x0000070c), + INTEL_CPU_DESC(INTEL_SANDYBRIDGE, 7, 0x00000028), + INTEL_CPU_DESC(INTEL_SANDYBRIDGE_X, 6, 0x00000618), + INTEL_CPU_DESC(INTEL_SANDYBRIDGE_X, 7, 0x0000070c), {} }; diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index dd7b9463696f5..ba32d5655bec3 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -275,10 +275,10 @@ struct x86_cpu_desc { u32 x86_microcode_rev; }; -#define INTEL_CPU_DESC(model, stepping, revision) { \ - .x86_family = 6, \ - .x86_vendor = X86_VENDOR_INTEL, \ - .x86_model = (model), \ +#define INTEL_CPU_DESC(vfm, stepping, revision) { \ + .x86_family = VFM_FAMILY(vfm), \ + .x86_vendor = VFM_VENDOR(vfm), \ + .x86_model = VFM_MODEL(vfm), \ .x86_stepping = (stepping), \ .x86_microcode_rev = (revision), \ } From eff62422fe81eccc6daf38e578c86b602bcf244d Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 20 May 2024 15:45:59 -0700 Subject: [PATCH 09/18] x86/cpu/intel: Switch to new Intel CPU model defines commit 6568fc18c2f62b4f35092e9680fe39f3500f4767 upstream. New CPU #defines encode vendor and family as well as model. Intel-SIG: commit 6568fc18c2f6 x86/cpu/intel: Switch to new Intel CPU model defines. New Intel X86 CPU Family definition Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240520224620.9480-29-tony.luck%40intel.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/intel.c | 108 ++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 55 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f4df401050a9e..df06502cc6bad 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -69,19 +69,19 @@ static bool cpu_model_supports_sld __ro_after_init; */ static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c) { - switch (c->x86_model) { - case INTEL_FAM6_CORE_YONAH: - case INTEL_FAM6_CORE2_MEROM: - case INTEL_FAM6_CORE2_MEROM_L: - case INTEL_FAM6_CORE2_PENRYN: - case INTEL_FAM6_CORE2_DUNNINGTON: - case INTEL_FAM6_NEHALEM: - case INTEL_FAM6_NEHALEM_G: - case INTEL_FAM6_NEHALEM_EP: - case INTEL_FAM6_NEHALEM_EX: - case INTEL_FAM6_WESTMERE: - case INTEL_FAM6_WESTMERE_EP: - case INTEL_FAM6_SANDYBRIDGE: + switch (c->x86_vfm) { + case INTEL_CORE_YONAH: + case INTEL_CORE2_MEROM: + case INTEL_CORE2_MEROM_L: + case INTEL_CORE2_PENRYN: + case INTEL_CORE2_DUNNINGTON: + case INTEL_NEHALEM: + case INTEL_NEHALEM_G: + case INTEL_NEHALEM_EP: + case INTEL_NEHALEM_EX: + case INTEL_WESTMERE: + case INTEL_WESTMERE_EP: + case INTEL_SANDYBRIDGE: setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP); } } @@ -103,9 +103,9 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) */ if (c->x86 != 6) return; - switch (c->x86_model) { - case INTEL_FAM6_XEON_PHI_KNL: - case INTEL_FAM6_XEON_PHI_KNM: + switch (c->x86_vfm) { + case INTEL_XEON_PHI_KNL: + case INTEL_XEON_PHI_KNM: break; default: return; @@ -131,32 +131,32 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) * - Release note from 20180108 microcode release */ struct sku_microcode { - u8 model; + u32 vfm; u8 stepping; u32 microcode; }; static const struct sku_microcode spectre_bad_microcodes[] = { - { INTEL_FAM6_KABYLAKE, 0x0B, 0x80 }, - { INTEL_FAM6_KABYLAKE, 0x0A, 0x80 }, - { INTEL_FAM6_KABYLAKE, 0x09, 0x80 }, - { INTEL_FAM6_KABYLAKE_L, 0x0A, 0x80 }, - { INTEL_FAM6_KABYLAKE_L, 0x09, 0x80 }, - { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, - { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, - { INTEL_FAM6_BROADWELL, 0x04, 0x28 }, - { INTEL_FAM6_BROADWELL_G, 0x01, 0x1b }, - { INTEL_FAM6_BROADWELL_D, 0x02, 0x14 }, - { INTEL_FAM6_BROADWELL_D, 0x03, 0x07000011 }, - { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 }, - { INTEL_FAM6_HASWELL_L, 0x01, 0x21 }, - { INTEL_FAM6_HASWELL_G, 0x01, 0x18 }, - { INTEL_FAM6_HASWELL, 0x03, 0x23 }, - { INTEL_FAM6_HASWELL_X, 0x02, 0x3b }, - { INTEL_FAM6_HASWELL_X, 0x04, 0x10 }, - { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a }, + { INTEL_KABYLAKE, 0x0B, 0x80 }, + { INTEL_KABYLAKE, 0x0A, 0x80 }, + { INTEL_KABYLAKE, 0x09, 0x80 }, + { INTEL_KABYLAKE_L, 0x0A, 0x80 }, + { INTEL_KABYLAKE_L, 0x09, 0x80 }, + { INTEL_SKYLAKE_X, 0x03, 0x0100013e }, + { INTEL_SKYLAKE_X, 0x04, 0x0200003c }, + { INTEL_BROADWELL, 0x04, 0x28 }, + { INTEL_BROADWELL_G, 0x01, 0x1b }, + { INTEL_BROADWELL_D, 0x02, 0x14 }, + { INTEL_BROADWELL_D, 0x03, 0x07000011 }, + { INTEL_BROADWELL_X, 0x01, 0x0b000025 }, + { INTEL_HASWELL_L, 0x01, 0x21 }, + { INTEL_HASWELL_G, 0x01, 0x18 }, + { INTEL_HASWELL, 0x03, 0x23 }, + { INTEL_HASWELL_X, 0x02, 0x3b }, + { INTEL_HASWELL_X, 0x04, 0x10 }, + { INTEL_IVYBRIDGE_X, 0x04, 0x42a }, /* Observed in the wild */ - { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b }, - { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 }, + { INTEL_SANDYBRIDGE_X, 0x06, 0x61b }, + { INTEL_SANDYBRIDGE_X, 0x07, 0x712 }, }; static bool bad_spectre_microcode(struct cpuinfo_x86 *c) @@ -170,11 +170,8 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_HYPERVISOR)) return false; - if (c->x86 != 6) - return false; - for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { - if (c->x86_model == spectre_bad_microcodes[i].model && + if (c->x86_vfm == spectre_bad_microcodes[i].vfm && c->x86_stepping == spectre_bad_microcodes[i].stepping) return (c->microcode <= spectre_bad_microcodes[i].microcode); } @@ -483,7 +480,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) * need the microcode to have already been loaded... so if it is * not, recommend a BIOS update and disable large pages. */ - if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 && + if (c->x86_vfm == INTEL_ATOM_BONNELL && c->x86_stepping <= 2 && c->microcode < 0x20e) { pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); clear_cpu_cap(c, X86_FEATURE_PSE); @@ -516,11 +513,11 @@ static void early_init_intel(struct cpuinfo_x86 *c) /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ if (c->x86 == 6) { - switch (c->x86_model) { - case INTEL_FAM6_ATOM_SALTWELL_MID: - case INTEL_FAM6_ATOM_SALTWELL_TABLET: - case INTEL_FAM6_ATOM_SILVERMONT_MID: - case INTEL_FAM6_ATOM_AIRMONT_NP: + switch (c->x86_vfm) { + case INTEL_ATOM_SALTWELL_MID: + case INTEL_ATOM_SALTWELL_TABLET: + case INTEL_ATOM_SILVERMONT_MID: + case INTEL_ATOM_AIRMONT_NP: set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); break; default: @@ -564,7 +561,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) * should be false so that __flush_tlb_all() causes CR3 instead of CR4.PGE * to be modified. */ - if (c->x86 == 5 && c->x86_model == 9) { + if (c->x86_vfm == INTEL_QUARK_X1000) { pr_info("Disabling PGE capability bit\n"); setup_clear_cpu_cap(X86_FEATURE_PGE); } @@ -834,12 +831,13 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_PEBS); } - if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) && - (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) + if (boot_cpu_has(X86_FEATURE_CLFLUSH) && + (c->x86_vfm == INTEL_CORE2_DUNNINGTON || + c->x86_vfm == INTEL_NEHALEM_EX || + c->x86_vfm == INTEL_WESTMERE_EX)) set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); - if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) && - ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT))) + if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT) set_cpu_bug(c, X86_BUG_MONITOR); #ifdef CONFIG_X86_64 @@ -1389,9 +1387,9 @@ void switch_to_sld(unsigned long tifn) * feature even though they do not enumerate IA32_CORE_CAPABILITIES. */ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0), + X86_MATCH_VFM(INTEL_ICELAKE_X, 0), + X86_MATCH_VFM(INTEL_ICELAKE_L, 0), + X86_MATCH_VFM(INTEL_ICELAKE_D, 0), {} }; From e4240120629011acd2ce60b9ae059e2146605b0a Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Wed, 29 May 2024 19:36:05 +0100 Subject: [PATCH 10/18] x86/cpu/intel: Drop stray FAM6 check with new Intel CPU model defines commit 34b3fc558b537bdf99644dcde539e151716f6331 upstream. The outer if () should have been dropped when switching to c->x86_vfm. Fixes: 6568fc18c2f6 ("x86/cpu/intel: Switch to new Intel CPU model defines") Intel-SIG: commit 34b3fc558b53 x86/cpu/intel: Drop stray FAM6 check with new Intel CPU model defines. New Intel X86 CPU Family definition Signed-off-by: Andrew Cooper Signed-off-by: Borislav Petkov (AMD) Acked-by: Tony Luck Link: https://lore.kernel.org/r/20240529183605.17520-1-andrew.cooper3@citrix.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/intel.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index df06502cc6bad..72a75adba7f26 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -512,17 +512,13 @@ static void early_init_intel(struct cpuinfo_x86 *c) } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ - if (c->x86 == 6) { - switch (c->x86_vfm) { - case INTEL_ATOM_SALTWELL_MID: - case INTEL_ATOM_SALTWELL_TABLET: - case INTEL_ATOM_SILVERMONT_MID: - case INTEL_ATOM_AIRMONT_NP: - set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); - break; - default: - break; - } + switch (c->x86_vfm) { + case INTEL_ATOM_SALTWELL_MID: + case INTEL_ATOM_SALTWELL_TABLET: + case INTEL_ATOM_SILVERMONT_MID: + case INTEL_ATOM_AIRMONT_NP: + set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); + break; } /* From 200184b54348b85aad0e9a077214ab881e4ac0f6 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 20 May 2024 15:46:02 -0700 Subject: [PATCH 11/18] perf/x86/intel: Switch to new Intel CPU model defines commit d142df13f3574237688c7a20e0019cccc7ae39eb upstream. New CPU #defines encode vendor and family as well as model. Intel-SIG: commit d142df13f357 perf/x86/intel: Switch to new Intel CPU model defines. New Intel X86 CPU Family definition Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20240520224620.9480-32-tony.luck%40intel.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/events/intel/core.c | 146 +++++++++++++++++------------------ 1 file changed, 73 insertions(+), 73 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 9d866fb5fec8c..4825a2b88bb3c 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -5814,8 +5814,8 @@ static void intel_pmu_check_hybrid_pmus(u64 fixed_mask) static __always_inline bool is_mtl(u8 x86_model) { - return (x86_model == INTEL_FAM6_METEORLAKE) || - (x86_model == INTEL_FAM6_METEORLAKE_L); + return (x86_model == INTEL_METEORLAKE) || + (x86_model == INTEL_METEORLAKE_L); } __init int intel_pmu_init(void) @@ -5913,19 +5913,19 @@ __init int intel_pmu_init(void) /* * Install the hw-cache-events table: */ - switch (boot_cpu_data.x86_model) { - case INTEL_FAM6_CORE_YONAH: + switch (boot_cpu_data.x86_vfm) { + case INTEL_CORE_YONAH: pr_cont("Core events, "); name = "core"; break; - case INTEL_FAM6_CORE2_MEROM: + case INTEL_CORE2_MEROM: x86_add_quirk(intel_clovertown_quirk); fallthrough; - case INTEL_FAM6_CORE2_MEROM_L: - case INTEL_FAM6_CORE2_PENRYN: - case INTEL_FAM6_CORE2_DUNNINGTON: + case INTEL_CORE2_MEROM_L: + case INTEL_CORE2_PENRYN: + case INTEL_CORE2_DUNNINGTON: memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -5937,9 +5937,9 @@ __init int intel_pmu_init(void) name = "core2"; break; - case INTEL_FAM6_NEHALEM: - case INTEL_FAM6_NEHALEM_EP: - case INTEL_FAM6_NEHALEM_EX: + case INTEL_NEHALEM: + case INTEL_NEHALEM_EP: + case INTEL_NEHALEM_EX: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -5971,11 +5971,11 @@ __init int intel_pmu_init(void) name = "nehalem"; break; - case INTEL_FAM6_ATOM_BONNELL: - case INTEL_FAM6_ATOM_BONNELL_MID: - case INTEL_FAM6_ATOM_SALTWELL: - case INTEL_FAM6_ATOM_SALTWELL_MID: - case INTEL_FAM6_ATOM_SALTWELL_TABLET: + case INTEL_ATOM_BONNELL: + case INTEL_ATOM_BONNELL_MID: + case INTEL_ATOM_SALTWELL: + case INTEL_ATOM_SALTWELL_MID: + case INTEL_ATOM_SALTWELL_TABLET: memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -5988,11 +5988,11 @@ __init int intel_pmu_init(void) name = "bonnell"; break; - case INTEL_FAM6_ATOM_SILVERMONT: - case INTEL_FAM6_ATOM_SILVERMONT_D: - case INTEL_FAM6_ATOM_SILVERMONT_MID: - case INTEL_FAM6_ATOM_AIRMONT: - case INTEL_FAM6_ATOM_AIRMONT_MID: + case INTEL_ATOM_SILVERMONT: + case INTEL_ATOM_SILVERMONT_D: + case INTEL_ATOM_SILVERMONT_MID: + case INTEL_ATOM_AIRMONT: + case INTEL_ATOM_AIRMONT_MID: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, @@ -6010,8 +6010,8 @@ __init int intel_pmu_init(void) name = "silvermont"; break; - case INTEL_FAM6_ATOM_GOLDMONT: - case INTEL_FAM6_ATOM_GOLDMONT_D: + case INTEL_ATOM_GOLDMONT: + case INTEL_ATOM_GOLDMONT_D: memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, @@ -6037,7 +6037,7 @@ __init int intel_pmu_init(void) name = "goldmont"; break; - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + case INTEL_ATOM_GOLDMONT_PLUS: memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs, @@ -6066,9 +6066,9 @@ __init int intel_pmu_init(void) name = "goldmont_plus"; break; - case INTEL_FAM6_ATOM_TREMONT_D: - case INTEL_FAM6_ATOM_TREMONT: - case INTEL_FAM6_ATOM_TREMONT_L: + case INTEL_ATOM_TREMONT_D: + case INTEL_ATOM_TREMONT: + case INTEL_ATOM_TREMONT_L: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6095,7 +6095,7 @@ __init int intel_pmu_init(void) name = "Tremont"; break; - case INTEL_FAM6_ATOM_GRACEMONT: + case INTEL_ATOM_GRACEMONT: x86_pmu.mid_ack = true; memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6125,8 +6125,8 @@ __init int intel_pmu_init(void) name = "gracemont"; break; - case INTEL_FAM6_ATOM_CRESTMONT: - case INTEL_FAM6_ATOM_CRESTMONT_X: + case INTEL_ATOM_CRESTMONT: + case INTEL_ATOM_CRESTMONT_X: x86_pmu.mid_ack = true; memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6156,9 +6156,9 @@ __init int intel_pmu_init(void) name = "crestmont"; break; - case INTEL_FAM6_WESTMERE: - case INTEL_FAM6_WESTMERE_EP: - case INTEL_FAM6_WESTMERE_EX: + case INTEL_WESTMERE: + case INTEL_WESTMERE_EP: + case INTEL_WESTMERE_EX: memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, @@ -6187,8 +6187,8 @@ __init int intel_pmu_init(void) name = "westmere"; break; - case INTEL_FAM6_SANDYBRIDGE: - case INTEL_FAM6_SANDYBRIDGE_X: + case INTEL_SANDYBRIDGE: + case INTEL_SANDYBRIDGE_X: x86_add_quirk(intel_sandybridge_quirk); x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, @@ -6201,7 +6201,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; - if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X) + if (boot_cpu_data.x86_vfm == INTEL_SANDYBRIDGE_X) x86_pmu.extra_regs = intel_snbep_extra_regs; else x86_pmu.extra_regs = intel_snb_extra_regs; @@ -6227,8 +6227,8 @@ __init int intel_pmu_init(void) name = "sandybridge"; break; - case INTEL_FAM6_IVYBRIDGE: - case INTEL_FAM6_IVYBRIDGE_X: + case INTEL_IVYBRIDGE: + case INTEL_IVYBRIDGE_X: x86_add_quirk(intel_ht_bug); memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6244,7 +6244,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; x86_pmu.pebs_prec_dist = true; - if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X) + if (boot_cpu_data.x86_vfm == INTEL_IVYBRIDGE_X) x86_pmu.extra_regs = intel_snbep_extra_regs; else x86_pmu.extra_regs = intel_snb_extra_regs; @@ -6266,10 +6266,10 @@ __init int intel_pmu_init(void) break; - case INTEL_FAM6_HASWELL: - case INTEL_FAM6_HASWELL_X: - case INTEL_FAM6_HASWELL_L: - case INTEL_FAM6_HASWELL_G: + case INTEL_HASWELL: + case INTEL_HASWELL_X: + case INTEL_HASWELL_L: + case INTEL_HASWELL_G: x86_add_quirk(intel_ht_bug); x86_add_quirk(intel_pebs_isolation_quirk); x86_pmu.late_ack = true; @@ -6299,10 +6299,10 @@ __init int intel_pmu_init(void) name = "haswell"; break; - case INTEL_FAM6_BROADWELL: - case INTEL_FAM6_BROADWELL_D: - case INTEL_FAM6_BROADWELL_G: - case INTEL_FAM6_BROADWELL_X: + case INTEL_BROADWELL: + case INTEL_BROADWELL_D: + case INTEL_BROADWELL_G: + case INTEL_BROADWELL_X: x86_add_quirk(intel_pebs_isolation_quirk); x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6341,8 +6341,8 @@ __init int intel_pmu_init(void) name = "broadwell"; break; - case INTEL_FAM6_XEON_PHI_KNL: - case INTEL_FAM6_XEON_PHI_KNM: + case INTEL_XEON_PHI_KNL: + case INTEL_XEON_PHI_KNM: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, @@ -6361,15 +6361,15 @@ __init int intel_pmu_init(void) name = "knights-landing"; break; - case INTEL_FAM6_SKYLAKE_X: + case INTEL_SKYLAKE_X: pmem = true; fallthrough; - case INTEL_FAM6_SKYLAKE_L: - case INTEL_FAM6_SKYLAKE: - case INTEL_FAM6_KABYLAKE_L: - case INTEL_FAM6_KABYLAKE: - case INTEL_FAM6_COMETLAKE_L: - case INTEL_FAM6_COMETLAKE: + case INTEL_SKYLAKE_L: + case INTEL_SKYLAKE: + case INTEL_KABYLAKE_L: + case INTEL_KABYLAKE: + case INTEL_COMETLAKE_L: + case INTEL_COMETLAKE: x86_add_quirk(intel_pebs_isolation_quirk); x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6418,16 +6418,16 @@ __init int intel_pmu_init(void) name = "skylake"; break; - case INTEL_FAM6_ICELAKE_X: - case INTEL_FAM6_ICELAKE_D: + case INTEL_ICELAKE_X: + case INTEL_ICELAKE_D: x86_pmu.pebs_ept = 1; pmem = true; fallthrough; - case INTEL_FAM6_ICELAKE_L: - case INTEL_FAM6_ICELAKE: - case INTEL_FAM6_TIGERLAKE_L: - case INTEL_FAM6_TIGERLAKE: - case INTEL_FAM6_ROCKETLAKE: + case INTEL_ICELAKE_L: + case INTEL_ICELAKE: + case INTEL_TIGERLAKE_L: + case INTEL_TIGERLAKE: + case INTEL_ROCKETLAKE: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -6460,16 +6460,16 @@ __init int intel_pmu_init(void) name = "icelake"; break; - case INTEL_FAM6_SAPPHIRERAPIDS_X: - case INTEL_FAM6_EMERALDRAPIDS_X: + case INTEL_SAPPHIRERAPIDS_X: + case INTEL_EMERALDRAPIDS_X: x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; x86_pmu.extra_regs = intel_spr_extra_regs; pr_cont("Sapphire Rapids events, "); name = "sapphire_rapids"; goto glc_common; - case INTEL_FAM6_GRANITERAPIDS_X: - case INTEL_FAM6_GRANITERAPIDS_D: + case INTEL_GRANITERAPIDS_X: + case INTEL_GRANITERAPIDS_D: x86_pmu.extra_regs = intel_gnr_extra_regs; pr_cont("Granite Rapids events, "); name = "granite_rapids"; @@ -6508,12 +6508,12 @@ __init int intel_pmu_init(void) x86_pmu.set_topdown_event_period = icl_set_topdown_event_period; break; - case INTEL_FAM6_ALDERLAKE: - case INTEL_FAM6_ALDERLAKE_L: - case INTEL_FAM6_RAPTORLAKE: - case INTEL_FAM6_RAPTORLAKE_P: - case INTEL_FAM6_METEORLAKE: - case INTEL_FAM6_METEORLAKE_L: + case INTEL_ALDERLAKE: + case INTEL_ALDERLAKE_L: + case INTEL_RAPTORLAKE: + case INTEL_RAPTORLAKE_P: + case INTEL_METEORLAKE: + case INTEL_METEORLAKE_L: /* * Alder Lake has 2 types of CPU, core and atom. * From fd8983e8114ee0d0446caf655439e0a8c99396d7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 8 Jan 2025 06:30:16 -0800 Subject: [PATCH 12/18] perf/x86/intel/uncore: Clean up func_id commit 3f710be02ea648001ba18fb2c9fa7765e743dec2 upstream. The below warning may be triggered on GNR when the PCIE uncore units are exposed. WARNING: CPU: 4 PID: 1 at arch/x86/events/intel/uncore.c:1169 uncore_pci_pmu_register+0x158/0x190 The current uncore driver assumes that all the devices in the same PMU have the exact same devfn. It's true for the previous platforms. But it doesn't work for the new PCIE uncore units on GNR. The assumption doesn't make sense. There is no reason to limit the devices from the same PMU to the same devfn. Also, the current code just throws the warning, but still registers the device. The WARN_ON_ONCE() should be removed. The func_id is used by the later event_init() to check if a event->pmu has valid devices. For cpu and mmio uncore PMUs, they are always valid. For pci uncore PMUs, it's set when the PMU is registered. It can be replaced by the pmu->registered. Clean up the func_id. Intel-SIG: commit 3f710be02ea6 perf/x86/intel/uncore: Clean up func_id. PMU GNR support Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Eric Hu Link: https://lkml.kernel.org/r/20250108143017.1793781-1-kan.liang@linux.intel.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/events/intel/uncore.c | 20 +++++++------------- arch/x86/events/intel/uncore.h | 1 - arch/x86/events/intel/uncore_snb.c | 2 +- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 727ed19aeed17..5901925c09773 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -744,7 +744,7 @@ static int uncore_pmu_event_init(struct perf_event *event) pmu = uncore_event_to_pmu(event); /* no device found for this pmu */ - if (pmu->func_id < 0) + if (!pmu->registered) return -ENOENT; /* Sampling not supported yet */ @@ -991,7 +991,7 @@ static void uncore_types_exit(struct intel_uncore_type **types) uncore_type_exit(*types); } -static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) +static int __init uncore_type_init(struct intel_uncore_type *type) { struct intel_uncore_pmu *pmus; size_t size; @@ -1004,7 +1004,6 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) size = uncore_max_dies() * sizeof(struct intel_uncore_box *); for (i = 0; i < type->num_boxes; i++) { - pmus[i].func_id = setid ? i : -1; pmus[i].pmu_idx = i; pmus[i].type = type; pmus[i].boxes = kzalloc(size, GFP_KERNEL); @@ -1054,12 +1053,12 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) } static int __init -uncore_types_init(struct intel_uncore_type **types, bool setid) +uncore_types_init(struct intel_uncore_type **types) { int ret; for (; *types; types++) { - ret = uncore_type_init(*types, setid); + ret = uncore_type_init(*types); if (ret) return ret; } @@ -1159,11 +1158,6 @@ static int uncore_pci_pmu_register(struct pci_dev *pdev, if (!box) return -ENOMEM; - if (pmu->func_id < 0) - pmu->func_id = pdev->devfn; - else - WARN_ON_ONCE(pmu->func_id != pdev->devfn); - atomic_inc(&box->refcnt); box->dieid = die; box->pci_dev = pdev; @@ -1409,7 +1403,7 @@ static int __init uncore_pci_init(void) goto err; } - ret = uncore_types_init(uncore_pci_uncores, false); + ret = uncore_types_init(uncore_pci_uncores); if (ret) goto errtype; @@ -1677,7 +1671,7 @@ static int __init uncore_cpu_init(void) { int ret; - ret = uncore_types_init(uncore_msr_uncores, true); + ret = uncore_types_init(uncore_msr_uncores); if (ret) goto err; @@ -1696,7 +1690,7 @@ static int __init uncore_mmio_init(void) struct intel_uncore_type **types = uncore_mmio_uncores; int ret; - ret = uncore_types_init(types, true); + ret = uncore_types_init(types); if (ret) goto err; diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index b2524f8eba79e..6c79bf0125125 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -125,7 +125,6 @@ struct intel_uncore_pmu { struct pmu pmu; char name[UNCORE_PMU_NAME_LEN]; int pmu_idx; - int func_id; bool registered; atomic_t activeboxes; cpumask_t cpu_mask; diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 1f4869227efb9..5eb8a64c2850e 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -733,7 +733,7 @@ static int snb_uncore_imc_event_init(struct perf_event *event) pmu = uncore_event_to_pmu(event); /* no device found for this pmu */ - if (pmu->func_id < 0) + if (!pmu->registered) return -ENOENT; /* Sampling not supported yet */ From 075088c72e0a254ef1343752f5edbaf076574d71 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 8 Jan 2025 06:30:17 -0800 Subject: [PATCH 13/18] perf/x86/intel/uncore: Support more units on Granite Rapids commit 6d642735cdb6cdb814d2b6c81652caa53ce04842 upstream. The same CXL PMONs support is also avaiable on GNR. Apply spr_uncore_cxlcm and spr_uncore_cxldp to GNR as well. The other units were broken on early HW samples, so they were ignored in the early enabling patch. The issue has been fixed and verified on the later production HW. Add UPI, B2UPI, B2HOT, PCIEX16 and PCIEX8 for GNR. Intel-SIG: commit 6d642735cdb6 perf/x86/intel/uncore: Support more units on Granite Rapids. GNR PMU backporting Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Eric Hu Link: https://lkml.kernel.org/r/20250108143017.1793781-2-kan.liang@linux.intel.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/events/intel/uncore_snbep.c | 48 ++++++++++++++++++---------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 6dc8685860e1b..8d69b7756de16 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -6310,17 +6310,8 @@ void spr_uncore_mmio_init(void) /* GNR uncore support */ #define UNCORE_GNR_NUM_UNCORE_TYPES 23 -#define UNCORE_GNR_TYPE_15 15 -#define UNCORE_GNR_B2UPI 18 -#define UNCORE_GNR_TYPE_21 21 -#define UNCORE_GNR_TYPE_22 22 int gnr_uncore_units_ignore[] = { - UNCORE_SPR_UPI, - UNCORE_GNR_TYPE_15, - UNCORE_GNR_B2UPI, - UNCORE_GNR_TYPE_21, - UNCORE_GNR_TYPE_22, UNCORE_IGNORE_END }; @@ -6329,6 +6320,31 @@ static struct intel_uncore_type gnr_uncore_ubox = { .attr_update = uncore_alias_groups, }; +static struct intel_uncore_type gnr_uncore_pciex8 = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "pciex8", +}; + +static struct intel_uncore_type gnr_uncore_pciex16 = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "pciex16", +}; + +static struct intel_uncore_type gnr_uncore_upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "upi", +}; + +static struct intel_uncore_type gnr_uncore_b2upi = { + SPR_UNCORE_PCI_COMMON_FORMAT(), + .name = "b2upi", +}; + +static struct intel_uncore_type gnr_uncore_b2hot = { + .name = "b2hot", + .attr_update = uncore_alias_groups, +}; + static struct intel_uncore_type gnr_uncore_b2cmi = { SPR_UNCORE_PCI_COMMON_FORMAT(), .name = "b2cmi", @@ -6353,21 +6369,21 @@ static struct intel_uncore_type *gnr_uncores[UNCORE_GNR_NUM_UNCORE_TYPES] = { &gnr_uncore_ubox, &spr_uncore_imc, NULL, + &gnr_uncore_upi, NULL, NULL, NULL, + &spr_uncore_cxlcm, + &spr_uncore_cxldp, NULL, - NULL, - NULL, - NULL, - NULL, + &gnr_uncore_b2hot, &gnr_uncore_b2cmi, &gnr_uncore_b2cxl, - NULL, + &gnr_uncore_b2upi, NULL, &gnr_uncore_mdf_sbo, - NULL, - NULL, + &gnr_uncore_pciex16, + &gnr_uncore_pciex8, }; void gnr_uncore_cpu_init(void) From 976635aee3aeb3cb37030b32c5b190e5e45211f5 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 11 Feb 2025 13:30:15 -0800 Subject: [PATCH 14/18] perf vendor events: Add Clearwaterforest events commit e415c1493fa1e93afaec697385b8952d932c41bc upstream. Add events v1.00. Bring in the events from: https://github.com/intel/perfmon/tree/main/CWF/events Co-developed-by: Caleb Biggers Intel-SIG: commit e415c1493fa1 perf vendor events: Add Clearwaterforest events. ClearWater Forrest PMU backporting Signed-off-by: Caleb Biggers Acked-by: Kan Liang Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Link: https://lore.kernel.org/r/20250211213031.114209-9-irogers@google.com Signed-off-by: Namhyung Kim [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- .../arch/x86/clearwaterforest/cache.json | 144 ++++++++++++++++++ .../arch/x86/clearwaterforest/counter.json | 7 + .../arch/x86/clearwaterforest/frontend.json | 18 +++ .../arch/x86/clearwaterforest/memory.json | 22 +++ .../arch/x86/clearwaterforest/other.json | 22 +++ .../arch/x86/clearwaterforest/pipeline.json | 113 ++++++++++++++ .../x86/clearwaterforest/virtual-memory.json | 29 ++++ tools/perf/pmu-events/arch/x86/mapfile.csv | 1 + 8 files changed, 356 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/clearwaterforest/cache.json create mode 100644 tools/perf/pmu-events/arch/x86/clearwaterforest/counter.json create mode 100644 tools/perf/pmu-events/arch/x86/clearwaterforest/frontend.json create mode 100644 tools/perf/pmu-events/arch/x86/clearwaterforest/memory.json create mode 100644 tools/perf/pmu-events/arch/x86/clearwaterforest/other.json create mode 100644 tools/perf/pmu-events/arch/x86/clearwaterforest/pipeline.json create mode 100644 tools/perf/pmu-events/arch/x86/clearwaterforest/virtual-memory.json diff --git a/tools/perf/pmu-events/arch/x86/clearwaterforest/cache.json b/tools/perf/pmu-events/arch/x86/clearwaterforest/cache.json new file mode 100644 index 0000000000000..875361b30f1df --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/clearwaterforest/cache.json @@ -0,0 +1,144 @@ +[ + { + "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "1000003", + "UMask": "0x41" + }, + { + "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "1000003", + "UMask": "0x4f" + }, + { + "BriefDescription": "Counts the number of load ops retired.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", + "SampleAfterValue": "1000003", + "UMask": "0x81" + }, + { + "BriefDescription": "Counts the number of store ops retired.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_STORES", + "SampleAfterValue": "1000003", + "UMask": "0x82" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_1024", + "MSRIndex": "0x3F6", + "MSRValue": "0x400", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_2048", + "MSRIndex": "0x3F6", + "MSRValue": "0x800", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.", + "Counter": "0,1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of stores uops retired same as MEM_UOPS_RETIRED.ALL_STORES", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY", + "SampleAfterValue": "1000003", + "UMask": "0x6" + } +] diff --git a/tools/perf/pmu-events/arch/x86/clearwaterforest/counter.json b/tools/perf/pmu-events/arch/x86/clearwaterforest/counter.json new file mode 100644 index 0000000000000..a0eaf5b6f2bc8 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/clearwaterforest/counter.json @@ -0,0 +1,7 @@ +[ + { + "Unit": "core", + "CountersNumFixed": "3", + "CountersNumGeneric": "39" + } +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/clearwaterforest/frontend.json b/tools/perf/pmu-events/arch/x86/clearwaterforest/frontend.json new file mode 100644 index 0000000000000..7a9250e5c8f29 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/clearwaterforest/frontend.json @@ -0,0 +1,18 @@ +[ + { + "BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x80", + "EventName": "ICACHE.ACCESSES", + "SampleAfterValue": "1000003", + "UMask": "0x3" + }, + { + "BriefDescription": "Counts every time the code stream enters into a new cache line by walking sequential from the previous line or being redirected by a jump and the instruction cache registers bytes are not present. -", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x80", + "EventName": "ICACHE.MISSES", + "SampleAfterValue": "1000003", + "UMask": "0x2" + } +] diff --git a/tools/perf/pmu-events/arch/x86/clearwaterforest/memory.json b/tools/perf/pmu-events/arch/x86/clearwaterforest/memory.json new file mode 100644 index 0000000000000..f5007e56f39bb --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/clearwaterforest/memory.json @@ -0,0 +1,22 @@ +[ + { + "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x33FBFC00001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x33FBFC00002", + "SampleAfterValue": "100003", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/clearwaterforest/other.json b/tools/perf/pmu-events/arch/x86/clearwaterforest/other.json new file mode 100644 index 0000000000000..80454e497f835 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/clearwaterforest/other.json @@ -0,0 +1,22 @@ +[ + { + "BriefDescription": "Counts demand data reads that have any type of response.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10002", + "SampleAfterValue": "100003", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/clearwaterforest/pipeline.json b/tools/perf/pmu-events/arch/x86/clearwaterforest/pipeline.json new file mode 100644 index 0000000000000..6a5faa704b852 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/clearwaterforest/pipeline.json @@ -0,0 +1,113 @@ +[ + { + "BriefDescription": "Counts the total number of branch instructions retired for all branch types.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.CORE", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. [This event is alias to CPU_CLK_UNHALTED.THREAD_P]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.CORE_P", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of unhalted reference clock cycles.", + "Counter": "Fixed counter 2", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "SampleAfterValue": "1000003", + "UMask": "0x3" + }, + { + "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. [This event is alias to CPU_CLK_UNHALTED.CORE_P]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of instructions retired.", + "Counter": "Fixed counter 0", + "EventName": "INST_RETIRED.ANY", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of instructions retired.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.ANY_P", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.", + "Counter": "36", + "EventName": "TOPDOWN_BAD_SPECULATION.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls. [This event is alias to TOPDOWN_BE_BOUND.ALL_P]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xa4", + "EventName": "TOPDOWN_BE_BOUND.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls. [This event is alias to TOPDOWN_BE_BOUND.ALL]", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0xa4", + "EventName": "TOPDOWN_BE_BOUND.ALL_P", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of retirement slots not consumed due to front end stalls.", + "Counter": "37", + "EventName": "TOPDOWN_FE_BOUND.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x6" + }, + { + "BriefDescription": "Fixed Counter: Counts the number of consumed retirement slots.", + "Counter": "38", + "EventName": "TOPDOWN_RETIRING.ALL", + "SampleAfterValue": "1000003", + "UMask": "0x7" + } +] diff --git a/tools/perf/pmu-events/arch/x86/clearwaterforest/virtual-memory.json b/tools/perf/pmu-events/arch/x86/clearwaterforest/virtual-memory.json new file mode 100644 index 0000000000000..78f2b835c1fa4 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/clearwaterforest/virtual-memory.json @@ -0,0 +1,29 @@ +[ + { + "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "1000003", + "UMask": "0xe" + }, + { + "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "1000003", + "UMask": "0xe" + }, + { + "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.", + "Counter": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "1000003", + "UMask": "0xe" + } +] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 05b115b5fcc2d..ce689ec4852a2 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -34,6 +34,7 @@ GenuineIntel-6-25,v2,westmereep-sp,core GenuineIntel-6-2F,v2,westmereex,core GenuineIntel-6-55-[01234],v1,skylakex,core GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core +GenuineIntel-6-DD,v1.00,clearwaterforest,core GenuineIntel-6-7D,v1,icelake,core GenuineIntel-6-7E,v1,icelake,core GenuineIntel-6-8[CD],v1,tigerlake,core From f0c5f5464b313bda2bdddf369e4a5adf7865f722 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 24 Apr 2024 11:15:01 -0700 Subject: [PATCH 15/18] perf/x86/intel/uncore: Switch to new Intel CPU model defines commit 9828a1cff456e0d6f55f9e148585313c2dd59c00 upstream. New CPU #defines encode vendor and family as well as model. [ bp: Squash *three* uncore patches into one. ] Intel-SIG: commit 9828a1cff456 perf/x86/intel/uncore: Switch to new Intel CPU model defines. ClearWater Forrest PMU backporting Signed-off-by: Tony Luck Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/all/20240424181501.41557-1-tony.luck%40intel.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/events/intel/uncore.c | 94 ++++++++++++++-------------- arch/x86/events/intel/uncore_nhmex.c | 3 +- arch/x86/events/intel/uncore_snbep.c | 5 +- 3 files changed, 52 insertions(+), 50 deletions(-) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 5901925c09773..32dc17cf501b9 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1839,53 +1839,53 @@ static const struct intel_uncore_init_fun generic_uncore_init __initconst = { }; static const struct x86_cpu_id intel_uncore_match[] __initconst = { - X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &nhm_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &nhm_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &nhm_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &snb_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &ivb_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &hsw_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &hsw_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &hsw_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &bdw_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &bdw_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &snbep_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &nhmex_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &nhmex_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ivbep_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &hswep_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &bdx_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &bdx_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &knl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &knl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &skl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &skl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &skx_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &skl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &skl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &skl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &skl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &icl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &tgl_l_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &tgl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, &gnr_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D, &gnr_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, &gnr_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT, &gnr_uncore_init), + X86_MATCH_VFM(INTEL_NEHALEM_EP, &nhm_uncore_init), + X86_MATCH_VFM(INTEL_NEHALEM, &nhm_uncore_init), + X86_MATCH_VFM(INTEL_WESTMERE, &nhm_uncore_init), + X86_MATCH_VFM(INTEL_WESTMERE_EP, &nhm_uncore_init), + X86_MATCH_VFM(INTEL_SANDYBRIDGE, &snb_uncore_init), + X86_MATCH_VFM(INTEL_IVYBRIDGE, &ivb_uncore_init), + X86_MATCH_VFM(INTEL_HASWELL, &hsw_uncore_init), + X86_MATCH_VFM(INTEL_HASWELL_L, &hsw_uncore_init), + X86_MATCH_VFM(INTEL_HASWELL_G, &hsw_uncore_init), + X86_MATCH_VFM(INTEL_BROADWELL, &bdw_uncore_init), + X86_MATCH_VFM(INTEL_BROADWELL_G, &bdw_uncore_init), + X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &snbep_uncore_init), + X86_MATCH_VFM(INTEL_NEHALEM_EX, &nhmex_uncore_init), + X86_MATCH_VFM(INTEL_WESTMERE_EX, &nhmex_uncore_init), + X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &ivbep_uncore_init), + X86_MATCH_VFM(INTEL_HASWELL_X, &hswep_uncore_init), + X86_MATCH_VFM(INTEL_BROADWELL_X, &bdx_uncore_init), + X86_MATCH_VFM(INTEL_BROADWELL_D, &bdx_uncore_init), + X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &knl_uncore_init), + X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &knl_uncore_init), + X86_MATCH_VFM(INTEL_SKYLAKE, &skl_uncore_init), + X86_MATCH_VFM(INTEL_SKYLAKE_L, &skl_uncore_init), + X86_MATCH_VFM(INTEL_SKYLAKE_X, &skx_uncore_init), + X86_MATCH_VFM(INTEL_KABYLAKE_L, &skl_uncore_init), + X86_MATCH_VFM(INTEL_KABYLAKE, &skl_uncore_init), + X86_MATCH_VFM(INTEL_COMETLAKE_L, &skl_uncore_init), + X86_MATCH_VFM(INTEL_COMETLAKE, &skl_uncore_init), + X86_MATCH_VFM(INTEL_ICELAKE_L, &icl_uncore_init), + X86_MATCH_VFM(INTEL_ICELAKE_NNPI, &icl_uncore_init), + X86_MATCH_VFM(INTEL_ICELAKE, &icl_uncore_init), + X86_MATCH_VFM(INTEL_ICELAKE_D, &icx_uncore_init), + X86_MATCH_VFM(INTEL_ICELAKE_X, &icx_uncore_init), + X86_MATCH_VFM(INTEL_TIGERLAKE_L, &tgl_l_uncore_init), + X86_MATCH_VFM(INTEL_TIGERLAKE, &tgl_uncore_init), + X86_MATCH_VFM(INTEL_ROCKETLAKE, &rkl_uncore_init), + X86_MATCH_VFM(INTEL_ALDERLAKE, &adl_uncore_init), + X86_MATCH_VFM(INTEL_ALDERLAKE_L, &adl_uncore_init), + X86_MATCH_VFM(INTEL_RAPTORLAKE, &adl_uncore_init), + X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &adl_uncore_init), + X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_uncore_init), + X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_uncore_init), + X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_uncore_init), + X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, &gnr_uncore_init), + X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &snr_uncore_init), + X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_uncore_init), + X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_uncore_init), + X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_uncore_init), {}, }; MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index 56eea2c66cfb8..158c24823d901 100644 --- a/arch/x86/events/intel/uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Nehalem-EX/Westmere-EX uncore support */ +#include #include "uncore.h" /* NHM-EX event control */ @@ -1217,7 +1218,7 @@ static struct intel_uncore_type *nhmex_msr_uncores[] = { void nhmex_uncore_cpu_init(void) { - if (boot_cpu_data.x86_model == 46) + if (boot_cpu_data.x86_vfm == INTEL_NEHALEM_EX) uncore_nhmex = true; else nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 8d69b7756de16..654cfe0000369 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* SandyBridge-EP/IvyTown uncore support */ +#include #include "uncore.h" #include "uncore_discovery.h" @@ -3258,7 +3259,7 @@ void bdx_uncore_cpu_init(void) uncore_msr_uncores = bdx_msr_uncores; /* Detect systems with no SBOXes */ - if ((boot_cpu_data.x86_model == 86) || hswep_has_limit_sbox(BDX_PCU_DID)) + if (boot_cpu_data.x86_vfm == INTEL_BROADWELL_D || hswep_has_limit_sbox(BDX_PCU_DID)) uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints; @@ -5150,7 +5151,7 @@ static int icx_iio_get_topology(struct intel_uncore_type *type) static int icx_iio_set_mapping(struct intel_uncore_type *type) { /* Detect ICX-D system. This case is not supported */ - if (boot_cpu_data.x86_model == INTEL_FAM6_ICELAKE_D) { + if (boot_cpu_data.x86_vfm == INTEL_ICELAKE_D) { pmu_clear_mapping_attr(type->attr_update, &icx_iio_mapping_group); return -EPERM; } From 8983e40a04bbb6c60481d90226a0f60e77889ae9 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 11 Dec 2024 08:11:46 -0800 Subject: [PATCH 16/18] perf/x86/intel/uncore: Add Clearwater Forest support commit the upstream. same as the previous Sierra Forest. The only difference is the event list, which will be supported in the perf tool later. Intel-SIG: commit the perf/x86/intel/uncore: Add Clearwater Forest support. ClearWater Forrest PMU backporting Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20241211161146.235253-1-kan.liang@linux.intel.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/events/intel/uncore.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 32dc17cf501b9..a29a9a99b4b65 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1886,6 +1886,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_uncore_init), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &gnr_uncore_init), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &gnr_uncore_init), + X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &gnr_uncore_init), {}, }; MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); From c21979f9d8fbd81713631e8ab4c2abcb56b19e41 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 7 Jul 2025 13:17:47 -0700 Subject: [PATCH 17/18] perf/x86/intel/uncore: Support MSR portal for discovery tables commit cf002dafedd06241175e4dbce39ba90a4b75822c upstream. Starting from the Panther Lake, the discovery table mechanism is also supported in client platforms. The difference is that the portal of the global discovery table is retrieved from an MSR. The layout of discovery tables are the same as the server platforms. Factor out __parse_discovery_table() to parse discover tables. The uncore PMON is Die scope. Need to parse the discovery tables for each die. Intel-SIG: commit cf002dafedd0 perf/x86/intel/uncore: Support MSR portal for discovery tables. ClearWater Forrest PMU backporting Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dapeng Mi Link: https://lore.kernel.org/r/20250707201750.616527-2-kan.liang@linux.intel.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/events/intel/uncore_discovery.c | 79 ++++++++++++++++++++---- arch/x86/events/intel/uncore_discovery.h | 3 + 2 files changed, 70 insertions(+), 12 deletions(-) diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index 9da52386e91d5..79656d4d46df2 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -273,24 +273,15 @@ uncore_ignore_unit(struct uncore_unit_discovery *unit, int *ignore) return false; } -static int parse_discovery_table(struct pci_dev *dev, int die, - u32 bar_offset, bool *parsed, - int *ignore) +static int __parse_discovery_table(resource_size_t addr, int die, + bool *parsed, int *ignore) { struct uncore_global_discovery global; struct uncore_unit_discovery unit; void __iomem *io_addr; - resource_size_t addr; unsigned long size; - u32 val; int i; - pci_read_config_dword(dev, bar_offset, &val); - - if (val & UNCORE_DISCOVERY_MASK) - return -EINVAL; - - addr = (resource_size_t)(val & ~UNCORE_DISCOVERY_MASK); size = UNCORE_DISCOVERY_GLOBAL_MAP_SIZE; io_addr = ioremap(addr, size); if (!io_addr) @@ -333,7 +324,32 @@ static int parse_discovery_table(struct pci_dev *dev, int die, return 0; } -bool intel_uncore_has_discovery_tables(int *ignore) +static int parse_discovery_table(struct pci_dev *dev, int die, + u32 bar_offset, bool *parsed, + int *ignore) +{ + resource_size_t addr; + u32 val; + + pci_read_config_dword(dev, bar_offset, &val); + + if (val & ~PCI_BASE_ADDRESS_MEM_MASK & ~PCI_BASE_ADDRESS_MEM_TYPE_64) + return -EINVAL; + + addr = (resource_size_t)(val & PCI_BASE_ADDRESS_MEM_MASK); +#ifdef CONFIG_PHYS_ADDR_T_64BIT + if ((val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) { + u32 val2; + + pci_read_config_dword(dev, bar_offset + 4, &val2); + addr |= ((resource_size_t)val2) << 32; + } +#endif + + return __parse_discovery_table(addr, die, parsed, ignore); +} + +static bool intel_uncore_has_discovery_tables_pci(int *ignore) { u32 device, val, entry_id, bar_offset; int die, dvsec = 0, ret = true; @@ -382,6 +398,45 @@ bool intel_uncore_has_discovery_tables(int *ignore) return ret; } +static bool intel_uncore_has_discovery_tables_msr(int *ignore) +{ + unsigned long *die_mask; + bool parsed = false; + int cpu, die; + u64 base; + + die_mask = kcalloc(BITS_TO_LONGS(uncore_max_dies()), + sizeof(unsigned long), GFP_KERNEL); + if (!die_mask) + return false; + + cpus_read_lock(); + for_each_online_cpu(cpu) { + die = topology_logical_die_id(cpu); + if (__test_and_set_bit(die, die_mask)) + continue; + + if (rdmsrl_safe_on_cpu(cpu, UNCORE_DISCOVERY_MSR, &base)) + continue; + + if (!base) + continue; + + __parse_discovery_table(base, die, &parsed, ignore); + } + + cpus_read_unlock(); + + kfree(die_mask); + return parsed; +} + +bool intel_uncore_has_discovery_tables(int *ignore) +{ + return intel_uncore_has_discovery_tables_msr(ignore) || + intel_uncore_has_discovery_tables_pci(ignore); +} + void intel_uncore_clear_discovery_tables(void) { struct intel_uncore_discovery_type *type, *next; diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h index 17d017059b797..e08c0d07d8ad8 100644 --- a/arch/x86/events/intel/uncore_discovery.h +++ b/arch/x86/events/intel/uncore_discovery.h @@ -1,5 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +/* Store the full address of the global discovery table */ +#define UNCORE_DISCOVERY_MSR 0x201e + /* Generic device ID of a discovery table device */ #define UNCORE_DISCOVERY_TABLE_DEVICE 0x09a7 /* Capability ID for a discovery table device */ From 875632dce9e7a0b2bc688cd22f309405ad7dbf99 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 7 Jul 2025 13:17:48 -0700 Subject: [PATCH 18/18] perf/x86/intel/uncore: Support customized MMIO map size commit fca24bf2b6b619770d7f1222c0284791d7766239 upstream. For a server platform, the MMIO map size is always 0x4000. However, a client platform may have a smaller map size. Make the map size customizable. Intel-SIG: commit fca24bf2b6b6 perf/x86/intel/uncore: Support customized MMIO map size. ClearWater Forrest PMU backporting Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dapeng Mi Link: https://lore.kernel.org/r/20250707201750.616527-3-kan.liang@linux.intel.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/events/intel/uncore_discovery.c | 2 +- arch/x86/events/intel/uncore_snbep.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index 79656d4d46df2..a543a54fd7140 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -650,7 +650,7 @@ void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box) } addr = unit->addr; - box->io_addr = ioremap(addr, UNCORE_GENERIC_MMIO_SIZE); + box->io_addr = ioremap(addr, type->mmio_map_size); if (!box->io_addr) { pr_warn("Uncore type %d box %d: ioremap error for 0x%llx.\n", type->type_id, unit->id, (unsigned long long)addr); diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 654cfe0000369..bf73f14d7efc7 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -6122,6 +6122,8 @@ static void uncore_type_customized_copy(struct intel_uncore_type *to_type, to_type->format_group = from_type->format_group; if (from_type->attr_update) to_type->attr_update = from_type->attr_update; + if (from_type->mmio_map_size) + to_type->mmio_map_size = from_type->mmio_map_size; } static struct intel_uncore_type **