Skip to content

Commit

Permalink
x86/cpu: Move cpu_l[l2]c_id into topology info
Browse files Browse the repository at this point in the history
The topology IDs which identify the LLC and L2 domains clearly belong to
the per CPU topology information.

Move them into cpuinfo_x86::cpuinfo_topo and get rid of the extra per CPU
data and the related exports.

This also paves the way to do proper topology evaluation during early boot
because it removes the only per CPU dependency for that.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Sohil Mehta <sohil.mehta@intel.com>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230814085112.803864641@linutronix.de
  • Loading branch information
Thomas Gleixner committed Oct 10, 2023
1 parent 22dc963 commit 6e29032
Show file tree
Hide file tree
Showing 13 changed files with 48 additions and 67 deletions.
4 changes: 1 addition & 3 deletions Documentation/arch/x86/topology.rst
Expand Up @@ -79,9 +79,7 @@ Package-related topology information in the kernel:
The maximum possible number of packages in the system. Helpful for per
package facilities to preallocate per package information.

- cpu_llc_id:

A per-CPU variable containing:
- cpuinfo_x86.topo.llc_id:

- On Intel, the first APIC ID of the list of CPUs sharing the Last Level
Cache
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/events/amd/uncore.c
Expand Up @@ -537,7 +537,7 @@ static int amd_uncore_cpu_starting(unsigned int cpu)

if (amd_uncore_llc) {
uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
uncore->id = get_llc_id(cpu);
uncore->id = per_cpu_llc_id(cpu);

uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
Expand Down
3 changes: 0 additions & 3 deletions arch/x86/include/asm/cacheinfo.h
Expand Up @@ -7,9 +7,6 @@ extern unsigned int memory_caching_control;
#define CACHE_MTRR 0x01
#define CACHE_PAT 0x02

void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu);
void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu);

void cache_disable(void);
void cache_enable(void);
void set_cache_aps_delayed_init(bool val);
Expand Down
14 changes: 13 additions & 1 deletion arch/x86/include/asm/processor.h
Expand Up @@ -99,6 +99,10 @@ struct cpuinfo_topology {
// Logical ID mappings
u32 logical_pkg_id;
u32 logical_die_id;

// Cache level topology IDs
u32 llc_id;
u32 l2c_id;
};

struct cpuinfo_x86 {
Expand Down Expand Up @@ -689,7 +693,15 @@ extern int set_tsc_mode(unsigned int val);

DECLARE_PER_CPU(u64, msr_misc_features_shadow);

extern u16 get_llc_id(unsigned int cpu);
static inline u16 per_cpu_llc_id(unsigned int cpu)
{
return per_cpu(cpu_info.topo.llc_id, cpu);
}

static inline u16 per_cpu_l2c_id(unsigned int cpu)
{
return per_cpu(cpu_info.topo.l2c_id, cpu);
}

#ifdef CONFIG_CPU_SUP_AMD
extern u32 amd_get_nodes_per_socket(void);
Expand Down
2 changes: 0 additions & 2 deletions arch/x86/include/asm/smp.h
Expand Up @@ -17,8 +17,6 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map);
/* cpus sharing the last level cache: */
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id);

DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/include/asm/topology.h
Expand Up @@ -115,7 +115,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu);
extern unsigned int __max_die_per_package;

#ifdef CONFIG_SMP
#define topology_cluster_id(cpu) (per_cpu(cpu_l2c_id, cpu))
#define topology_cluster_id(cpu) (cpu_data(cpu).topo.l2c_id)
#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu))
#define topology_cluster_cpumask(cpu) (cpu_clustergroup_mask(cpu))
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/apic/apic_numachip.c
Expand Up @@ -161,7 +161,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
u64 val;
u32 nodes = 1;

this_cpu_write(cpu_llc_id, node);
c->topo.llc_id = node;

/* Account for nodes per socket in multi-core-module processors */
if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
Expand Down
12 changes: 4 additions & 8 deletions arch/x86/kernel/cpu/amd.c
Expand Up @@ -401,8 +401,6 @@ static void legacy_fixup_core_id(struct cpuinfo_x86 *c)
*/
static void amd_get_topology(struct cpuinfo_x86 *c)
{
int cpu = smp_processor_id();

/* get information required for multi-node processors */
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
int err;
Expand Down Expand Up @@ -430,15 +428,14 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
if (!err)
c->x86_coreid_bits = get_count_order(c->x86_max_cores);

cacheinfo_amd_init_llc_id(c, cpu);
cacheinfo_amd_init_llc_id(c);

} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;

rdmsrl(MSR_FAM10H_NODE_ID, value);
c->topo.die_id = value & 7;

per_cpu(cpu_llc_id, cpu) = c->topo.die_id;
c->topo.llc_id = c->topo.die_id;
} else
return;

Expand All @@ -455,15 +452,14 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
static void amd_detect_cmp(struct cpuinfo_x86 *c)
{
unsigned bits;
int cpu = smp_processor_id();

bits = c->x86_coreid_bits;
/* Low order bits define the core id (index of core in socket) */
c->topo.core_id = c->topo.initial_apicid & ((1 << bits)-1);
/* Convert the initial APIC ID into the socket ID */
c->topo.pkg_id = c->topo.initial_apicid >> bits;
/* use socket ID also for last level cache */
per_cpu(cpu_llc_id, cpu) = c->topo.die_id = c->topo.pkg_id;
c->topo.llc_id = c->topo.die_id = c->topo.pkg_id;
}

u32 amd_get_nodes_per_socket(void)
Expand All @@ -481,7 +477,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c)

node = numa_cpu_node(cpu);
if (node == NUMA_NO_NODE)
node = get_llc_id(cpu);
node = per_cpu_llc_id(cpu);

/*
* On multi-fabric platform (e.g. Numascale NumaChip) a
Expand Down
33 changes: 12 additions & 21 deletions arch/x86/kernel/cpu/cacheinfo.c
Expand Up @@ -661,7 +661,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
return i;
}

void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu)
void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c)
{
/*
* We may have multiple LLCs if L3 caches exist, so check if we
Expand All @@ -672,13 +672,13 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu)

if (c->x86 < 0x17) {
/* LLC is at the node level. */
per_cpu(cpu_llc_id, cpu) = c->topo.die_id;
c->topo.llc_id = c->topo.die_id;
} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
/*
* LLC is at the core complex level.
* Core complex ID is ApicId[3] for these processors.
*/
per_cpu(cpu_llc_id, cpu) = c->topo.apicid >> 3;
c->topo.llc_id = c->topo.apicid >> 3;
} else {
/*
* LLC ID is calculated from the number of threads sharing the
Expand All @@ -694,12 +694,12 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu)
if (num_sharing_cache) {
int bits = get_count_order(num_sharing_cache);

per_cpu(cpu_llc_id, cpu) = c->topo.apicid >> bits;
c->topo.llc_id = c->topo.apicid >> bits;
}
}
}

void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu)
void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
{
/*
* We may have multiple LLCs if L3 caches exist, so check if we
Expand All @@ -712,7 +712,7 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu)
* LLC is at the core complex level.
* Core complex ID is ApicId[3] for these processors.
*/
per_cpu(cpu_llc_id, cpu) = c->topo.apicid >> 3;
c->topo.llc_id = c->topo.apicid >> 3;
}

void init_amd_cacheinfo(struct cpuinfo_x86 *c)
Expand Down Expand Up @@ -740,9 +740,6 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
#ifdef CONFIG_SMP
unsigned int cpu = c->cpu_index;
#endif

if (c->cpuid_level > 3) {
static int is_initialized;
Expand Down Expand Up @@ -856,30 +853,24 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)

if (new_l2) {
l2 = new_l2;
#ifdef CONFIG_SMP
per_cpu(cpu_llc_id, cpu) = l2_id;
per_cpu(cpu_l2c_id, cpu) = l2_id;
#endif
c->topo.llc_id = l2_id;
c->topo.l2c_id = l2_id;
}

if (new_l3) {
l3 = new_l3;
#ifdef CONFIG_SMP
per_cpu(cpu_llc_id, cpu) = l3_id;
#endif
c->topo.llc_id = l3_id;
}

#ifdef CONFIG_SMP
/*
* If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
* If llc_id is not yet set, this means cpuid_level < 4 which in
* turns means that the only possibility is SMT (as indicated in
* cpuid1). Since cpuid2 doesn't specify shared caches, and we know
* that SMT shares all caches, we can unconditionally set cpu_llc_id to
* c->topo.pkg_id.
*/
if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
per_cpu(cpu_llc_id, cpu) = c->topo.pkg_id;
#endif
if (c->topo.llc_id == BAD_APICID)
c->topo.llc_id = c->topo.pkg_id;

c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));

Expand Down
14 changes: 2 additions & 12 deletions arch/x86/kernel/cpu/common.c
Expand Up @@ -74,18 +74,6 @@ u32 elf_hwcap2 __read_mostly;
int smp_num_siblings = 1;
EXPORT_SYMBOL(smp_num_siblings);

/* Last level cache ID of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;

u16 get_llc_id(unsigned int cpu)
{
return per_cpu(cpu_llc_id, cpu);
}
EXPORT_SYMBOL_GPL(get_llc_id);

/* L2 cache ID of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID;

static struct ppin_info {
int feature;
int msr_ppin_ctl;
Expand Down Expand Up @@ -1830,6 +1818,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
c->x86_max_cores = 1;
c->x86_coreid_bits = 0;
c->topo.cu_id = 0xff;
c->topo.llc_id = BAD_APICID;
c->topo.l2c_id = BAD_APICID;
#ifdef CONFIG_X86_64
c->x86_clflush_size = 64;
c->x86_phys_bits = 36;
Expand Down
3 changes: 3 additions & 0 deletions arch/x86/kernel/cpu/cpu.h
Expand Up @@ -78,6 +78,9 @@ extern int detect_ht_early(struct cpuinfo_x86 *c);
extern void detect_ht(struct cpuinfo_x86 *c);
extern void check_null_seg_clears_base(struct cpuinfo_x86 *c);

void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c);
void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c);

unsigned int aperfmperf_get_khz(int cpu);
void cpu_select_mitigations(void);

Expand Down
14 changes: 5 additions & 9 deletions arch/x86/kernel/cpu/hygon.c
Expand Up @@ -63,8 +63,6 @@ static void hygon_get_topology_early(struct cpuinfo_x86 *c)
*/
static void hygon_get_topology(struct cpuinfo_x86 *c)
{
int cpu = smp_processor_id();

/* get information required for multi-node processors */
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
int err;
Expand Down Expand Up @@ -94,14 +92,13 @@ static void hygon_get_topology(struct cpuinfo_x86 *c)
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR) && c->x86_model <= 0x3)
c->topo.pkg_id = c->topo.apicid >> APICID_SOCKET_ID_BIT;

cacheinfo_hygon_init_llc_id(c, cpu);
cacheinfo_hygon_init_llc_id(c);
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;

rdmsrl(MSR_FAM10H_NODE_ID, value);
c->topo.die_id = value & 7;

per_cpu(cpu_llc_id, cpu) = c->topo.die_id;
c->topo.llc_id = c->topo.die_id;
} else
return;

Expand All @@ -116,15 +113,14 @@ static void hygon_get_topology(struct cpuinfo_x86 *c)
static void hygon_detect_cmp(struct cpuinfo_x86 *c)
{
unsigned int bits;
int cpu = smp_processor_id();

bits = c->x86_coreid_bits;
/* Low order bits define the core id (index of core in socket) */
c->topo.core_id = c->topo.initial_apicid & ((1 << bits)-1);
/* Convert the initial APIC ID into the socket ID */
c->topo.pkg_id = c->topo.initial_apicid >> bits;
/* use socket ID also for last level cache */
per_cpu(cpu_llc_id, cpu) = c->topo.die_id = c->topo.pkg_id;
/* Use package ID also for last level cache */
c->topo.llc_id = c->topo.die_id = c->topo.pkg_id;
}

static void srat_detect_node(struct cpuinfo_x86 *c)
Expand All @@ -136,7 +132,7 @@ static void srat_detect_node(struct cpuinfo_x86 *c)

node = numa_cpu_node(cpu);
if (node == NUMA_NO_NODE)
node = per_cpu(cpu_llc_id, cpu);
node = c->topo.llc_id;

/*
* On multi-fabric platform (e.g. Numascale NumaChip) a
Expand Down
10 changes: 5 additions & 5 deletions arch/x86/kernel/smpboot.c
Expand Up @@ -478,7 +478,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)

if (c->topo.pkg_id == o->topo.pkg_id &&
c->topo.die_id == o->topo.die_id &&
per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
per_cpu_llc_id(cpu1) == per_cpu_llc_id(cpu2)) {
if (c->topo.core_id == o->topo.core_id)
return topology_sane(c, o, "smt");

Expand Down Expand Up @@ -510,11 +510,11 @@ static bool match_l2c(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;

/* If the arch didn't set up l2c_id, fall back to SMT */
if (per_cpu(cpu_l2c_id, cpu1) == BAD_APICID)
if (per_cpu_l2c_id(cpu1) == BAD_APICID)
return match_smt(c, o);

/* Do not match if L2 cache id does not match: */
if (per_cpu(cpu_l2c_id, cpu1) != per_cpu(cpu_l2c_id, cpu2))
if (per_cpu_l2c_id(cpu1) != per_cpu_l2c_id(cpu2))
return false;

return topology_sane(c, o, "l2c");
Expand Down Expand Up @@ -560,11 +560,11 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
bool intel_snc = id && id->driver_data;

/* Do not match if we do not have a valid APICID for cpu: */
if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID)
if (per_cpu_llc_id(cpu1) == BAD_APICID)
return false;

/* Do not match if LLC id does not match: */
if (per_cpu(cpu_llc_id, cpu1) != per_cpu(cpu_llc_id, cpu2))
if (per_cpu_llc_id(cpu1) != per_cpu_llc_id(cpu2))
return false;

/*
Expand Down

0 comments on commit 6e29032

Please sign in to comment.