Skip to content

Commit

Permalink
sched/numa: Rewrite the CONFIG_NUMA sched domain support
Browse files Browse the repository at this point in the history
The current code groups up to 16 nodes in a level and then puts an
ALLNODES domain spanning the entire tree on top of that. This doesn't
reflect the numa topology and esp for the smaller not-fully-connected
machines out there today this might make a difference.

Therefore, build a proper numa topology based on node_distance().

Since there's no fixed numa layers anymore, the static SD_NODE_INIT
and SD_ALLNODES_INIT aren't usable anymore, the new code tries to
construct something similar and scales some values either on the
number of cpus in the domain and/or the node_distance() ratio.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Anton Blanchard <anton@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: David Howells <dhowells@redhat.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: linux-alpha@vger.kernel.org
Cc: linux-ia64@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-sh@vger.kernel.org
Cc: Matt Turner <mattst88@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: sparclinux@vger.kernel.org
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86@kernel.org
Cc: Dimitri Sivanich <sivanich@sgi.com>
Cc: Greg Pearson <greg.pearson@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: bob.picco@oracle.com
Cc: chris.mason@oracle.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-r74n3n8hhuc2ynbrnp3vt954@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed May 9, 2012
1 parent bd939f4 commit cb83b62
Show file tree
Hide file tree
Showing 9 changed files with 185 additions and 318 deletions.
25 changes: 0 additions & 25 deletions arch/ia64/include/asm/topology.h
Expand Up @@ -70,31 +70,6 @@ void build_cpu_to_node_map(void);
.nr_balance_failed = 0, \
}

/* sched_domains SD_NODE_INIT for IA64 NUMA machines */
#define SD_NODE_INIT (struct sched_domain) { \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
.min_interval = 8, \
.max_interval = 8*(min(num_online_cpus(), 32U)), \
.busy_factor = 64, \
.imbalance_pct = 125, \
.cache_nice_tries = 2, \
.busy_idx = 3, \
.idle_idx = 2, \
.newidle_idx = 0, \
.wake_idx = 0, \
.forkexec_idx = 0, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_NEWIDLE \
| SD_BALANCE_EXEC \
| SD_BALANCE_FORK \
| SD_SERIALIZE, \
.last_balance = jiffies, \
.balance_interval = 64, \
.nr_balance_failed = 0, \
}

#endif /* CONFIG_NUMA */

#ifdef CONFIG_SMP
Expand Down
17 changes: 0 additions & 17 deletions arch/mips/include/asm/mach-ip27/topology.h
Expand Up @@ -36,23 +36,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];

#define node_distance(from, to) (__node_distances[(from)][(to)])

/* sched_domains SD_NODE_INIT for SGI IP27 machines */
#define SD_NODE_INIT (struct sched_domain) { \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
.min_interval = 8, \
.max_interval = 32, \
.busy_factor = 32, \
.imbalance_pct = 125, \
.cache_nice_tries = 1, \
.flags = SD_LOAD_BALANCE | \
SD_BALANCE_EXEC, \
.last_balance = jiffies, \
.balance_interval = 1, \
.nr_balance_failed = 0, \
}

#include <asm-generic/topology.h>

#endif /* _ASM_MACH_TOPOLOGY_H */
36 changes: 0 additions & 36 deletions arch/powerpc/include/asm/topology.h
Expand Up @@ -18,12 +18,6 @@ struct device_node;
*/
#define RECLAIM_DISTANCE 10

/*
* Avoid creating an extra level of balancing (SD_ALLNODES) on the largest
* POWER7 boxes which have a maximum of 32 nodes.
*/
#define SD_NODES_PER_DOMAIN 32

#include <asm/mmzone.h>

static inline int cpu_to_node(int cpu)
Expand Down Expand Up @@ -51,36 +45,6 @@ static inline int pcibus_to_node(struct pci_bus *bus)
cpu_all_mask : \
cpumask_of_node(pcibus_to_node(bus)))

/* sched_domains SD_NODE_INIT for PPC64 machines */
#define SD_NODE_INIT (struct sched_domain) { \
.min_interval = 8, \
.max_interval = 32, \
.busy_factor = 32, \
.imbalance_pct = 125, \
.cache_nice_tries = 1, \
.busy_idx = 3, \
.idle_idx = 1, \
.newidle_idx = 0, \
.wake_idx = 0, \
.forkexec_idx = 0, \
\
.flags = 1*SD_LOAD_BALANCE \
| 0*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \
| 0*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \
| 0*SD_PREFER_LOCAL \
| 0*SD_SHARE_CPUPOWER \
| 0*SD_POWERSAVINGS_BALANCE \
| 0*SD_SHARE_PKG_RESOURCES \
| 1*SD_SERIALIZE \
| 0*SD_PREFER_SIBLING \
, \
.last_balance = jiffies, \
.balance_interval = 1, \
}

extern int __node_distance(int, int);
#define node_distance(a, b) __node_distance(a, b)

Expand Down
25 changes: 0 additions & 25 deletions arch/sh/include/asm/topology.h
Expand Up @@ -3,31 +3,6 @@

#ifdef CONFIG_NUMA

/* sched_domains SD_NODE_INIT for sh machines */
#define SD_NODE_INIT (struct sched_domain) { \
.parent = NULL, \
.child = NULL, \
.groups = NULL, \
.min_interval = 8, \
.max_interval = 32, \
.busy_factor = 32, \
.imbalance_pct = 125, \
.cache_nice_tries = 2, \
.busy_idx = 3, \
.idle_idx = 2, \
.newidle_idx = 0, \
.wake_idx = 0, \
.forkexec_idx = 0, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_FORK \
| SD_BALANCE_EXEC \
| SD_BALANCE_NEWIDLE \
| SD_SERIALIZE, \
.last_balance = jiffies, \
.balance_interval = 1, \
.nr_balance_failed = 0, \
}

#define cpu_to_node(cpu) ((void)(cpu),0)
#define parent_node(node) ((void)(node),0)

Expand Down
19 changes: 0 additions & 19 deletions arch/sparc/include/asm/topology_64.h
Expand Up @@ -31,25 +31,6 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
cpu_all_mask : \
cpumask_of_node(pcibus_to_node(bus)))

#define SD_NODE_INIT (struct sched_domain) { \
.min_interval = 8, \
.max_interval = 32, \
.busy_factor = 32, \
.imbalance_pct = 125, \
.cache_nice_tries = 2, \
.busy_idx = 3, \
.idle_idx = 2, \
.newidle_idx = 0, \
.wake_idx = 0, \
.forkexec_idx = 0, \
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_FORK \
| SD_BALANCE_EXEC \
| SD_SERIALIZE, \
.last_balance = jiffies, \
.balance_interval = 1, \
}

#else /* CONFIG_NUMA */

#include <asm-generic/topology.h>
Expand Down
26 changes: 0 additions & 26 deletions arch/tile/include/asm/topology.h
Expand Up @@ -78,32 +78,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
.balance_interval = 32, \
}

/* sched_domains SD_NODE_INIT for TILE architecture */
#define SD_NODE_INIT (struct sched_domain) { \
.min_interval = 16, \
.max_interval = 512, \
.busy_factor = 32, \
.imbalance_pct = 125, \
.cache_nice_tries = 1, \
.busy_idx = 3, \
.idle_idx = 1, \
.newidle_idx = 2, \
.wake_idx = 1, \
.flags = 1*SD_LOAD_BALANCE \
| 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \
| 0*SD_BALANCE_WAKE \
| 0*SD_WAKE_AFFINE \
| 0*SD_PREFER_LOCAL \
| 0*SD_SHARE_CPUPOWER \
| 0*SD_SHARE_PKG_RESOURCES \
| 1*SD_SERIALIZE \
, \
.last_balance = jiffies, \
.balance_interval = 128, \
}

/* By definition, we create nodes based on online memory. */
#define node_has_online_mem(nid) 1

Expand Down
38 changes: 0 additions & 38 deletions arch/x86/include/asm/topology.h
Expand Up @@ -92,44 +92,6 @@ extern void setup_node_to_cpumask_map(void);

#define pcibus_to_node(bus) __pcibus_to_node(bus)

#ifdef CONFIG_X86_32
# define SD_CACHE_NICE_TRIES 1
# define SD_IDLE_IDX 1
#else
# define SD_CACHE_NICE_TRIES 2
# define SD_IDLE_IDX 2
#endif

/* sched_domains SD_NODE_INIT for NUMA machines */
#define SD_NODE_INIT (struct sched_domain) { \
.min_interval = 8, \
.max_interval = 32, \
.busy_factor = 32, \
.imbalance_pct = 125, \
.cache_nice_tries = SD_CACHE_NICE_TRIES, \
.busy_idx = 3, \
.idle_idx = SD_IDLE_IDX, \
.newidle_idx = 0, \
.wake_idx = 0, \
.forkexec_idx = 0, \
\
.flags = 1*SD_LOAD_BALANCE \
| 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \
| 0*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \
| 0*SD_PREFER_LOCAL \
| 0*SD_SHARE_CPUPOWER \
| 0*SD_POWERSAVINGS_BALANCE \
| 0*SD_SHARE_PKG_RESOURCES \
| 1*SD_SERIALIZE \
| 0*SD_PREFER_SIBLING \
, \
.last_balance = jiffies, \
.balance_interval = 1, \
}

extern int __node_distance(int, int);
#define node_distance(a, b) __node_distance(a, b)

Expand Down
37 changes: 0 additions & 37 deletions include/linux/topology.h
Expand Up @@ -70,7 +70,6 @@ int arch_update_cpu_topology(void);
* Below are the 3 major initializers used in building sched_domains:
* SD_SIBLING_INIT, for SMT domains
* SD_CPU_INIT, for SMP domains
* SD_NODE_INIT, for NUMA domains
*
* Any architecture that cares to do any tuning to these values should do so
* by defining their own arch-specific initializer in include/asm/topology.h.
Expand Down Expand Up @@ -176,48 +175,12 @@ int arch_update_cpu_topology(void);
}
#endif

/* sched_domains SD_ALLNODES_INIT for NUMA machines */
#define SD_ALLNODES_INIT (struct sched_domain) { \
.min_interval = 64, \
.max_interval = 64*num_online_cpus(), \
.busy_factor = 128, \
.imbalance_pct = 133, \
.cache_nice_tries = 1, \
.busy_idx = 3, \
.idle_idx = 3, \
.flags = 1*SD_LOAD_BALANCE \
| 1*SD_BALANCE_NEWIDLE \
| 0*SD_BALANCE_EXEC \
| 0*SD_BALANCE_FORK \
| 0*SD_BALANCE_WAKE \
| 0*SD_WAKE_AFFINE \
| 0*SD_SHARE_CPUPOWER \
| 0*SD_POWERSAVINGS_BALANCE \
| 0*SD_SHARE_PKG_RESOURCES \
| 1*SD_SERIALIZE \
| 0*SD_PREFER_SIBLING \
, \
.last_balance = jiffies, \
.balance_interval = 64, \
}

#ifndef SD_NODES_PER_DOMAIN
#define SD_NODES_PER_DOMAIN 16
#endif

#ifdef CONFIG_SCHED_BOOK
#ifndef SD_BOOK_INIT
#error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!!
#endif
#endif /* CONFIG_SCHED_BOOK */

#ifdef CONFIG_NUMA
#ifndef SD_NODE_INIT
#error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!!
#endif

#endif /* CONFIG_NUMA */

#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
DECLARE_PER_CPU(int, numa_node);

Expand Down

0 comments on commit cb83b62

Please sign in to comment.