From 26dbff1a2f8bf3ad147790dac13a2df520092878 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 23 Jun 2015 01:26:52 -0500 Subject: [PATCH 01/40] i8042: decrease debug message level to info Author: Arjan van de Ven Signed-off-by: Miguel Bernal Marin Signed-off-by: Jose Carlos Venegas Munoz --- drivers/input/serio/i8042.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 6dac7c1853a541..fab04cd8a7a095 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -621,7 +621,7 @@ static int i8042_enable_kbd_port(void) if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { i8042_ctr &= ~I8042_CTR_KBDINT; i8042_ctr |= I8042_CTR_KBDDIS; - pr_err("Failed to enable KBD port\n"); + pr_info("Failed to enable KBD port\n"); return -EIO; } @@ -640,7 +640,7 @@ static int i8042_enable_aux_port(void) if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { i8042_ctr &= ~I8042_CTR_AUXINT; i8042_ctr |= I8042_CTR_AUXDIS; - pr_err("Failed to enable AUX port\n"); + pr_info("Failed to enable AUX port\n"); return -EIO; } @@ -732,7 +732,7 @@ static int i8042_check_mux(void) i8042_ctr &= ~I8042_CTR_AUXINT; if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { - pr_err("Failed to disable AUX port, can't use MUX\n"); + pr_info("Failed to disable AUX port, can't use MUX\n"); return -EIO; } @@ -955,7 +955,7 @@ static int i8042_controller_selftest(void) do { if (i8042_command(¶m, I8042_CMD_CTL_TEST)) { - pr_err("i8042 controller selftest timeout\n"); + pr_info("i8042 controller selftest timeout\n"); return -ENODEV; } @@ -977,7 +977,7 @@ static int i8042_controller_selftest(void) pr_info("giving up on controller selftest, continuing anyway...\n"); return 0; #else - pr_err("i8042 controller selftest failed\n"); + pr_info("i8042 controller selftest failed\n"); return -EIO; #endif } From 3fa8d5021861aaf04cd4c8399ed00875423e798b Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 11 Jan 2016 10:01:44 -0600 Subject: [PATCH 02/40] increase the ext4 default commit age Both the VM and EXT4 have a "commit to disk after X seconds" time. Currently the EXT4 time is shorter than our VM time, which is a bit suboptional, it's better for performance to let the VM do the writeouts in bulk rather than something deep in the journalling layer. (DISTRO TWEAK -- NOT FOR UPSTREAM) Signed-off-by: Arjan van de Ven Signed-off-by: Jose Carlos Venegas Munoz --- include/linux/jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 52772c826c8682..062adf94afa499 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -45,7 +45,7 @@ /* * The default maximum commit age, in seconds. */ -#define JBD2_DEFAULT_MAX_COMMIT_AGE 5 +#define JBD2_DEFAULT_MAX_COMMIT_AGE 30 #ifdef CONFIG_JBD2_DEBUG /* From 1f229181ba192b4411029399c181fbf262a1356e Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 14 Mar 2016 11:10:58 -0600 Subject: [PATCH 03/40] pci pme wakeups Reduce wakeups for PME checks, which are a workaround for miswired boards (sadly, too many of them) in laptops. --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 59c01d68c6d5ed..bfdcc03c57b7ed 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -62,7 +62,7 @@ struct pci_pme_device { struct pci_dev *dev; }; -#define PME_TIMEOUT 1000 /* How long between PME checks */ +#define PME_TIMEOUT 4000 /* How long between PME checks */ /* * Following exit from Conventional Reset, devices must be ready within 1 sec From dd3a5d0c78a3fbedb74219c5034bbc9049933af3 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 19 Mar 2016 21:32:19 -0400 Subject: [PATCH 04/40] intel_idle: tweak cpuidle cstates Increase target_residency in cpuidle cstate Tune intel_idle to be a bit less agressive; Clear linux is cleaner in hygiene (wakupes) than the average linux, so we can afford changing these in a way that increases performance while keeping power efficiency --- drivers/idle/intel_idle.c | 50 +++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index ea5a6a14c5537a..4d64eff6234abf 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -573,7 +573,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 120, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -581,7 +581,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 33, - .target_residency = 100, + .target_residency = 900, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -589,7 +589,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 133, - .target_residency = 400, + .target_residency = 1000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -597,7 +597,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x32", .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 166, - .target_residency = 500, + .target_residency = 1500, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -605,7 +605,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 300, - .target_residency = 900, + .target_residency = 2000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -613,7 +613,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x50", .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 600, - .target_residency = 1800, + .target_residency = 5000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -621,7 +621,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = { .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 2600, - .target_residency = 7700, + .target_residency = 9000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -641,7 +641,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 120, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -649,7 +649,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 40, - .target_residency = 100, + .target_residency = 1000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -657,7 +657,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 133, - .target_residency = 400, + .target_residency = 1000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -665,7 +665,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x32", .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 166, - .target_residency = 500, + .target_residency = 2000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -673,7 +673,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 300, - .target_residency = 900, + .target_residency = 4000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -681,7 +681,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x50", .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 600, - .target_residency = 1800, + .target_residency = 7000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -689,7 +689,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = { .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 2600, - .target_residency = 7700, + .target_residency = 9000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -710,7 +710,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 120, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -718,7 +718,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x10", .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 70, - .target_residency = 100, + .target_residency = 1000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -726,7 +726,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 85, - .target_residency = 200, + .target_residency = 600, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -734,7 +734,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x33", .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 124, - .target_residency = 800, + .target_residency = 3000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -742,7 +742,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 200, - .target_residency = 800, + .target_residency = 3200, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -750,7 +750,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x50", .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 480, - .target_residency = 5000, + .target_residency = 9000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -758,7 +758,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { .desc = "MWAIT 0x60", .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 890, - .target_residency = 5000, + .target_residency = 9000, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -779,7 +779,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, - .target_residency = 20, + .target_residency = 300, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -808,7 +808,7 @@ static struct cpuidle_state icx_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 4, - .target_residency = 4, + .target_residency = 40, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -816,7 +816,7 @@ static struct cpuidle_state icx_cstates[] __initdata = { .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 170, - .target_residency = 600, + .target_residency = 900, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -937,7 +937,7 @@ static struct cpuidle_state gmt_cstates[] __initdata = { .desc = "MWAIT 0x01", .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 2, - .target_residency = 4, + .target_residency = 40, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { From afa4ce7faf2b82f84baa9f47fc96845b3736c8c0 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 11 Feb 2015 16:05:23 -0600 Subject: [PATCH 05/40] bootstats: add printk's to measure boot time in more detail Few distro-tweaks to add printk's to visualize boot time better Author: Arjan van de Ven Signed-off-by: Miguel Bernal Marin --- arch/x86/kernel/alternative.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 73be3931e4f060..959a3a6279cc1a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1622,7 +1622,9 @@ void __init alternative_instructions(void) * Then patch alternatives, such that those paravirt calls that are in * alternatives can be overwritten by their immediate fragments. */ + printk("clr: Applying alternatives\n"); apply_alternatives(__alt_instructions, __alt_instructions_end); + printk("clr: Applying alternatives done\n"); /* * Now all calls are established. Apply the call thunks if From 827929e5cc9b57cfb60ba325b4efb1f76d639605 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 11 Feb 2015 17:28:14 -0600 Subject: [PATCH 06/40] smpboot: reuse timer calibration NO point recalibrating for known-constant tsc ... saves 200ms+ of boot time. --- arch/x86/kernel/tsc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 15f97c0abc9d09..ec0f9951abc6d1 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1644,6 +1644,9 @@ unsigned long calibrate_delay_is_known(void) if (!constant_tsc || !mask) return 0; + if (cpu != 0) + return cpu_data(0).loops_per_jiffy; + sibling = cpumask_any_but(mask, cpu); if (sibling < nr_cpu_ids) return cpu_data(sibling).loops_per_jiffy; From 62f756228019eb671e8980df2ef55234d1574201 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 2 Jun 2016 23:36:32 -0500 Subject: [PATCH 07/40] initialize ata before graphics ATA init is the long pole in the boot process, and its asynchronous. move the graphics init after it so that ata and graphics initialize in parallel --- drivers/Makefile | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/Makefile b/drivers/Makefile index 1bec7819a837ab..dcdb0ddb7b66d8 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -66,15 +66,8 @@ obj-y += char/ # iommu/ comes before gpu as gpu are using iommu controllers obj-y += iommu/ -# gpu/ comes after char for AGP vs DRM startup and after iommu -obj-y += gpu/ - obj-$(CONFIG_CONNECTOR) += connector/ -# i810fb and intelfb depend on char/agp/ -obj-$(CONFIG_FB_I810) += video/fbdev/i810/ -obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/ - obj-$(CONFIG_PARPORT) += parport/ obj-y += base/ block/ misc/ mfd/ nfc/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/ @@ -86,6 +79,14 @@ obj-y += macintosh/ obj-y += scsi/ obj-y += nvme/ obj-$(CONFIG_ATA) += ata/ + +# gpu/ comes after char for AGP vs DRM startup and after iommu +obj-y += gpu/ + +# i810fb and intelfb depend on char/agp/ +obj-$(CONFIG_FB_I810) += video/fbdev/i810/ +obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/ + obj-$(CONFIG_TARGET_CORE) += target/ obj-$(CONFIG_MTD) += mtd/ obj-$(CONFIG_SPI) += spi/ From 87caba39b270b7e686808953403eed97eb2e840b Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 6 Jan 2017 15:34:09 +0000 Subject: [PATCH 08/40] ipv4/tcp: allow the memory tuning for tcp to go a little bigger than default --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3d3a24f795734e..9d9b8d1c3ac9cc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4689,8 +4689,8 @@ void __init tcp_init(void) tcp_init_mem(); /* Set per-socket limits to no more than 1/128 the pressure threshold */ limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); - max_wshare = min(4UL*1024*1024, limit); - max_rshare = min(6UL*1024*1024, limit); + max_wshare = min(16UL*1024*1024, limit); + max_rshare = min(16UL*1024*1024, limit); init_net.ipv4.sysctl_tcp_wmem[0] = PAGE_SIZE; init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024; From 2e4e6fd20cca598ef3f7b6fd5326376b812bd742 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 17 May 2017 01:52:11 +0000 Subject: [PATCH 09/40] init: wait for partition and retry scan As Clear Linux boots fast the device is not ready when the mounting code is reached, so a retry device scan will be performed every 0.5 sec for at least 40 sec and synchronize the async task. Signed-off-by: Miguel Bernal Marin --- block/early-lookup.c | 15 +++++++++++++-- init/do_mounts.c | 2 ++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/block/early-lookup.c b/block/early-lookup.c index 3effbd0d35e9fa..503a8c65ee9863 100644 --- a/block/early-lookup.c +++ b/block/early-lookup.c @@ -5,6 +5,7 @@ */ #include #include +#include struct uuidcmp { const char *uuid; @@ -243,8 +244,18 @@ static int __init devt_from_devnum(const char *name, dev_t *devt) */ int __init early_lookup_bdev(const char *name, dev_t *devt) { - if (strncmp(name, "PARTUUID=", 9) == 0) - return devt_from_partuuid(name + 9, devt); + if (strncmp(name, "PARTUUID=", 9) == 0) { + int res; + int needtowait = 40<<1; + res = devt_from_partuuid(name + 9, devt); + if (!res) return res; + while (res && needtowait) { + msleep(500); + res = devt_from_partuuid(name + 9, devt); + needtowait--; + } + return res; + } if (strncmp(name, "PARTLABEL=", 10) == 0) return devt_from_partlabel(name + 10, devt); if (strncmp(name, "/dev/", 5) == 0) diff --git a/init/do_mounts.c b/init/do_mounts.c index 5dfd30b13f4857..bc2835722b7ea9 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -474,7 +474,9 @@ void __init prepare_namespace(void) * For example, it is not atypical to wait 5 seconds here * for the touchpad of a laptop to initialize. */ + async_synchronize_full(); wait_for_device_probe(); + async_synchronize_full(); md_run_setup(); From 5b8693115505869528ab06a503734bc29bcc38b4 Mon Sep 17 00:00:00 2001 From: "Brett T. Warden" Date: Mon, 13 Aug 2018 04:01:21 -0500 Subject: [PATCH 10/40] add boot option to allow unsigned modules Add module.sig_unenforce boot parameter to allow loading unsigned kernel modules. Parameter is only effective if CONFIG_MODULE_SIG_FORCE is enabled and system is *not* SecureBooted. Signed-off-by: Brett T. Warden Signed-off-by: Miguel Bernal Marin --- kernel/module/signing.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/module/signing.c b/kernel/module/signing.c index a2ff4242e623d5..8cd7c6f5d8bbfe 100644 --- a/kernel/module/signing.c +++ b/kernel/module/signing.c @@ -14,6 +14,8 @@ #include #include #include +#include + #include "internal.h" #undef MODULE_PARAM_PREFIX @@ -21,6 +23,11 @@ static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); module_param(sig_enforce, bool_enable_only, 0644); +/* Allow disabling module signature requirement by adding boot param */ +static bool sig_unenforce = false; +module_param(sig_unenforce, bool_enable_only, 0644); + +extern struct boot_params boot_params; /* * Export sig_enforce kernel cmdline parameter to allow other subsystems rely @@ -28,6 +35,8 @@ module_param(sig_enforce, bool_enable_only, 0644); */ bool is_module_sig_enforced(void) { + if (sig_unenforce) + return false; return sig_enforce; } EXPORT_SYMBOL(is_module_sig_enforced); From 3103ba76dd2745da5e712c22ea3b51f8e55be54d Mon Sep 17 00:00:00 2001 From: William Douglas Date: Wed, 20 Jun 2018 17:23:21 +0000 Subject: [PATCH 11/40] enable stateless firmware loading Prefer the order of specific version before generic and /etc before /lib to enable the user to give specific overrides for generic firmware and distribution firmware. --- drivers/base/firmware_loader/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index b58c42f1b1ce65..817c81bc9107ab 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -470,6 +470,8 @@ static int fw_decompress_xz(struct device *dev, struct fw_priv *fw_priv, static char fw_path_para[256]; static const char * const fw_path[] = { fw_path_para, + "/etc/firmware/" UTS_RELEASE, + "/etc/firmware", "/lib/firmware/updates/" UTS_RELEASE, "/lib/firmware/updates", "/lib/firmware/" UTS_RELEASE, From 746019cf1cc3e2faf55147a7961ef15f245f0ccc Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Thu, 2 Aug 2018 12:03:22 -0700 Subject: [PATCH 12/40] migrate some systemd defaults to the kernel defaults. These settings are needed to prevent networking issues when the networking modules come up by default without explicit settings, which breaks some cases. We don't want the modprobe settings to be read at boot time if we're not going to do anything else ever. --- drivers/net/dummy.c | 2 +- include/uapi/linux/if_bonding.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index c4b1b0aa438ac5..06b00f7a8eab9d 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -43,7 +43,7 @@ #define DRV_NAME "dummy" -static int numdummies = 1; +static int numdummies = 0; /* fake multicast ability */ static void set_multicast_list(struct net_device *dev) diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h index d174914a837dbf..bf8e2af101a3c8 100644 --- a/include/uapi/linux/if_bonding.h +++ b/include/uapi/linux/if_bonding.h @@ -82,7 +82,7 @@ #define BOND_STATE_ACTIVE 0 /* link is active */ #define BOND_STATE_BACKUP 1 /* link is backup */ -#define BOND_DEFAULT_MAX_BONDS 1 /* Default maximum number of devices to support */ +#define BOND_DEFAULT_MAX_BONDS 0 /* Default maximum number of devices to support */ #define BOND_DEFAULT_TX_QUEUES 16 /* Default number of tx queues per device */ From d1dac022453c2c8076fa4350129c3536dd9ac5f7 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 10 Mar 2016 15:11:28 +0000 Subject: [PATCH 13/40] xattr: allow setting user.* attributes on symlinks by owner Kvmtool and clear containers supports using user attributes to label host files with the virtual uid/guid of the file in the container. This allows an end user to manage their files and a complete uid space without all the ugly namespace stuff. The one gap in the support is symlinks because an end user can change the ownership of a symbolic link. We support attributes on these files as you can already (as root) set security attributes on them. The current rules seem slightly over-paranoid and as we have a use case this patch enables updating the attributes on a symbolic link IFF you are the owner of the synlink (as permissions are not usually meaningful on the link itself). Signed-off-by: Alan Cox --- fs/xattr.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index efd4736bc94b09..663befc923ca68 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -140,16 +140,17 @@ xattr_permission(struct mnt_idmap *idmap, struct inode *inode, } /* - * In the user.* namespace, only regular files and directories can have - * extended attributes. For sticky directories, only the owner and - * privileged users can write attributes. + * In the user.* namespace, only regular files, symbolic links, and + * directories can have extended attributes. For symbolic links and + * sticky directories, only the owner and privileged users can write + * attributes. */ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { - if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) + if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && !S_ISLNK(inode->i_mode)) return (mask & MAY_WRITE) ? -EPERM : -ENODATA; - if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && - (mask & MAY_WRITE) && - !inode_owner_or_capable(idmap, inode)) + if (((S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX)) + || S_ISLNK(inode->i_mode)) && (mask & MAY_WRITE) + && !inode_owner_or_capable(idmap, inode)) return -EPERM; } From bebb6ff01e8904de8feae43ae1bf243eaf35b377 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 13 Dec 2018 01:00:49 +0000 Subject: [PATCH 14/40] do accept() in LIFO order for cache efficiency --- include/linux/wait.h | 2 ++ kernel/sched/wait.c | 24 ++++++++++++++++++++++++ net/ipv4/inet_connection_sock.c | 2 +- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/include/linux/wait.h b/include/linux/wait.h index 5ec7739400f4d7..d1df939187fbf1 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -165,6 +165,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head) extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); +extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); @@ -1195,6 +1196,7 @@ do { \ */ void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); +void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 802d98cf2de317..9e5a15389c30ef 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -47,6 +47,17 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_ } EXPORT_SYMBOL_GPL(add_wait_queue_priority); +void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) +{ + unsigned long flags; + + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; + spin_lock_irqsave(&wq_head->lock, flags); + __add_wait_queue(wq_head, wq_entry); + spin_unlock_irqrestore(&wq_head->lock, flags); +} +EXPORT_SYMBOL(add_wait_queue_exclusive_lifo); + void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { unsigned long flags; @@ -298,6 +309,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent } EXPORT_SYMBOL(prepare_to_wait_exclusive); +void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) +{ + unsigned long flags; + + wq_entry->flags |= WQ_FLAG_EXCLUSIVE; + spin_lock_irqsave(&wq_head->lock, flags); + if (list_empty(&wq_entry->entry)) + __add_wait_queue(wq_head, wq_entry); + set_current_state(state); + spin_unlock_irqrestore(&wq_head->lock, flags); +} +EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo); + void init_wait_entry(struct wait_queue_entry *wq_entry, int flags) { wq_entry->flags = flags; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 394a498c282322..c7ecb4b596a0b7 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -627,7 +627,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) * having to remove and re-insert us on the wait queue. */ for (;;) { - prepare_to_wait_exclusive(sk_sleep(sk), &wait, + prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); release_sock(sk); if (reqsk_queue_empty(&icsk->icsk_accept_queue)) From 2fe24fa19944fdc856aeb418c1c22c8b70d28415 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 18 Feb 2018 23:35:41 +0000 Subject: [PATCH 15/40] locking: rwsem: spin faster tweak rwsem owner spinning a bit --- kernel/locking/rwsem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 9eabd585ce7afa..56229464d3d152 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -749,6 +749,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) struct task_struct *new, *owner; unsigned long flags, new_flags; enum owner_state state; + int i = 0; lockdep_assert_preemption_disabled(); @@ -785,7 +786,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) break; } - cpu_relax(); + if (i++ > 1000) + cpu_relax(); } return state; From b647ae079842850cac597557c413b279994224c7 Mon Sep 17 00:00:00 2001 From: Joe Konno Date: Tue, 25 Jun 2019 10:35:54 -0700 Subject: [PATCH 16/40] ata: libahci: ignore staggered spin-up Change libahci to ignore firmware's staggered spin-up flag. End-users who wish to honor firmware's SSS flag can add the following kernel parameter to a new file at /etc/kernel/cmdline.d/ignore_sss.conf: libahci.ignore_sss=0 And then run sudo clr-boot-manager update Signed-off-by: Joe Konno --- drivers/ata/libahci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index f1263364fa97fa..efcae19412e596 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -34,14 +34,14 @@ #include "libata.h" static int ahci_skip_host_reset; -int ahci_ignore_sss; +int ahci_ignore_sss=1; EXPORT_SYMBOL_GPL(ahci_ignore_sss); module_param_named(skip_host_reset, ahci_skip_host_reset, int, 0444); MODULE_PARM_DESC(skip_host_reset, "skip global host reset (0=don't skip, 1=skip)"); module_param_named(ignore_sss, ahci_ignore_sss, int, 0444); -MODULE_PARM_DESC(ignore_sss, "Ignore staggered spinup flag (0=don't ignore, 1=ignore)"); +MODULE_PARM_DESC(ignore_sss, "Ignore staggered spinup flag (0=don't ignore, 1=ignore [default])"); static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, unsigned hints); From 6bf4fab24b881c109adef4456fdcf02070078148 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 10 Aug 2019 03:19:04 +0000 Subject: [PATCH 17/40] print CPU that faults print cpu number when we print a crash --- arch/x86/mm/fault.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ab778eac195205..a702a8f9716823 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -779,9 +779,9 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, if (!printk_ratelimit()) return; - printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx", + printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx cpu %i", loglvl, tsk->comm, task_pid_nr(tsk), address, - (void *)regs->ip, (void *)regs->sp, error_code); + (void *)regs->ip, (void *)regs->sp, error_code, raw_smp_processor_id()); print_vma_addr(KERN_CONT " in ", regs->ip); From 5568e7641ad2aec54664cbe3712932eb4425190a Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 11 Nov 2019 23:12:11 +0000 Subject: [PATCH 18/40] nvme workaround --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 21783aa2ee8e18..a140e737357bce 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -59,7 +59,7 @@ static u8 nvme_max_retries = 5; module_param_named(max_retries, nvme_max_retries, byte, 0644); MODULE_PARM_DESC(max_retries, "max number of retries a command may have"); -static unsigned long default_ps_max_latency_us = 100000; +static unsigned long default_ps_max_latency_us = 200; module_param(default_ps_max_latency_us, ulong, 0644); MODULE_PARM_DESC(default_ps_max_latency_us, "max power saving latency for new devices; use PM QOS to change per device"); From 9c3b74e68355605d3bdc4d921b04116f1f1d5e95 Mon Sep 17 00:00:00 2001 From: Alexander Koskovich Date: Wed, 12 Feb 2020 22:47:12 +0000 Subject: [PATCH 19/40] don't report an error if PowerClamp run on other CPU --- drivers/thermal/intel/intel_powerclamp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 36243a3972fd7f..4419ad0a3d4ada 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -739,6 +739,11 @@ static const struct thermal_cooling_device_ops powerclamp_cooling_ops = { .set_cur_state = powerclamp_set_cur_state, }; +static const struct x86_cpu_id amd_cpu[] = { + { X86_VENDOR_AMD }, + {}, +}; + static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = { X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL), {} @@ -748,6 +753,11 @@ MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids); static int __init powerclamp_probe(void) { + if (x86_match_cpu(amd_cpu)){ + pr_info("Intel PowerClamp does not support AMD CPUs\n"); + return -ENODEV; + } + if (!x86_match_cpu(intel_powerclamp_ids)) { pr_err("CPU does not support MWAIT\n"); return -ENODEV; From 38a2b516a060e0d4e8708f1c3a6df3b5854fb468 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 27 Sep 2021 17:43:01 +0000 Subject: [PATCH 20/40] lib/raid6: add patch --- lib/raid6/algos.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 0ec534faf019bc..86762ba454a1d5 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -142,8 +142,10 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) for (best = NULL, algo = raid6_recov_algos; *algo; algo++) if (!best || (*algo)->priority > best->priority) - if (!(*algo)->valid || (*algo)->valid()) + if (!(*algo)->valid || (*algo)->valid()) { best = *algo; + break; + } if (best) { raid6_2data_recov = best->data2; From 5f7da08233cbefbff8fa1e789c1eaeb45264b009 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 16 Nov 2021 17:39:25 +0000 Subject: [PATCH 21/40] itmt_epb: use epb to scale itmt --- arch/x86/include/asm/topology.h | 1 + arch/x86/kernel/cpu/intel_epb.c | 4 ++++ arch/x86/kernel/itmt.c | 29 ++++++++++++++++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 3235ba1e5b06d7..80babac99d46a8 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -183,6 +183,7 @@ extern unsigned int __read_mostly sysctl_sched_itmt_enabled; /* Interface to set priority of a cpu */ void sched_set_itmt_core_prio(int prio, int core_cpu); +void sched_set_itmt_power_ratio(int power_ratio, int core_cpu); /* Interface to notify scheduler that system supports ITMT */ int sched_set_itmt_support(void); diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c index e4c3ba91321c95..fcac72edede5b8 100644 --- a/arch/x86/kernel/cpu/intel_epb.c +++ b/arch/x86/kernel/cpu/intel_epb.c @@ -166,6 +166,10 @@ static ssize_t energy_perf_bias_store(struct device *dev, if (ret < 0) return ret; + /* update the ITMT scheduler logic to use the power policy data */ + /* scale the val up by 2 so the range is 224 - 256 */ + sched_set_itmt_power_ratio(256 - val * 2, cpu); + return count; } diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index ee4fe8cdb85767..476928553e52ed 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -25,6 +25,7 @@ static DEFINE_MUTEX(itmt_update_mutex); DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); +DEFINE_PER_CPU_READ_MOSTLY(int, sched_power_ratio); /* Boolean to track if system has ITMT capabilities */ static bool __read_mostly sched_itmt_capable; @@ -160,7 +161,12 @@ void sched_clear_itmt_support(void) int arch_asym_cpu_priority(int cpu) { - return per_cpu(sched_core_priority, cpu); + int power_ratio = per_cpu(sched_power_ratio, cpu); + + /* a power ratio of 0 (uninitialized) is assumed to be maximum */ + if (power_ratio == 0) + power_ratio = 256 - 2 * 6; + return per_cpu(sched_core_priority, cpu) * power_ratio / 256; } /** @@ -181,3 +187,24 @@ void sched_set_itmt_core_prio(int prio, int cpu) { per_cpu(sched_core_priority, cpu) = prio; } + +/** + * sched_set_itmt_power_ratio() - Set CPU priority based on ITMT + * @power_ratio: The power scaling ratio [1..256] for the core + * @core_cpu: The cpu number associated with the core + * + * Set a scaling to the cpu performance based on long term power + * settings (like EPB). + * + * Note this is for the policy not for the actual dynamic frequency; + * the frequency will increase itself as workloads run on a core. + */ + +void sched_set_itmt_power_ratio(int power_ratio, int core_cpu) +{ + int cpu; + + for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { + per_cpu(sched_power_ratio, cpu) = power_ratio; + } +} From acc06ef6c98c07f0076079116ab75697c6d7e077 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 18 Nov 2021 16:09:47 +0000 Subject: [PATCH 22/40] itmt2 ADL fixes On systems with overclocking enabled, CPPC Highest Performance can be hard coded to 0xff. In this case even if we have cores with different highest performance, ITMT can't be enabled as the current implementation depends on CPPC Highest Performance. On such systems we can use MSR_HWP_CAPABILITIES maximum performance field when CPPC.Highest Performance is 0xff. Due to legacy reasons, we can't solely depend on MSR_HWP_CAPABILITIES as in some older systems CPPC Highest Performance is the only way to identify different performing cores. Signed-off-by: Srinivas Pandruvada --- drivers/cpufreq/intel_pstate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index dc50c9fb488dfc..c54e6455ec7b46 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -373,6 +373,13 @@ static void intel_pstate_set_itmt_prio(int cpu) * update them at any time after it has been called. */ sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu); + /* + * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff. + * In this case we can't use CPPC.highest_perf to enable ITMT. + * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide. + */ + if (cppc_perf.highest_perf == 0xff) + cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached)); if (max_highest_perf <= min_highest_perf) { if (cppc_perf.highest_perf > max_highest_perf) From 5c6765ca8c84b803a023a0c09898eea9a8ab143a Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 23 Nov 2021 17:38:50 +0000 Subject: [PATCH 23/40] add a per cpu minimum high watermark an tune batch size make sure there's at least 1024 per cpu pages... a reasonably small amount for todays system --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 85741403948f55..efd9bf746ac057 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5213,11 +5213,11 @@ static int zone_batchsize(struct zone *zone) /* * The number of pages to batch allocate is either ~0.1% - * of the zone or 1MB, whichever is smaller. The batch + * of the zone or 4MB, whichever is smaller. The batch * size is striking a balance between allocation latency * and zone lock contention. */ - batch = min(zone_managed_pages(zone) >> 10, SZ_1M / PAGE_SIZE); + batch = min(zone_managed_pages(zone) >> 10, 4 * SZ_1M / PAGE_SIZE); batch /= 4; /* We effectively *= 4 below */ if (batch < 1) batch = 1; From 0f9c0d109aa3fb41755aa7ba91c0f713ec04f26e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 28 Apr 2023 17:01:35 +0100 Subject: [PATCH 24/40] md/raid6 algorithms: scale test duration for speedier boots Instead of using jiffies and waiting for jiffies to wrap before measuring use the higher precision local_time for benchmarking. Measure 2500 loops, which works out to be accurate enough for benchmarking the raid algo data rates. Also add division by zero checking in case timing measurements are bogus. Speeds up raid benchmarking from 48,000 usecs to 4000 usecs, saving 0.044 seconds on boot. Signed-off-by: Colin Ian King --- lib/raid6/algos.c | 53 ++++++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 86762ba454a1d5..84fb8ac3a36691 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -18,6 +18,8 @@ #else #include #include +#include + /* In .bss so it's zeroed */ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); EXPORT_SYMBOL(raid6_empty_zero_page); @@ -161,12 +163,15 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) static inline const struct raid6_calls *raid6_choose_gen( void *(*const dptrs)[RAID6_TEST_DISKS], const int disks) { - unsigned long perf, bestgenperf, j0, j1; + unsigned long perf; + const unsigned long max_perf = 2500; int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */ const struct raid6_calls *const *algo; const struct raid6_calls *best; + const u64 ns_per_mb = 1000000000 >> 20; + u64 n, ns, t, ns_best = ~0ULL; - for (bestgenperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { + for (best = NULL, algo = raid6_algos; *algo; algo++) { if (!best || (*algo)->priority >= best->priority) { if ((*algo)->valid && !(*algo)->valid()) continue; @@ -176,26 +181,20 @@ static inline const struct raid6_calls *raid6_choose_gen( break; } - perf = 0; - preempt_disable(); - j0 = jiffies; - while ((j1 = jiffies) == j0) - cpu_relax(); - while (time_before(jiffies, - j1 + (1<gen_syndrome(disks, PAGE_SIZE, *dptrs); - perf++; } + ns = local_clock() - t; preempt_enable(); - if (perf > bestgenperf) { - bestgenperf = perf; + if (ns < ns_best) { + ns_best = ns; best = *algo; } - pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name, - (perf * HZ * (disks-2)) >> - (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2)); + n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2); + pr_info("raid6: %-8s gen() %5llu MB/s (%llu ns)\n", (*algo)->name, (ns > 0) ? n / ns : 0, ns); } } @@ -212,31 +211,23 @@ static inline const struct raid6_calls *raid6_choose_gen( goto out; } - pr_info("raid6: using algorithm %s gen() %ld MB/s\n", - best->name, - (bestgenperf * HZ * (disks - 2)) >> - (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2)); + n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2); + pr_info("raid6: using algorithm %s gen() %llu MB/s (%llu ns)\n", + best->name, (ns_best > 0) ? n / ns_best : 0, ns_best); if (best->xor_syndrome) { - perf = 0; - preempt_disable(); - j0 = jiffies; - while ((j1 = jiffies) == j0) - cpu_relax(); - while (time_before(jiffies, - j1 + (1 << RAID6_TIME_JIFFIES_LG2))) { + t = local_clock(); + for (perf = 0; perf < max_perf; perf++) { best->xor_syndrome(disks, start, stop, PAGE_SIZE, *dptrs); - perf++; } + ns = local_clock() - t; preempt_enable(); - pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", - (perf * HZ * (disks - 2)) >> - (20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1)); + n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2); + pr_info("raid6: .... xor() %llu MB/s, rmw enabled (%llu ns)\n", (ns > 0) ? n / ns : 0, ns); } - out: return best; } From 9235ae68bbb9ef894ec8d69f77f348e399010030 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 20 Jan 2023 11:16:42 +0000 Subject: [PATCH 25/40] initcall: only print non-zero initcall debug to speed up boot Printing initcall timings that successfully return after 0 usecs provides not much useful information and takes a small amount of time to do so. Disable the initcall timings for these specific cases. On an Alderlake i9-12900 this reduces kernel boot time by 0.67% (timed up to the invocation of systemd starting) based on 10 boot measurements. Signed-off-by: Colin Ian King --- init/main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/init/main.c b/init/main.c index 436d73261810bd..4ace622f2e9025 100644 --- a/init/main.c +++ b/init/main.c @@ -1183,10 +1183,13 @@ static __init_or_module void trace_initcall_finish_cb(void *data, initcall_t fn, int ret) { ktime_t rettime, *calltime = data; + long long delta; rettime = ktime_get(); - printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n", - fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime)); + delta = ktime_us_delta(rettime, *calltime); + if (ret || delta) + printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n", + fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime)); } static ktime_t initcall_calltime; From 43c73f07dc98dea0b7cf3ae8ef7051a5dc5ade78 Mon Sep 17 00:00:00 2001 From: "Brett T. Warden" Date: Mon, 19 Sep 2022 08:52:45 -0700 Subject: [PATCH 26/40] scale --- kernel/sched/fair.c | 13 ++++++++++++- kernel/sched/sched.h | 1 + mm/memcontrol.c | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index df348aa55d3c7e..cf94f181ffe249 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3942,7 +3942,8 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) */ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq) { - long delta = cfs_rq->avg.load_avg - cfs_rq->tg_load_avg_contrib; + long delta; + u64 now; /* * No need to update load_avg for root_task_group as it is not used. @@ -3950,9 +3951,19 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq) if (cfs_rq->tg == &root_task_group) return; + /* + * For migration heavy workloads, access to tg->load_avg can be + * unbound. Limit the update rate to at most once per ms. + */ + now = sched_clock_cpu(cpu_of(rq_of(cfs_rq))); + if (now - cfs_rq->last_update_tg_load_avg < NSEC_PER_MSEC) + return; + + delta = cfs_rq->avg.load_avg - cfs_rq->tg_load_avg_contrib; if (abs(delta) > cfs_rq->tg_load_avg_contrib / 64) { atomic_long_add(delta, &cfs_rq->tg->load_avg); cfs_rq->tg_load_avg_contrib = cfs_rq->avg.load_avg; + cfs_rq->last_update_tg_load_avg = now; } } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 04846272409cc0..2ff809fc35f764 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -594,6 +594,7 @@ struct cfs_rq { } removed; #ifdef CONFIG_FAIR_GROUP_SCHED + u64 last_update_tg_load_avg; unsigned long tg_load_avg_contrib; long propagate; long prop_runnable_sum; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5b009b233ab892..54210a110665bd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -626,7 +626,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val) cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id()); x = __this_cpu_add_return(stats_updates, abs(val)); - if (x > MEMCG_CHARGE_BATCH) { + if (x > MEMCG_CHARGE_BATCH * 128) { /* * If stats_flush_threshold exceeds the threshold * (>num_online_cpus()), cgroup stats update will be triggered From 0dfd8d25eda3b75ea351df1fdbc86afc1b1a42b8 Mon Sep 17 00:00:00 2001 From: Intel ClearLinux Date: Sun, 19 Nov 2023 21:48:29 +0100 Subject: [PATCH 27/40] [PATCH] Place libraries right below the binary for PIE binaries Place libraries right below the binary for PIE binaries, this helps code locality (and thus performance). Signed-off-by: Kai Krakow --- fs/binfmt_elf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7b3d2d4914073f..e1b8673960e062 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1296,6 +1296,8 @@ static int load_elf_binary(struct linux_binprm *bprm) mm = current->mm; mm->end_code = end_code; mm->start_code = start_code; + if (start_code >= ELF_ET_DYN_BASE) + mm->mmap_base = start_code; mm->start_data = start_data; mm->end_data = end_data; mm->start_stack = bprm->p; From c71acce11ff8b49e989849fc25b0cb512fcb703f Mon Sep 17 00:00:00 2001 From: Intel ClearLinux Date: Sun, 19 Nov 2023 21:19:46 +0100 Subject: [PATCH 28/40] [PATCH] epp retune Signed-off-by: Kai Krakow --- arch/x86/include/asm/msr-index.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b37abb55e948b7..23c7dfa24cda91 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -490,7 +490,7 @@ #define HWP_MAX_PERF(x) ((x & 0xff) << 8) #define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) #define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) -#define HWP_EPP_PERFORMANCE 0x00 +#define HWP_EPP_PERFORMANCE 0x01 #define HWP_EPP_BALANCE_PERFORMANCE 0x80 #define HWP_EPP_BALANCE_POWERSAVE 0xC0 #define HWP_EPP_POWERSAVE 0xFF From 62886ee78a98adf5b58f7cae59d5a9dbc7014088 Mon Sep 17 00:00:00 2001 From: Intel ClearLinux Date: Sun, 19 Nov 2023 21:16:15 +0100 Subject: [PATCH 29/40] [PATCH] tcp tuning Signed-off-by: Kai Krakow --- include/net/tcp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 4b03ca7cb8a5eb..d48a96e2d23332 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -130,10 +130,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */ -#define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */ +#define TCP_DELACK_MAX ((unsigned)(HZ/10)) /* maximal time to delay before sending an ACK */ #if HZ >= 100 -#define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */ -#define TCP_ATO_MIN ((unsigned)(HZ/25)) +#define TCP_DELACK_MIN ((unsigned)(HZ/100)) /* minimal time to delay before sending an ACK */ +#define TCP_ATO_MIN ((unsigned)(HZ/100)) #else #define TCP_DELACK_MIN 4U #define TCP_ATO_MIN 4U From 9a029372cb03df9a910d362535f3f2e8c575c570 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Feb 2023 10:40:24 +0000 Subject: [PATCH 30/40] mm/memcontrol: add some branch hints based on gcov analysis Signed-off-by: Colin Ian King --- mm/memcontrol.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 54210a110665bd..d2f2b67dff5c9f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -626,7 +626,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val) cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id()); x = __this_cpu_add_return(stats_updates, abs(val)); - if (x > MEMCG_CHARGE_BATCH * 128) { + if (unlikely(x > MEMCG_CHARGE_BATCH * 128)) { /* * If stats_flush_threshold exceeds the threshold * (>num_online_cpus()), cgroup stats update will be triggered @@ -845,7 +845,7 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); /* Update memcg and lruvec */ - if (!mem_cgroup_disabled()) + if (likely(!mem_cgroup_disabled())) __mod_memcg_lruvec_state(lruvec, idx, val); } @@ -2151,7 +2151,7 @@ void folio_memcg_lock(struct folio *folio) static void __folio_memcg_unlock(struct mem_cgroup *memcg) { - if (memcg && memcg->move_lock_task == current) { + if (likely(memcg && memcg->move_lock_task == current)) { unsigned long flags = memcg->move_lock_flags; memcg->move_lock_task = NULL; From 36f7cd39f82c9849cc0d4408d5d633a3fec21417 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Feb 2023 11:53:51 +0000 Subject: [PATCH 31/40] sched/core: add some branch hints based on gcov analysis Signed-off-by: Colin Ian King --- kernel/sched/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 802551e0009bf1..25c45b1dff003c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -554,7 +554,7 @@ void raw_spin_rq_lock_nested(struct rq *rq, int subclass) /* Matches synchronize_rcu() in __sched_core_enable() */ preempt_disable(); - if (sched_core_disabled()) { + if (likely(sched_core_disabled())) { raw_spin_lock_nested(&rq->__lock, subclass); /* preempt_count *MUST* be > 1 */ preempt_enable_no_resched(); @@ -763,7 +763,7 @@ void update_rq_clock(struct rq *rq) #endif delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; - if (delta < 0) + if (unlikely(delta < 0)) return; rq->clock += delta; update_rq_clock_task(rq, delta); @@ -6075,7 +6075,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) struct rq *rq_i; bool need_sync; - if (!sched_core_enabled(rq)) + if (likely(!sched_core_enabled(rq))) return __pick_next_task(rq, prev, rf); cpu = cpu_of(rq); @@ -8559,7 +8559,7 @@ SYSCALL_DEFINE0(sched_yield) #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) int __sched __cond_resched(void) { - if (should_resched(0)) { + if (unlikely(should_resched(0))) { preempt_schedule_common(); return 1; } From 8bfead680ada799f6c3451ecc8ea6a685163c3a8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 6 Mar 2023 12:25:29 +0000 Subject: [PATCH 32/40] crypto: kdf: make the module init call a late init call Signed-off-by: Colin Ian King --- crypto/kdf_sp800108.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/kdf_sp800108.c b/crypto/kdf_sp800108.c index c3f9938e1ad27f..e77478e064d8f6 100644 --- a/crypto/kdf_sp800108.c +++ b/crypto/kdf_sp800108.c @@ -149,7 +149,7 @@ static int __init crypto_kdf108_init(void) static void __exit crypto_kdf108_exit(void) { } -module_init(crypto_kdf108_init); +late_initcall(crypto_kdf108_init); module_exit(crypto_kdf108_exit); MODULE_LICENSE("GPL v2"); From 5a3d58a5aef397aa6f9a57eaa70134112449b213 Mon Sep 17 00:00:00 2001 From: Intel ClearLinux Date: Sun, 19 Nov 2023 21:16:46 +0100 Subject: [PATCH 33/40] [PATCH] ratelimit sched yield Some misguided apps hammer sched_yield() in a tight loop (they should be using futexes instead) which causes massive lock contention even if there is little work to do or to yield to. rare limit yielding since the base scheduler does a pretty good job already about just running the right things Signed-off-by: Kai Krakow --- kernel/sched/core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 25c45b1dff003c..1868e28d7ad204 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8525,10 +8525,22 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, return ret; } +static DEFINE_PER_CPU(unsigned long, last_yield); + static void do_sched_yield(void) { struct rq_flags rf; struct rq *rq; + int cpu = raw_smp_processor_id(); + + cond_resched(); + + /* rate limit yielding to something sensible */ + + if (!time_after(jiffies, per_cpu(last_yield, cpu))) + return; + + per_cpu(last_yield, cpu) = jiffies; rq = this_rq_lock_irq(&rf); From 7a929e3d6915ba0d0307d4c803b8bce785b48358 Mon Sep 17 00:00:00 2001 From: Intel ClearLinux Date: Sun, 19 Nov 2023 21:17:16 +0100 Subject: [PATCH 34/40] [PATCH] scale net alloc Signed-off-by: Kai Krakow --- include/net/sock.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/net/sock.h b/include/net/sock.h index 92f7ea62a9159c..54bdc875fda9fc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1682,10 +1682,17 @@ static inline void sk_mem_charge(struct sock *sk, int size) static inline void sk_mem_uncharge(struct sock *sk, int size) { + int reclaimable, reclaim_threshold; + + reclaim_threshold = 64 * 1024; if (!sk_has_account(sk)) return; sk_forward_alloc_add(sk, size); - sk_mem_reclaim(sk); + reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk); + if (reclaimable > reclaim_threshold) { + reclaimable -= reclaim_threshold; + __sk_mem_reclaim(sk, reclaimable); + } } /* From 1a6775207b8fc58867daf04b069709946c2b6ca7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 3 May 2023 17:31:05 +0100 Subject: [PATCH 35/40] clocksource: only perform extended clocksource checks for AMD systems Signed-off-by: Colin Ian King --- drivers/clocksource/acpi_pm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c index 82338773602cae..d84f0e29452ecf 100644 --- a/drivers/clocksource/acpi_pm.c +++ b/drivers/clocksource/acpi_pm.c @@ -176,13 +176,16 @@ static int verify_pmtmr_rate(void) static int __init init_acpi_pm_clocksource(void) { u64 value1, value2; - unsigned int i, j = 0; + unsigned int i, j = 0, checks = 1; if (!pmtmr_ioport) return -ENODEV; + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + checks = ACPI_PM_MONOTONICITY_CHECKS; + /* "verify" this timing source: */ - for (j = 0; j < ACPI_PM_MONOTONICITY_CHECKS; j++) { + for (j = 0; j < checks; j++) { udelay(100 * j); value1 = clocksource_acpi_pm.read(&clocksource_acpi_pm); for (i = 0; i < ACPI_PM_READ_CHECKS; i++) { From 295e384c4b8092ba750eb5b9844e10eeb2a4aa89 Mon Sep 17 00:00:00 2001 From: Intel ClearLinux Date: Sun, 19 Nov 2023 21:17:41 +0100 Subject: [PATCH 36/40] [PATCH] better idle balance Signed-off-by: Kai Krakow --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index cf94f181ffe249..dcde40bf6ac6cb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -12139,7 +12139,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) update_next_balance(sd, &next_balance); - if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) + if (this_rq->avg_idle/2 < curr_cost + sd->max_newidle_lb_cost) break; if (sd->flags & SD_BALANCE_NEWIDLE) { From 29c3df5aee492f0be818eb5e3f82738f96ee142c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 27 Jun 2023 14:12:27 +0100 Subject: [PATCH 37/40] ACPI: align slab for improved memory performance Enabling SLAB_HWCACHE_ALIGN for the ACPI object caches improves boot speed in the ACPICA core for object allocation and free'ing especially in the AML parsing and execution phases in boot. Testing with 100 boots shows an average boot saving in acpi_init of ~35000 usecs compared to the unaligned version. Most of the ACPI objects being allocated and free'd are of very short life times in the critical paths for parsing and execution, so the extra memory used for alignment isn't too onerous. Signed-off-by: Colin Ian King --- drivers/acpi/osl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index f725813d0cce6a..dc1eb49db49e51 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -1556,7 +1556,7 @@ void acpi_os_release_lock(acpi_spinlock lockp, acpi_cpu_flags flags) acpi_status acpi_os_create_cache(char *name, u16 size, u16 depth, acpi_cache_t ** cache) { - *cache = kmem_cache_create(name, size, 0, 0, NULL); + *cache = kmem_cache_create(name, size, 0, SLAB_HWCACHE_ALIGN, NULL); if (*cache == NULL) return AE_ERROR; else From 21394ab2b177c3f436daae7ab9dcf4d7be8f9f9d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Sep 2023 14:16:21 +0100 Subject: [PATCH 38/40] thermal: intel: powerclamp: check MWAIT first, use pr_warn insteal of pr_err For x86 targets it's more pertinant to check for lack of MWAIT than AMD specific cpus, so swap the order of tests. Also make the pr_err a pr_warn to align with other ENODEV warning messages. Signed-off-by: Colin Ian King --- drivers/thermal/intel/intel_powerclamp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 4419ad0a3d4ada..c787c1d2390f34 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -752,14 +752,13 @@ MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids); static int __init powerclamp_probe(void) { - - if (x86_match_cpu(amd_cpu)){ - pr_info("Intel PowerClamp does not support AMD CPUs\n"); + if (!x86_match_cpu(intel_powerclamp_ids)) { + pr_info("CPU does not support MWAIT\n"); return -ENODEV; } - if (!x86_match_cpu(intel_powerclamp_ids)) { - pr_err("CPU does not support MWAIT\n"); + if (x86_match_cpu(amd_cpu)){ + pr_info("Intel PowerClamp does not support AMD CPUs\n"); return -ENODEV; } From 001f5d28c58d645338fc03fe1d9d1f44e46a64a8 Mon Sep 17 00:00:00 2001 From: Intel ClearLinux Date: Sun, 19 Nov 2023 21:18:07 +0100 Subject: [PATCH 39/40] [PATCH] timer slack Signed-off-by: Kai Krakow --- init/init_task.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/init_task.c b/init/init_task.c index ff6c4b9bfe6b1c..ba451478532da1 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -130,7 +130,7 @@ struct task_struct init_task .journal_info = NULL, INIT_CPU_TIMERS(init_task) .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock), - .timer_slack_ns = 50000, /* 50 usec default slack */ + .timer_slack_ns = 50, /* 50 nsec default slack */ .thread_pid = &init_struct_pid, .thread_group = LIST_HEAD_INIT(init_task.thread_group), .thread_node = LIST_HEAD_INIT(init_signals.thread_head), From 5edc2983c6717da74e2a289245cb54c085ea792f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 14 Nov 2023 13:29:45 +0000 Subject: [PATCH 40/40] sched/fair: remove upper limit on cpu number Signed-off-by: Colin Ian King --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index dcde40bf6ac6cb..210b5d65276de0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -212,7 +212,7 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w) */ static unsigned int get_update_sysctl_factor(void) { - unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8); + unsigned int cpus = num_online_cpus(); unsigned int factor; switch (sysctl_sched_tunable_scaling) {