Skip to content

Commit c006fac

Browse files
paulturnerPeter Zijlstra
authored andcommitted
sched: Warn on long periods of pending need_resched
CPU scheduler marks need_resched flag to signal a schedule() on a particular CPU. But, schedule() may not happen immediately in cases where the current task is executing in the kernel mode (no preemption state) for extended periods of time. This patch adds a warn_on if need_resched is pending for more than the time specified in sysctl resched_latency_warn_ms. If it goes off, it is likely that there is a missing cond_resched() somewhere. Monitoring is done via the tick and the accuracy is hence limited to jiffy scale. This also means that we won't trigger the warning if the tick is disabled. This feature (LATENCY_WARN) is default disabled. Signed-off-by: Paul Turner <pjt@google.com> Signed-off-by: Josh Don <joshdon@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20210416212936.390566-1-joshdon@google.com
1 parent 3f5ad91 commit c006fac

File tree

5 files changed

+97
-1
lines changed

5 files changed

+97
-1
lines changed

include/linux/sched/sysctl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ extern unsigned int sysctl_numa_balancing_scan_size;
4848
#ifdef CONFIG_SCHED_DEBUG
4949
extern __read_mostly unsigned int sysctl_sched_migration_cost;
5050
extern __read_mostly unsigned int sysctl_sched_nr_migrate;
51+
52+
extern int sysctl_resched_latency_warn_ms;
53+
extern int sysctl_resched_latency_warn_once;
5154
#endif
5255

5356
/*

kernel/sched/core.c

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,17 @@ const_debug unsigned int sysctl_sched_features =
5858
#include "features.h"
5959
0;
6060
#undef SCHED_FEAT
61-
#endif
61+
62+
/*
63+
* Print a warning if need_resched is set for the given duration (if
64+
* LATENCY_WARN is enabled).
65+
*
66+
* If sysctl_resched_latency_warn_once is set, only one warning will be shown
67+
* per boot.
68+
*/
69+
__read_mostly int sysctl_resched_latency_warn_ms = 100;
70+
__read_mostly int sysctl_resched_latency_warn_once = 1;
71+
#endif /* CONFIG_SCHED_DEBUG */
6272

6373
/*
6474
* Number of tasks to iterate in a single balance run.
@@ -4527,6 +4537,55 @@ unsigned long long task_sched_runtime(struct task_struct *p)
45274537
return ns;
45284538
}
45294539

4540+
#ifdef CONFIG_SCHED_DEBUG
4541+
static u64 cpu_resched_latency(struct rq *rq)
4542+
{
4543+
int latency_warn_ms = READ_ONCE(sysctl_resched_latency_warn_ms);
4544+
u64 resched_latency, now = rq_clock(rq);
4545+
static bool warned_once;
4546+
4547+
if (sysctl_resched_latency_warn_once && warned_once)
4548+
return 0;
4549+
4550+
if (!need_resched() || !latency_warn_ms)
4551+
return 0;
4552+
4553+
if (system_state == SYSTEM_BOOTING)
4554+
return 0;
4555+
4556+
if (!rq->last_seen_need_resched_ns) {
4557+
rq->last_seen_need_resched_ns = now;
4558+
rq->ticks_without_resched = 0;
4559+
return 0;
4560+
}
4561+
4562+
rq->ticks_without_resched++;
4563+
resched_latency = now - rq->last_seen_need_resched_ns;
4564+
if (resched_latency <= latency_warn_ms * NSEC_PER_MSEC)
4565+
return 0;
4566+
4567+
warned_once = true;
4568+
4569+
return resched_latency;
4570+
}
4571+
4572+
static int __init setup_resched_latency_warn_ms(char *str)
4573+
{
4574+
long val;
4575+
4576+
if ((kstrtol(str, 0, &val))) {
4577+
pr_warn("Unable to set resched_latency_warn_ms\n");
4578+
return 1;
4579+
}
4580+
4581+
sysctl_resched_latency_warn_ms = val;
4582+
return 1;
4583+
}
4584+
__setup("resched_latency_warn_ms=", setup_resched_latency_warn_ms);
4585+
#else
4586+
static inline u64 cpu_resched_latency(struct rq *rq) { return 0; }
4587+
#endif /* CONFIG_SCHED_DEBUG */
4588+
45304589
/*
45314590
* This function gets called by the timer code, with HZ frequency.
45324591
* We call it with interrupts disabled.
@@ -4538,6 +4597,7 @@ void scheduler_tick(void)
45384597
struct task_struct *curr = rq->curr;
45394598
struct rq_flags rf;
45404599
unsigned long thermal_pressure;
4600+
u64 resched_latency;
45414601

45424602
arch_scale_freq_tick();
45434603
sched_clock_tick();
@@ -4548,10 +4608,15 @@ void scheduler_tick(void)
45484608
thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
45494609
update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure);
45504610
curr->sched_class->task_tick(rq, curr, 0);
4611+
if (sched_feat(LATENCY_WARN))
4612+
resched_latency = cpu_resched_latency(rq);
45514613
calc_global_load_tick(rq);
45524614

45534615
rq_unlock(rq, &rf);
45544616

4617+
if (sched_feat(LATENCY_WARN) && resched_latency)
4618+
resched_latency_warn(cpu, resched_latency);
4619+
45554620
perf_event_task_tick();
45564621

45574622
#ifdef CONFIG_SMP
@@ -5046,6 +5111,9 @@ static void __sched notrace __schedule(bool preempt)
50465111
next = pick_next_task(rq, prev, &rf);
50475112
clear_tsk_need_resched(prev);
50485113
clear_preempt_need_resched();
5114+
#ifdef CONFIG_SCHED_DEBUG
5115+
rq->last_seen_need_resched_ns = 0;
5116+
#endif
50495117

50505118
if (likely(prev != next)) {
50515119
rq->nr_switches++;

kernel/sched/debug.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,9 @@ static __init int sched_init_debug(void)
309309
debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity);
310310
debugfs_create_u32("wakeup_granularity_ns", 0644, debugfs_sched, &sysctl_sched_wakeup_granularity);
311311

312+
debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
313+
debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
314+
312315
#ifdef CONFIG_SMP
313316
debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
314317
debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
@@ -1027,3 +1030,13 @@ void proc_sched_set_task(struct task_struct *p)
10271030
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
10281031
#endif
10291032
}
1033+
1034+
void resched_latency_warn(int cpu, u64 latency)
1035+
{
1036+
static DEFINE_RATELIMIT_STATE(latency_check_ratelimit, 60 * 60 * HZ, 1);
1037+
1038+
WARN(__ratelimit(&latency_check_ratelimit),
1039+
"sched: CPU %d need_resched set for > %llu ns (%d ticks) "
1040+
"without schedule\n",
1041+
cpu, latency, cpu_rq(cpu)->ticks_without_resched);
1042+
}

kernel/sched/features.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,5 +91,7 @@ SCHED_FEAT(WA_BIAS, true)
9191
SCHED_FEAT(UTIL_EST, true)
9292
SCHED_FEAT(UTIL_EST_FASTUP, true)
9393

94+
SCHED_FEAT(LATENCY_WARN, false)
95+
9496
SCHED_FEAT(ALT_PERIOD, true)
9597
SCHED_FEAT(BASE_SLICE, true)

kernel/sched/sched.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include <linux/prefetch.h>
5959
#include <linux/profile.h>
6060
#include <linux/psi.h>
61+
#include <linux/ratelimit.h>
6162
#include <linux/rcupdate_wait.h>
6263
#include <linux/security.h>
6364
#include <linux/stop_machine.h>
@@ -971,6 +972,11 @@ struct rq {
971972

972973
atomic_t nr_iowait;
973974

975+
#ifdef CONFIG_SCHED_DEBUG
976+
u64 last_seen_need_resched_ns;
977+
int ticks_without_resched;
978+
#endif
979+
974980
#ifdef CONFIG_MEMBARRIER
975981
int membarrier_state;
976982
#endif
@@ -2371,13 +2377,17 @@ extern void print_dl_stats(struct seq_file *m, int cpu);
23712377
extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
23722378
extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
23732379
extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
2380+
2381+
extern void resched_latency_warn(int cpu, u64 latency);
23742382
#ifdef CONFIG_NUMA_BALANCING
23752383
extern void
23762384
show_numa_stats(struct task_struct *p, struct seq_file *m);
23772385
extern void
23782386
print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
23792387
unsigned long tpf, unsigned long gsf, unsigned long gpf);
23802388
#endif /* CONFIG_NUMA_BALANCING */
2389+
#else
2390+
static inline void resched_latency_warn(int cpu, u64 latency) {}
23812391
#endif /* CONFIG_SCHED_DEBUG */
23822392

23832393
extern void init_cfs_rq(struct cfs_rq *cfs_rq);

0 commit comments

Comments
 (0)