Skip to content

Commit ceeadb8

Browse files
laoarPeter Zijlstra
authored andcommitted
sched: Make struct sched_statistics independent of fair sched class
If we want to use the schedstats facility to trace other sched classes, we should make it independent of fair sched class. The struct sched_statistics is the schedular statistics of a task_struct or a task_group. So we can move it into struct task_struct and struct task_group to achieve the goal. After the patch, schestats are orgnized as follows, struct task_struct { ... struct sched_entity se; struct sched_rt_entity rt; struct sched_dl_entity dl; ... struct sched_statistics stats; ... }; Regarding the task group, schedstats is only supported for fair group sched, and a new struct sched_entity_stats is introduced, suggested by Peter - struct sched_entity_stats { struct sched_entity se; struct sched_statistics stats; } __no_randomize_layout; Then with the se in a task_group, we can easily get the stats. The sched_statistics members may be frequently modified when schedstats is enabled, in order to avoid impacting on random data which may in the same cacheline with them, the struct sched_statistics is defined as cacheline aligned. As this patch changes the core struct of scheduler, so I verified the performance it may impact on the scheduler with 'perf bench sched pipe', suggested by Mel. Below is the result, in which all the values are in usecs/op. Before After kernel.sched_schedstats=0 5.2~5.4 5.2~5.4 kernel.sched_schedstats=1 5.3~5.5 5.3~5.5 [These data is a little difference with the earlier version, that is because my old test machine is destroyed so I have to use a new different test machine.] Almost no impact on the sched performance. No functional change. [lkp@intel.com: reported build failure in earlier version] Signed-off-by: Yafang Shao <laoar.shao@gmail.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Mel Gorman <mgorman@suse.de> Link: https://lore.kernel.org/r/20210905143547.4668-3-laoar.shao@gmail.com
1 parent a2dcb27 commit ceeadb8

File tree

8 files changed

+143
-100
lines changed

8 files changed

+143
-100
lines changed

include/linux/sched.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ struct sched_statistics {
522522
u64 nr_wakeups_passive;
523523
u64 nr_wakeups_idle;
524524
#endif
525-
};
525+
} ____cacheline_aligned;
526526

527527
struct sched_entity {
528528
/* For load-balancing: */
@@ -538,8 +538,6 @@ struct sched_entity {
538538

539539
u64 nr_migrations;
540540

541-
struct sched_statistics statistics;
542-
543541
#ifdef CONFIG_FAIR_GROUP_SCHED
544542
int depth;
545543
struct sched_entity *parent;
@@ -803,6 +801,8 @@ struct task_struct {
803801
struct uclamp_se uclamp[UCLAMP_CNT];
804802
#endif
805803

804+
struct sched_statistics stats;
805+
806806
#ifdef CONFIG_PREEMPT_NOTIFIERS
807807
/* List of struct preempt_notifier: */
808808
struct hlist_head preempt_notifiers;

kernel/sched/core.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3489,11 +3489,11 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
34893489
#ifdef CONFIG_SMP
34903490
if (cpu == rq->cpu) {
34913491
__schedstat_inc(rq->ttwu_local);
3492-
__schedstat_inc(p->se.statistics.nr_wakeups_local);
3492+
__schedstat_inc(p->stats.nr_wakeups_local);
34933493
} else {
34943494
struct sched_domain *sd;
34953495

3496-
__schedstat_inc(p->se.statistics.nr_wakeups_remote);
3496+
__schedstat_inc(p->stats.nr_wakeups_remote);
34973497
rcu_read_lock();
34983498
for_each_domain(rq->cpu, sd) {
34993499
if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
@@ -3505,14 +3505,14 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
35053505
}
35063506

35073507
if (wake_flags & WF_MIGRATED)
3508-
__schedstat_inc(p->se.statistics.nr_wakeups_migrate);
3508+
__schedstat_inc(p->stats.nr_wakeups_migrate);
35093509
#endif /* CONFIG_SMP */
35103510

35113511
__schedstat_inc(rq->ttwu_count);
3512-
__schedstat_inc(p->se.statistics.nr_wakeups);
3512+
__schedstat_inc(p->stats.nr_wakeups);
35133513

35143514
if (wake_flags & WF_SYNC)
3515-
__schedstat_inc(p->se.statistics.nr_wakeups_sync);
3515+
__schedstat_inc(p->stats.nr_wakeups_sync);
35163516
}
35173517

35183518
/*
@@ -4196,7 +4196,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
41964196

41974197
#ifdef CONFIG_SCHEDSTATS
41984198
/* Even if schedstat is disabled, there should not be garbage */
4199-
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
4199+
memset(&p->stats, 0, sizeof(p->stats));
42004200
#endif
42014201

42024202
RB_CLEAR_NODE(&p->dl.rb_node);
@@ -9553,9 +9553,9 @@ void normalize_rt_tasks(void)
95539553
continue;
95549554

95559555
p->se.exec_start = 0;
9556-
schedstat_set(p->se.statistics.wait_start, 0);
9557-
schedstat_set(p->se.statistics.sleep_start, 0);
9558-
schedstat_set(p->se.statistics.block_start, 0);
9556+
schedstat_set(p->stats.wait_start, 0);
9557+
schedstat_set(p->stats.sleep_start, 0);
9558+
schedstat_set(p->stats.block_start, 0);
95599559

95609560
if (!dl_task(p) && !rt_task(p)) {
95619561
/*
@@ -10397,11 +10397,14 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
1039710397
seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time);
1039810398

1039910399
if (schedstat_enabled() && tg != &root_task_group) {
10400+
struct sched_statistics *stats;
1040010401
u64 ws = 0;
1040110402
int i;
1040210403

10403-
for_each_possible_cpu(i)
10404-
ws += schedstat_val(tg->se[i]->statistics.wait_sum);
10404+
for_each_possible_cpu(i) {
10405+
stats = __schedstats_from_se(tg->se[i]);
10406+
ws += schedstat_val(stats->wait_sum);
10407+
}
1040510408

1040610409
seq_printf(sf, "wait_sum %llu\n", ws);
1040710410
}

kernel/sched/deadline.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1265,8 +1265,8 @@ static void update_curr_dl(struct rq *rq)
12651265
return;
12661266
}
12671267

1268-
schedstat_set(curr->se.statistics.exec_max,
1269-
max(curr->se.statistics.exec_max, delta_exec));
1268+
schedstat_set(curr->stats.exec_max,
1269+
max(curr->stats.exec_max, delta_exec));
12701270

12711271
curr->se.sum_exec_runtime += delta_exec;
12721272
account_group_exec_runtime(curr, delta_exec);

kernel/sched/debug.c

Lines changed: 48 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -449,9 +449,11 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
449449
struct sched_entity *se = tg->se[cpu];
450450

451451
#define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
452-
#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F))
452+
#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", \
453+
#F, (long long)schedstat_val(stats->F))
453454
#define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
454-
#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
455+
#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", \
456+
#F, SPLIT_NS((long long)schedstat_val(stats->F)))
455457

456458
if (!se)
457459
return;
@@ -461,16 +463,18 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
461463
PN(se->sum_exec_runtime);
462464

463465
if (schedstat_enabled()) {
464-
PN_SCHEDSTAT(se->statistics.wait_start);
465-
PN_SCHEDSTAT(se->statistics.sleep_start);
466-
PN_SCHEDSTAT(se->statistics.block_start);
467-
PN_SCHEDSTAT(se->statistics.sleep_max);
468-
PN_SCHEDSTAT(se->statistics.block_max);
469-
PN_SCHEDSTAT(se->statistics.exec_max);
470-
PN_SCHEDSTAT(se->statistics.slice_max);
471-
PN_SCHEDSTAT(se->statistics.wait_max);
472-
PN_SCHEDSTAT(se->statistics.wait_sum);
473-
P_SCHEDSTAT(se->statistics.wait_count);
466+
struct sched_statistics *stats = __schedstats_from_se(se);
467+
468+
PN_SCHEDSTAT(wait_start);
469+
PN_SCHEDSTAT(sleep_start);
470+
PN_SCHEDSTAT(block_start);
471+
PN_SCHEDSTAT(sleep_max);
472+
PN_SCHEDSTAT(block_max);
473+
PN_SCHEDSTAT(exec_max);
474+
PN_SCHEDSTAT(slice_max);
475+
PN_SCHEDSTAT(wait_max);
476+
PN_SCHEDSTAT(wait_sum);
477+
P_SCHEDSTAT(wait_count);
474478
}
475479

476480
P(se->load.weight);
@@ -537,9 +541,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
537541
p->prio);
538542

539543
SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
540-
SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
544+
SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)),
541545
SPLIT_NS(p->se.sum_exec_runtime),
542-
SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
546+
SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)));
543547

544548
#ifdef CONFIG_NUMA_BALANCING
545549
SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
@@ -958,8 +962,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
958962
"---------------------------------------------------------"
959963
"----------\n");
960964

961-
#define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->F))
962-
#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->F))
965+
#define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->stats.F))
966+
#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->stats.F))
963967

964968
PN(se.exec_start);
965969
PN(se.vruntime);
@@ -972,33 +976,33 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
972976
if (schedstat_enabled()) {
973977
u64 avg_atom, avg_per_cpu;
974978

975-
PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
976-
PN_SCHEDSTAT(se.statistics.wait_start);
977-
PN_SCHEDSTAT(se.statistics.sleep_start);
978-
PN_SCHEDSTAT(se.statistics.block_start);
979-
PN_SCHEDSTAT(se.statistics.sleep_max);
980-
PN_SCHEDSTAT(se.statistics.block_max);
981-
PN_SCHEDSTAT(se.statistics.exec_max);
982-
PN_SCHEDSTAT(se.statistics.slice_max);
983-
PN_SCHEDSTAT(se.statistics.wait_max);
984-
PN_SCHEDSTAT(se.statistics.wait_sum);
985-
P_SCHEDSTAT(se.statistics.wait_count);
986-
PN_SCHEDSTAT(se.statistics.iowait_sum);
987-
P_SCHEDSTAT(se.statistics.iowait_count);
988-
P_SCHEDSTAT(se.statistics.nr_migrations_cold);
989-
P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
990-
P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
991-
P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
992-
P_SCHEDSTAT(se.statistics.nr_forced_migrations);
993-
P_SCHEDSTAT(se.statistics.nr_wakeups);
994-
P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
995-
P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
996-
P_SCHEDSTAT(se.statistics.nr_wakeups_local);
997-
P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
998-
P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
999-
P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
1000-
P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
1001-
P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
979+
PN_SCHEDSTAT(sum_sleep_runtime);
980+
PN_SCHEDSTAT(wait_start);
981+
PN_SCHEDSTAT(sleep_start);
982+
PN_SCHEDSTAT(block_start);
983+
PN_SCHEDSTAT(sleep_max);
984+
PN_SCHEDSTAT(block_max);
985+
PN_SCHEDSTAT(exec_max);
986+
PN_SCHEDSTAT(slice_max);
987+
PN_SCHEDSTAT(wait_max);
988+
PN_SCHEDSTAT(wait_sum);
989+
P_SCHEDSTAT(wait_count);
990+
PN_SCHEDSTAT(iowait_sum);
991+
P_SCHEDSTAT(iowait_count);
992+
P_SCHEDSTAT(nr_migrations_cold);
993+
P_SCHEDSTAT(nr_failed_migrations_affine);
994+
P_SCHEDSTAT(nr_failed_migrations_running);
995+
P_SCHEDSTAT(nr_failed_migrations_hot);
996+
P_SCHEDSTAT(nr_forced_migrations);
997+
P_SCHEDSTAT(nr_wakeups);
998+
P_SCHEDSTAT(nr_wakeups_sync);
999+
P_SCHEDSTAT(nr_wakeups_migrate);
1000+
P_SCHEDSTAT(nr_wakeups_local);
1001+
P_SCHEDSTAT(nr_wakeups_remote);
1002+
P_SCHEDSTAT(nr_wakeups_affine);
1003+
P_SCHEDSTAT(nr_wakeups_affine_attempts);
1004+
P_SCHEDSTAT(nr_wakeups_passive);
1005+
P_SCHEDSTAT(nr_wakeups_idle);
10021006

10031007
avg_atom = p->se.sum_exec_runtime;
10041008
if (nr_switches)
@@ -1064,7 +1068,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
10641068
void proc_sched_set_task(struct task_struct *p)
10651069
{
10661070
#ifdef CONFIG_SCHEDSTATS
1067-
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
1071+
memset(&p->stats, 0, sizeof(p->stats));
10681072
#endif
10691073
}
10701074

0 commit comments

Comments
 (0)