Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

FIX: from CodeAurora - cherry picked from intersactRaven

  • Loading branch information...
commit 9874422de594edc7f0246cf6e384c1f55fb13f7a 1 parent f4313a7
Marcus Stojcevich authored
Showing with 94 additions and 37 deletions.
  1. +94 −37 Kernel/kernel/sched.c
131 Kernel/kernel/sched.c
View
@@ -659,21 +659,17 @@ static inline struct task_group *task_group(struct task_struct *p)
#endif /* CONFIG_CGROUP_SCHED */
-static u64 irq_time_cpu(int cpu);
-static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time);
+static void update_rq_clock_task(struct rq *rq, s64 delta);
-inline void update_rq_clock(struct rq *rq)
+static void update_rq_clock(struct rq *rq)
{
- int cpu = cpu_of(rq);
- u64 irq_time;
-
- if (!rq->skip_clock_update)
- rq->clock = sched_clock_cpu(cpu_of(rq));
- irq_time = irq_time_cpu(cpu);
- if (rq->clock - irq_time > rq->clock_task)
- rq->clock_task = rq->clock - irq_time;
+ s64 delta;
+ if (rq->skip_clock_update)
+ return;
- sched_irq_time_avg_update(rq, irq_time);
+ delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
+ rq->clock += delta;
+ update_rq_clock_task(rq, delta);
}
/*
@@ -1858,10 +1854,9 @@ static const struct sched_class rt_sched_class;
* They are read and saved off onto struct rq in update_rq_clock().
* This may result in other CPU reading this CPU's irq time and can
* race with irq/account_system_vtime on this CPU. We would either get old
- * or new value (or semi updated value on 32 bit) with a side effect of
- * accounting a slice of irq time to wrong task when irq is in progress
- * while we read rq->clock. That is a worthy compromise in place of having
- * locks on each irq in account_system_time.
+ * or new value with a side effect of accounting a slice of irq time to wrong
+ * task when irq is in progress while we read rq->clock. That is a worthy
+ * compromise in place of having locks on each irq in account_system_time.
*/
static DEFINE_PER_CPU(u64, cpu_hardirq_time);
static DEFINE_PER_CPU(u64, cpu_softirq_time);
@@ -1879,19 +1874,58 @@ void disable_sched_clock_irqtime(void)
sched_clock_irqtime = 0;
}
-static u64 irq_time_cpu(int cpu)
+#ifndef CONFIG_64BIT
+static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
+
+static inline void irq_time_write_begin(void)
{
- if (!sched_clock_irqtime)
- return 0;
+ __this_cpu_inc(irq_time_seq.sequence);
+ smp_wmb();
+}
+
+static inline void irq_time_write_end(void)
+{
+ smp_wmb();
+ __this_cpu_inc(irq_time_seq.sequence);
+}
+
+static inline u64 irq_time_read(int cpu)
+{
+ u64 irq_time;
+ unsigned seq;
+
+ do {
+ seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
+ irq_time = per_cpu(cpu_softirq_time, cpu) +
+ per_cpu(cpu_hardirq_time, cpu);
+ } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
+
+ return irq_time;
+}
+#else /* CONFIG_64BIT */
+static inline void irq_time_write_begin(void)
+{
+}
+
+static inline void irq_time_write_end(void)
+{
+}
+static inline u64 irq_time_read(int cpu)
+{
return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
}
+#endif /* CONFIG_64BIT */
+/*
+ * Called before incrementing preempt_count on {soft,}irq_enter
+ * and before decrementing preempt_count on {soft,}irq_exit.
+ */
void account_system_vtime(struct task_struct *curr)
{
unsigned long flags;
+ s64 delta;
int cpu;
- u64 now, delta;
if (!sched_clock_irqtime)
return;
@@ -1899,9 +1933,10 @@ void account_system_vtime(struct task_struct *curr)
local_irq_save(flags);
cpu = smp_processor_id();
- now = sched_clock_cpu(cpu);
- delta = now - per_cpu(irq_start_time, cpu);
- per_cpu(irq_start_time, cpu) = now;
+ delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
+ __this_cpu_add(irq_start_time, delta);
+
+ irq_time_write_begin();
/*
* We do not account for softirq time from ksoftirqd here.
* We want to continue accounting softirq time to ksoftirqd thread
@@ -1909,33 +1944,55 @@ void account_system_vtime(struct task_struct *curr)
* that do not consume any time, but still wants to run.
*/
if (hardirq_count())
- per_cpu(cpu_hardirq_time, cpu) += delta;
+ __this_cpu_add(cpu_hardirq_time, delta);
else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
- per_cpu(cpu_softirq_time, cpu) += delta;
+ __this_cpu_add(cpu_softirq_time, delta);
+ irq_time_write_end();
local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(account_system_vtime);
-static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time)
+static void update_rq_clock_task(struct rq *rq, s64 delta)
{
- if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) {
- u64 delta_irq = curr_irq_time - rq->prev_irq_time;
- rq->prev_irq_time = curr_irq_time;
- sched_rt_avg_update(rq, delta_irq);
- }
+ s64 irq_delta;
+
+ irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
+
+ /*
+ * Since irq_time is only updated on {soft,}irq_exit, we might run into
+ * this case when a previous update_rq_clock() happened inside a
+ * {soft,}irq region.
+ *
+ * When this happens, we stop ->clock_task and only update the
+ * prev_irq_time stamp to account for the part that fit, so that a next
+ * update will consume the rest. This ensures ->clock_task is
+ * monotonic.
+ *
+ * It does however cause some slight miss-attribution of {soft,}irq
+ * time, a more accurate solution would be to update the irq_time using
+ * the current rq->clock timestamp, except that would require using
+ * atomic ops.
+ */
+ if (irq_delta > delta)
+ irq_delta = delta;
+
+ rq->prev_irq_time += irq_delta;
+ delta -= irq_delta;
+ rq->clock_task += delta;
+
+ if (irq_delta && sched_feat(NONIRQ_POWER))
+ sched_rt_avg_update(rq, irq_delta);
}
-#else
+#else /* CONFIG_IRQ_TIME_ACCOUNTING */
-static u64 irq_time_cpu(int cpu)
+static void update_rq_clock_task(struct rq *rq, s64 delta)
{
- return 0;
+ rq->clock_task += delta;
}
-static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { }
-
-#endif
+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
#include "sched_stats.h"
Please sign in to comment.
Something went wrong with that request. Please try again.