Skip to content

Commit

Permalink
ARM: 7205/2: sched_clock: allow sched_clock to be selected at runtime
Browse files Browse the repository at this point in the history
sched_clock() is yet another blocker on the road to the single
image. This patch implements an idea by Russell King:

http://www.spinics.net/lists/linux-omap/msg49561.html

Instead of asking the platform to implement both sched_clock()
itself and the rollover callback, simply register a read()
function, and let the ARM code care about sched_clock() itself,
the conversion to ns and the rollover. sched_clock() uses
this read() function as an indirection to the platform code.
If the platform doesn't provide a read(), the code falls back
to the jiffy counter (just like the default sched_clock).

This allow some simplifications and possibly some footprint gain
when multiple platforms are compiled in. Among the drawbacks,
the removal of the *_fixed_sched_clock optimization which could
negatively impact some platforms (sa1100, tegra, versatile
and omap).

Tested on 11MPCore, OMAP4 and Tegra.

Cc: Imre Kaloz <kaloz@openwrt.org>
Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Colin Cross <ccross@android.com>
Cc: Erik Gilling <konkers@android.com>
Cc: Olof Johansson <olof@lixom.net>
Cc: Sascha Hauer <kernel@pengutronix.de>
Cc: Alessandro Rubini <rubini@unipv.it>
Cc: STEricsson <STEricsson_nomadik_linux@list.st.com>
Cc: Lennert Buytenhek <kernel@wantstofly.org>
Cc: Ben Dooks <ben-linux@fluff.org>
Tested-by: Jamie Iles <jamie@jamieiles.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Tested-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Krzysztof Halasa <khc@pm.waw.pl>
Acked-by: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
  • Loading branch information
Marc Zyngier authored and Russell King committed Dec 18, 2011
1 parent 3bdc348 commit 2f0778a
Show file tree
Hide file tree
Showing 19 changed files with 161 additions and 435 deletions.
108 changes: 1 addition & 107 deletions arch/arm/include/asm/sched_clock.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,113 +8,7 @@
#ifndef ASM_SCHED_CLOCK
#define ASM_SCHED_CLOCK

#include <linux/kernel.h>
#include <linux/types.h>

struct clock_data {
u64 epoch_ns;
u32 epoch_cyc;
u32 epoch_cyc_copy;
u32 mult;
u32 shift;
};

#define DEFINE_CLOCK_DATA(name) struct clock_data name

static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift)
{
return (cyc * mult) >> shift;
}

/*
* Atomically update the sched_clock epoch. Your update callback will
* be called from a timer before the counter wraps - read the current
* counter value, and call this function to safely move the epochs
* forward. Only use this from the update callback.
*/
static inline void update_sched_clock(struct clock_data *cd, u32 cyc, u32 mask)
{
unsigned long flags;
u64 ns = cd->epoch_ns +
cyc_to_ns((cyc - cd->epoch_cyc) & mask, cd->mult, cd->shift);

/*
* Write epoch_cyc and epoch_ns in a way that the update is
* detectable in cyc_to_fixed_sched_clock().
*/
raw_local_irq_save(flags);
cd->epoch_cyc = cyc;
smp_wmb();
cd->epoch_ns = ns;
smp_wmb();
cd->epoch_cyc_copy = cyc;
raw_local_irq_restore(flags);
}

/*
* If your clock rate is known at compile time, using this will allow
* you to optimize the mult/shift loads away. This is paired with
* init_fixed_sched_clock() to ensure that your mult/shift are correct.
*/
static inline unsigned long long cyc_to_fixed_sched_clock(struct clock_data *cd,
u32 cyc, u32 mask, u32 mult, u32 shift)
{
u64 epoch_ns;
u32 epoch_cyc;

/*
* Load the epoch_cyc and epoch_ns atomically. We do this by
* ensuring that we always write epoch_cyc, epoch_ns and
* epoch_cyc_copy in strict order, and read them in strict order.
* If epoch_cyc and epoch_cyc_copy are not equal, then we're in
* the middle of an update, and we should repeat the load.
*/
do {
epoch_cyc = cd->epoch_cyc;
smp_rmb();
epoch_ns = cd->epoch_ns;
smp_rmb();
} while (epoch_cyc != cd->epoch_cyc_copy);

return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, mult, shift);
}

/*
* Otherwise, you need to use this, which will obtain the mult/shift
* from the clock_data structure. Use init_sched_clock() with this.
*/
static inline unsigned long long cyc_to_sched_clock(struct clock_data *cd,
u32 cyc, u32 mask)
{
return cyc_to_fixed_sched_clock(cd, cyc, mask, cd->mult, cd->shift);
}

/*
* Initialize the clock data - calculate the appropriate multiplier
* and shift. Also setup a timer to ensure that the epoch is refreshed
* at the appropriate time interval, which will call your update
* handler.
*/
void init_sched_clock(struct clock_data *, void (*)(void),
unsigned int, unsigned long);

/*
* Use this initialization function rather than init_sched_clock() if
* you're using cyc_to_fixed_sched_clock, which will warn if your
* constants are incorrect.
*/
static inline void init_fixed_sched_clock(struct clock_data *cd,
void (*update)(void), unsigned int bits, unsigned long rate,
u32 mult, u32 shift)
{
init_sched_clock(cd, update, bits, rate);
if (cd->mult != mult || cd->shift != shift) {
pr_crit("sched_clock: wrong multiply/shift: %u>>%u vs calculated %u>>%u\n"
"sched_clock: fix multiply/shift to avoid scheduler hiccups\n",
mult, shift, cd->mult, cd->shift);
}
}

extern void sched_clock_postinit(void);
extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate);

#endif
118 changes: 105 additions & 13 deletions arch/arm/kernel/sched_clock.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,61 +14,153 @@

#include <asm/sched_clock.h>

struct clock_data {
u64 epoch_ns;
u32 epoch_cyc;
u32 epoch_cyc_copy;
u32 mult;
u32 shift;
};

static void sched_clock_poll(unsigned long wrap_ticks);
static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0);
static void (*sched_clock_update_fn)(void);

static struct clock_data cd = {
.mult = NSEC_PER_SEC / HZ,
};

static u32 __read_mostly sched_clock_mask = 0xffffffff;

static u32 notrace jiffy_sched_clock_read(void)
{
return (u32)(jiffies - INITIAL_JIFFIES);
}

static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;

static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift)
{
return (cyc * mult) >> shift;
}

static unsigned long long cyc_to_sched_clock(u32 cyc, u32 mask)
{
u64 epoch_ns;
u32 epoch_cyc;

/*
* Load the epoch_cyc and epoch_ns atomically. We do this by
* ensuring that we always write epoch_cyc, epoch_ns and
* epoch_cyc_copy in strict order, and read them in strict order.
* If epoch_cyc and epoch_cyc_copy are not equal, then we're in
* the middle of an update, and we should repeat the load.
*/
do {
epoch_cyc = cd.epoch_cyc;
smp_rmb();
epoch_ns = cd.epoch_ns;
smp_rmb();
} while (epoch_cyc != cd.epoch_cyc_copy);

return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, cd.mult, cd.shift);
}

/*
* Atomically update the sched_clock epoch.
*/
static void notrace update_sched_clock(void)
{
unsigned long flags;
u32 cyc;
u64 ns;

cyc = read_sched_clock();
ns = cd.epoch_ns +
cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
cd.mult, cd.shift);
/*
* Write epoch_cyc and epoch_ns in a way that the update is
* detectable in cyc_to_fixed_sched_clock().
*/
raw_local_irq_save(flags);
cd.epoch_cyc = cyc;
smp_wmb();
cd.epoch_ns = ns;
smp_wmb();
cd.epoch_cyc_copy = cyc;
raw_local_irq_restore(flags);
}

static void sched_clock_poll(unsigned long wrap_ticks)
{
mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks));
sched_clock_update_fn();
update_sched_clock();
}

void __init init_sched_clock(struct clock_data *cd, void (*update)(void),
unsigned int clock_bits, unsigned long rate)
void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
{
unsigned long r, w;
u64 res, wrap;
char r_unit;

sched_clock_update_fn = update;
BUG_ON(bits > 32);
WARN_ON(!irqs_disabled());
WARN_ON(read_sched_clock != jiffy_sched_clock_read);
read_sched_clock = read;
sched_clock_mask = (1 << bits) - 1;

/* calculate the mult/shift to convert counter ticks to ns. */
clocks_calc_mult_shift(&cd->mult, &cd->shift, rate, NSEC_PER_SEC, 0);
clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 0);

r = rate;
if (r >= 4000000) {
r /= 1000000;
r_unit = 'M';
} else {
} else if (r >= 1000) {
r /= 1000;
r_unit = 'k';
}
} else
r_unit = ' ';

/* calculate how many ns until we wrap */
wrap = cyc_to_ns((1ULL << clock_bits) - 1, cd->mult, cd->shift);
wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift);
do_div(wrap, NSEC_PER_MSEC);
w = wrap;

/* calculate the ns resolution of this counter */
res = cyc_to_ns(1ULL, cd->mult, cd->shift);
res = cyc_to_ns(1ULL, cd.mult, cd.shift);
pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n",
clock_bits, r, r_unit, res, w);
bits, r, r_unit, res, w);

/*
* Start the timer to keep sched_clock() properly updated and
* sets the initial epoch.
*/
sched_clock_timer.data = msecs_to_jiffies(w - (w / 10));
update();
update_sched_clock();

/*
* Ensure that sched_clock() starts off at 0ns
*/
cd->epoch_ns = 0;
cd.epoch_ns = 0;

pr_debug("Registered %pF as sched_clock source\n", read);
}

unsigned long long notrace sched_clock(void)
{
u32 cyc = read_sched_clock();
return cyc_to_sched_clock(cyc, sched_clock_mask);
}

void __init sched_clock_postinit(void)
{
/*
* If no sched_clock function has been provided at that point,
* make it the final one one.
*/
if (read_sched_clock == jiffy_sched_clock_read)
setup_sched_clock(jiffy_sched_clock_read, 32, HZ);

sched_clock_poll(sched_clock_timer.data);
}
16 changes: 3 additions & 13 deletions arch/arm/mach-ixp4xx/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/serial.h>
#include <linux/sched.h>
#include <linux/tty.h>
#include <linux/platform_device.h>
#include <linux/serial_core.h>
Expand Down Expand Up @@ -403,18 +402,9 @@ void __init ixp4xx_sys_init(void)
/*
* sched_clock()
*/
static DEFINE_CLOCK_DATA(cd);

unsigned long long notrace sched_clock(void)
static u32 notrace ixp4xx_read_sched_clock(void)
{
u32 cyc = *IXP4XX_OSTS;
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
}

static void notrace ixp4xx_update_sched_clock(void)
{
u32 cyc = *IXP4XX_OSTS;
update_sched_clock(&cd, cyc, (u32)~0);
return *IXP4XX_OSTS;
}

/*
Expand All @@ -430,7 +420,7 @@ unsigned long ixp4xx_timer_freq = IXP4XX_TIMER_FREQ;
EXPORT_SYMBOL(ixp4xx_timer_freq);
static void __init ixp4xx_clocksource_init(void)
{
init_sched_clock(&cd, ixp4xx_update_sched_clock, 32, ixp4xx_timer_freq);
setup_sched_clock(ixp4xx_read_sched_clock, 32, ixp4xx_timer_freq);

clocksource_mmio_init(NULL, "OSTS", ixp4xx_timer_freq, 200, 32,
ixp4xx_clocksource_read);
Expand Down
16 changes: 3 additions & 13 deletions arch/arm/mach-mmp/time.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

#include <linux/io.h>
#include <linux/irq.h>
#include <linux/sched.h>

#include <asm/sched_clock.h>
#include <mach/addr-map.h>
Expand All @@ -42,8 +41,6 @@
#define MAX_DELTA (0xfffffffe)
#define MIN_DELTA (16)

static DEFINE_CLOCK_DATA(cd);

/*
* FIXME: the timer needs some delay to stablize the counter capture
*/
Expand All @@ -59,16 +56,9 @@ static inline uint32_t timer_read(void)
return __raw_readl(TIMERS_VIRT_BASE + TMR_CVWR(1));
}

unsigned long long notrace sched_clock(void)
static u32 notrace mmp_read_sched_clock(void)
{
u32 cyc = timer_read();
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
}

static void notrace mmp_update_sched_clock(void)
{
u32 cyc = timer_read();
update_sched_clock(&cd, cyc, (u32)~0);
return timer_read();
}

static irqreturn_t timer_interrupt(int irq, void *dev_id)
Expand Down Expand Up @@ -201,7 +191,7 @@ void __init timer_init(int irq)
{
timer_config();

init_sched_clock(&cd, mmp_update_sched_clock, 32, CLOCK_TICK_RATE);
setup_sched_clock(mmp_read_sched_clock, 32, CLOCK_TICK_RATE);

ckevt.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, ckevt.shift);
ckevt.max_delta_ns = clockevent_delta2ns(MAX_DELTA, &ckevt);
Expand Down
Loading

0 comments on commit 2f0778a

Please sign in to comment.