Skip to content

Commit 6b43ae8

Browse files
ntp: Fix leap-second hrtimer livelock
Since commit 7dffa3c the ntp subsystem has used an hrtimer for triggering the leapsecond adjustment. However, this can cause a potential livelock. Thomas diagnosed this as the following pattern: CPU 0 CPU 1 do_adjtimex() spin_lock_irq(&ntp_lock); process_adjtimex_modes(); timer_interrupt() process_adj_status(); do_timer() ntp_start_leap_timer(); write_lock(&xtime_lock); hrtimer_start(); update_wall_time(); hrtimer_reprogram(); ntp_tick_length() tick_program_event() spin_lock(&ntp_lock); clockevents_program_event() ktime_get() seq = req_seqbegin(xtime_lock); This patch tries to avoid the problem by reverting back to not using an hrtimer to inject leapseconds, and instead we handle the leapsecond processing in the second_overflow() function. The downside to this change is that on systems that support highres timers, the leap second processing will occur on a HZ tick boundary, (ie: ~1-10ms, depending on HZ) after the leap second instead of possibly sooner (~34us in my tests w/ x86_64 lapic). This patch applies on top of tip/timers/core. CC: Sasha Levin <levinsasha928@gmail.com> CC: Thomas Gleixner <tglx@linutronix.de> Reported-by: Sasha Levin <levinsasha928@gmail.com> Diagnoised-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Sasha Levin <levinsasha928@gmail.com> Signed-off-by: John Stultz <john.stultz@linaro.org>
1 parent 57779dc commit 6b43ae8

File tree

3 files changed

+48
-102
lines changed

3 files changed

+48
-102
lines changed

include/linux/timex.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ extern void ntp_clear(void);
252252
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
253253
extern u64 ntp_tick_length(void);
254254

255-
extern void second_overflow(void);
255+
extern int second_overflow(unsigned long secs);
256256
extern int do_adjtimex(struct timex *);
257257
extern void hardpps(const struct timespec *, const struct timespec *);
258258

kernel/time/ntp.c

Lines changed: 41 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ unsigned long tick_nsec;
3434
static u64 tick_length;
3535
static u64 tick_length_base;
3636

37-
static struct hrtimer leap_timer;
38-
3937
#define MAX_TICKADJ 500LL /* usecs */
4038
#define MAX_TICKADJ_SCALED \
4139
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
@@ -381,70 +379,63 @@ u64 ntp_tick_length(void)
381379

382380

383381
/*
384-
* Leap second processing. If in leap-insert state at the end of the
385-
* day, the system clock is set back one second; if in leap-delete
386-
* state, the system clock is set ahead one second.
382+
* this routine handles the overflow of the microsecond field
383+
*
384+
* The tricky bits of code to handle the accurate clock support
385+
* were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
386+
* They were originally developed for SUN and DEC kernels.
387+
* All the kudos should go to Dave for this stuff.
388+
*
389+
* Also handles leap second processing, and returns leap offset
387390
*/
388-
static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
391+
int second_overflow(unsigned long secs)
389392
{
390-
enum hrtimer_restart res = HRTIMER_NORESTART;
391-
unsigned long flags;
393+
s64 delta;
392394
int leap = 0;
395+
unsigned long flags;
393396

394397
spin_lock_irqsave(&ntp_lock, flags);
398+
399+
/*
400+
* Leap second processing. If in leap-insert state at the end of the
401+
* day, the system clock is set back one second; if in leap-delete
402+
* state, the system clock is set ahead one second.
403+
*/
395404
switch (time_state) {
396405
case TIME_OK:
406+
if (time_status & STA_INS)
407+
time_state = TIME_INS;
408+
else if (time_status & STA_DEL)
409+
time_state = TIME_DEL;
397410
break;
398411
case TIME_INS:
399-
leap = -1;
400-
time_state = TIME_OOP;
401-
printk(KERN_NOTICE
402-
"Clock: inserting leap second 23:59:60 UTC\n");
403-
hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
404-
res = HRTIMER_RESTART;
412+
if (secs % 86400 == 0) {
413+
leap = -1;
414+
time_state = TIME_OOP;
415+
printk(KERN_NOTICE
416+
"Clock: inserting leap second 23:59:60 UTC\n");
417+
}
405418
break;
406419
case TIME_DEL:
407-
leap = 1;
408-
time_tai--;
409-
time_state = TIME_WAIT;
410-
printk(KERN_NOTICE
411-
"Clock: deleting leap second 23:59:59 UTC\n");
420+
if ((secs + 1) % 86400 == 0) {
421+
leap = 1;
422+
time_tai--;
423+
time_state = TIME_WAIT;
424+
printk(KERN_NOTICE
425+
"Clock: deleting leap second 23:59:59 UTC\n");
426+
}
412427
break;
413428
case TIME_OOP:
414429
time_tai++;
415430
time_state = TIME_WAIT;
416-
/* fall through */
431+
break;
432+
417433
case TIME_WAIT:
418434
if (!(time_status & (STA_INS | STA_DEL)))
419435
time_state = TIME_OK;
420436
break;
421437
}
422-
spin_unlock_irqrestore(&ntp_lock, flags);
423-
424-
/*
425-
* We have to call this outside of the ntp_lock to keep
426-
* the proper locking hierarchy
427-
*/
428-
if (leap)
429-
timekeeping_leap_insert(leap);
430-
431-
return res;
432-
}
433-
434-
/*
435-
* this routine handles the overflow of the microsecond field
436-
*
437-
* The tricky bits of code to handle the accurate clock support
438-
* were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
439-
* They were originally developed for SUN and DEC kernels.
440-
* All the kudos should go to Dave for this stuff.
441-
*/
442-
void second_overflow(void)
443-
{
444-
s64 delta;
445-
unsigned long flags;
446438

447-
spin_lock_irqsave(&ntp_lock, flags);
448439

449440
/* Bump the maxerror field */
450441
time_maxerror += MAXFREQ / NSEC_PER_USEC;
@@ -481,8 +472,13 @@ void second_overflow(void)
481472
tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
482473
<< NTP_SCALE_SHIFT;
483474
time_adjust = 0;
475+
476+
477+
484478
out:
485479
spin_unlock_irqrestore(&ntp_lock, flags);
480+
481+
return leap;
486482
}
487483

488484
#ifdef CONFIG_GENERIC_CMOS_UPDATE
@@ -544,27 +540,6 @@ static void notify_cmos_timer(void)
544540
static inline void notify_cmos_timer(void) { }
545541
#endif
546542

547-
/*
548-
* Start the leap seconds timer:
549-
*/
550-
static inline void ntp_start_leap_timer(struct timespec *ts)
551-
{
552-
long now = ts->tv_sec;
553-
554-
if (time_status & STA_INS) {
555-
time_state = TIME_INS;
556-
now += 86400 - now % 86400;
557-
hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
558-
559-
return;
560-
}
561-
562-
if (time_status & STA_DEL) {
563-
time_state = TIME_DEL;
564-
now += 86400 - (now + 1) % 86400;
565-
hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
566-
}
567-
}
568543

569544
/*
570545
* Propagate a new txc->status value into the NTP state:
@@ -589,22 +564,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
589564
time_status &= STA_RONLY;
590565
time_status |= txc->status & ~STA_RONLY;
591566

592-
switch (time_state) {
593-
case TIME_OK:
594-
ntp_start_leap_timer(ts);
595-
break;
596-
case TIME_INS:
597-
case TIME_DEL:
598-
time_state = TIME_OK;
599-
ntp_start_leap_timer(ts);
600-
case TIME_WAIT:
601-
if (!(time_status & (STA_INS | STA_DEL)))
602-
time_state = TIME_OK;
603-
break;
604-
case TIME_OOP:
605-
hrtimer_restart(&leap_timer);
606-
break;
607-
}
608567
}
609568
/*
610569
* Called with the xtime lock held, so we can access and modify
@@ -686,9 +645,6 @@ int do_adjtimex(struct timex *txc)
686645
(txc->tick < 900000/USER_HZ ||
687646
txc->tick > 1100000/USER_HZ))
688647
return -EINVAL;
689-
690-
if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
691-
hrtimer_cancel(&leap_timer);
692648
}
693649

694650
if (txc->modes & ADJ_SETOFFSET) {
@@ -1010,6 +966,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
1010966
void __init ntp_init(void)
1011967
{
1012968
ntp_clear();
1013-
hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
1014-
leap_timer.function = ntp_leap_second;
1015969
}

kernel/time/timekeeping.c

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -184,18 +184,6 @@ static void timekeeping_update(bool clearntp)
184184
}
185185

186186

187-
void timekeeping_leap_insert(int leapsecond)
188-
{
189-
unsigned long flags;
190-
191-
write_seqlock_irqsave(&timekeeper.lock, flags);
192-
timekeeper.xtime.tv_sec += leapsecond;
193-
timekeeper.wall_to_monotonic.tv_sec -= leapsecond;
194-
timekeeping_update(false);
195-
write_sequnlock_irqrestore(&timekeeper.lock, flags);
196-
197-
}
198-
199187
/**
200188
* timekeeping_forward_now - update clock to the current time
201189
*
@@ -969,9 +957,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
969957

970958
timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
971959
while (timekeeper.xtime_nsec >= nsecps) {
960+
int leap;
972961
timekeeper.xtime_nsec -= nsecps;
973962
timekeeper.xtime.tv_sec++;
974-
second_overflow();
963+
leap = second_overflow(timekeeper.xtime.tv_sec);
964+
timekeeper.xtime.tv_sec += leap;
975965
}
976966

977967
/* Accumulate raw time */
@@ -1082,9 +1072,11 @@ static void update_wall_time(void)
10821072
* xtime.tv_nsec isn't larger then NSEC_PER_SEC
10831073
*/
10841074
if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) {
1075+
int leap;
10851076
timekeeper.xtime.tv_nsec -= NSEC_PER_SEC;
10861077
timekeeper.xtime.tv_sec++;
1087-
second_overflow();
1078+
leap = second_overflow(timekeeper.xtime.tv_sec);
1079+
timekeeper.xtime.tv_sec += leap;
10881080
}
10891081

10901082
timekeeping_update(false);

0 commit comments

Comments
 (0)