Skip to content

Commit

Permalink
core/lock: fix timeout warning causing a deadlock false positive
Browse files Browse the repository at this point in the history
If a lock waiter exceeds the warning timeout, it prints a message
while still registered as requesting the lock. Printing the message
can take locks, so if one is held when the owner of the original
lock tries to print a message, it will get a false positive deadlock
detection, which brings down the system.

This can easily be hit when there is a lot of HMI activity from a
KVM guest, where the timebase was not returned to host timebase
before calling the HMI handler.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
  • Loading branch information
npiggin authored and stewartsmith committed Oct 10, 2018
1 parent 7dbf80d commit 2aa227c
Showing 1 changed file with 15 additions and 6 deletions.
21 changes: 15 additions & 6 deletions core/lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
*/
bool bust_locks = true;

#define LOCK_TIMEOUT_MS 5000

#ifdef DEBUG_LOCKS

static void __nomcount lock_error(struct lock *l, const char *reason, uint16_t err)
Expand Down Expand Up @@ -78,7 +80,6 @@ static inline bool __nomcount __try_lock(struct cpu_thread *cpu, struct lock *l)
return false;
}

#define LOCK_TIMEOUT_MS 5000
static inline bool lock_timeout(unsigned long start)
{
/* Print warning if lock has been spinning for more than TIMEOUT_MS */
Expand All @@ -92,9 +93,6 @@ static inline bool lock_timeout(unsigned long start)
*/
if( !(mfspr(SPR_TFMR) & SPR_TFMR_TB_VALID))
return false;
prlog(PR_WARNING, "WARNING: Lock has been "\
"spinning for %lums\n", wait - start);
backtrace();
return true;
}

Expand Down Expand Up @@ -252,8 +250,19 @@ void lock_caller(struct lock *l, const char *owner)
barrier();
smt_medium();

if (start && !timeout_warn)
timeout_warn = lock_timeout(start);
if (start && !timeout_warn && lock_timeout(start)) {
/*
* Holding the lock request while printing a
* timeout and taking console locks can result
* in deadlock fals positive if the lock owner
* tries to take the console lock. So drop it.
*/
remove_lock_request();
prlog(PR_WARNING, "WARNING: Lock has been spinning for over %dms\n", LOCK_TIMEOUT_MS);
backtrace();
add_lock_request(l);
timeout_warn = true;
}
}

remove_lock_request();
Expand Down

0 comments on commit 2aa227c

Please sign in to comment.