Skip to content

Commit

Permalink
rwsem: steal writing sem for better performance
Browse files Browse the repository at this point in the history
Commit 5a50508 changes to rwsem from mutex, caused aim7 fork_test
performance to dropped 50%. Yuanhan liu did an analysis, found it was
caused by strict sequential writing. Ingo suggest stealing sem writing from
front task in wait queue. https://lkml.org/lkml/2013/1/29/84

So does this patch.
In this patch, I just allow write stealing to happen when the first waiter
is also writer. The performance fully is now fully recovered.

Reported-by: lkp@linux.intel.com
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Alex Shi <alex.shi@intel.com>
  • Loading branch information
Alex Shi authored and mnm9994u committed Jan 19, 2014
1 parent 341b6f7 commit 0972938
Showing 1 changed file with 46 additions and 29 deletions.
75 changes: 46 additions & 29 deletions lib/rwsem.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
*
* Written by David Howells (dhowells@redhat.com).
* Derived from arch/i386/kernel/semaphore.c
*
* Steal writing sem. Alex Shi <alex.shi@intel.com>
*/
#include <linux/rwsem.h>
#include <linux/sched.h>
Expand Down Expand Up @@ -60,7 +62,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
struct rwsem_waiter *waiter;
struct task_struct *tsk;
struct list_head *next;
signed long oldcount, woken, loop, adjustment;
signed long woken, loop, adjustment;

waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
Expand All @@ -72,30 +74,8 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
*/
goto out;

/* There's a writer at the front of the queue - try to grant it the
* write lock. However, we only wake this writer if we can transition
* the active part of the count from 0 -> 1
*/
adjustment = RWSEM_ACTIVE_WRITE_BIAS;
if (waiter->list.next == &sem->wait_list)
adjustment -= RWSEM_WAITING_BIAS;

try_again_write:
oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
if (oldcount & RWSEM_ACTIVE_MASK)
/* Someone grabbed the sem already */
goto undo_write;

/* We must be careful not to touch 'waiter' after we set ->task = NULL.
* It is an allocated on the waiter's stack and may become invalid at
* any time after that point (due to a wakeup from another source).
*/
list_del(&waiter->list);
tsk = waiter->task;
smp_mb();
waiter->task = NULL;
wake_up_process(tsk);
put_task_struct(tsk);
/* wake up the writing waiter and let the task grap sem */
wake_up_process(waiter->task);
goto out;

readers_only:
Expand Down Expand Up @@ -157,12 +137,40 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wake_type)

out:
return sem;
}

/* try to get write sem, caller hold sem->wait_lock */
static int try_get_writer_sem(struct rw_semaphore *sem,
struct rwsem_waiter *waiter)
{
struct rwsem_waiter *fwaiter;
long oldcount, adjustment;

/* undo the change to the active count, but check for a transition
* 1->0 */
undo_write:
/* only steal when first waiter is writing */
fwaiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
if (!(fwaiter->flags & RWSEM_WAITING_FOR_WRITE))
return 0;

adjustment = RWSEM_ACTIVE_WRITE_BIAS;
/* only one waiter in queue */
if (fwaiter == waiter && waiter->list.next == &sem->wait_list)
adjustment -= RWSEM_WAITING_BIAS;

try_again_write:
oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
if (!(oldcount & RWSEM_ACTIVE_MASK)) {
/* no active lock */
struct task_struct *tsk = waiter->task;

list_del(&waiter->list);
smp_mb();
put_task_struct(tsk);
tsk->state = TASK_RUNNING;
return 1;
}
/* some one grabbed the sem already */
if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
goto out;
return 0;
goto try_again_write;
}

Expand Down Expand Up @@ -210,6 +218,15 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
for (;;) {
if (!waiter.task)
break;

raw_spin_lock_irq(&sem->wait_lock);
/* try to get the writer sem, may steal from the head writer */
if (flags == RWSEM_WAITING_FOR_WRITE)
if (try_get_writer_sem(sem, &waiter)) {
raw_spin_unlock_irq(&sem->wait_lock);
return sem;
}
raw_spin_unlock_irq(&sem->wait_lock);
schedule();
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
}
Expand Down

0 comments on commit 0972938

Please sign in to comment.