154 changes: 105 additions & 49 deletions mm/slab.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@
#include <linux/kmemcheck.h>
#include <linux/memory.h>
#include <linux/prefetch.h>
#include <linux/locallock.h>

#include <net/sock.h>

Expand Down Expand Up @@ -688,12 +689,40 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
#endif

static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
static DEFINE_LOCAL_IRQ_LOCK(slab_lock);

#ifndef CONFIG_PREEMPT_RT_BASE
# define slab_on_each_cpu(func, cp) on_each_cpu(func, cp, 1)
#else
/*
* execute func() for all CPUs. On PREEMPT_RT we dont actually have
* to run on the remote CPUs - we only have to take their CPU-locks.
* (This is a rare operation, so cacheline bouncing is not an issue.)
*/
static void
slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg)
{
unsigned int i;

for_each_online_cpu(i) {
spin_lock_irq(&per_cpu(slab_lock, i).lock);
func(arg, i);
spin_unlock_irq(&per_cpu(slab_lock, i).lock);
}
}
#endif

static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
{
return cachep->array[smp_processor_id()];
}

static inline struct array_cache *cpu_cache_get_on_cpu(struct kmem_cache *cachep,
int cpu)
{
return cachep->array[cpu];
}

static inline struct kmem_cache *__find_general_cachep(size_t size,
gfp_t gfpflags)
{
Expand Down Expand Up @@ -1163,9 +1192,10 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
if (l3->alien) {
struct array_cache *ac = l3->alien[node];

if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
if (ac && ac->avail &&
local_spin_trylock_irq(slab_lock, &ac->lock)) {
__drain_alien_cache(cachep, ac, node);
spin_unlock_irq(&ac->lock);
local_spin_unlock_irq(slab_lock, &ac->lock);
}
}
}
Expand All @@ -1180,9 +1210,9 @@ static void drain_alien_cache(struct kmem_cache *cachep,
for_each_online_node(i) {
ac = alien[i];
if (ac) {
spin_lock_irqsave(&ac->lock, flags);
local_spin_lock_irqsave(slab_lock, &ac->lock, flags);
__drain_alien_cache(cachep, ac, i);
spin_unlock_irqrestore(&ac->lock, flags);
local_spin_unlock_irqrestore(slab_lock, &ac->lock, flags);
}
}
}
Expand Down Expand Up @@ -1261,11 +1291,11 @@ static int init_cache_nodelists_node(int node)
cachep->nodelists[node] = l3;
}

spin_lock_irq(&cachep->nodelists[node]->list_lock);
local_spin_lock_irq(slab_lock, &cachep->nodelists[node]->list_lock);
cachep->nodelists[node]->free_limit =
(1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
spin_unlock_irq(&cachep->nodelists[node]->list_lock);
local_spin_unlock_irq(slab_lock, &cachep->nodelists[node]->list_lock);
}
return 0;
}
Expand All @@ -1290,15 +1320,15 @@ static void __cpuinit cpuup_canceled(long cpu)
if (!l3)
goto free_array_cache;

spin_lock_irq(&l3->list_lock);
local_spin_lock_irq(slab_lock, &l3->list_lock);

/* Free limit for this kmem_list3 */
l3->free_limit -= cachep->batchcount;
if (nc)
free_block(cachep, nc->entry, nc->avail, node);

if (!cpumask_empty(mask)) {
spin_unlock_irq(&l3->list_lock);
local_spin_unlock_irq(slab_lock, &l3->list_lock);
goto free_array_cache;
}

Expand All @@ -1312,7 +1342,7 @@ static void __cpuinit cpuup_canceled(long cpu)
alien = l3->alien;
l3->alien = NULL;

spin_unlock_irq(&l3->list_lock);
local_spin_unlock_irq(slab_lock, &l3->list_lock);

kfree(shared);
if (alien) {
Expand Down Expand Up @@ -1386,7 +1416,7 @@ static int __cpuinit cpuup_prepare(long cpu)
l3 = cachep->nodelists[node];
BUG_ON(!l3);

spin_lock_irq(&l3->list_lock);
local_spin_lock_irq(slab_lock, &l3->list_lock);
if (!l3->shared) {
/*
* We are serialised from CPU_DEAD or
Expand All @@ -1401,7 +1431,7 @@ static int __cpuinit cpuup_prepare(long cpu)
alien = NULL;
}
#endif
spin_unlock_irq(&l3->list_lock);
local_spin_unlock_irq(slab_lock, &l3->list_lock);
kfree(shared);
free_alien_cache(alien);
if (cachep->flags & SLAB_DEBUG_OBJECTS)
Expand Down Expand Up @@ -1594,6 +1624,8 @@ void __init kmem_cache_init(void)
if (num_possible_nodes() == 1)
use_alien_caches = 0;

local_irq_lock_init(slab_lock);

for (i = 0; i < NUM_INIT_LISTS; i++) {
kmem_list3_init(&initkmem_list3[i]);
if (i < MAX_NUMNODES)
Expand Down Expand Up @@ -2574,7 +2606,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
#if DEBUG
static void check_irq_off(void)
{
BUG_ON(!irqs_disabled());
BUG_ON_NONRT(!irqs_disabled());
}

static void check_irq_on(void)
Expand Down Expand Up @@ -2609,26 +2641,37 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
struct array_cache *ac,
int force, int node);

static void do_drain(void *arg)
static void __do_drain(void *arg, unsigned int cpu)
{
struct kmem_cache *cachep = arg;
struct array_cache *ac;
int node = numa_mem_id();
int node = cpu_to_mem(cpu);

check_irq_off();
ac = cpu_cache_get(cachep);
ac = cpu_cache_get_on_cpu(cachep, cpu);
spin_lock(&cachep->nodelists[node]->list_lock);
free_block(cachep, ac->entry, ac->avail, node);
spin_unlock(&cachep->nodelists[node]->list_lock);
ac->avail = 0;
}

#ifndef CONFIG_PREEMPT_RT_BASE
static void do_drain(void *arg)
{
__do_drain(arg, smp_processor_id());
}
#else
static void do_drain(void *arg, int this_cpu)
{
__do_drain(arg, this_cpu);
}
#endif

static void drain_cpu_caches(struct kmem_cache *cachep)
{
struct kmem_list3 *l3;
int node;

on_each_cpu(do_drain, cachep, 1);
slab_on_each_cpu(do_drain, cachep);
check_irq_on();
for_each_online_node(node) {
l3 = cachep->nodelists[node];
Expand Down Expand Up @@ -2659,10 +2702,10 @@ static int drain_freelist(struct kmem_cache *cache,
nr_freed = 0;
while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {

spin_lock_irq(&l3->list_lock);
local_spin_lock_irq(slab_lock, &l3->list_lock);
p = l3->slabs_free.prev;
if (p == &l3->slabs_free) {
spin_unlock_irq(&l3->list_lock);
local_spin_unlock_irq(slab_lock, &l3->list_lock);
goto out;
}

Expand All @@ -2676,7 +2719,7 @@ static int drain_freelist(struct kmem_cache *cache,
* to the cache.
*/
l3->free_objects -= cache->num;
spin_unlock_irq(&l3->list_lock);
local_spin_unlock_irq(slab_lock, &l3->list_lock);
slab_destroy(cache, slabp);
nr_freed++;
}
Expand Down Expand Up @@ -2951,7 +2994,7 @@ static int cache_grow(struct kmem_cache *cachep,
offset *= cachep->colour_off;

if (local_flags & __GFP_WAIT)
local_irq_enable();
local_unlock_irq(slab_lock);

/*
* The test for missing atomic flag is performed here, rather than
Expand Down Expand Up @@ -2981,7 +3024,7 @@ static int cache_grow(struct kmem_cache *cachep,
cache_init_objs(cachep, slabp);

if (local_flags & __GFP_WAIT)
local_irq_disable();
local_lock_irq(slab_lock);
check_irq_off();
spin_lock(&l3->list_lock);

Expand All @@ -2995,7 +3038,7 @@ static int cache_grow(struct kmem_cache *cachep,
kmem_freepages(cachep, objp);
failed:
if (local_flags & __GFP_WAIT)
local_irq_disable();
local_lock_irq(slab_lock);
return 0;
}

Expand Down Expand Up @@ -3409,11 +3452,11 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
* set and go into memory reserves if necessary.
*/
if (local_flags & __GFP_WAIT)
local_irq_enable();
local_unlock_irq(slab_lock);
kmem_flagcheck(cache, flags);
obj = kmem_getpages(cache, local_flags, numa_mem_id());
if (local_flags & __GFP_WAIT)
local_irq_disable();
local_lock_irq(slab_lock);
if (obj) {
/*
* Insert into the appropriate per node queues
Expand Down Expand Up @@ -3531,7 +3574,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
return NULL;

cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
local_lock_irqsave(slab_lock, save_flags);

if (nodeid == NUMA_NO_NODE)
nodeid = slab_node;
Expand All @@ -3556,7 +3599,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
/* ___cache_alloc_node can fall back to other nodes */
ptr = ____cache_alloc_node(cachep, flags, nodeid);
out:
local_irq_restore(save_flags);
local_unlock_irqrestore(slab_lock, save_flags);
ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
flags);
Expand Down Expand Up @@ -3616,9 +3659,9 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
return NULL;

cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
local_lock_irqsave(slab_lock, save_flags);
objp = __do_cache_alloc(cachep, flags);
local_irq_restore(save_flags);
local_unlock_irqrestore(slab_lock, save_flags);
objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags,
flags);
Expand Down Expand Up @@ -3932,9 +3975,9 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
debug_check_no_locks_freed(objp, cachep->object_size);
if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(objp, cachep->object_size);
local_irq_save(flags);
local_lock_irqsave(slab_lock, flags);
__cache_free(cachep, objp, _RET_IP_);
local_irq_restore(flags);
local_unlock_irqrestore(slab_lock, flags);

trace_kmem_cache_free(_RET_IP_, objp);
}
Expand Down Expand Up @@ -3963,9 +4006,9 @@ void kfree(const void *objp)
debug_check_no_locks_freed(objp, c->object_size);

debug_check_no_obj_freed(objp, c->object_size);
local_irq_save(flags);
local_lock_irqsave(slab_lock, flags);
__cache_free(c, (void *)objp, _RET_IP_);
local_irq_restore(flags);
local_unlock_irqrestore(slab_lock, flags);
}
EXPORT_SYMBOL(kfree);

Expand Down Expand Up @@ -4008,7 +4051,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
if (l3) {
struct array_cache *shared = l3->shared;

spin_lock_irq(&l3->list_lock);
local_spin_lock_irq(slab_lock, &l3->list_lock);

if (shared)
free_block(cachep, shared->entry,
Expand All @@ -4021,7 +4064,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
}
l3->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
spin_unlock_irq(&l3->list_lock);
local_spin_unlock_irq(slab_lock, &l3->list_lock);
kfree(shared);
free_alien_cache(new_alien);
continue;
Expand Down Expand Up @@ -4068,17 +4111,28 @@ struct ccupdate_struct {
struct array_cache *new[0];
};

static void do_ccupdate_local(void *info)
static void __do_ccupdate_local(void *info, int cpu)
{
struct ccupdate_struct *new = info;
struct array_cache *old;

check_irq_off();
old = cpu_cache_get(new->cachep);
old = cpu_cache_get_on_cpu(new->cachep, cpu);

new->cachep->array[cpu] = new->new[cpu];
new->new[cpu] = old;
}

new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
new->new[smp_processor_id()] = old;
#ifndef CONFIG_PREEMPT_RT_BASE
static void do_ccupdate_local(void *info)
{
__do_ccupdate_local(info, smp_processor_id());
}
#else
static void do_ccupdate_local(void *info, int cpu)
{
__do_ccupdate_local(info, cpu);
}
#endif

/* Always called with the slab_mutex held */
static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
Expand All @@ -4104,7 +4158,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
}
new->cachep = cachep;

on_each_cpu(do_ccupdate_local, (void *)new, 1);
slab_on_each_cpu(do_ccupdate_local, (void *)new);

check_irq_on();
cachep->batchcount = batchcount;
Expand All @@ -4115,9 +4169,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
struct array_cache *ccold = new->new[i];
if (!ccold)
continue;
spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
local_spin_lock_irq(slab_lock,
&cachep->nodelists[cpu_to_mem(i)]->list_lock);
free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
local_spin_unlock_irq(slab_lock,
&cachep->nodelists[cpu_to_mem(i)]->list_lock);
kfree(ccold);
}
kfree(new);
Expand Down Expand Up @@ -4193,7 +4249,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
if (ac->touched && !force) {
ac->touched = 0;
} else {
spin_lock_irq(&l3->list_lock);
local_spin_lock_irq(slab_lock, &l3->list_lock);
if (ac->avail) {
tofree = force ? ac->avail : (ac->limit + 4) / 5;
if (tofree > ac->avail)
Expand All @@ -4203,7 +4259,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
memmove(ac->entry, &(ac->entry[tofree]),
sizeof(void *) * ac->avail);
}
spin_unlock_irq(&l3->list_lock);
local_spin_unlock_irq(slab_lock, &l3->list_lock);
}
}

Expand Down Expand Up @@ -4342,7 +4398,7 @@ static int s_show(struct seq_file *m, void *p)
continue;

check_irq_on();
spin_lock_irq(&l3->list_lock);
local_spin_lock_irq(slab_lock, &l3->list_lock);

list_for_each_entry(slabp, &l3->slabs_full, list) {
if (slabp->inuse != cachep->num && !error)
Expand All @@ -4367,7 +4423,7 @@ static int s_show(struct seq_file *m, void *p)
if (l3->shared)
shared_avail += l3->shared->avail;

spin_unlock_irq(&l3->list_lock);
local_spin_unlock_irq(slab_lock, &l3->list_lock);
}
num_slabs += active_slabs;
num_objs = num_slabs * cachep->num;
Expand Down Expand Up @@ -4596,13 +4652,13 @@ static int leaks_show(struct seq_file *m, void *p)
continue;

check_irq_on();
spin_lock_irq(&l3->list_lock);
local_spin_lock_irq(slab_lock, &l3->list_lock);

list_for_each_entry(slabp, &l3->slabs_full, list)
handle_slab(n, cachep, slabp);
list_for_each_entry(slabp, &l3->slabs_partial, list)
handle_slab(n, cachep, slabp);
spin_unlock_irq(&l3->list_lock);
local_spin_unlock_irq(slab_lock, &l3->list_lock);
}
name = cachep->name;
if (n[0] == n[1]) {
Expand Down