Skip to content

Commit

Permalink
neigh: RCU conversion of struct neighbour
Browse files Browse the repository at this point in the history
This is the second step for neighbour RCU conversion.

(first was commit d6bf781 : RCU conversion of neigh hash table)

neigh_lookup() becomes lockless, but still take a reference on found
neighbour. (no more read_lock()/read_unlock() on tbl->lock)

struct neighbour gets an additional rcu_head field and is freed after an
RCU grace period.

Future work would need to eventually not take a reference on neighbour
for temporary dst (DST_NOCACHE), but this would need dst->_neighbour to
use a noref bit like we did for skb->_dst.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Eric Dumazet authored and davem330 committed Oct 7, 2010
1 parent 546add7 commit 767e97e
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 54 deletions.
5 changes: 3 additions & 2 deletions include/net/neighbour.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ struct neigh_statistics {
#define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field)

struct neighbour {
struct neighbour *next;
struct neighbour __rcu *next;
struct neigh_table *tbl;
struct neigh_parms *parms;
struct net_device *dev;
Expand All @@ -111,6 +111,7 @@ struct neighbour {
struct sk_buff_head arp_queue;
struct timer_list timer;
const struct neigh_ops *ops;
struct rcu_head rcu;
u8 primary_key[0];
};

Expand Down Expand Up @@ -139,7 +140,7 @@ struct pneigh_entry {
*/

struct neigh_hash_table {
struct neighbour **hash_buckets;
struct neighbour __rcu **hash_buckets;
unsigned int hash_mask;
__u32 hash_rnd;
struct rcu_head rcu;
Expand Down
137 changes: 85 additions & 52 deletions net/core/neighbour.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,18 +139,22 @@ static int neigh_forced_gc(struct neigh_table *tbl)
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
for (i = 0; i <= nht->hash_mask; i++) {
struct neighbour *n, **np;
struct neighbour *n;
struct neighbour __rcu **np;

np = &nht->hash_buckets[i];
while ((n = *np) != NULL) {
while ((n = rcu_dereference_protected(*np,
lockdep_is_held(&tbl->lock))) != NULL) {
/* Neighbour record may be discarded if:
* - nobody refers to it.
* - it is not permanent
*/
write_lock(&n->lock);
if (atomic_read(&n->refcnt) == 1 &&
!(n->nud_state & NUD_PERMANENT)) {
*np = n->next;
rcu_assign_pointer(*np,
rcu_dereference_protected(n->next,
lockdep_is_held(&tbl->lock)));
n->dead = 1;
shrunk = 1;
write_unlock(&n->lock);
Expand Down Expand Up @@ -208,14 +212,18 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
lockdep_is_held(&tbl->lock));

for (i = 0; i <= nht->hash_mask; i++) {
struct neighbour *n, **np = &nht->hash_buckets[i];
struct neighbour *n;
struct neighbour __rcu **np = &nht->hash_buckets[i];

while ((n = *np) != NULL) {
while ((n = rcu_dereference_protected(*np,
lockdep_is_held(&tbl->lock))) != NULL) {
if (dev && n->dev != dev) {
np = &n->next;
continue;
}
*np = n->next;
rcu_assign_pointer(*np,
rcu_dereference_protected(n->next,
lockdep_is_held(&tbl->lock)));
write_lock(&n->lock);
neigh_del_timer(n);
n->dead = 1;
Expand Down Expand Up @@ -323,7 +331,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
kfree(ret);
return NULL;
}
ret->hash_buckets = buckets;
rcu_assign_pointer(ret->hash_buckets, buckets);
ret->hash_mask = entries - 1;
get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
return ret;
Expand Down Expand Up @@ -362,17 +370,22 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
for (i = 0; i <= old_nht->hash_mask; i++) {
struct neighbour *n, *next;

for (n = old_nht->hash_buckets[i];
for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
lockdep_is_held(&tbl->lock));
n != NULL;
n = next) {
hash = tbl->hash(n->primary_key, n->dev,
new_nht->hash_rnd);

hash &= new_nht->hash_mask;
next = n->next;

n->next = new_nht->hash_buckets[hash];
new_nht->hash_buckets[hash] = n;
next = rcu_dereference_protected(n->next,
lockdep_is_held(&tbl->lock));

rcu_assign_pointer(n->next,
rcu_dereference_protected(
new_nht->hash_buckets[hash],
lockdep_is_held(&tbl->lock)));
rcu_assign_pointer(new_nht->hash_buckets[hash], n);
}
}

Expand All @@ -394,15 +407,18 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);
hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
read_lock(&tbl->lock);
for (n = nht->hash_buckets[hash_val]; n; n = n->next) {

for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
n != NULL;
n = rcu_dereference_bh(n->next)) {
if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
neigh_hold(n);
if (!atomic_inc_not_zero(&n->refcnt))
n = NULL;
NEIGH_CACHE_STAT_INC(tbl, hits);
break;
}
}
read_unlock(&tbl->lock);

rcu_read_unlock_bh();
return n;
}
Expand All @@ -421,16 +437,19 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);
hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
read_lock(&tbl->lock);
for (n = nht->hash_buckets[hash_val]; n; n = n->next) {

for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
n != NULL;
n = rcu_dereference_bh(n->next)) {
if (!memcmp(n->primary_key, pkey, key_len) &&
net_eq(dev_net(n->dev), net)) {
neigh_hold(n);
if (!atomic_inc_not_zero(&n->refcnt))
n = NULL;
NEIGH_CACHE_STAT_INC(tbl, hits);
break;
}
}
read_unlock(&tbl->lock);

rcu_read_unlock_bh();
return n;
}
Expand Down Expand Up @@ -483,18 +502,24 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
goto out_tbl_unlock;
}

for (n1 = nht->hash_buckets[hash_val]; n1; n1 = n1->next) {
for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
lockdep_is_held(&tbl->lock));
n1 != NULL;
n1 = rcu_dereference_protected(n1->next,
lockdep_is_held(&tbl->lock))) {
if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
neigh_hold(n1);
rc = n1;
goto out_tbl_unlock;
}
}

n->next = nht->hash_buckets[hash_val];
nht->hash_buckets[hash_val] = n;
n->dead = 0;
neigh_hold(n);
rcu_assign_pointer(n->next,
rcu_dereference_protected(nht->hash_buckets[hash_val],
lockdep_is_held(&tbl->lock)));
rcu_assign_pointer(nht->hash_buckets[hash_val], n);
write_unlock_bh(&tbl->lock);
NEIGH_PRINTK2("neigh %p is created.\n", n);
rc = n;
Expand Down Expand Up @@ -651,6 +676,12 @@ static inline void neigh_parms_put(struct neigh_parms *parms)
neigh_parms_destroy(parms);
}

static void neigh_destroy_rcu(struct rcu_head *head)
{
struct neighbour *neigh = container_of(head, struct neighbour, rcu);

kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
}
/*
* neighbour must already be out of the table;
*
Expand Down Expand Up @@ -690,7 +721,7 @@ void neigh_destroy(struct neighbour *neigh)
NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);

atomic_dec(&neigh->tbl->entries);
kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
call_rcu(&neigh->rcu, neigh_destroy_rcu);
}
EXPORT_SYMBOL(neigh_destroy);

Expand Down Expand Up @@ -731,7 +762,8 @@ static void neigh_connect(struct neighbour *neigh)
static void neigh_periodic_work(struct work_struct *work)
{
struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
struct neighbour *n, **np;
struct neighbour *n;
struct neighbour __rcu **np;
unsigned int i;
struct neigh_hash_table *nht;

Expand All @@ -756,7 +788,8 @@ static void neigh_periodic_work(struct work_struct *work)
for (i = 0 ; i <= nht->hash_mask; i++) {
np = &nht->hash_buckets[i];

while ((n = *np) != NULL) {
while ((n = rcu_dereference_protected(*np,
lockdep_is_held(&tbl->lock))) != NULL) {
unsigned int state;

write_lock(&n->lock);
Expand Down Expand Up @@ -1213,8 +1246,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
}

/* This function can be used in contexts, where only old dev_queue_xmit
worked, f.e. if you want to override normal output path (eql, shaper),
but resolution is not made yet.
* worked, f.e. if you want to override normal output path (eql, shaper),
* but resolution is not made yet.
*/

int neigh_compat_output(struct sk_buff *skb)
Expand Down Expand Up @@ -2123,7 +2156,7 @@ static void neigh_update_notify(struct neighbour *neigh)
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
struct netlink_callback *cb)
{
struct net * net = sock_net(skb->sk);
struct net *net = sock_net(skb->sk);
struct neighbour *n;
int rc, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
Expand All @@ -2132,13 +2165,14 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);

read_lock(&tbl->lock);
for (h = 0; h <= nht->hash_mask; h++) {
if (h < s_h)
continue;
if (h > s_h)
s_idx = 0;
for (n = nht->hash_buckets[h], idx = 0; n; n = n->next) {
for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
n != NULL;
n = rcu_dereference_bh(n->next)) {
if (!net_eq(dev_net(n->dev), net))
continue;
if (idx < s_idx)
Expand All @@ -2150,13 +2184,12 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
rc = -1;
goto out;
}
next:
next:
idx++;
}
}
rc = skb->len;
out:
read_unlock(&tbl->lock);
rcu_read_unlock_bh();
cb->args[1] = h;
cb->args[2] = idx;
Expand Down Expand Up @@ -2195,11 +2228,13 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);

read_lock(&tbl->lock);
read_lock(&tbl->lock); /* avoid resizes */
for (chain = 0; chain <= nht->hash_mask; chain++) {
struct neighbour *n;

for (n = nht->hash_buckets[chain]; n; n = n->next)
for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
n != NULL;
n = rcu_dereference_bh(n->next))
cb(n, cookie);
}
read_unlock(&tbl->lock);
Expand All @@ -2217,16 +2252,20 @@ void __neigh_for_each_release(struct neigh_table *tbl,
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
for (chain = 0; chain <= nht->hash_mask; chain++) {
struct neighbour *n, **np;
struct neighbour *n;
struct neighbour __rcu **np;

np = &nht->hash_buckets[chain];
while ((n = *np) != NULL) {
while ((n = rcu_dereference_protected(*np,
lockdep_is_held(&tbl->lock))) != NULL) {
int release;

write_lock(&n->lock);
release = cb(n);
if (release) {
*np = n->next;
rcu_assign_pointer(*np,
rcu_dereference_protected(n->next,
lockdep_is_held(&tbl->lock)));
n->dead = 1;
} else
np = &n->next;
Expand All @@ -2250,7 +2289,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)

state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
for (bucket = 0; bucket <= nht->hash_mask; bucket++) {
n = nht->hash_buckets[bucket];
n = rcu_dereference_bh(nht->hash_buckets[bucket]);

while (n) {
if (!net_eq(dev_net(n->dev), net))
Expand All @@ -2267,8 +2306,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
break;
if (n->nud_state & ~NUD_NOARP)
break;
next:
n = n->next;
next:
n = rcu_dereference_bh(n->next);
}

if (n)
Expand All @@ -2292,7 +2331,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
if (v)
return n;
}
n = n->next;
n = rcu_dereference_bh(n->next);

while (1) {
while (n) {
Expand All @@ -2309,8 +2348,8 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,

if (n->nud_state & ~NUD_NOARP)
break;
next:
n = n->next;
next:
n = rcu_dereference_bh(n->next);
}

if (n)
Expand All @@ -2319,7 +2358,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
if (++state->bucket > nht->hash_mask)
break;

n = nht->hash_buckets[state->bucket];
n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
}

if (n && pos)
Expand Down Expand Up @@ -2417,7 +2456,6 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
}

void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
__acquires(tbl->lock)
__acquires(rcu_bh)
{
struct neigh_seq_state *state = seq->private;
Expand All @@ -2428,7 +2466,7 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl

rcu_read_lock_bh();
state->nht = rcu_dereference_bh(tbl->nht);
read_lock(&tbl->lock);

return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
}
EXPORT_SYMBOL(neigh_seq_start);
Expand Down Expand Up @@ -2461,13 +2499,8 @@ void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
EXPORT_SYMBOL(neigh_seq_next);

void neigh_seq_stop(struct seq_file *seq, void *v)
__releases(tbl->lock)
__releases(rcu_bh)
{
struct neigh_seq_state *state = seq->private;
struct neigh_table *tbl = state->tbl;

read_unlock(&tbl->lock);
rcu_read_unlock_bh();
}
EXPORT_SYMBOL(neigh_seq_stop);
Expand Down

0 comments on commit 767e97e

Please sign in to comment.