Skip to content

Commit

Permalink
Merge branch 'inet-frag-fixes'
Browse files Browse the repository at this point in the history
Florian Westphal says:

====================
inet: ip defrag bug fixes

Johan Schuijt and Frank Schreuder reported crash and softlockup after the
inet workqueue eviction change:

general protection fault: 0000 [#1] SMP
CPU: 0 PID: 4 Comm: kworker/0:0 Not tainted 3.18.18-transip-1.5 #1
Workqueue: events inet_frag_worker
task: ffff880224935130 ti: ffff880224938000 task.ti: ffff880224938000
RIP: 0010:[<ffffffff8149288c>] [<ffffffff8149288c>] inet_evict_bucket+0xfc/0x160
RSP: 0018:ffff88022493bd58  EFLAGS: 00010286
RAX: ffff88021f4f3e80 RBX: dead000000100100 RCX: 000000000000006b
RDX: 000000000000006c RSI: ffff88021f4f3e80 RDI: dead0000001000a8
RBP: 0000000000000002 R08: ffff880222273900 R09: ffff880036e49200
R10: ffff8800c6e86500 R11: ffff880036f45500 R12: ffffffff81a87100
R13: ffff88022493bd70 R14: 0000000000000000 R15: ffff8800c9b26280
[..]
Call Trace:
 [<ffffffff814929e0>] ? inet_frag_worker+0x60/0x210
 [<ffffffff8107e3a2>] ? process_one_work+0x142/0x3b0
 [<ffffffff8107eb94>] ? worker_thread+0x114/0x440
[..]

A second issue results in softlockup since the evictor may restart the
eviction loop for a (potentially) unlimited number of times while local
softirqs are disabled.

Frank reports that test system remained stable for 14 hours of testing
(before, crash occured within half an hour in their setup).
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
davem330 committed Jul 27, 2015
2 parents 81296fc + caaecdd commit 64b892a
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 47 deletions.
17 changes: 11 additions & 6 deletions include/net/inet_frag.h
Expand Up @@ -21,13 +21,11 @@ struct netns_frags {
* @INET_FRAG_FIRST_IN: first fragment has arrived
* @INET_FRAG_LAST_IN: final fragment has arrived
* @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction
* @INET_FRAG_EVICTED: frag queue is being evicted
*/
enum {
INET_FRAG_FIRST_IN = BIT(0),
INET_FRAG_LAST_IN = BIT(1),
INET_FRAG_COMPLETE = BIT(2),
INET_FRAG_EVICTED = BIT(3)
};

/**
Expand All @@ -45,6 +43,7 @@ enum {
* @flags: fragment queue flags
* @max_size: maximum received fragment size
* @net: namespace that this frag belongs to
* @list_evictor: list of queues to forcefully evict (e.g. due to low memory)
*/
struct inet_frag_queue {
spinlock_t lock;
Expand All @@ -59,6 +58,7 @@ struct inet_frag_queue {
__u8 flags;
u16 max_size;
struct netns_frags *net;
struct hlist_node list_evictor;
};

#define INETFRAGS_HASHSZ 1024
Expand Down Expand Up @@ -125,6 +125,11 @@ static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f
inet_frag_destroy(q, f);
}

static inline bool inet_frag_evicting(struct inet_frag_queue *q)
{
return !hlist_unhashed(&q->list_evictor);
}

/* Memory Tracking Functions. */

/* The default percpu_counter batch size is not big enough to scale to
Expand All @@ -139,14 +144,14 @@ static inline int frag_mem_limit(struct netns_frags *nf)
return percpu_counter_read(&nf->mem);
}

static inline void sub_frag_mem_limit(struct inet_frag_queue *q, int i)
static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
{
__percpu_counter_add(&q->net->mem, -i, frag_percpu_counter_batch);
__percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch);
}

static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i)
static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
{
__percpu_counter_add(&q->net->mem, i, frag_percpu_counter_batch);
__percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch);
}

static inline void init_frag_mem_limit(struct netns_frags *nf)
Expand Down
6 changes: 3 additions & 3 deletions net/ieee802154/6lowpan/reassembly.c
Expand Up @@ -207,7 +207,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
} else {
fq->q.meat += skb->len;
}
add_frag_mem_limit(&fq->q, skb->truesize);
add_frag_mem_limit(fq->q.net, skb->truesize);

if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
Expand Down Expand Up @@ -287,7 +287,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
clone->data_len = clone->len;
head->data_len -= clone->len;
head->len -= clone->len;
add_frag_mem_limit(&fq->q, clone->truesize);
add_frag_mem_limit(fq->q.net, clone->truesize);
}

WARN_ON(head == NULL);
Expand All @@ -310,7 +310,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
}
fp = next;
}
sub_frag_mem_limit(&fq->q, sum_truesize);
sub_frag_mem_limit(fq->q.net, sum_truesize);

head->next = NULL;
head->dev = dev;
Expand Down
40 changes: 15 additions & 25 deletions net/ipv4/inet_fragment.c
Expand Up @@ -131,34 +131,22 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
unsigned int evicted = 0;
HLIST_HEAD(expired);

evict_again:
spin_lock(&hb->chain_lock);

hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
if (!inet_fragq_should_evict(fq))
continue;

if (!del_timer(&fq->timer)) {
/* q expiring right now thus increment its refcount so
* it won't be freed under us and wait until the timer
* has finished executing then destroy it
*/
atomic_inc(&fq->refcnt);
spin_unlock(&hb->chain_lock);
del_timer_sync(&fq->timer);
inet_frag_put(fq, f);
goto evict_again;
}
if (!del_timer(&fq->timer))
continue;

fq->flags |= INET_FRAG_EVICTED;
hlist_del(&fq->list);
hlist_add_head(&fq->list, &expired);
hlist_add_head(&fq->list_evictor, &expired);
++evicted;
}

spin_unlock(&hb->chain_lock);

hlist_for_each_entry_safe(fq, n, &expired, list)
hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
f->frag_expire((unsigned long) fq);

return evicted;
Expand Down Expand Up @@ -240,18 +228,20 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
int i;

nf->low_thresh = 0;
local_bh_disable();

evict_again:
local_bh_disable();
seq = read_seqbegin(&f->rnd_seqlock);

for (i = 0; i < INETFRAGS_HASHSZ ; i++)
inet_evict_bucket(f, &f->hash[i]);

if (read_seqretry(&f->rnd_seqlock, seq))
goto evict_again;

local_bh_enable();
cond_resched();

if (read_seqretry(&f->rnd_seqlock, seq) ||
percpu_counter_sum(&nf->mem))
goto evict_again;

percpu_counter_destroy(&nf->mem);
}
Expand Down Expand Up @@ -284,8 +274,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
struct inet_frag_bucket *hb;

hb = get_frag_bucket_locked(fq, f);
if (!(fq->flags & INET_FRAG_EVICTED))
hlist_del(&fq->list);
hlist_del(&fq->list);
fq->flags |= INET_FRAG_COMPLETE;
spin_unlock(&hb->chain_lock);
}

Expand All @@ -297,7 +287,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
if (!(fq->flags & INET_FRAG_COMPLETE)) {
fq_unlink(fq, f);
atomic_dec(&fq->refcnt);
fq->flags |= INET_FRAG_COMPLETE;
}
}
EXPORT_SYMBOL(inet_frag_kill);
Expand Down Expand Up @@ -330,11 +319,12 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
fp = xp;
}
sum = sum_truesize + f->qsize;
sub_frag_mem_limit(q, sum);

if (f->destructor)
f->destructor(q);
kmem_cache_free(f->frags_cachep, q);

sub_frag_mem_limit(nf, sum);
}
EXPORT_SYMBOL(inet_frag_destroy);

Expand Down Expand Up @@ -390,7 +380,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,

q->net = nf;
f->constructor(q, arg);
add_frag_mem_limit(q, f->qsize);
add_frag_mem_limit(nf, f->qsize);

setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
Expand Down
12 changes: 6 additions & 6 deletions net/ipv4/ip_fragment.c
Expand Up @@ -202,7 +202,7 @@ static void ip_expire(unsigned long arg)
ipq_kill(qp);
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);

if (!(qp->q.flags & INET_FRAG_EVICTED)) {
if (!inet_frag_evicting(&qp->q)) {
struct sk_buff *head = qp->q.fragments;
const struct iphdr *iph;
int err;
Expand Down Expand Up @@ -309,7 +309,7 @@ static int ip_frag_reinit(struct ipq *qp)
kfree_skb(fp);
fp = xp;
} while (fp);
sub_frag_mem_limit(&qp->q, sum_truesize);
sub_frag_mem_limit(qp->q.net, sum_truesize);

qp->q.flags = 0;
qp->q.len = 0;
Expand Down Expand Up @@ -455,7 +455,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
qp->q.fragments = next;

qp->q.meat -= free_it->len;
sub_frag_mem_limit(&qp->q, free_it->truesize);
sub_frag_mem_limit(qp->q.net, free_it->truesize);
kfree_skb(free_it);
}
}
Expand All @@ -479,7 +479,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
add_frag_mem_limit(&qp->q, skb->truesize);
add_frag_mem_limit(qp->q.net, skb->truesize);
if (offset == 0)
qp->q.flags |= INET_FRAG_FIRST_IN;

Expand Down Expand Up @@ -587,7 +587,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
add_frag_mem_limit(&qp->q, clone->truesize);
add_frag_mem_limit(qp->q.net, clone->truesize);
}

skb_push(head, head->data - skb_network_header(head));
Expand Down Expand Up @@ -615,7 +615,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
}
fp = next;
}
sub_frag_mem_limit(&qp->q, sum_truesize);
sub_frag_mem_limit(qp->q.net, sum_truesize);

head->next = NULL;
head->dev = dev;
Expand Down
6 changes: 3 additions & 3 deletions net/ipv6/netfilter/nf_conntrack_reasm.c
Expand Up @@ -348,7 +348,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len;
add_frag_mem_limit(&fq->q, skb->truesize);
add_frag_mem_limit(fq->q.net, skb->truesize);

/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
Expand Down Expand Up @@ -430,7 +430,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
clone->ip_summed = head->ip_summed;

NFCT_FRAG6_CB(clone)->orig = NULL;
add_frag_mem_limit(&fq->q, clone->truesize);
add_frag_mem_limit(fq->q.net, clone->truesize);
}

/* We have to remove fragment header from datagram and to relocate
Expand All @@ -454,7 +454,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
}
sub_frag_mem_limit(&fq->q, head->truesize);
sub_frag_mem_limit(fq->q.net, head->truesize);

head->ignore_df = 1;
head->next = NULL;
Expand Down
8 changes: 4 additions & 4 deletions net/ipv6/reassembly.c
Expand Up @@ -144,7 +144,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,

IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);

if (fq->q.flags & INET_FRAG_EVICTED)
if (inet_frag_evicting(&fq->q))
goto out_rcu_unlock;

IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
Expand Down Expand Up @@ -330,7 +330,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
add_frag_mem_limit(&fq->q, skb->truesize);
add_frag_mem_limit(fq->q.net, skb->truesize);

/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
Expand Down Expand Up @@ -443,7 +443,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
add_frag_mem_limit(&fq->q, clone->truesize);
add_frag_mem_limit(fq->q.net, clone->truesize);
}

/* We have to remove fragment header from datagram and to relocate
Expand Down Expand Up @@ -481,7 +481,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
}
fp = next;
}
sub_frag_mem_limit(&fq->q, sum_truesize);
sub_frag_mem_limit(fq->q.net, sum_truesize);

head->next = NULL;
head->dev = dev;
Expand Down

0 comments on commit 64b892a

Please sign in to comment.