Skip to content

Commit

Permalink
ipv6: Fix sporadic sendmsg -EINVAL when sending to multicast groups.
Browse files Browse the repository at this point in the history
Thanks to excellent diagnosis by Eduard Guzovsky.

The core problem is that on a network with lots of active
multicast traffic, the neighbour cache can fill up.  If
we try to allocate a new route and thus neighbour cache
entry, the bog-standard GC attempt the neighbour layer does
in ineffective because route entries hold a reference
to the existing neighbour entries and GC can only liberate
entries with no references.

IPV4 already has a way to handle this, by doing a route cache
GC in such situations (when neigh attach returns -ENOBUFS).

So simply mimick this on the ipv6 side.

Tested-by: Eduard Guzovsky <eguzovsky@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
davem330 committed Jan 5, 2009
1 parent eb4dea5 commit 14deae4
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 7 deletions.
4 changes: 2 additions & 2 deletions include/net/ndisc.h
Expand Up @@ -155,9 +155,9 @@ static inline struct neighbour * ndisc_get_neigh(struct net_device *dev, const s
{

if (dev)
return __neigh_lookup(&nd_tbl, addr, dev, 1);
return __neigh_lookup_errno(&nd_tbl, addr, dev);

return NULL;
return ERR_PTR(-ENODEV);
}


Expand Down
52 changes: 47 additions & 5 deletions net/ipv6/route.c
Expand Up @@ -627,6 +627,9 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
rt = ip6_rt_copy(ort);

if (rt) {
struct neighbour *neigh;
int attempts = !in_softirq();

if (!(rt->rt6i_flags&RTF_GATEWAY)) {
if (rt->rt6i_dst.plen != 128 &&
ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
Expand All @@ -646,7 +649,35 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
}
#endif

rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
retry:
neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
if (IS_ERR(neigh)) {
struct net *net = dev_net(rt->rt6i_dev);
int saved_rt_min_interval =
net->ipv6.sysctl.ip6_rt_gc_min_interval;
int saved_rt_elasticity =
net->ipv6.sysctl.ip6_rt_gc_elasticity;

if (attempts-- > 0) {
net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;

ip6_dst_gc(net->ipv6.ip6_dst_ops);

net->ipv6.sysctl.ip6_rt_gc_elasticity =
saved_rt_elasticity;
net->ipv6.sysctl.ip6_rt_gc_min_interval =
saved_rt_min_interval;
goto retry;
}

if (net_ratelimit())
printk(KERN_WARNING
"Neighbour table overflow.\n");
dst_free(&rt->u.dst);
return NULL;
}
rt->rt6i_nexthop = neigh;

}

Expand Down Expand Up @@ -945,8 +976,11 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
dev_hold(dev);
if (neigh)
neigh_hold(neigh);
else
else {
neigh = ndisc_get_neigh(dev, addr);
if (IS_ERR(neigh))
neigh = NULL;
}

rt->rt6i_dev = dev;
rt->rt6i_idev = idev;
Expand Down Expand Up @@ -1887,6 +1921,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
{
struct net *net = dev_net(idev->dev);
struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
struct neighbour *neigh;

if (rt == NULL)
return ERR_PTR(-ENOMEM);
Expand All @@ -1909,11 +1944,18 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->rt6i_flags |= RTF_ANYCAST;
else
rt->rt6i_flags |= RTF_LOCAL;
rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
if (rt->rt6i_nexthop == NULL) {
neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
if (IS_ERR(neigh)) {
dst_free(&rt->u.dst);
return ERR_PTR(-ENOMEM);

/* We are casting this because that is the return
* value type. But an errno encoded pointer is the
* same regardless of the underlying pointer type,
* and that's what we are returning. So this is OK.
*/
return (struct rt6_info *) neigh;
}
rt->rt6i_nexthop = neigh;

ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
rt->rt6i_dst.plen = 128;
Expand Down

0 comments on commit 14deae4

Please sign in to comment.