Skip to content
Permalink
Browse files

inet: add RCU protection to inet->opt

We lack proper synchronization to manipulate inet->opt ip_options

Problem is ip_make_skb() calls ip_setup_cork() and
ip_setup_cork() possibly makes a copy of ipc->opt (struct ip_options),
without any protection against another thread manipulating inet->opt.

Another thread can change inet->opt pointer and free old one under us.

Use RCU to protect inet->opt (changed to inet->inet_opt).

Instead of handling atomic refcounts, just copy ip_options when
necessary, to avoid cache line dirtying.

We cant insert an rcu_head in struct ip_options since its included in
skb->cb[], so this patch is large because I had to introduce a new
ip_options_rcu structure.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information...
Eric Dumazet authored and davem330 committed Apr 21, 2011
1 parent 0a14842 commit f6d8bd051c391c1c0458a30b2a7abcd939329259
@@ -57,7 +57,15 @@ struct ip_options {
unsigned char __data[0];
};

#define optlength(opt) (sizeof(struct ip_options) + opt->optlen)
struct ip_options_rcu {
struct rcu_head rcu;
struct ip_options opt;
};

struct ip_options_data {
struct ip_options_rcu opt;
char data[40];
};

struct inet_request_sock {
struct request_sock req;
@@ -78,7 +86,7 @@ struct inet_request_sock {
acked : 1,
no_srccheck: 1;
kmemcheck_bitfield_end(flags);
struct ip_options *opt;
struct ip_options_rcu *opt;
};

static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
@@ -140,7 +148,7 @@ struct inet_sock {
__be16 inet_sport;
__u16 inet_id;

struct ip_options *opt;
struct ip_options_rcu __rcu *inet_opt;
__u8 tos;
__u8 min_ttl;
__u8 mc_ttl;
@@ -52,7 +52,7 @@ static inline unsigned int ip_hdrlen(const struct sk_buff *skb)
struct ipcm_cookie {
__be32 addr;
int oif;
struct ip_options *opt;
struct ip_options_rcu *opt;
__u8 tx_flags;
};

@@ -92,7 +92,7 @@ extern int igmp_mc_proc_init(void);

extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
__be32 saddr, __be32 daddr,
struct ip_options *opt);
struct ip_options_rcu *opt);
extern int ip_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev);
extern int ip_local_deliver(struct sk_buff *skb);
@@ -416,14 +416,15 @@ extern int ip_forward(struct sk_buff *skb);
* Functions provided by ip_options.c
*/

extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag);
extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt,
__be32 daddr, struct rtable *rt, int is_frag);
extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb);
extern void ip_options_fragment(struct sk_buff *skb);
extern int ip_options_compile(struct net *net,
struct ip_options *opt, struct sk_buff *skb);
extern int ip_options_get(struct net *net, struct ip_options **optp,
extern int ip_options_get(struct net *net, struct ip_options_rcu **optp,
unsigned char *data, int optlen);
extern int ip_options_get_from_user(struct net *net, struct ip_options **optp,
extern int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
unsigned char __user *data, int optlen);
extern void ip_options_undo(struct ip_options * opt);
extern void ip_forward_options(struct sk_buff *skb);
@@ -48,6 +48,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct flowi4 fl4;
struct rtable *rt;
int err;
struct ip_options_rcu *inet_opt;

dp->dccps_role = DCCP_ROLE_CLIENT;

@@ -58,10 +59,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
return -EAFNOSUPPORT;

nexthop = daddr = usin->sin_addr.s_addr;
if (inet->opt != NULL && inet->opt->srr) {

inet_opt = rcu_dereference_protected(inet->inet_opt,
sock_owned_by_user(sk));
if (inet_opt != NULL && inet_opt->opt.srr) {
if (daddr == 0)
return -EINVAL;
nexthop = inet->opt->faddr;
nexthop = inet_opt->opt.faddr;
}

orig_sport = inet->inet_sport;
@@ -78,7 +82,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
return -ENETUNREACH;
}

if (inet->opt == NULL || !inet->opt->srr)
if (inet_opt == NULL || !inet_opt->opt.srr)
daddr = rt->rt_dst;

if (inet->inet_saddr == 0)
@@ -89,8 +93,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr = daddr;

inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet->opt != NULL)
inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
/*
* Socket identity is still unknown (sport may be zero).
* However we set state to DCCP_REQUESTING and not releasing socket
@@ -405,7 +409,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->inet_daddr = ireq->rmt_addr;
newinet->inet_rcv_saddr = ireq->loc_addr;
newinet->inet_saddr = ireq->loc_addr;
newinet->opt = ireq->opt;
newinet->inet_opt = ireq->opt;
ireq->opt = NULL;
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
@@ -573,7 +573,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
First: no IPv4 options.
*/
newinet->opt = NULL;
newinet->inet_opt = NULL;

/* Clone RX bits */
newnp->rxopt.all = np->rxopt.all;
@@ -153,7 +153,7 @@ void inet_sock_destruct(struct sock *sk)
WARN_ON(sk->sk_wmem_queued);
WARN_ON(sk->sk_forward_alloc);

kfree(inet->opt);
kfree(rcu_dereference_protected(inet->inet_opt, 1));
dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
sk_refcnt_debug_dec(sk);
}
@@ -1106,9 +1106,12 @@ static int inet_sk_reselect_saddr(struct sock *sk)
struct flowi4 fl4;
struct rtable *rt;
__be32 new_saddr;
struct ip_options_rcu *inet_opt;

if (inet->opt && inet->opt->srr)
daddr = inet->opt->faddr;
inet_opt = rcu_dereference_protected(inet->inet_opt,
sock_owned_by_user(sk));
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;

/* Query new route. */
rt = ip_route_connect(&fl4, daddr, 0, RT_CONN_FLAGS(sk),
@@ -1148,16 +1151,20 @@ int inet_sk_rebuild_header(struct sock *sk)
struct inet_sock *inet = inet_sk(sk);
struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
__be32 daddr;
struct ip_options_rcu *inet_opt;
int err;

/* Route is OK, nothing to do. */
if (rt)
return 0;

/* Reroute. */
rcu_read_lock();
inet_opt = rcu_dereference(inet->inet_opt);
daddr = inet->inet_daddr;
if (inet->opt && inet->opt->srr)
daddr = inet->opt->faddr;
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
rcu_read_unlock();
rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr,
inet->inet_dport, inet->inet_sport,
sk->sk_protocol, RT_CONN_FLAGS(sk),

0 comments on commit f6d8bd0

Please sign in to comment.
You can’t perform that action at this time.