Skip to content

Commit 7d26727

Browse files
Rainer Weikusatdavem330
Rainer Weikusat
authored andcommitted
unix: avoid use-after-free in ep_remove_wait_queue
Rainer Weikusat <rweikusat@mobileactivedefense.com> writes: An AF_UNIX datagram socket being the client in an n:1 association with some server socket is only allowed to send messages to the server if the receive queue of this socket contains at most sk_max_ack_backlog datagrams. This implies that prospective writers might be forced to go to sleep despite none of the message presently enqueued on the server receive queue were sent by them. In order to ensure that these will be woken up once space becomes again available, the present unix_dgram_poll routine does a second sock_poll_wait call with the peer_wait wait queue of the server socket as queue argument (unix_dgram_recvmsg does a wake up on this queue after a datagram was received). This is inherently problematic because the server socket is only guaranteed to remain alive for as long as the client still holds a reference to it. In case the connection is dissolved via connect or by the dead peer detection logic in unix_dgram_sendmsg, the server socket may be freed despite "the polling mechanism" (in particular, epoll) still has a pointer to the corresponding peer_wait queue. There's no way to forcibly deregister a wait queue with epoll. Based on an idea by Jason Baron, the patch below changes the code such that a wait_queue_t belonging to the client socket is enqueued on the peer_wait queue of the server whenever the peer receive queue full condition is detected by either a sendmsg or a poll. A wake up on the peer queue is then relayed to the ordinary wait queue of the client socket via wake function. The connection to the peer wait queue is again dissolved if either a wake up is about to be relayed or the client socket reconnects or a dead peer is detected or the client socket is itself closed. This enables removing the second sock_poll_wait from unix_dgram_poll, thus avoiding the use-after-free, while still ensuring that no blocked writer sleeps forever. Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com> Fixes: ec0d215 ("af_unix: fix 'poll for write'/connected DGRAM sockets") Reviewed-by: Jason Baron <jbaron@akamai.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 3b13758 commit 7d26727

File tree

2 files changed

+165
-19
lines changed

2 files changed

+165
-19
lines changed

Diff for: include/net/af_unix.h

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ struct unix_sock {
6262
#define UNIX_GC_CANDIDATE 0
6363
#define UNIX_GC_MAYBE_CYCLE 1
6464
struct socket_wq peer_wq;
65+
wait_queue_t peer_wake;
6566
};
6667

6768
static inline struct unix_sock *unix_sk(const struct sock *sk)

Diff for: net/unix/af_unix.c

+164-19
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,118 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
326326
return s;
327327
}
328328

329+
/* Support code for asymmetrically connected dgram sockets
330+
*
331+
* If a datagram socket is connected to a socket not itself connected
332+
* to the first socket (eg, /dev/log), clients may only enqueue more
333+
* messages if the present receive queue of the server socket is not
334+
* "too large". This means there's a second writeability condition
335+
* poll and sendmsg need to test. The dgram recv code will do a wake
336+
* up on the peer_wait wait queue of a socket upon reception of a
337+
* datagram which needs to be propagated to sleeping would-be writers
338+
* since these might not have sent anything so far. This can't be
339+
* accomplished via poll_wait because the lifetime of the server
340+
* socket might be less than that of its clients if these break their
341+
* association with it or if the server socket is closed while clients
342+
* are still connected to it and there's no way to inform "a polling
343+
* implementation" that it should let go of a certain wait queue
344+
*
345+
* In order to propagate a wake up, a wait_queue_t of the client
346+
* socket is enqueued on the peer_wait queue of the server socket
347+
* whose wake function does a wake_up on the ordinary client socket
348+
* wait queue. This connection is established whenever a write (or
349+
* poll for write) hit the flow control condition and broken when the
350+
* association to the server socket is dissolved or after a wake up
351+
* was relayed.
352+
*/
353+
354+
static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
355+
void *key)
356+
{
357+
struct unix_sock *u;
358+
wait_queue_head_t *u_sleep;
359+
360+
u = container_of(q, struct unix_sock, peer_wake);
361+
362+
__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
363+
q);
364+
u->peer_wake.private = NULL;
365+
366+
/* relaying can only happen while the wq still exists */
367+
u_sleep = sk_sleep(&u->sk);
368+
if (u_sleep)
369+
wake_up_interruptible_poll(u_sleep, key);
370+
371+
return 0;
372+
}
373+
374+
static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
375+
{
376+
struct unix_sock *u, *u_other;
377+
int rc;
378+
379+
u = unix_sk(sk);
380+
u_other = unix_sk(other);
381+
rc = 0;
382+
spin_lock(&u_other->peer_wait.lock);
383+
384+
if (!u->peer_wake.private) {
385+
u->peer_wake.private = other;
386+
__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
387+
388+
rc = 1;
389+
}
390+
391+
spin_unlock(&u_other->peer_wait.lock);
392+
return rc;
393+
}
394+
395+
static void unix_dgram_peer_wake_disconnect(struct sock *sk,
396+
struct sock *other)
397+
{
398+
struct unix_sock *u, *u_other;
399+
400+
u = unix_sk(sk);
401+
u_other = unix_sk(other);
402+
spin_lock(&u_other->peer_wait.lock);
403+
404+
if (u->peer_wake.private == other) {
405+
__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
406+
u->peer_wake.private = NULL;
407+
}
408+
409+
spin_unlock(&u_other->peer_wait.lock);
410+
}
411+
412+
static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
413+
struct sock *other)
414+
{
415+
unix_dgram_peer_wake_disconnect(sk, other);
416+
wake_up_interruptible_poll(sk_sleep(sk),
417+
POLLOUT |
418+
POLLWRNORM |
419+
POLLWRBAND);
420+
}
421+
422+
/* preconditions:
423+
* - unix_peer(sk) == other
424+
* - association is stable
425+
*/
426+
static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
427+
{
428+
int connected;
429+
430+
connected = unix_dgram_peer_wake_connect(sk, other);
431+
432+
if (unix_recvq_full(other))
433+
return 1;
434+
435+
if (connected)
436+
unix_dgram_peer_wake_disconnect(sk, other);
437+
438+
return 0;
439+
}
440+
329441
static int unix_writable(const struct sock *sk)
330442
{
331443
return sk->sk_state != TCP_LISTEN &&
@@ -431,6 +543,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
431543
skpair->sk_state_change(skpair);
432544
sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
433545
}
546+
547+
unix_dgram_peer_wake_disconnect(sk, skpair);
434548
sock_put(skpair); /* It may now die */
435549
unix_peer(sk) = NULL;
436550
}
@@ -666,6 +780,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
666780
INIT_LIST_HEAD(&u->link);
667781
mutex_init(&u->readlock); /* single task reading lock */
668782
init_waitqueue_head(&u->peer_wait);
783+
init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
669784
unix_insert_socket(unix_sockets_unbound(sk), sk);
670785
out:
671786
if (sk == NULL)
@@ -1033,6 +1148,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
10331148
if (unix_peer(sk)) {
10341149
struct sock *old_peer = unix_peer(sk);
10351150
unix_peer(sk) = other;
1151+
unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1152+
10361153
unix_state_double_unlock(sk, other);
10371154

10381155
if (other != old_peer)
@@ -1472,6 +1589,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
14721589
struct scm_cookie scm;
14731590
int max_level;
14741591
int data_len = 0;
1592+
int sk_locked;
14751593

14761594
wait_for_unix_gc();
14771595
err = scm_send(sock, msg, &scm, false);
@@ -1550,23 +1668,29 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
15501668
goto out_free;
15511669
}
15521670

1671+
sk_locked = 0;
15531672
unix_state_lock(other);
1673+
restart_locked:
15541674
err = -EPERM;
15551675
if (!unix_may_send(sk, other))
15561676
goto out_unlock;
15571677

1558-
if (sock_flag(other, SOCK_DEAD)) {
1678+
if (unlikely(sock_flag(other, SOCK_DEAD))) {
15591679
/*
15601680
* Check with 1003.1g - what should
15611681
* datagram error
15621682
*/
15631683
unix_state_unlock(other);
15641684
sock_put(other);
15651685

1686+
if (!sk_locked)
1687+
unix_state_lock(sk);
1688+
15661689
err = 0;
1567-
unix_state_lock(sk);
15681690
if (unix_peer(sk) == other) {
15691691
unix_peer(sk) = NULL;
1692+
unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1693+
15701694
unix_state_unlock(sk);
15711695

15721696
unix_dgram_disconnected(sk, other);
@@ -1592,21 +1716,38 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
15921716
goto out_unlock;
15931717
}
15941718

1595-
if (unix_peer(other) != sk && unix_recvq_full(other)) {
1596-
if (!timeo) {
1597-
err = -EAGAIN;
1598-
goto out_unlock;
1719+
if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1720+
if (timeo) {
1721+
timeo = unix_wait_for_peer(other, timeo);
1722+
1723+
err = sock_intr_errno(timeo);
1724+
if (signal_pending(current))
1725+
goto out_free;
1726+
1727+
goto restart;
15991728
}
16001729

1601-
timeo = unix_wait_for_peer(other, timeo);
1730+
if (!sk_locked) {
1731+
unix_state_unlock(other);
1732+
unix_state_double_lock(sk, other);
1733+
}
16021734

1603-
err = sock_intr_errno(timeo);
1604-
if (signal_pending(current))
1605-
goto out_free;
1735+
if (unix_peer(sk) != other ||
1736+
unix_dgram_peer_wake_me(sk, other)) {
1737+
err = -EAGAIN;
1738+
sk_locked = 1;
1739+
goto out_unlock;
1740+
}
16061741

1607-
goto restart;
1742+
if (!sk_locked) {
1743+
sk_locked = 1;
1744+
goto restart_locked;
1745+
}
16081746
}
16091747

1748+
if (unlikely(sk_locked))
1749+
unix_state_unlock(sk);
1750+
16101751
if (sock_flag(other, SOCK_RCVTSTAMP))
16111752
__net_timestamp(skb);
16121753
maybe_add_creds(skb, sock, other);
@@ -1620,6 +1761,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
16201761
return len;
16211762

16221763
out_unlock:
1764+
if (sk_locked)
1765+
unix_state_unlock(sk);
16231766
unix_state_unlock(other);
16241767
out_free:
16251768
kfree_skb(skb);
@@ -2476,14 +2619,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
24762619
return mask;
24772620

24782621
writable = unix_writable(sk);
2479-
other = unix_peer_get(sk);
2480-
if (other) {
2481-
if (unix_peer(other) != sk) {
2482-
sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2483-
if (unix_recvq_full(other))
2484-
writable = 0;
2485-
}
2486-
sock_put(other);
2622+
if (writable) {
2623+
unix_state_lock(sk);
2624+
2625+
other = unix_peer(sk);
2626+
if (other && unix_peer(other) != sk &&
2627+
unix_recvq_full(other) &&
2628+
unix_dgram_peer_wake_me(sk, other))
2629+
writable = 0;
2630+
2631+
unix_state_unlock(sk);
24872632
}
24882633

24892634
if (writable)

0 commit comments

Comments
 (0)