diff --git a/block/genhd.c b/block/genhd.c index 6542b9b84a0..f8da21edc4b 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -828,6 +828,7 @@ static void disk_seqf_stop(struct seq_file *seqf, void *v) if (iter) { class_dev_iter_exit(iter); kfree(iter); + seqf->private = NULL; } } diff --git a/drivers/crypto/msm/qcedev.c b/drivers/crypto/msm/qcedev.c index f27cdd2f670..73fba391330 100644 --- a/drivers/crypto/msm/qcedev.c +++ b/drivers/crypto/msm/qcedev.c @@ -1610,44 +1610,6 @@ static int qcedev_vbuf_ablk_cipher(struct qcedev_async_req *areq, struct qcedev_cipher_op_req *saved_req; struct qcedev_cipher_op_req *creq = &areq->cipher_op_req; - /* Verify Source Address's */ - for (i = 0; i < areq->cipher_op_req.entries; i++) - if (!access_ok(VERIFY_READ, - (void __user *)areq->cipher_op_req.vbuf.src[i].vaddr, - areq->cipher_op_req.vbuf.src[i].len)) - return -EFAULT; - - /* Verify Destination Address's */ - if (creq->in_place_op != 1) { - for (i = 0, total = 0; i < QCEDEV_MAX_BUFFERS; i++) { - if ((areq->cipher_op_req.vbuf.dst[i].vaddr != 0) && - (total < creq->data_len)) { - if (!access_ok(VERIFY_WRITE, - (void __user *)creq->vbuf.dst[i].vaddr, - creq->vbuf.dst[i].len)) { - pr_err("%s:DST WR_VERIFY err %d=0x%x\n", - __func__, i, - (u32)creq->vbuf.dst[i].vaddr); - return -EFAULT; - } - total += creq->vbuf.dst[i].len; - } - } - } else { - for (i = 0, total = 0; i < creq->entries; i++) { - if (total < creq->data_len) { - if (!access_ok(VERIFY_WRITE, - (void __user *)creq->vbuf.src[i].vaddr, - creq->vbuf.src[i].len)) { - pr_err("%s:SRC WR_VERIFY err %d=0x%x\n", - __func__, i, - (u32)creq->vbuf.src[i].vaddr); - return -EFAULT; - } - total += creq->vbuf.src[i].len; - } - } - } total = 0; if (areq->cipher_op_req.mode == QCEDEV_AES_MODE_CTR) @@ -1893,6 +1855,36 @@ static int qcedev_check_cipher_params(struct qcedev_cipher_op_req *req, __func__, total, req->data_len); goto error; } + /* Verify Source Address's */ + for (i = 0, total = 0; i < req->entries; i++) { + if (total < req->data_len) { + if (!access_ok(VERIFY_READ, + (void __user *)req->vbuf.src[i].vaddr, + req->vbuf.src[i].len)) { + pr_err("%s:SRC RD_VERIFY err %d=0x%lx\n", + __func__, i, (uintptr_t) + req->vbuf.src[i].vaddr); + goto error; + } + total += req->vbuf.src[i].len; + } + } + + /* Verify Destination Address's */ + for (i = 0, total = 0; i < QCEDEV_MAX_BUFFERS; i++) { + if ((req->vbuf.dst[i].vaddr != 0) && + (total < req->data_len)) { + if (!access_ok(VERIFY_WRITE, + (void __user *)req->vbuf.dst[i].vaddr, + req->vbuf.dst[i].len)) { + pr_err("%s:DST WR_VERIFY err %d=0x%lx\n", + __func__, i, (uintptr_t) + req->vbuf.dst[i].vaddr); + goto error; + } + total += req->vbuf.dst[i].len; + } + } return 0; error: return -EINVAL; diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 75fa2e7b87b..bf966258fe1 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -994,6 +994,7 @@ static void hid_input_field(struct hid_device *hid, struct hid_field *field, /* Ignore report if ErrorRollOver */ if (!(field->flags & HID_MAIN_ITEM_VARIABLE) && value[n] >= min && value[n] <= max && + value[n] - min < field->maxusage && field->usage[value[n] - min].hid == HID_UP_KEYBOARD + 1) goto exit; } @@ -1006,11 +1007,13 @@ static void hid_input_field(struct hid_device *hid, struct hid_field *field, } if (field->value[n] >= min && field->value[n] <= max + && field->value[n] - min < field->maxusage && field->usage[field->value[n] - min].hid && search(value, field->value[n], count)) hid_process_event(hid, field, &field->usage[field->value[n] - min], 0, interrupt); if (value[n] >= min && value[n] <= max + && value[n] - min < field->maxusage && field->usage[value[n] - min].hid && search(field->value, value[n], count)) hid_process_event(hid, field, &field->usage[value[n] - min], 1, interrupt); diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index fb119ce06ae..f9d12ba2709 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -741,8 +741,13 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, return k; /* probably out of space --> ENOMEM */ } if (sdp->detached) { - if (srp->bio) + if (srp->bio) { + if (srp->rq->cmd != srp->rq->__cmd) + kfree(srp->rq->cmd); blk_end_request_all(srp->rq, -EIO); + srp->rq = NULL; + } + sg_finish_rem_req(srp); return -ENODEV; } diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 25f68803e2d..56b9d236549 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -1085,7 +1085,7 @@ static int binder_dec_node(struct binder_node *node, int strong, int internal) static struct binder_ref *binder_get_ref(struct binder_proc *proc, - uint32_t desc) + uint32_t desc, bool need_strong_ref) { struct rb_node *n = proc->refs_by_desc.rb_node; struct binder_ref *ref; @@ -1093,12 +1093,16 @@ static struct binder_ref *binder_get_ref(struct binder_proc *proc, while (n) { ref = rb_entry(n, struct binder_ref, rb_node_desc); - if (desc < ref->desc) + if (desc < ref->desc) { n = n->rb_left; - else if (desc > ref->desc) + } else if (desc > ref->desc) { n = n->rb_right; - else + } else if (need_strong_ref && !ref->strong) { + binder_user_error("tried to use weak ref as strong ref\n"); + return NULL; + } else { return ref; + } } return NULL; } @@ -1370,7 +1374,8 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { - struct binder_ref *ref = binder_get_ref(proc, fp->handle); + struct binder_ref *ref = binder_get_ref(proc, fp->handle, + fp->type == BINDER_TYPE_HANDLE); if (ref == NULL) { printk(KERN_ERR "binder: transaction release %d" " bad handle %ld\n", debug_id, @@ -1469,7 +1474,7 @@ static void binder_transaction(struct binder_proc *proc, } else { if (tr->target.handle) { struct binder_ref *ref; - ref = binder_get_ref(proc, tr->target.handle); + ref = binder_get_ref(proc, tr->target.handle, true); if (ref == NULL) { binder_user_error("binder: %d:%d got " "transaction to invalid handle\n", @@ -1674,7 +1679,8 @@ static void binder_transaction(struct binder_proc *proc, } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { - struct binder_ref *ref = binder_get_ref(proc, fp->handle); + struct binder_ref *ref = binder_get_ref(proc, fp->handle, + fp->type == BINDER_TYPE_HANDLE); if (ref == NULL) { binder_user_error("binder: %d:%d got " "transaction with invalid " @@ -1881,7 +1887,9 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, ref->desc); } } else - ref = binder_get_ref(proc, target); + ref = binder_get_ref(proc, target, + cmd == BC_ACQUIRE || + cmd == BC_RELEASE); if (ref == NULL) { binder_user_error("binder: %d:%d refcou" "nt change on invalid ref %d\n", @@ -2092,7 +2100,7 @@ int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, if (get_user(cookie, (void __user * __user *)ptr)) return -EFAULT; ptr += sizeof(void *); - ref = binder_get_ref(proc, target); + ref = binder_get_ref(proc, target, false); if (ref == NULL) { binder_user_error("binder: %d:%d %s " "invalid ref %d\n", @@ -3363,7 +3371,7 @@ static void print_binder_node(struct seq_file *m, struct binder_node *node) static void print_binder_ref(struct seq_file *m, struct binder_ref *ref) { - seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %p\n", + seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %pK\n", ref->debug_id, ref->desc, ref->node->proc ? "" : "dead ", ref->node->debug_id, ref->strong, ref->weak, ref->death); } diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 44789f2eddb..fdcf5ddfcc1 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -395,6 +395,7 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) global_page_state(NR_FILE_PAGES)) other_file = global_page_state(NR_FILE_PAGES) - global_page_state(NR_SHMEM) - + global_page_state(NR_UNEVICTABLE) - total_swapcache_pages; else other_file = 0; diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 24b95db75d8..90ee4170c92 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -406,6 +406,10 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush); * they are not on hot paths so a little discipline won't do * any harm. * + * The line discipline-related tty_struct fields are reset to + * prevent the ldisc driver from re-using stale information for + * the new ldisc instance. + * * Locking: takes termios_mutex */ @@ -414,6 +418,9 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num) mutex_lock(&tty->termios_mutex); tty->termios->c_line = num; mutex_unlock(&tty->termios_mutex); + + tty->disc_data = NULL; + tty->receive_room = 0; } /** diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9e9db425c61..d545c5cfd09 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3376,6 +3376,7 @@ int ext4_can_truncate(struct inode *inode) int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) { +#if 0 struct inode *inode = file->f_path.dentry->d_inode; if (!S_ISREG(inode->i_mode)) return -EOPNOTSUPP; @@ -3391,6 +3392,12 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) } return ext4_ext_punch_hole(file, offset, length); +#else + /* + * Disabled as per b/28760453 + */ + return -EOPNOTSUPP; +#endif } /* diff --git a/fs/ioprio.c b/fs/ioprio.c index 0dd6a2a7ae8..6b0777867c6 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -144,9 +144,11 @@ static int get_task_ioprio(struct task_struct *p) ret = security_task_getioprio(p); if (ret) goto out; + task_lock(p); ret = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, IOPRIO_NORM); if (p->io_context) ret = p->io_context->ioprio; + task_unlock(p); out: return ret; } diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 08dfd6ad8f3..155bdf703bc 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -23,9 +23,6 @@ static const struct proc_ns_operations *ns_entries[] = { #endif #ifdef CONFIG_IPC_NS &ipcns_operations, -#endif -#ifdef CONFIG_PID_NS - &pidns_operations, #endif &mntns_operations, }; diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index b4cc97c47bc..2b1824d5443 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -150,7 +150,7 @@ void fixup_perms_recursive(struct dentry *dentry, const char* name, size_t len) } } else if (descendant_may_need_fixup(info->perm)) { mutex_lock(&dentry->d_inode->i_mutex); - list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &dentry->d_subdirs, d_child) { fixup_perms_recursive(child, name, len); } mutex_unlock(&dentry->d_inode->i_mutex); @@ -164,7 +164,7 @@ void fixup_top_recursive(struct dentry *parent) { return; info = SDCARDFS_I(parent->d_inode); spin_lock(&parent->d_lock); - list_for_each_entry(dentry, &parent->d_subdirs, d_u.d_child) { + list_for_each_entry(dentry, &parent->d_subdirs, d_child) { if (dentry->d_inode) { if (SDCARDFS_I(parent->d_inode)->top != SDCARDFS_I(dentry->d_inode)->top) { get_derived_permission(parent, dentry); diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c index 6bccd07616b..a9580751e6e 100644 --- a/fs/sdcardfs/file.c +++ b/fs/sdcardfs/file.c @@ -228,6 +228,7 @@ static int sdcardfs_open(struct inode *inode, struct file *file) /* save current_cred and override it */ OVERRIDE_CRED(sbi, saved_cred); + file->f_mode |= FMODE_NONMAPPABLE; file->private_data = kzalloc(sizeof(struct sdcardfs_file_info), GFP_KERNEL); if (!SDCARDFS_F(file)) { @@ -323,6 +324,11 @@ static int sdcardfs_fasync(int fd, struct file *file, int flag) return err; } +static struct file *sdcardfs_get_lower_file(struct file *f) +{ + return sdcardfs_lower_file(f); +} + const struct file_operations sdcardfs_main_fops = { .llseek = generic_file_llseek, .read = sdcardfs_read, @@ -337,6 +343,7 @@ const struct file_operations sdcardfs_main_fops = { .release = sdcardfs_file_release, .fsync = sdcardfs_fsync, .fasync = sdcardfs_fasync, + .get_lower_file = sdcardfs_get_lower_file, }; /* trimmed directory options */ @@ -353,4 +360,5 @@ const struct file_operations sdcardfs_dir_fops = { .flush = sdcardfs_flush, .fsync = sdcardfs_fsync, .fasync = sdcardfs_fasync, + .get_lower_file = sdcardfs_get_lower_file, }; diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index 652d0118df9..eb4b0d7f117 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -111,8 +111,13 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, u return ERR_PTR(err); } /* if found a cached inode, then just return it */ - if (!(inode->i_state & I_NEW)) + if (!(inode->i_state & I_NEW)) { + /* There can only be one alias, as we don't permit hard links + * This ensures we do not keep stale dentries that would later + * cause confusion. */ + d_prune_aliases(inode); return inode; + } /* initialize new inode */ info = SDCARDFS_I(inode); @@ -244,7 +249,7 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, struct dentry *child; struct dentry *match = NULL; spin_lock(&lower_dir_dentry->d_lock); - list_for_each_entry(child, &lower_dir_dentry->d_subdirs, d_u.d_child) { + list_for_each_entry(child, &lower_dir_dentry->d_subdirs, d_child) { if (child && child->d_inode) { if (strcasecmp(child->d_name.name, name)==0) { match = dget(child); diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 55cd3509c58..6017118818d 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -276,7 +276,7 @@ static int sdcardfs_read_super(struct super_block *sb, const char *dev_name, sb_info->options.fs_low_uid, sb_info->options.fs_low_gid, 00755);*/ } else { - setup_derived_state(sb->s_root->d_inode, PERM_ROOT, sb_info->options.fs_low_uid, AID_ROOT, false, sb->s_root->d_inode); + setup_derived_state(sb->s_root->d_inode, PERM_ROOT, sb_info->options.fs_user_id, AID_ROOT, false, sb->s_root->d_inode); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); } fix_derived_permission(sb->s_root->d_inode); diff --git a/include/linux/fs.h b/include/linux/fs.h index ca73eb8ecdb..705fbb0171a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -114,6 +114,9 @@ struct inodes_stat_t { /* File is opened with O_PATH; almost nothing can be done with it */ #define FMODE_PATH ((__force fmode_t)0x4000) +/* File hasn't page cache and can't be mmaped, for stackable filesystem */ +#define FMODE_NONMAPPABLE ((__force fmode_t)0x400000) + /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) @@ -1645,6 +1648,7 @@ struct file_operations { int (*setlease)(struct file *, long, struct file_lock **); long (*fallocate)(struct file *file, int mode, loff_t offset, loff_t len); + struct file* (*get_lower_file)(struct file *f); }; struct inode_operations { diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 6c890170af8..d9625050b0c 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -272,7 +272,6 @@ struct proc_ns_operations { extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; extern const struct proc_ns_operations ipcns_operations; -extern const struct proc_ns_operations pidns_operations; extern const struct proc_ns_operations mntns_operations; union proc_op { diff --git a/include/net/af_unix.h b/include/net/af_unix.h index ca68e2cef23..9760e7160dd 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -14,10 +14,11 @@ extern struct sock *unix_get_socket(struct file *filp); extern struct sock *unix_peer_get(struct sock *); #define UNIX_HASH_SIZE 256 +#define UNIX_HASH_BITS 8 extern unsigned int unix_tot_inflight; extern spinlock_t unix_table_lock; -extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; +extern struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; struct unix_address { atomic_t refcnt; @@ -60,6 +61,7 @@ struct unix_sock { unsigned int gc_maybe_cycle : 1; unsigned char recursion_level; struct socket_wq peer_wq; + wait_queue_t peer_wake; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) diff --git a/include/net/tcp.h b/include/net/tcp.h index d98b8de0b16..e65f30e5693 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1309,6 +1309,8 @@ static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unli { if (sk->sk_send_head == skb_unlinked) sk->sk_send_head = NULL; + if (tcp_sk(sk)->highest_sack == skb_unlinked) + tcp_sk(sk)->highest_sack = NULL; } static inline void tcp_init_send_head(struct sock *sk) diff --git a/kernel/events/core.c b/kernel/events/core.c index 33ad70ac76d..4d315c63bf5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5266,7 +5266,6 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) int err = 0; mutex_lock(&swhash->hlist_mutex); - if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) { struct swevent_hlist *hlist; @@ -7281,14 +7280,7 @@ static void perf_event_exit_cpu_context(int cpu) static void perf_event_exit_cpu(int cpu) { - struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); - perf_event_exit_cpu_context(cpu); - - mutex_lock(&swhash->hlist_mutex); - swhash->online = false; - swevent_hlist_release(swhash); - mutex_unlock(&swhash->hlist_mutex); } #else static inline void perf_event_exit_cpu(int cpu) { } diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index c14b7b9fe41..0e5685f5e6e 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -11,6 +11,7 @@ #include #include #include +#include static struct kmem_cache *user_ns_cachep __read_mostly; diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c index fc2eeb7cb2e..1ef4cc34497 100644 --- a/lib/int_sqrt.c +++ b/lib/int_sqrt.c @@ -1,3 +1,9 @@ +/* + * Copyright (C) 2013 Davidlohr Bueso + * + * Based on the shift-and-subtract algorithm for computing integer + * square root from Guy L. Steele. + */ #include #include @@ -10,23 +16,23 @@ */ unsigned long int_sqrt(unsigned long x) { - unsigned long op, res, one; + unsigned long b, m, y = 0; - op = x; - res = 0; + if (x <= 1) + return x; - one = 1UL << (BITS_PER_LONG - 2); - while (one > op) - one >>= 2; + m = 1UL << (BITS_PER_LONG - 2); + while (m != 0) { + b = y + m; + y >>= 1; - while (one != 0) { - if (op >= res + one) { - op = op - (res + one); - res = res + 2 * one; + if (x >= b) { + x -= b; + y += m; } - res /= 2; - one /= 4; + m >>= 2; } - return res; + + return y; } EXPORT_SYMBOL(int_sqrt); diff --git a/mm/mmap.c b/mm/mmap.c index 9eca7d5c57c..2620dc3e40f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -995,6 +995,9 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, int error; unsigned long reqprot = prot; + while (file && (file->f_mode & FMODE_NONMAPPABLE)) + file = file->f_op->get_lower_file(file); + /* * Does the application expect PROT_READ to imply PROT_EXEC? * diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 43c6dd8da60..183e9a2d4b0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -922,14 +922,14 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias); if (1) { - struct rtnl_link_ifmap map = { - .mem_start = dev->mem_start, - .mem_end = dev->mem_end, - .base_addr = dev->base_addr, - .irq = dev->irq, - .dma = dev->dma, - .port = dev->if_port, - }; + struct rtnl_link_ifmap map; + memset(&map, 0, sizeof(map)); + map.mem_start = dev->mem_start; + map.mem_end = dev->mem_end; + map.base_addr = dev->base_addr; + map.irq = dev->irq; + map.dma = dev->dma; + map.port = dev->if_port; NLA_PUT(skb, IFLA_MAP, sizeof(map), &map); } diff --git a/net/socket.c b/net/socket.c index 025f7f4d2d8..4b66575b909 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2336,31 +2336,31 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, break; } -out_put: - fput_light(sock->file, fput_needed); - if (err == 0) - return datagrams; + goto out_put; - if (datagrams != 0) { + if (datagrams == 0) { + datagrams = err; + goto out_put; + } + + /* + * We may return less entries than requested (vlen) if the + * sock is non block and there aren't enough datagrams... + */ + if (err != -EAGAIN) { /* - * We may return less entries than requested (vlen) if the - * sock is non block and there aren't enough datagrams... + * ... or if recvmsg returns an error after we + * received some datagrams, where we record the + * error to return on the next call or if the + * app asks about it using getsockopt(SO_ERROR). */ - if (err != -EAGAIN) { - /* - * ... or if recvmsg returns an error after we - * received some datagrams, where we record the - * error to return on the next call or if the - * app asks about it using getsockopt(SO_ERROR). - */ - sock->sk->sk_err = -err; - } - - return datagrams; + sock->sk->sk_err = -err; } +out_put: + fput_light(sock->file, fput_needed); - return err; + return datagrams; } SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 09a84c9f2f5..a73c67a43fb 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -115,15 +115,24 @@ #include #include -struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; +struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE]; EXPORT_SYMBOL_GPL(unix_socket_table); DEFINE_SPINLOCK(unix_table_lock); EXPORT_SYMBOL_GPL(unix_table_lock); static atomic_long_t unix_nr_socks; -#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) -#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) +static struct hlist_head *unix_sockets_unbound(void *addr) +{ + unsigned long hash = (unsigned long)addr; + + hash ^= hash >> 16; + hash ^= hash >> 8; + hash %= UNIX_HASH_SIZE; + return &unix_socket_table[UNIX_HASH_SIZE + hash]; +} + +#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE) #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) @@ -306,6 +315,118 @@ static struct sock *unix_find_socket_byinode(struct inode *i) return s; } +/* Support code for asymmetrically connected dgram sockets + * + * If a datagram socket is connected to a socket not itself connected + * to the first socket (eg, /dev/log), clients may only enqueue more + * messages if the present receive queue of the server socket is not + * "too large". This means there's a second writeability condition + * poll and sendmsg need to test. The dgram recv code will do a wake + * up on the peer_wait wait queue of a socket upon reception of a + * datagram which needs to be propagated to sleeping would-be writers + * since these might not have sent anything so far. This can't be + * accomplished via poll_wait because the lifetime of the server + * socket might be less than that of its clients if these break their + * association with it or if the server socket is closed while clients + * are still connected to it and there's no way to inform "a polling + * implementation" that it should let go of a certain wait queue + * + * In order to propagate a wake up, a wait_queue_t of the client + * socket is enqueued on the peer_wait queue of the server socket + * whose wake function does a wake_up on the ordinary client socket + * wait queue. This connection is established whenever a write (or + * poll for write) hit the flow control condition and broken when the + * association to the server socket is dissolved or after a wake up + * was relayed. + */ + +static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags, + void *key) +{ + struct unix_sock *u; + wait_queue_head_t *u_sleep; + + u = container_of(q, struct unix_sock, peer_wake); + + __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, + q); + u->peer_wake.private = NULL; + + /* relaying can only happen while the wq still exists */ + u_sleep = sk_sleep(&u->sk); + if (u_sleep) + wake_up_interruptible_poll(u_sleep, key); + + return 0; +} + +static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) +{ + struct unix_sock *u, *u_other; + int rc; + + u = unix_sk(sk); + u_other = unix_sk(other); + rc = 0; + spin_lock(&u_other->peer_wait.lock); + + if (!u->peer_wake.private) { + u->peer_wake.private = other; + __add_wait_queue(&u_other->peer_wait, &u->peer_wake); + + rc = 1; + } + + spin_unlock(&u_other->peer_wait.lock); + return rc; +} + +static void unix_dgram_peer_wake_disconnect(struct sock *sk, + struct sock *other) +{ + struct unix_sock *u, *u_other; + + u = unix_sk(sk); + u_other = unix_sk(other); + spin_lock(&u_other->peer_wait.lock); + + if (u->peer_wake.private == other) { + __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); + u->peer_wake.private = NULL; + } + + spin_unlock(&u_other->peer_wait.lock); +} + +static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, + struct sock *other) +{ + unix_dgram_peer_wake_disconnect(sk, other); + wake_up_interruptible_poll(sk_sleep(sk), + POLLOUT | + POLLWRNORM | + POLLWRBAND); +} + +/* preconditions: + * - unix_peer(sk) == other + * - association is stable + */ +static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) +{ + int connected; + + connected = unix_dgram_peer_wake_connect(sk, other); + + if (unix_recvq_full(other)) + return 1; + + if (connected) + unix_dgram_peer_wake_disconnect(sk, other); + + return 0; +} + static inline int unix_writable(struct sock *sk) { return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; @@ -410,6 +531,8 @@ static void unix_release_sock(struct sock *sk, int embrion) skpair->sk_state_change(skpair); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); } + + unix_dgram_peer_wake_disconnect(sk, skpair); sock_put(skpair); /* It may now die */ unix_peer(sk) = NULL; } @@ -646,7 +769,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); - unix_insert_socket(unix_sockets_unbound, sk); + init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); + unix_insert_socket(unix_sockets_unbound(sk), sk); out: if (sk == NULL) atomic_long_dec(&unix_nr_socks); @@ -1020,6 +1144,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (unix_peer(sk)) { struct sock *old_peer = unix_peer(sk); unix_peer(sk) = other; + unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); + unix_state_double_unlock(sk, other); if (other != old_peer) @@ -1459,6 +1585,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, long timeo; struct scm_cookie tmp_scm; int max_level; + int sk_locked; if (NULL == siocb->scm) siocb->scm = &tmp_scm; @@ -1527,12 +1654,14 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out_free; } + sk_locked = 0; unix_state_lock(other); +restart_locked: err = -EPERM; if (!unix_may_send(sk, other)) goto out_unlock; - if (sock_flag(other, SOCK_DEAD)) { + if (unlikely(sock_flag(other, SOCK_DEAD))) { /* * Check with 1003.1g - what should * datagram error @@ -1540,10 +1669,14 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, unix_state_unlock(other); sock_put(other); + if (!sk_locked) + unix_state_lock(sk); + err = 0; - unix_state_lock(sk); if (unix_peer(sk) == other) { unix_peer(sk) = NULL; + unix_dgram_peer_wake_disconnect_wakeup(sk, other); + unix_state_unlock(sk); unix_dgram_disconnected(sk, other); @@ -1569,21 +1702,43 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out_unlock; } - if (unix_peer(other) != sk && unix_recvq_full(other)) { - if (!timeo) { - err = -EAGAIN; - goto out_unlock; + /* other == sk && unix_peer(other) != sk if + * - unix_peer(sk) == NULL, destination address bound to sk + * - unix_peer(sk) == sk by time of get but disconnected before lock + */ + if (other != sk && + unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { + if (timeo) { + timeo = unix_wait_for_peer(other, timeo); + + err = sock_intr_errno(timeo); + if (signal_pending(current)) + goto out_free; + + goto restart; } - timeo = unix_wait_for_peer(other, timeo); + if (!sk_locked) { + unix_state_unlock(other); + unix_state_double_lock(sk, other); + } - err = sock_intr_errno(timeo); - if (signal_pending(current)) - goto out_free; + if (unix_peer(sk) != other || + unix_dgram_peer_wake_me(sk, other)) { + err = -EAGAIN; + sk_locked = 1; + goto out_unlock; + } - goto restart; + if (!sk_locked) { + sk_locked = 1; + goto restart_locked; + } } + if (unlikely(sk_locked)) + unix_state_unlock(sk); + if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); maybe_add_creds(skb, sock, other); @@ -1597,6 +1752,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, return len; out_unlock: + if (sk_locked) + unix_state_unlock(sk); unix_state_unlock(other); out_free: kfree_skb(skb); @@ -2229,14 +2386,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, return mask; writable = unix_writable(sk); - other = unix_peer_get(sk); - if (other) { - if (unix_peer(other) != sk) { - sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); - if (unix_recvq_full(other)) - writable = 0; - } - sock_put(other); + if (writable) { + unix_state_lock(sk); + + other = unix_peer(sk); + if (other && unix_peer(other) != sk && + unix_recvq_full(other) && + unix_dgram_peer_wake_me(sk, other)) + writable = 0; + + unix_state_unlock(sk); } if (writable) @@ -2248,47 +2407,58 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, } #ifdef CONFIG_PROC_FS -static struct sock *first_unix_socket(int *i) -{ - for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); - } - return NULL; -} -static struct sock *next_unix_socket(int *i, struct sock *s) -{ - struct sock *next = sk_next(s); - /* More in this chain? */ - if (next) - return next; - /* Look for next non-empty chain. */ - for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { - if (!hlist_empty(&unix_socket_table[*i])) - return __sk_head(&unix_socket_table[*i]); - } - return NULL; -} +#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1) + +#define get_bucket(x) ((x) >> BUCKET_SPACE) +#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1)) +#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) struct unix_iter_state { struct seq_net_private p; - int i; }; -static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos) +static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos) { - struct unix_iter_state *iter = seq->private; - loff_t off = 0; - struct sock *s; + unsigned long offset = get_offset(*pos); + unsigned long bucket = get_bucket(*pos); + struct sock *sk; + unsigned long count = 0; - for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) { - if (sock_net(s) != seq_file_net(seq)) + for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) { + if (sock_net(sk) != seq_file_net(seq)) continue; - if (off == pos) - return s; - ++off; + if (++count == offset) + break; } + + return sk; +} + +static struct sock *unix_next_socket(struct seq_file *seq, + struct sock *sk, + loff_t *pos) +{ + unsigned long bucket; + + while (sk > (struct sock *)SEQ_START_TOKEN) { + sk = sk_next(sk); + if (!sk) + goto next_bucket; + if (sock_net(sk) == seq_file_net(seq)) + return sk; + } + + do { + sk = unix_from_bucket(seq, pos); + if (sk) + return sk; + +next_bucket: + bucket = get_bucket(*pos) + 1; + *pos = set_bucket_offset(bucket, 1); + } while (bucket < ARRAY_SIZE(unix_socket_table)); + return NULL; } @@ -2296,22 +2466,20 @@ static void *unix_seq_start(struct seq_file *seq, loff_t *pos) __acquires(unix_table_lock) { spin_lock(&unix_table_lock); - return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN; + + if (!*pos) + return SEQ_START_TOKEN; + + if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) + return NULL; + + return unix_next_socket(seq, NULL, pos); } static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct unix_iter_state *iter = seq->private; - struct sock *sk = v; ++*pos; - - if (v == SEQ_START_TOKEN) - sk = first_unix_socket(&iter->i); - else - sk = next_unix_socket(&iter->i, sk); - while (sk && (sock_net(sk) != seq_file_net(seq))) - sk = next_unix_socket(&iter->i, sk); - return sk; + return unix_next_socket(seq, v, pos); } static void unix_seq_stop(struct seq_file *seq, void *v) diff --git a/net/unix/diag.c b/net/unix/diag.c index 2656840cf20..3c0e5569fd3 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -196,7 +196,9 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) num = s_num = cb->args[1]; spin_lock(&unix_table_lock); - for (slot = s_slot; slot <= UNIX_HASH_SIZE; s_num = 0, slot++) { + for (slot = s_slot; + slot < ARRAY_SIZE(unix_socket_table); + s_num = 0, slot++) { struct sock *sk; struct hlist_node *node; @@ -229,7 +231,7 @@ static struct sock *unix_lookup_by_ino(int ino) struct sock *sk; spin_lock(&unix_table_lock); - for (i = 0; i <= UNIX_HASH_SIZE; i++) { + for (i = 0; i < ARRAY_SIZE(unix_socket_table); i++) { struct hlist_node *node; sk_for_each(sk, node, &unix_socket_table[i]) diff --git a/security/keys/proc.c b/security/keys/proc.c index 49bbc97943a..3f7b4102a35 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -188,7 +188,7 @@ static int proc_keys_show(struct seq_file *m, void *v) struct timespec now; unsigned long timo; key_ref_t key_ref, skey_ref; - char xbuf[12]; + char xbuf[16]; int rc; key_ref = make_key_ref(key, 0);