Skip to content

Commit

Permalink
xen/evtchn: dynamically grow pending event channel ring
Browse files Browse the repository at this point in the history
If more than 1024 event channels are bound to a evtchn device then it
possible (even with well behaved applications) for the ring to
overflow and events to be lost (reported as an -EFBIG error).

Dynamically increase the size of the ring so there is always enough
space for all bound events.  Well behaved applicables that only unmask
events after draining them from the ring can thus no longer lose
events.

However, an application could unmask an event before draining it,
allowing multiple entries per port to accumulate in the ring, and a
overflow could still occur.  So the overflow detection and reporting
is retained.

The ring size is initially only 64 entries so the common use case of
an application only binding a few events will use less memory than
before.  The ring size may grow to 512 KiB (enough for all 2^17
possible channels).  This order 7 kmalloc() may fail due to memory
fragmentation, so we fall back to trying vmalloc().

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
  • Loading branch information
David Vrabel committed Nov 26, 2015
1 parent b4ff838 commit 8620015
Showing 1 changed file with 107 additions and 16 deletions.
123 changes: 107 additions & 16 deletions drivers/xen/evtchn.c
Expand Up @@ -49,6 +49,8 @@
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>

#include <xen/xen.h>
#include <xen/events.h>
Expand All @@ -58,10 +60,10 @@
struct per_user_data {
struct mutex bind_mutex; /* serialize bind/unbind operations */
struct rb_root evtchns;
unsigned int nr_evtchns;

/* Notification ring, accessed via /dev/xen/evtchn. */
#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t))
#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
unsigned int ring_size;
evtchn_port_t *ring;
unsigned int ring_cons, ring_prod, ring_overflow;
struct mutex ring_cons_mutex; /* protect against concurrent readers */
Expand All @@ -80,10 +82,41 @@ struct user_evtchn {
bool enabled;
};

static evtchn_port_t *evtchn_alloc_ring(unsigned int size)
{
evtchn_port_t *ring;
size_t s = size * sizeof(*ring);

ring = kmalloc(s, GFP_KERNEL);
if (!ring)
ring = vmalloc(s);

return ring;
}

static void evtchn_free_ring(evtchn_port_t *ring)
{
kvfree(ring);
}

static unsigned int evtchn_ring_offset(struct per_user_data *u,
unsigned int idx)
{
return idx & (u->ring_size - 1);
}

static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u,
unsigned int idx)
{
return u->ring + evtchn_ring_offset(u, idx);
}

static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
{
struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;

u->nr_evtchns++;

while (*new) {
struct user_evtchn *this;

Expand All @@ -107,6 +140,7 @@ static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)

static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
{
u->nr_evtchns--;
rb_erase(&evtchn->node, &u->evtchns);
kfree(evtchn);
}
Expand Down Expand Up @@ -144,8 +178,8 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)

spin_lock(&u->ring_prod_lock);

if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port;
if ((u->ring_prod - u->ring_cons) < u->ring_size) {
*evtchn_ring_entry(u, u->ring_prod) = evtchn->port;
wmb(); /* Ensure ring contents visible */
if (u->ring_cons == u->ring_prod++) {
wake_up_interruptible(&u->evtchn_wait);
Expand Down Expand Up @@ -200,10 +234,10 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
}

/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
if (((c ^ p) & u->ring_size) != 0) {
bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) *
sizeof(evtchn_port_t);
bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t);
} else {
bytes1 = (p - c) * sizeof(evtchn_port_t);
bytes2 = 0;
Expand All @@ -219,7 +253,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,

rc = -EFAULT;
rmb(); /* Ensure that we see the port before we copy it. */
if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) ||
((bytes2 != 0) &&
copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
goto unlock_out;
Expand Down Expand Up @@ -278,6 +312,66 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
return rc;
}

static int evtchn_resize_ring(struct per_user_data *u)
{
unsigned int new_size;
evtchn_port_t *new_ring, *old_ring;
unsigned int p, c;

/*
* Ensure the ring is large enough to capture all possible
* events. i.e., one free slot for each bound event.
*/
if (u->nr_evtchns <= u->ring_size)
return 0;

if (u->ring_size == 0)
new_size = 64;
else
new_size = 2 * u->ring_size;

new_ring = evtchn_alloc_ring(new_size);
if (!new_ring)
return -ENOMEM;

old_ring = u->ring;

/*
* Access to the ring contents is serialized by either the
* prod /or/ cons lock so take both when resizing.
*/
mutex_lock(&u->ring_cons_mutex);
spin_lock_irq(&u->ring_prod_lock);

/*
* Copy the old ring contents to the new ring.
*
* If the ring contents crosses the end of the current ring,
* it needs to be copied in two chunks.
*
* +---------+ +------------------+
* |34567 12| -> | 1234567 |
* +-----p-c-+ +------------------+
*/
p = evtchn_ring_offset(u, u->ring_prod);
c = evtchn_ring_offset(u, u->ring_cons);
if (p < c) {
memcpy(new_ring + c, u->ring + c, (u->ring_size - c) * sizeof(*u->ring));
memcpy(new_ring + u->ring_size, u->ring, p * sizeof(*u->ring));
} else
memcpy(new_ring + c, u->ring + c, (p - c) * sizeof(*u->ring));

u->ring = new_ring;
u->ring_size = new_size;

spin_unlock_irq(&u->ring_prod_lock);
mutex_unlock(&u->ring_cons_mutex);

evtchn_free_ring(old_ring);

return 0;
}

static int evtchn_bind_to_user(struct per_user_data *u, int port)
{
struct user_evtchn *evtchn;
Expand Down Expand Up @@ -305,6 +399,10 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
if (rc < 0)
goto err;

rc = evtchn_resize_ring(u);
if (rc < 0)
goto err;

rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0,
u->name, evtchn);
if (rc < 0)
Expand Down Expand Up @@ -503,13 +601,6 @@ static int evtchn_open(struct inode *inode, struct file *filp)

init_waitqueue_head(&u->evtchn_wait);

u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
if (u->ring == NULL) {
kfree(u->name);
kfree(u);
return -ENOMEM;
}

mutex_init(&u->bind_mutex);
mutex_init(&u->ring_cons_mutex);
spin_lock_init(&u->ring_prod_lock);
Expand All @@ -532,7 +623,7 @@ static int evtchn_release(struct inode *inode, struct file *filp)
evtchn_unbind_from_user(u, evtchn);
}

free_page((unsigned long)u->ring);
evtchn_free_ring(u->ring);
kfree(u->name);
kfree(u);

Expand Down

0 comments on commit 8620015

Please sign in to comment.