Skip to content

Commit

Permalink
Convert ram_list to RCU
Browse files Browse the repository at this point in the history
Allow "unlocked" reads of the ram_list by using an RCU-enabled QLIST.

The ramlist mutex is kept.  call_rcu callbacks are run with the iothread
lock taken, but that may change in the future.  Writers still take the
ramlist mutex, but they no longer need to assume that the iothread lock
is taken.

Readers of the list, instead, no longer require either the iothread
or ramlist mutex, but they need to use rcu_read_lock() and
rcu_read_unlock().

One place in arch_init.c was downgrading from write side to read side
like this:

    qemu_mutex_lock_iothread()
    qemu_mutex_lock_ramlist()
    ...
    qemu_mutex_unlock_iothread()
    ...
    qemu_mutex_unlock_ramlist()

and the equivalent idiom is:

    qemu_mutex_lock_ramlist()
    rcu_read_lock()
    ...
    qemu_mutex_unlock_ramlist()
    ...
    rcu_read_unlock()

Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Mike Day <ncmike@ncultra.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
Mike Day authored and bonzini committed Feb 16, 2015
1 parent 0d53d9f commit 0dc3f44
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 52 deletions.
65 changes: 47 additions & 18 deletions arch_init.c
Expand Up @@ -52,6 +52,7 @@
#include "exec/ram_addr.h"
#include "hw/acpi/acpi.h"
#include "qemu/host-utils.h"
#include "qemu/rcu_queue.h"

#ifdef DEBUG_ARCH_INIT
#define DPRINTF(fmt, ...) \
Expand Down Expand Up @@ -523,9 +524,12 @@ static void migration_bitmap_sync(void)
trace_migration_bitmap_sync_start();
address_space_sync_dirty_bitmap(&address_space_memory);

QLIST_FOREACH(block, &ram_list.blocks, next) {
rcu_read_lock();
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
}
rcu_read_unlock();

trace_migration_bitmap_sync_end(migration_dirty_pages
- num_dirty_pages_init);
num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
Expand Down Expand Up @@ -648,6 +652,8 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset,
/*
* ram_find_and_save_block: Finds a page to send and sends it to f
*
* Called within an RCU critical section.
*
* Returns: The number of bytes written.
* 0 means no dirty pages
*/
Expand All @@ -661,7 +667,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage)
MemoryRegion *mr;

if (!block)
block = QLIST_FIRST(&ram_list.blocks);
block = QLIST_FIRST_RCU(&ram_list.blocks);

while (true) {
mr = block->mr;
Expand All @@ -672,9 +678,9 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage)
}
if (offset >= block->used_length) {
offset = 0;
block = QLIST_NEXT(block, next);
block = QLIST_NEXT_RCU(block, next);
if (!block) {
block = QLIST_FIRST(&ram_list.blocks);
block = QLIST_FIRST_RCU(&ram_list.blocks);
complete_round = true;
ram_bulk_stage = false;
}
Expand Down Expand Up @@ -728,10 +734,10 @@ uint64_t ram_bytes_total(void)
RAMBlock *block;
uint64_t total = 0;

QLIST_FOREACH(block, &ram_list.blocks, next) {
rcu_read_lock();
QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
total += block->used_length;
}

rcu_read_unlock();
return total;
}

Expand Down Expand Up @@ -777,6 +783,13 @@ static void reset_ram_globals(void)

#define MAX_WAIT 50 /* ms, half buffered_file limit */


/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
* long-running RCU critical section. When rcu-reclaims in the code
* start to become numerous it will be necessary to reduce the
* granularity of these critical sections.
*/

static int ram_save_setup(QEMUFile *f, void *opaque)
{
RAMBlock *block;
Expand Down Expand Up @@ -820,6 +833,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
/* iothread lock needed for ram_list.dirty_memory[] */
qemu_mutex_lock_iothread();
qemu_mutex_lock_ramlist();
rcu_read_lock();
bytes_transferred = 0;
reset_ram_globals();

Expand All @@ -832,7 +846,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
* gaps due to alignment or unplugs.
*/
migration_dirty_pages = 0;
QLIST_FOREACH(block, &ram_list.blocks, next) {
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
uint64_t block_pages;

block_pages = block->used_length >> TARGET_PAGE_BITS;
Expand All @@ -841,17 +855,18 @@ static int ram_save_setup(QEMUFile *f, void *opaque)

memory_global_dirty_log_start();
migration_bitmap_sync();
qemu_mutex_unlock_ramlist();
qemu_mutex_unlock_iothread();

qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);

QLIST_FOREACH(block, &ram_list.blocks, next) {
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
qemu_put_byte(f, strlen(block->idstr));
qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
qemu_put_be64(f, block->used_length);
}

qemu_mutex_unlock_ramlist();
rcu_read_unlock();

ram_control_before_iterate(f, RAM_CONTROL_SETUP);
ram_control_after_iterate(f, RAM_CONTROL_SETUP);
Expand All @@ -868,12 +883,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
int64_t t0;
int total_sent = 0;

qemu_mutex_lock_ramlist();

rcu_read_lock();
if (ram_list.version != last_version) {
reset_ram_globals();
}

/* Read version before ram_list.blocks */
smp_rmb();

ram_control_before_iterate(f, RAM_CONTROL_ROUND);

t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
Expand Down Expand Up @@ -904,8 +921,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
}
i++;
}

qemu_mutex_unlock_ramlist();
rcu_read_unlock();

/*
* Must occur before EOS (or any QEMUFile operation)
Expand Down Expand Up @@ -933,7 +949,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
/* Called with iothread lock */
static int ram_save_complete(QEMUFile *f, void *opaque)
{
qemu_mutex_lock_ramlist();
rcu_read_lock();

migration_bitmap_sync();

ram_control_before_iterate(f, RAM_CONTROL_FINISH);
Expand All @@ -955,7 +972,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
ram_control_after_iterate(f, RAM_CONTROL_FINISH);
migration_end();

qemu_mutex_unlock_ramlist();
rcu_read_unlock();
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);

return 0;
Expand All @@ -969,7 +986,9 @@ static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)

if (remaining_size < max_size) {
qemu_mutex_lock_iothread();
rcu_read_lock();
migration_bitmap_sync();
rcu_read_unlock();
qemu_mutex_unlock_iothread();
remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
}
Expand Down Expand Up @@ -1011,6 +1030,9 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
return 0;
}

/* Must be called from within a rcu critical section.
* Returns a pointer from within the RCU-protected ram_list.
*/
static inline void *host_from_stream_offset(QEMUFile *f,
ram_addr_t offset,
int flags)
Expand All @@ -1032,7 +1054,7 @@ static inline void *host_from_stream_offset(QEMUFile *f,
qemu_get_buffer(f, (uint8_t *)id, len);
id[len] = 0;

QLIST_FOREACH(block, &ram_list.blocks, next) {
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
if (!strncmp(id, block->idstr, sizeof(id)) &&
block->max_length > offset) {
return memory_region_get_ram_ptr(block->mr) + offset;
Expand Down Expand Up @@ -1065,6 +1087,12 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
ret = -EINVAL;
}

/* This RCU critical section can be very long running.
* When RCU reclaims in the code start to become numerous,
* it will be necessary to reduce the granularity of this
* critical section.
*/
rcu_read_lock();
while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
ram_addr_t addr, total_ram_bytes;
void *host;
Expand All @@ -1089,7 +1117,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
id[len] = 0;
length = qemu_get_be64(f);

QLIST_FOREACH(block, &ram_list.blocks, next) {
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
if (!strncmp(id, block->idstr, sizeof(id))) {
if (length != block->used_length) {
Error *local_err = NULL;
Expand Down Expand Up @@ -1163,6 +1191,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
}
}

rcu_read_unlock();
DPRINTF("Completed load of VM with exit code %d seq iteration "
"%" PRIu64 "\n", ret, seq_iter);
return ret;
Expand Down

0 comments on commit 0dc3f44

Please sign in to comment.