From f0ab6f109630940146cbaf47d0cd99993ddba824 Mon Sep 17 00:00:00 2001
From: Max Reitz <mreitz@redhat.com>
Date: Fri, 27 Feb 2015 14:54:39 -0500
Subject: [PATCH] block/vdi: Add locking for parallel requests

When allocating a new cluster, the first write to it must be the one
doing the allocation, because that one pads its write request to the
cluster size; if another write to that cluster is executed before it,
that write will be overwritten due to the padding.

See https://bugs.launchpad.net/qemu/+bug/1422307 for what can go wrong
without this patch.

Cc: qemu-stable <qemu-stable@nongnu.org>
Signed-off-by: Max Reitz <mreitz@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/vdi.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/block/vdi.c b/block/vdi.c
index 74030c6e30e6..53bd02fe227e 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -53,6 +53,7 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "migration/migration.h"
+#include "block/coroutine.h"
 
 #if defined(CONFIG_UUID)
 #include <uuid/uuid.h>
@@ -196,6 +197,8 @@ typedef struct {
     /* VDI header (converted to host endianness). */
     VdiHeader header;
 
+    CoMutex write_lock;
+
     Error *migration_blocker;
 } BDRVVdiState;
 
@@ -504,6 +507,8 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
               "vdi", bdrv_get_device_name(bs), "live migration");
     migrate_add_blocker(s->migration_blocker);
 
+    qemu_co_mutex_init(&s->write_lock);
+
     return 0;
 
  fail_free_bmap:
@@ -639,11 +644,31 @@ static int vdi_co_write(BlockDriverState *bs,
                    buf, n_sectors * SECTOR_SIZE);
             memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0,
                    (s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE);
+
+            /* Note that this coroutine does not yield anywhere from reading the
+             * bmap entry until here, so in regards to all the coroutines trying
+             * to write to this cluster, the one doing the allocation will
+             * always be the first to try to acquire the lock.
+             * Therefore, it is also the first that will actually be able to
+             * acquire the lock and thus the padded cluster is written before
+             * the other coroutines can write to the affected area. */
+            qemu_co_mutex_lock(&s->write_lock);
             ret = bdrv_write(bs->file, offset, block, s->block_sectors);
+            qemu_co_mutex_unlock(&s->write_lock);
         } else {
             uint64_t offset = s->header.offset_data / SECTOR_SIZE +
                               (uint64_t)bmap_entry * s->block_sectors +
                               sector_in_block;
+            qemu_co_mutex_lock(&s->write_lock);
+            /* This lock is only used to make sure the following write operation
+             * is executed after the write issued by the coroutine allocating
+             * this cluster, therefore we do not need to keep it locked.
+             * As stated above, the allocating coroutine will always try to lock
+             * the mutex before all the other concurrent accesses to that
+             * cluster, therefore at this point we can be absolutely certain
+             * that that write operation has returned (there may be other writes
+             * in flight, but they do not concern this very operation). */
+            qemu_co_mutex_unlock(&s->write_lock);
             ret = bdrv_write(bs->file, offset, buf, n_sectors);
         }