-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Hyper-V Dynamic Memory Protocol driver (hv-balloon) hot-add support
One of advantages of using this protocol over ACPI-based PC DIMM hotplug is that it allows hot-adding memory in much smaller granularity because the ACPI DIMM slot limit does not apply. In order to enable this functionality a new memory backend needs to be created and provided to the driver via the "memdev" parameter. This can be achieved by, for example, adding "-object memory-backend-ram,id=mem1,size=32G" to the QEMU command line and then instantiating the driver with "memdev=mem1" parameter. The device will try to use multiple memslots to cover the memory backend in order to reduce the size of metadata for the not-yet-hot-added part of the memory backend. Co-developed-by: David Hildenbrand <david@redhat.com> Acked-by: David Hildenbrand <david@redhat.com> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
- Loading branch information
1 parent
0d9e8c0
commit 99a4706
Showing
5 changed files
with
878 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
/* | ||
* QEMU Hyper-V Dynamic Memory Protocol driver | ||
* | ||
* Copyright (C) 2020-2023 Oracle and/or its affiliates. | ||
* | ||
* This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
* See the COPYING file in the top-level directory. | ||
*/ | ||
|
||
#include "hv-balloon-internal.h" | ||
#include "hv-balloon-our_range_memslots.h" | ||
#include "trace.h" | ||
|
||
/* OurRange */ | ||
static void our_range_init(OurRange *our_range, uint64_t start, uint64_t count) | ||
{ | ||
assert(count <= UINT64_MAX - start); | ||
our_range->range.start = start; | ||
our_range->range.count = count; | ||
|
||
hvb_page_range_tree_init(&our_range->removed_guest); | ||
hvb_page_range_tree_init(&our_range->removed_both); | ||
|
||
/* mark the whole range as unused but for potential use */ | ||
our_range->added = 0; | ||
our_range->unusable_tail = 0; | ||
} | ||
|
||
static void our_range_destroy(OurRange *our_range) | ||
{ | ||
hvb_page_range_tree_destroy(&our_range->removed_guest); | ||
hvb_page_range_tree_destroy(&our_range->removed_both); | ||
} | ||
|
||
void hvb_our_range_clear_removed_trees(OurRange *our_range) | ||
{ | ||
hvb_page_range_tree_destroy(&our_range->removed_guest); | ||
hvb_page_range_tree_destroy(&our_range->removed_both); | ||
hvb_page_range_tree_init(&our_range->removed_guest); | ||
hvb_page_range_tree_init(&our_range->removed_both); | ||
} | ||
|
||
void hvb_our_range_mark_added(OurRange *our_range, uint64_t additional_size) | ||
{ | ||
assert(additional_size <= UINT64_MAX - our_range->added); | ||
|
||
our_range->added += additional_size; | ||
|
||
assert(our_range->added <= UINT64_MAX - our_range->unusable_tail); | ||
assert(our_range->added + our_range->unusable_tail <= | ||
our_range->range.count); | ||
} | ||
|
||
/* OurRangeMemslots */ | ||
static void our_range_memslots_init_slots(OurRangeMemslots *our_range, | ||
MemoryRegion *backing_mr, | ||
Object *memslot_owner) | ||
{ | ||
OurRangeMemslotsSlots *memslots = &our_range->slots; | ||
unsigned int idx; | ||
uint64_t memslot_offset; | ||
|
||
assert(memslots->count > 0); | ||
memslots->slots = g_new0(MemoryRegion, memslots->count); | ||
|
||
/* Initialize our memslots, but don't map them yet. */ | ||
assert(memslots->size_each > 0); | ||
for (idx = 0, memslot_offset = 0; idx < memslots->count; | ||
idx++, memslot_offset += memslots->size_each) { | ||
uint64_t memslot_size; | ||
g_autofree char *name = NULL; | ||
|
||
/* The size of the last memslot might be smaller. */ | ||
if (idx == memslots->count - 1) { | ||
uint64_t region_size; | ||
|
||
assert(our_range->mr); | ||
region_size = memory_region_size(our_range->mr); | ||
memslot_size = region_size - memslot_offset; | ||
} else { | ||
memslot_size = memslots->size_each; | ||
} | ||
|
||
name = g_strdup_printf("memslot-%u", idx); | ||
memory_region_init_alias(&memslots->slots[idx], memslot_owner, name, | ||
backing_mr, memslot_offset, memslot_size); | ||
/* | ||
* We want to be able to atomically and efficiently activate/deactivate | ||
* individual memslots without affecting adjacent memslots in memory | ||
* notifiers. | ||
*/ | ||
memory_region_set_unmergeable(&memslots->slots[idx], true); | ||
} | ||
|
||
memslots->mapped_count = 0; | ||
} | ||
|
||
OurRangeMemslots *hvb_our_range_memslots_new(uint64_t addr, | ||
MemoryRegion *parent_mr, | ||
MemoryRegion *backing_mr, | ||
Object *memslot_owner, | ||
unsigned int memslot_count, | ||
uint64_t memslot_size) | ||
{ | ||
OurRangeMemslots *our_range; | ||
|
||
our_range = g_malloc(sizeof(*our_range)); | ||
our_range_init(&our_range->range, | ||
addr / HV_BALLOON_PAGE_SIZE, | ||
memory_region_size(parent_mr) / HV_BALLOON_PAGE_SIZE); | ||
our_range->slots.size_each = memslot_size; | ||
our_range->slots.count = memslot_count; | ||
our_range->mr = parent_mr; | ||
our_range_memslots_init_slots(our_range, backing_mr, memslot_owner); | ||
|
||
return our_range; | ||
} | ||
|
||
static void our_range_memslots_free_memslots(OurRangeMemslots *our_range) | ||
{ | ||
OurRangeMemslotsSlots *memslots = &our_range->slots; | ||
unsigned int idx; | ||
uint64_t offset; | ||
|
||
memory_region_transaction_begin(); | ||
for (idx = 0, offset = 0; idx < memslots->mapped_count; | ||
idx++, offset += memslots->size_each) { | ||
trace_hv_balloon_unmap_slot(idx, memslots->count, offset); | ||
assert(memory_region_is_mapped(&memslots->slots[idx])); | ||
memory_region_del_subregion(our_range->mr, &memslots->slots[idx]); | ||
} | ||
memory_region_transaction_commit(); | ||
|
||
for (idx = 0; idx < memslots->count; idx++) { | ||
object_unparent(OBJECT(&memslots->slots[idx])); | ||
} | ||
|
||
g_clear_pointer(&our_range->slots.slots, g_free); | ||
} | ||
|
||
void hvb_our_range_memslots_free(OurRangeMemslots *our_range) | ||
{ | ||
OurRangeMemslotsSlots *memslots = &our_range->slots; | ||
MemoryRegion *hostmem_mr; | ||
RAMBlock *rb; | ||
|
||
assert(our_range->slots.count > 0); | ||
assert(our_range->slots.slots); | ||
|
||
hostmem_mr = memslots->slots[0].alias; | ||
rb = hostmem_mr->ram_block; | ||
ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); | ||
|
||
our_range_memslots_free_memslots(our_range); | ||
our_range_destroy(&our_range->range); | ||
g_free(our_range); | ||
} | ||
|
||
void hvb_our_range_memslots_ensure_mapped_additional(OurRangeMemslots *our_range, | ||
uint64_t additional_map_size) | ||
{ | ||
OurRangeMemslotsSlots *memslots = &our_range->slots; | ||
uint64_t total_map_size; | ||
unsigned int idx; | ||
uint64_t offset; | ||
|
||
total_map_size = (our_range->range.added + additional_map_size) * | ||
HV_BALLOON_PAGE_SIZE; | ||
idx = memslots->mapped_count; | ||
assert(memslots->size_each > 0); | ||
offset = idx * memslots->size_each; | ||
|
||
/* | ||
* Activate all memslots covered by the newly added region in a single | ||
* transaction. | ||
*/ | ||
memory_region_transaction_begin(); | ||
for ( ; idx < memslots->count; | ||
idx++, offset += memslots->size_each) { | ||
/* | ||
* If this memslot starts beyond or at the end of the range to map so | ||
* does every next one. | ||
*/ | ||
if (offset >= total_map_size) { | ||
break; | ||
} | ||
|
||
/* | ||
* Instead of enabling/disabling memslot, we add/remove them. This | ||
* should make address space updates faster, because we don't have to | ||
* loop over many disabled subregions. | ||
*/ | ||
trace_hv_balloon_map_slot(idx, memslots->count, offset); | ||
assert(!memory_region_is_mapped(&memslots->slots[idx])); | ||
memory_region_add_subregion(our_range->mr, offset, | ||
&memslots->slots[idx]); | ||
|
||
memslots->mapped_count++; | ||
} | ||
memory_region_transaction_commit(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
/* | ||
* QEMU Hyper-V Dynamic Memory Protocol driver | ||
* | ||
* Copyright (C) 2020-2023 Oracle and/or its affiliates. | ||
* | ||
* This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
* See the COPYING file in the top-level directory. | ||
*/ | ||
|
||
#ifndef HW_HYPERV_HV_BALLOON_OUR_RANGE_MEMSLOTS_H | ||
#define HW_HYPERV_HV_BALLOON_OUR_RANGE_MEMSLOTS_H | ||
|
||
#include "qemu/osdep.h" | ||
|
||
#include "exec/memory.h" | ||
#include "qom/object.h" | ||
#include "hv-balloon-page_range_tree.h" | ||
|
||
/* OurRange */ | ||
#define OUR_RANGE(ptr) ((OurRange *)(ptr)) | ||
|
||
/* "our range" means the memory range owned by this driver (for hot-adding) */ | ||
typedef struct OurRange { | ||
PageRange range; | ||
|
||
/* How many pages were hot-added to the guest */ | ||
uint64_t added; | ||
|
||
/* Pages at the end not currently usable */ | ||
uint64_t unusable_tail; | ||
|
||
/* Memory removed from the guest */ | ||
PageRangeTree removed_guest, removed_both; | ||
} OurRange; | ||
|
||
static inline uint64_t our_range_get_remaining_start(OurRange *our_range) | ||
{ | ||
return our_range->range.start + our_range->added; | ||
} | ||
|
||
static inline uint64_t our_range_get_remaining_size(OurRange *our_range) | ||
{ | ||
return our_range->range.count - our_range->added - our_range->unusable_tail; | ||
} | ||
|
||
void hvb_our_range_mark_added(OurRange *our_range, uint64_t additional_size); | ||
|
||
static inline void our_range_mark_remaining_unusable(OurRange *our_range) | ||
{ | ||
our_range->unusable_tail = our_range->range.count - our_range->added; | ||
} | ||
|
||
static inline PageRangeTree our_range_get_removed_tree(OurRange *our_range, | ||
bool both) | ||
{ | ||
if (both) { | ||
return our_range->removed_both; | ||
} else { | ||
return our_range->removed_guest; | ||
} | ||
} | ||
|
||
static inline bool our_range_is_removed_tree_empty(OurRange *our_range, | ||
bool both) | ||
{ | ||
if (both) { | ||
return page_range_tree_is_empty(our_range->removed_both); | ||
} else { | ||
return page_range_tree_is_empty(our_range->removed_guest); | ||
} | ||
} | ||
|
||
void hvb_our_range_clear_removed_trees(OurRange *our_range); | ||
|
||
/* OurRangeMemslots */ | ||
typedef struct OurRangeMemslotsSlots { | ||
/* Nominal size of each memslot (the last one might be smaller) */ | ||
uint64_t size_each; | ||
|
||
/* Slots array and its element count */ | ||
MemoryRegion *slots; | ||
unsigned int count; | ||
|
||
/* How many slots are currently mapped */ | ||
unsigned int mapped_count; | ||
} OurRangeMemslotsSlots; | ||
|
||
typedef struct OurRangeMemslots { | ||
OurRange range; | ||
|
||
/* Memslots covering our range */ | ||
OurRangeMemslotsSlots slots; | ||
|
||
MemoryRegion *mr; | ||
} OurRangeMemslots; | ||
|
||
OurRangeMemslots *hvb_our_range_memslots_new(uint64_t addr, | ||
MemoryRegion *parent_mr, | ||
MemoryRegion *backing_mr, | ||
Object *memslot_owner, | ||
unsigned int memslot_count, | ||
uint64_t memslot_size); | ||
void hvb_our_range_memslots_free(OurRangeMemslots *our_range); | ||
|
||
G_DEFINE_AUTOPTR_CLEANUP_FUNC(OurRangeMemslots, hvb_our_range_memslots_free) | ||
|
||
void hvb_our_range_memslots_ensure_mapped_additional(OurRangeMemslots *our_range, | ||
uint64_t additional_map_size); | ||
|
||
#endif |
Oops, something went wrong.