Skip to content

Commit

Permalink
Merge tag 'mem-2023-10-12' of https://github.com/davidhildenbrand/qemu
Browse files Browse the repository at this point in the history
…into staging

Hi,

"Host Memory Backends" and "Memory devices" queue ("mem"):
- Support memory devices with multiple memslots
- Support memory devices that dynamically consume memslots
- Support memory devices that can automatically decide on the number of
  memslots to use
- virtio-mem support for exposing memory dynamically via multiple
  memslots
- Some required cleanups/refactorings

# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAmUn+XMRHGRhdmlkQHJl
# ZGhhdC5jb20ACgkQTd4Q9wD/g1qDHA//T01suTa+uzrcoJHoMWN11S47WnAmbuTo
# vVakucLBPMJAa9xZeCy3OavXaVGpHkw+t6g3OFknof0LfQ5/j9iE3Q1PxURN7g5j
# SJ2WJXCoceM6T4TMhPvVvgEaYjFmESqZB5FZgedMT0QRyhAxMuF9pCkWhk1O3OAV
# JqQKqLFiGcv60AEuBYGZGzgiOUv8EJ5gKwRF4VOdyHIxqZDw1aZXzlcd4TzFZBQ7
# rwW/3ef+sFmUJdmfrSrqcIlQSRrqZ2w95xATDzLTIEEUT3SWqh/E95EZWIz1M0oQ
# NgWgFiLCR1KOj7bWFhLXT7IfyLh0mEysD+P/hY6QwQ4RewWG7EW5UK+JFswssdcZ
# rEj5XpHZzev/wx7hM4bWsoQ+VIvrH7j3uYGyWkcgYRbdDEkWDv2rsT23lwGYNhht
# oBsrdEBELRw6v4C8doq/+sCmHmuxUMqTGwbArCQVnB1XnLxOEkuqlnfq5MORkzNF
# fxbIRx+LRluOllC0HVaDQd8qxRq1+UC5WIpAcDcrouy4HGgi1onWKrXpgjIAbVyH
# M6cENkK7rnRk96gpeXdmrf0h9HqRciAOY8oUsFsvLyKBOCPBWDrLyOQEY5UoSdtD
# m4QpEVgywCy2z1uU/UObeT/UxJy/9EL/Zb+DHoEK06iEhwONoUJjEBYMJD38RMkk
# mwPTB4UAk9g=
# =s69t
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 12 Oct 2023 09:49:39 EDT
# gpg:                using RSA key 1BD9CAAD735C4C3A460DFCCA4DDE10F700FF835A
# gpg:                issuer "david@redhat.com"
# gpg: Good signature from "David Hildenbrand <david@redhat.com>" [unknown]
# gpg:                 aka "David Hildenbrand <davidhildenbrand@gmail.com>" [full]
# gpg:                 aka "David Hildenbrand <hildenbr@in.tum.de>" [unknown]
# gpg: WARNING: The key's User ID is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 1BD9 CAAD 735C 4C3A 460D  FCCA 4DDE 10F7 00FF 835A

* tag 'mem-2023-10-12' of https://github.com/davidhildenbrand/qemu:
  virtio-mem: Mark memslot alias memory regions unmergeable
  memory,vhost: Allow for marking memory device memory regions unmergeable
  virtio-mem: Expose device memory dynamically via multiple memslots if enabled
  virtio-mem: Update state to match bitmap as soon as it's been migrated
  virtio-mem: Pass non-const VirtIOMEM via virtio_mem_range_cb
  memory: Clarify mapping requirements for RamDiscardManager
  memory-device,vhost: Support automatic decision on the number of memslots
  vhost: Add vhost_get_max_memslots()
  kvm: Add stub for kvm_get_max_memslots()
  memory-device,vhost: Support memory devices that dynamically consume memslots
  memory-device: Track required and actually used memslots in DeviceMemoryState
  stubs: Rename qmp_memory_device.c to memory_device.c
  memory-device: Support memory devices with multiple memslots
  vhost: Return number of free memslots
  kvm: Return number of free memslots
  softmmu/physmem: Fixup qemu_ram_block_from_host() documentation
  vhost: Remove vhost_backend_can_merge() callback
  vhost: Rework memslot filtering and fix "used_memslot" tracking

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
  • Loading branch information
stefanhaRH committed Oct 16, 2023
2 parents 6301137 + ee6398d commit 48ba748
Show file tree
Hide file tree
Showing 23 changed files with 839 additions and 113 deletions.
1 change: 1 addition & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -2891,6 +2891,7 @@ F: hw/mem/pc-dimm.c
F: include/hw/mem/memory-device.h
F: include/hw/mem/nvdimm.h
F: include/hw/mem/pc-dimm.h
F: stubs/memory_device.c
F: docs/nvdimm.txt

SPICE
Expand Down
35 changes: 21 additions & 14 deletions accel/kvm/kvm-all.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,13 +174,31 @@ void kvm_resample_fd_notify(int gsi)
}
}

int kvm_get_max_memslots(void)
unsigned int kvm_get_max_memslots(void)
{
KVMState *s = KVM_STATE(current_accel());

return s->nr_slots;
}

unsigned int kvm_get_free_memslots(void)
{
unsigned int used_slots = 0;
KVMState *s = kvm_state;
int i;

kvm_slots_lock();
for (i = 0; i < s->nr_as; i++) {
if (!s->as[i].ml) {
continue;
}
used_slots = MAX(used_slots, s->as[i].ml->nr_used_slots);
}
kvm_slots_unlock();

return s->nr_slots - used_slots;
}

/* Called with KVMMemoryListener.slots_lock held */
static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
{
Expand All @@ -196,19 +214,6 @@ static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
return NULL;
}

bool kvm_has_free_slot(MachineState *ms)
{
KVMState *s = KVM_STATE(ms->accelerator);
bool result;
KVMMemoryListener *kml = &s->memory_listener;

kvm_slots_lock();
result = !!kvm_get_free_slot(kml);
kvm_slots_unlock();

return result;
}

/* Called with KVMMemoryListener.slots_lock held */
static KVMSlot *kvm_alloc_slot(KVMMemoryListener *kml)
{
Expand Down Expand Up @@ -1387,6 +1392,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
}
start_addr += slot_size;
size -= slot_size;
kml->nr_used_slots--;
} while (size);
return;
}
Expand All @@ -1412,6 +1418,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
ram_start_offset += slot_size;
ram += slot_size;
size -= slot_size;
kml->nr_used_slots++;
} while (size);
}

Expand Down
9 changes: 7 additions & 2 deletions accel/stubs/kvm-stub.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,14 @@ int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
return -ENOSYS;
}

bool kvm_has_free_slot(MachineState *ms)
unsigned int kvm_get_max_memslots(void)
{
return false;
return 0;
}

unsigned int kvm_get_free_memslots(void)
{
return 0;
}

void kvm_init_cpu_signals(CPUState *cpu)
Expand Down
196 changes: 188 additions & 8 deletions hw/mem/memory-device.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,19 +52,135 @@ static int memory_device_build_list(Object *obj, void *opaque)
return 0;
}

static void memory_device_check_addable(MachineState *ms, MemoryRegion *mr,
Error **errp)
static unsigned int memory_device_get_memslots(MemoryDeviceState *md)
{
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);

if (mdc->get_memslots) {
return mdc->get_memslots(md);
}
return 1;
}

/*
* Memslots that are reserved by memory devices (required but still reported
* as free from KVM / vhost).
*/
static unsigned int get_reserved_memslots(MachineState *ms)
{
if (ms->device_memory->used_memslots >
ms->device_memory->required_memslots) {
/* This is unexpected, and we warned already in the memory notifier. */
return 0;
}
return ms->device_memory->required_memslots -
ms->device_memory->used_memslots;
}

unsigned int memory_devices_get_reserved_memslots(void)
{
if (!current_machine->device_memory) {
return 0;
}
return get_reserved_memslots(current_machine);
}

bool memory_devices_memslot_auto_decision_active(void)
{
if (!current_machine->device_memory) {
return false;
}

return current_machine->device_memory->memslot_auto_decision_active;
}

static unsigned int memory_device_memslot_decision_limit(MachineState *ms,
MemoryRegion *mr)
{
const unsigned int reserved = get_reserved_memslots(ms);
const uint64_t size = memory_region_size(mr);
unsigned int max = vhost_get_max_memslots();
unsigned int free = vhost_get_free_memslots();
uint64_t available_space;
unsigned int memslots;

if (kvm_enabled()) {
max = MIN(max, kvm_get_max_memslots());
free = MIN(free, kvm_get_free_memslots());
}

/*
* If we only have less overall memslots than what we consider reasonable,
* just keep it to a minimum.
*/
if (max < MEMORY_DEVICES_SAFE_MAX_MEMSLOTS) {
return 1;
}

/*
* Consider our soft-limit across all memory devices. We don't really
* expect to exceed this limit in reasonable configurations.
*/
if (MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT <=
ms->device_memory->required_memslots) {
return 1;
}
memslots = MEMORY_DEVICES_SOFT_MEMSLOT_LIMIT -
ms->device_memory->required_memslots;

/*
* Consider the actually still free memslots. This is only relevant if
* other memslot consumers would consume *significantly* more memslots than
* what we prepared for (> 253). Unlikely, but let's just handle it
* cleanly.
*/
memslots = MIN(memslots, free - reserved);
if (memslots < 1 || unlikely(free < reserved)) {
return 1;
}

/* We cannot have any other memory devices? So give all to this device. */
if (size == ms->maxram_size - ms->ram_size) {
return memslots;
}

/*
* Simple heuristic: equally distribute the memslots over the space
* still available for memory devices.
*/
available_space = ms->maxram_size - ms->ram_size -
ms->device_memory->used_region_size;
memslots = (double)memslots * size / available_space;
return memslots < 1 ? 1 : memslots;
}

static void memory_device_check_addable(MachineState *ms, MemoryDeviceState *md,
MemoryRegion *mr, Error **errp)
{
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
const uint64_t used_region_size = ms->device_memory->used_region_size;
const uint64_t size = memory_region_size(mr);
const unsigned int reserved_memslots = get_reserved_memslots(ms);
unsigned int required_memslots, memslot_limit;

/*
* Instruct the device to decide how many memslots to use, if applicable,
* before we query the number of required memslots the first time.
*/
if (mdc->decide_memslots) {
memslot_limit = memory_device_memslot_decision_limit(ms, mr);
mdc->decide_memslots(md, memslot_limit);
}
required_memslots = memory_device_get_memslots(md);

/* we will need a new memory slot for kvm and vhost */
if (kvm_enabled() && !kvm_has_free_slot(ms)) {
error_setg(errp, "hypervisor has no free memory slots left");
/* we will need memory slots for kvm and vhost */
if (kvm_enabled() &&
kvm_get_free_memslots() < required_memslots + reserved_memslots) {
error_setg(errp, "hypervisor has not enough free memory slots left");
return;
}
if (!vhost_has_free_slot()) {
error_setg(errp, "a used vhost backend has no free memory slots left");
if (vhost_get_free_memslots() < required_memslots + reserved_memslots) {
error_setg(errp, "a used vhost backend has not enough free memory slots left");
return;
}

Expand Down Expand Up @@ -233,7 +349,7 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
goto out;
}

memory_device_check_addable(ms, mr, &local_err);
memory_device_check_addable(ms, md, mr, &local_err);
if (local_err) {
goto out;
}
Expand Down Expand Up @@ -264,6 +380,7 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
{
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
const unsigned int memslots = memory_device_get_memslots(md);
const uint64_t addr = mdc->get_addr(md);
MemoryRegion *mr;

Expand All @@ -275,6 +392,11 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
g_assert(ms->device_memory);

ms->device_memory->used_region_size += memory_region_size(mr);
ms->device_memory->required_memslots += memslots;
if (mdc->decide_memslots && memslots > 1) {
ms->device_memory->memslot_auto_decision_active++;
}

memory_region_add_subregion(&ms->device_memory->mr,
addr - ms->device_memory->base, mr);
trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr);
Expand All @@ -283,6 +405,7 @@ void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
{
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
const unsigned int memslots = memory_device_get_memslots(md);
MemoryRegion *mr;

/*
Expand All @@ -293,7 +416,12 @@ void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
g_assert(ms->device_memory);

memory_region_del_subregion(&ms->device_memory->mr, mr);

if (mdc->decide_memslots && memslots > 1) {
ms->device_memory->memslot_auto_decision_active--;
}
ms->device_memory->used_region_size -= memory_region_size(mr);
ms->device_memory->required_memslots -= memslots;
trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "",
mdc->get_addr(md));
}
Expand All @@ -313,6 +441,50 @@ uint64_t memory_device_get_region_size(const MemoryDeviceState *md,
return memory_region_size(mr);
}

static void memory_devices_region_mod(MemoryListener *listener,
MemoryRegionSection *mrs, bool add)
{
DeviceMemoryState *dms = container_of(listener, DeviceMemoryState,
listener);

if (!memory_region_is_ram(mrs->mr)) {
warn_report("Unexpected memory region mapped into device memory region.");
return;
}

/*
* The expectation is that each distinct RAM memory region section in
* our region for memory devices consumes exactly one memslot in KVM
* and in vhost. For vhost, this is true, except:
* * ROM memory regions don't consume a memslot. These get used very
* rarely for memory devices (R/O NVDIMMs).
* * Memslots without a fd (memory-backend-ram) don't necessarily
* consume a memslot. Such setups are quite rare and possibly bogus:
* the memory would be inaccessible by such vhost devices.
*
* So for vhost, in corner cases we might over-estimate the number of
* memslots that are currently used or that might still be reserved
* (required - used).
*/
dms->used_memslots += add ? 1 : -1;

if (dms->used_memslots > dms->required_memslots) {
warn_report("Memory devices use more memory slots than indicated as required.");
}
}

static void memory_devices_region_add(MemoryListener *listener,
MemoryRegionSection *mrs)
{
return memory_devices_region_mod(listener, mrs, true);
}

static void memory_devices_region_del(MemoryListener *listener,
MemoryRegionSection *mrs)
{
return memory_devices_region_mod(listener, mrs, false);
}

void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size)
{
g_assert(size);
Expand All @@ -322,8 +494,16 @@ void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size)

memory_region_init(&ms->device_memory->mr, OBJECT(ms), "device-memory",
size);
address_space_init(&ms->device_memory->as, &ms->device_memory->mr,
"device-memory");
memory_region_add_subregion(get_system_memory(), ms->device_memory->base,
&ms->device_memory->mr);

/* Track the number of memslots used by memory devices. */
ms->device_memory->listener.region_add = memory_devices_region_add;
ms->device_memory->listener.region_del = memory_devices_region_del;
memory_listener_register(&ms->device_memory->listener,
&ms->device_memory->as);
}

static const TypeInfo memory_device_info = {
Expand Down
9 changes: 7 additions & 2 deletions hw/virtio/vhost-stub.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@
#include "hw/virtio/vhost.h"
#include "hw/virtio/vhost-user.h"

bool vhost_has_free_slot(void)
unsigned int vhost_get_max_memslots(void)
{
return true;
return UINT_MAX;
}

unsigned int vhost_get_free_memslots(void)
{
return UINT_MAX;
}

bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
Expand Down

0 comments on commit 48ba748

Please sign in to comment.