Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge tag 'mem-2023-09-19' of https://github.com/davidhildenbrand/qemu
…into staging

Hi,

"Host Memory Backends" and "Memory devices" queue ("mem"):
- Support and document VM templating with R/O files using a new "rom"
  parameter for memory-backend-file
- Some cleanups and fixes around NVDIMMs and R/O file handling for guest
  RAM
- Optimize ioeventfd updates by skipping address spaces that are not
  applicable

# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEEG9nKrXNcTDpGDfzKTd4Q9wD/g1oFAmUJdykRHGRhdmlkQHJl
# ZGhhdC5jb20ACgkQTd4Q9wD/g1pf2w//akOUoYMuamySGjXtKLVyMKZkjIys+Ama
# k2C0xzsWAHBP572ezwHi8uxf5j9kzAjsw6GxDZ7FAamD9MhiohkEvkecloBx6f/c
# q3fVHblBNkG7v2urtf4+6PJtJvhzOST2SFXfWeYhO/vaA04AYCDgexv82JN3gA6B
# OS8WyOX62b8wILPSY2GLZ8IqpE9XnOYZwzVBn6YB1yo7ZkYEfXO6cA8nykNuNcOE
# vppqDo7uVIX6317FWj8ygxmzFfOaj0WT2MT2XFzEIDfg8BInQN8HC4mTn0hcVKMa
# N1y+eZH733CQKT+uNBRZ5YOeljOi4d6gEEyvkkA/L7e5D3Qg9hIdvHb4uryCFSWX
# Vt07OP1XLBwCZFobOC6sg+2gtTZJxxYK89e6ZzEd0454S24w5bnEteRAaCGOP0XL
# ww9xYULqhtZs55UC4rvZHJwdUAk1fIY4VqynwkeQXegvz6BxedNeEkJiiEU0Tizx
# N2VpsxAJ7H/LLSFeZoCRESo4azrH6U4n7S/eS1tkCniFqibfe2yIQCDoJVfb42ec
# gfg/vThCrDwHkIHzkMmoV8NndA7Q7SIkyMfYeEEBeZMeg8JzYll4DJEw/jQCacxh
# KRUa+AZvGlTJUq0mkvyOVfLki+iaehoIUuY1yvMrmdWijPO8n3YybmP9Ljhr8VdR
# 9MSYZe+I2v8=
# =iraT
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 19 Sep 2023 06:25:45 EDT
# gpg:                using RSA key 1BD9CAAD735C4C3A460DFCCA4DDE10F700FF835A
# gpg:                issuer "david@redhat.com"
# gpg: Good signature from "David Hildenbrand <david@redhat.com>" [unknown]
# gpg:                 aka "David Hildenbrand <davidhildenbrand@gmail.com>" [full]
# gpg:                 aka "David Hildenbrand <hildenbr@in.tum.de>" [unknown]
# gpg: WARNING: The key's User ID is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 1BD9 CAAD 735C 4C3A 460D  FCCA 4DDE 10F7 00FF 835A

* tag 'mem-2023-09-19' of https://github.com/davidhildenbrand/qemu:
  memory: avoid updating ioeventfds for some address_space
  machine: Improve error message when using default RAM backend id
  softmmu/physmem: Hint that "readonly=on,rom=off" exists when opening file R/W for private mapping fails
  docs: Start documenting VM templating
  docs: Don't mention "-mem-path" in multi-process.rst
  softmmu/physmem: Never return directories from file_ram_open()
  softmmu/physmem: Fail creation of new files in file_ram_open() with readonly=true
  softmmu/physmem: Bail out early in ram_block_discard_range() with readonly files
  softmmu/physmem: Remap with proper protection in qemu_ram_remap()
  backends/hostmem-file: Add "rom" property to support VM templating with R/O files
  softmmu/physmem: Distinguish between file access mode and mmap protection
  nvdimm: Reject writing label data to ROM instead of crashing QEMU

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
  • Loading branch information
Stefan Hajnoczi committed Sep 19, 2023
2 parents 526940f + 544cff4 commit 25ab9a9
Show file tree
Hide file tree
Showing 16 changed files with 354 additions and 49 deletions.
1 change: 1 addition & 0 deletions MAINTAINERS
Expand Up @@ -2961,6 +2961,7 @@ M: Igor Mammedov <imammedo@redhat.com>
S: Maintained
F: backends/hostmem*.c
F: include/sysemu/hostmem.h
F: docs/system/vm-templating.rst
T: git https://gitlab.com/ehabkost/qemu.git machine-next

Cryptodev Backends
Expand Down
61 changes: 59 additions & 2 deletions backends/hostmem-file.c
Expand Up @@ -18,6 +18,8 @@
#include "sysemu/hostmem.h"
#include "qom/object_interfaces.h"
#include "qom/object.h"
#include "qapi/visitor.h"
#include "qapi/qapi-visit-common.h"

OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendFile, MEMORY_BACKEND_FILE)

Expand All @@ -31,6 +33,7 @@ struct HostMemoryBackendFile {
bool discard_data;
bool is_pmem;
bool readonly;
OnOffAuto rom;
};

static void
Expand All @@ -53,15 +56,39 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
return;
}

switch (fb->rom) {
case ON_OFF_AUTO_AUTO:
/* Traditionally, opening the file readonly always resulted in ROM. */
fb->rom = fb->readonly ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
break;
case ON_OFF_AUTO_ON:
if (!fb->readonly) {
error_setg(errp, "property 'rom' = 'on' is not supported with"
" 'readonly' = 'off'");
return;
}
break;
case ON_OFF_AUTO_OFF:
if (fb->readonly && backend->share) {
error_setg(errp, "property 'rom' = 'off' is incompatible with"
" 'readonly' = 'on' and 'share' = 'on'");
return;
}
break;
default:
assert(false);
}

name = host_memory_backend_get_name(backend);
ram_flags = backend->share ? RAM_SHARED : 0;
ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0;
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
ram_flags |= RAM_NAMED_FILE;
memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
backend->size, fb->align, ram_flags,
fb->mem_path, fb->offset, fb->readonly,
errp);
fb->mem_path, fb->offset, errp);
g_free(name);
#endif
}
Expand Down Expand Up @@ -201,6 +228,32 @@ static void file_memory_backend_set_readonly(Object *obj, bool value,
fb->readonly = value;
}

static void file_memory_backend_get_rom(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(obj);
OnOffAuto rom = fb->rom;

visit_type_OnOffAuto(v, name, &rom, errp);
}

static void file_memory_backend_set_rom(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(obj);

if (host_memory_backend_mr_inited(backend)) {
error_setg(errp, "cannot change property '%s' of %s.", name,
object_get_typename(obj));
return;
}

visit_type_OnOffAuto(v, name, &fb->rom, errp);
}

static void file_backend_unparent(Object *obj)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
Expand Down Expand Up @@ -243,6 +296,10 @@ file_backend_class_init(ObjectClass *oc, void *data)
object_class_property_add_bool(oc, "readonly",
file_memory_backend_get_readonly,
file_memory_backend_set_readonly);
object_class_property_add(oc, "rom", "OnOffAuto",
file_memory_backend_get_rom, file_memory_backend_set_rom, NULL, NULL);
object_class_property_set_description(oc, "rom",
"Whether to create Read Only Memory (ROM)");
}

static void file_backend_instance_finalize(Object *o)
Expand Down
5 changes: 3 additions & 2 deletions docs/devel/multi-process.rst
Expand Up @@ -409,8 +409,9 @@ the initial messages sent to the emulation process is a guest memory
table. Each entry in this table consists of a file descriptor and size
that the emulation process can ``mmap()`` to directly access guest
memory, similar to ``vhost_user_set_mem_table()``. Note guest memory
must be backed by file descriptors, such as when QEMU is given the
*-mem-path* command line option.
must be backed by shared file-backed memory, for example, using
*-object memory-backend-file,share=on* and setting that memory backend
as RAM for the machine.

IOMMU operations
^^^^^^^^^^^^^^^^
Expand Down
1 change: 1 addition & 0 deletions docs/system/index.rst
Expand Up @@ -38,3 +38,4 @@ or Hypervisor.Framework.
security
multi-process
confidential-guest-support
vm-templating
125 changes: 125 additions & 0 deletions docs/system/vm-templating.rst
@@ -0,0 +1,125 @@
QEMU VM templating
==================

This document explains how to use VM templating in QEMU.

For now, the focus is on VM memory aspects, and not about how to save and
restore other VM state (i.e., migrate-to-file with ``x-ignore-shared``).

Overview
--------

With VM templating, a single template VM serves as the starting point for
new VMs. This allows for fast and efficient replication of VMs, resulting
in fast startup times and reduced memory consumption.

Conceptually, the VM state is frozen, to then be used as a basis for new
VMs. The Copy-On-Write mechanism in the operating systems makes sure that
new VMs are able to read template VM memory; however, any modifications
stay private and don't modify the original template VM or any other
created VM.

!!! Security Alert !!!
----------------------

When effectively cloning VMs by VM templating, hardware identifiers
(such as UUIDs and NIC MAC addresses), and similar data in the guest OS
(such as machine IDs, SSH keys, certificates) that are supposed to be
*unique* are no longer unique, which can be a security concern.

Please be aware of these implications and how to mitigate them for your
use case, which might involve vmgenid, hot(un)plug of NIC, etc..

Memory configuration
--------------------

In order to create the template VM, we have to make sure that VM memory
ends up in a file, from where it can be reused for the new VMs:

Supply VM RAM via memory-backend-file, with ``share=on`` (modifications go
to the file) and ``readonly=off`` (open the file writable). Note that
``readonly=off`` is implicit.

In the following command-line example, a 2GB VM is created, whereby VM RAM
is to be stored in the ``template`` file.

.. parsed-literal::
|qemu_system| [...] -m 2g \\
-object memory-backend-file,id=pc.ram,mem-path=template,size=2g,share=on,... \\
-machine q35,memory-backend=pc.ram
If multiple memory backends are used (vNUMA, DIMMs), configure all
memory backends accordingly.

Once the VM is in the desired state, stop the VM and save other VM state,
leaving the current state of VM RAM reside in the file.

In order to have a new VM be based on a template VM, we have to
configure VM RAM to be based on a template VM RAM file; however, the VM
should not be able to modify file content.

Supply VM RAM via memory-backend-file, with ``share=off`` (modifications
stay private), ``readonly=on`` (open the file readonly) and ``rom=off``
(don't make the memory readonly for the VM). Note that ``share=off`` is
implicit and that other VM state has to be restored separately.

In the following command-line example, a 2GB VM is created based on the
existing 2GB file ``template``.

.. parsed-literal::
|qemu_system| [...] -m 2g \\
-object memory-backend-file,id=pc.ram,mem-path=template,size=2g,readonly=on,rom=off,... \\
-machine q35,memory-backend=pc.ram
If multiple memory backends are used (vNUMA, DIMMs), configure all
memory backends accordingly.

Note that ``-mem-path`` cannot be used for VM templating when creating the
template VM or when starting new VMs based on a template VM.

Incompatible features
---------------------

Some features are incompatible with VM templating, as the underlying file
cannot be modified to discard VM RAM, or to actually share memory with
another process.

vhost-user and multi-process QEMU
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

vhost-user and multi-process QEMU are incompatible with VM templating.
These technologies rely on shared memory, however, the template VMs
don't actually share memory (``share=off``), even though they are
file-based.

virtio-balloon
~~~~~~~~~~~~~~

virtio-balloon inflation and "free page reporting" cannot discard VM RAM
and will repeatedly report errors. While virtio-balloon can be used
for template VMs (e.g., report VM RAM stats), "free page reporting"
should be disabled and the balloon should not be inflated.

virtio-mem
~~~~~~~~~~

virtio-mem cannot discard VM RAM that is managed by the virtio-mem
device. virtio-mem will fail early when realizing the device. To use
VM templating with virtio-mem, either hotplug virtio-mem devices to the
new VM, or don't supply any memory to the template VM using virtio-mem
(requested-size=0), not using a template VM file as memory backend for the
virtio-mem device.

VM migration
~~~~~~~~~~~~

For VM migration, "x-release-ram" similarly relies on discarding of VM
RAM on the migration source to free up migrated RAM, and will
repeatedly report errors.

Postcopy live migration fails discarding VM RAM on the migration
destination early and refuses to activate postcopy live migration. Note
that postcopy live migration usually only works on selected filesystems
(shmem/tmpfs, hugetlbfs) either way.
11 changes: 8 additions & 3 deletions hw/acpi/nvdimm.c
Expand Up @@ -670,7 +670,8 @@ static void nvdimm_dsm_label_size(NVDIMMDevice *nvdimm, hwaddr dsm_mem_addr)
}

static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
uint32_t offset, uint32_t length)
uint32_t offset, uint32_t length,
bool is_write)
{
uint32_t ret = NVDIMM_DSM_RET_STATUS_INVALID;

Expand All @@ -690,6 +691,10 @@ static uint32_t nvdimm_rw_label_data_check(NVDIMMDevice *nvdimm,
return ret;
}

if (is_write && nvdimm->readonly) {
return NVDIMM_DSM_RET_STATUS_UNSUPPORT;
}

return NVDIMM_DSM_RET_STATUS_SUCCESS;
}

Expand All @@ -713,7 +718,7 @@ static void nvdimm_dsm_get_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
get_label_data->length);

status = nvdimm_rw_label_data_check(nvdimm, get_label_data->offset,
get_label_data->length);
get_label_data->length, false);
if (status != NVDIMM_DSM_RET_STATUS_SUCCESS) {
nvdimm_dsm_no_payload(status, dsm_mem_addr);
return;
Expand Down Expand Up @@ -752,7 +757,7 @@ static void nvdimm_dsm_set_label_data(NVDIMMDevice *nvdimm, NvdimmDsmIn *in,
set_label_data->length);

status = nvdimm_rw_label_data_check(nvdimm, set_label_data->offset,
set_label_data->length);
set_label_data->length, true);
if (status != NVDIMM_DSM_RET_STATUS_SUCCESS) {
nvdimm_dsm_no_payload(status, dsm_mem_addr);
return;
Expand Down
11 changes: 8 additions & 3 deletions hw/core/machine.c
Expand Up @@ -1359,6 +1359,7 @@ static bool create_default_memdev(MachineState *ms, const char *path, Error **er

void machine_run_board_init(MachineState *machine, const char *mem_path, Error **errp)
{
ERRP_GUARD();
MachineClass *machine_class = MACHINE_GET_CLASS(machine);
ObjectClass *oc = object_class_by_name(machine->cpu_type);
CPUClass *cc;
Expand Down Expand Up @@ -1387,9 +1388,13 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error *
numa_uses_legacy_mem()) {
if (object_property_find(object_get_objects_root(),
machine_class->default_ram_id)) {
error_setg(errp, "object name '%s' is reserved for the default"
" RAM backend, it can't be used for any other purposes."
" Change the object's 'id' to something else",
error_setg(errp, "object's id '%s' is reserved for the default"
" RAM backend, it can't be used for any other purposes",
machine_class->default_ram_id);
error_append_hint(errp,
"Change the object's 'id' to something else or disable"
" automatic creation of the default RAM backend by setting"
" 'memory-backend=%s' with '-machine'.\n",
machine_class->default_ram_id);
return;
}
Expand Down
10 changes: 7 additions & 3 deletions hw/mem/nvdimm.c
Expand Up @@ -154,6 +154,9 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
object_get_canonical_path_component(OBJECT(hostmem)));
return;
}
if (memory_region_is_rom(mr)) {
nvdimm->readonly = true;
}

nvdimm->nvdimm_mr = g_new(MemoryRegion, 1);
memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm),
Expand Down Expand Up @@ -207,15 +210,16 @@ static void nvdimm_unrealize(PCDIMMDevice *dimm)
* label read/write functions.
*/
static void nvdimm_validate_rw_label_data(NVDIMMDevice *nvdimm, uint64_t size,
uint64_t offset)
uint64_t offset, bool is_write)
{
assert((nvdimm->label_size >= size + offset) && (offset + size > offset));
assert(!is_write || !nvdimm->readonly);
}

static void nvdimm_read_label_data(NVDIMMDevice *nvdimm, void *buf,
uint64_t size, uint64_t offset)
{
nvdimm_validate_rw_label_data(nvdimm, size, offset);
nvdimm_validate_rw_label_data(nvdimm, size, offset, false);

memcpy(buf, nvdimm->label_data + offset, size);
}
Expand All @@ -229,7 +233,7 @@ static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf,
"pmem", NULL);
uint64_t backend_offset;

nvdimm_validate_rw_label_data(nvdimm, size, offset);
nvdimm_validate_rw_label_data(nvdimm, size, offset, true);

if (!is_pmem) {
memcpy(nvdimm->label_data + offset, buf, size);
Expand Down
3 changes: 2 additions & 1 deletion hw/ppc/spapr_nvdimm.c
Expand Up @@ -320,7 +320,8 @@ static target_ulong h_scm_write_metadata(PowerPCCPU *cpu,

nvdimm = NVDIMM(drc->dev);
if ((offset + len < offset) ||
(nvdimm->label_size < len + offset)) {
(nvdimm->label_size < len + offset) ||
nvdimm->readonly) {
return H_P2;
}

Expand Down

0 comments on commit 25ab9a9

Please sign in to comment.