diff --git a/docs/bypass-iommu.txt b/docs/bypass-iommu.txt new file mode 100644 index 0000000000000..e6677bddd3243 --- /dev/null +++ b/docs/bypass-iommu.txt @@ -0,0 +1,89 @@ +BYPASS IOMMU PROPERTY +===================== + +Description +=========== +Traditionally, there is a global switch to enable/disable vIOMMU. All +devices in the system can only support go through vIOMMU or not, which +is not flexible. We introduce this bypass iommu property to support +coexist of devices go through vIOMMU and devices not. This is useful to +passthrough devices with no-iommu mode and devices go through vIOMMU in +the same virtual machine. + +PCI host bridges have a bypass_iommu property. This property is used to +determine whether the devices attached on the PCI host bridge will bypass +virtual iommu. The bypass_iommu property is valid only when there is a +virtual iommu in the system, it is implemented to allow some devices to +bypass vIOMMU. When bypass_iommu property is not set for a host bridge, +the attached devices will go through vIOMMU by default. + +Usage +===== +The bypass iommu feature support PXB host bridge and default main host +bridge, we add a bypass_iommu property for PXB and default_bus_bypass_iommu +for machine. Note that default_bus_bypass_iommu is available only when +the 'q35' machine type on x86 architecture and the 'virt' machine type +on AArch64. Other machine types do not support bypass iommu for default +root bus. + +1. The following is the bypass iommu options: + (1) PCI expander bridge + qemu -device pxb-pcie,bus_nr=0x10,addr=0x1,bypass_iommu=true + (2) Arm default host bridge + qemu -machine virt,iommu=smmuv3,default_bus_bypass_iommu=true + (3) X86 default root bus bypass iommu: + qemu -machine q35,default_bus_bypass_iommu=true + +2. Here is the detailed qemu command line for 'virt' machine with PXB on +AArch64: + +qemu-system-aarch64 \ + -machine virt,kernel_irqchip=on,iommu=smmuv3,default_bus_bypass_iommu=true \ + -device pxb-pcie,bus_nr=0x10,id=pci.10,bus=pcie.0,addr=0x3.0x1 \ + -device pxb-pcie,bus_nr=0x20,id=pci.20,bus=pcie.0,addr=0x3.0x2,bypass_iommu=true \ + +And we got: + - a default host bridge which bypass SMMUv3 + - a pxb host bridge which go through SMMUv3 + - a pxb host bridge which bypass SMMUv3 + +3. Here is the detailed qemu command line for 'q35' machine with PXB on +x86 architecture: + +qemu-system-x86_64 \ + -machine q35,accel=kvm,default_bus_bypass_iommu=true \ + -device pxb-pcie,bus_nr=0x10,id=pci.10,bus=pcie.0,addr=0x3 \ + -device pxb-pcie,bus_nr=0x20,id=pci.20,bus=pcie.0,addr=0x4,bypass_iommu=true \ + -device intel-iommu \ + +And we got: + - a default host bridge which bypass iommu + - a pxb host bridge which go through iommu + - a pxb host bridge which bypass iommu + +Limitations +=========== +There might be potential security risk when devices bypass iommu, because +devices might send malicious dma request to virtual machine if there is no +iommu isolation. So it would be necessary to only bypass iommu for trusted +device. + +Implementation +============== +The bypass iommu feature includes: + - Address space + Add bypass iommu property check of PCI Host and do not get iommu address + space for devices bypass iommu. + - Arm SMMUv3 support + We traverse all PCI root bus and get bus number ranges, then build explicit + RID mapping for devices which do not bypass iommu. + - X86 IOMMU support + To support Intel iommu, we traverse all PCI host bridge and get information + of devices which do not bypass iommu, then fill the DMAR drhd struct with + explicit device scope info. To support AMD iommu, add check of bypass iommu + when traverse the PCI hsot bridge. + - Machine and PXB options + We add bypass iommu options in machine option for default root bus, and add + option for PXB also. Note that the default value of bypass iommu is false, + so that the devices will by default go through iommu if there exist one. + diff --git a/docs/tools/index.rst b/docs/tools/index.rst index d923834a73985..9d80fa89eceb2 100644 --- a/docs/tools/index.rst +++ b/docs/tools/index.rst @@ -15,5 +15,6 @@ Contents: qemu-nbd qemu-pr-helper qemu-trace-stap + vhost-user-rng virtfs-proxy-helper virtiofsd diff --git a/docs/tools/vhost-user-rng.rst b/docs/tools/vhost-user-rng.rst new file mode 100644 index 0000000000000..7f69d7bb3c588 --- /dev/null +++ b/docs/tools/vhost-user-rng.rst @@ -0,0 +1,74 @@ +QEMU vhost-user-rng - RNG emulation backend +=========================================== + +Synopsis +-------- + +**vhost-user-rng** [*OPTIONS*] + +Description +----------- + +This program is a vhost-user backend that emulates a VirtIO random number +generator (RNG). It uses the host's random number generator pool, +/dev/urandom by default but configurable at will, to satisfy requests from +guests. + +This program is designed to work with QEMU's ``-device +vhost-user-rng-pci`` but should work with any virtual machine monitor +(VMM) that supports vhost-user. See the Examples section below. + +Options +------- + +.. program:: vhost-user-rng + +.. option:: -h, --help + + Print help. + +.. option:: -v, --verbose + + Increase verbosity of output + +.. option:: -s, --socket-path=PATH + + Listen on vhost-user UNIX domain socket at PATH. Incompatible with --fd. + +.. option:: -f, --fd=FDNUM + + Accept connections from vhost-user UNIX domain socket file descriptor FDNUM. + The file descriptor must already be listening for connections. + Incompatible with --socket-path. + +.. option:: -p, --period + + Rate, in milliseconds, at which the RNG hardware can generate random data. + Used in conjunction with the --max-bytes option. + +.. option:: -m, --max-bytes + + In conjuction with the --period parameter, provides the maximum number of byte + per milliseconds a RNG device can generate. + +Examples +-------- + +The daemon should be started first: + +:: + + host# vhost-user-rng --socket-path=rng.sock --period=1000 --max-bytes=4096 + +The QEMU invocation needs to create a chardev socket the device can +use to communicate as well as share the guests memory over a memfd. + +:: + + host# qemu-system \ + -chardev socket,path=$(PATH)/rng.sock,id=rng0 \ + -device vhost-user-rng-pci,chardev=rng0 \ + -m 4096 \ + -object memory-backend-file,id=mem,size=4G,mem-path=/dev/shm,share=on \ + -numa node,memdev=mem \ + ... diff --git a/hw/acpi/acpi-x86-stub.c b/hw/acpi/acpi-x86-stub.c index f88d6a090b3d6..e9e46c5c5fb55 100644 --- a/hw/acpi/acpi-x86-stub.c +++ b/hw/acpi/acpi-x86-stub.c @@ -1,7 +1,13 @@ #include "qemu/osdep.h" #include "hw/i386/pc.h" +#include "hw/i386/acpi-build.h" void pc_madt_cpu_entry(AcpiDeviceIf *adev, int uid, const CPUArchIdList *apic_ids, GArray *entry) { } + +Object *acpi_get_i386_pci_host(void) +{ + return NULL; +} diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 4daa79ec8d609..778e27b659853 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -217,6 +217,26 @@ static const VMStateDescription vmstate_cpuhp_state = { } }; +static bool vmstate_test_use_pcihp(void *opaque) +{ + ICH9LPCPMRegs *s = opaque; + + return s->use_acpi_hotplug_bridge; +} + +static const VMStateDescription vmstate_pcihp_state = { + .name = "ich9_pm/pcihp", + .version_id = 1, + .minimum_version_id = 1, + .needed = vmstate_test_use_pcihp, + .fields = (VMStateField[]) { + VMSTATE_PCI_HOTPLUG(acpi_pci_hotplug, + ICH9LPCPMRegs, + NULL, NULL), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_ich9_pm = { .name = "ich9_pm", .version_id = 1, @@ -238,6 +258,7 @@ const VMStateDescription vmstate_ich9_pm = { &vmstate_memhp_state, &vmstate_tco_io_state, &vmstate_cpuhp_state, + &vmstate_pcihp_state, NULL } }; @@ -259,6 +280,10 @@ static void pm_reset(void *opaque) } pm->smi_en_wmask = ~0; + if (pm->use_acpi_hotplug_bridge) { + acpi_pcihp_reset(&pm->acpi_pci_hotplug, true); + } + acpi_update_sci(&pm->acpi_regs, pm->irq); } @@ -297,6 +322,18 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, pm->enable_tco = true; acpi_pm_tco_init(&pm->tco_regs, &pm->io); + if (pm->use_acpi_hotplug_bridge) { + acpi_pcihp_init(OBJECT(lpc_pci), + &pm->acpi_pci_hotplug, + pci_get_bus(lpc_pci), + pci_address_space_io(lpc_pci), + true, + ACPI_PCIHP_ADDR_ICH9); + + qbus_set_hotplug_handler(BUS(pci_get_bus(lpc_pci)), + OBJECT(lpc_pci)); + } + pm->irq = sci_irq; qemu_register_reset(pm_reset, pm); pm->powerdown_notifier.notify = pm_powerdown_req; @@ -368,6 +405,20 @@ static void ich9_pm_set_enable_tco(Object *obj, bool value, Error **errp) s->pm.enable_tco = value; } +static bool ich9_pm_get_acpi_pci_hotplug(Object *obj, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + return s->pm.use_acpi_hotplug_bridge; +} + +static void ich9_pm_set_acpi_pci_hotplug(Object *obj, bool value, Error **errp) +{ + ICH9LPCState *s = ICH9_LPC_DEVICE(obj); + + s->pm.use_acpi_hotplug_bridge = value; +} + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) { static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; @@ -376,6 +427,7 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) pm->disable_s3 = 0; pm->disable_s4 = 0; pm->s4_val = 2; + pm->use_acpi_hotplug_bridge = true; object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE, &pm->pm_io_base, OBJ_PROP_FLAG_READ); @@ -399,6 +451,9 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) object_property_add_bool(obj, ACPI_PM_PROP_TCO_ENABLED, ich9_pm_get_enable_tco, ich9_pm_set_enable_tco); + object_property_add_bool(obj, "acpi-pci-hotplug-with-bridge-support", + ich9_pm_get_acpi_pci_hotplug, + ich9_pm_set_acpi_pci_hotplug); } void ich9_pm_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, @@ -406,6 +461,11 @@ void ich9_pm_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, { ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev); + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_pre_plug_cb(hotplug_dev, dev, errp); + return; + } + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) && !lpc->pm.acpi_memory_hotplug.is_enabled) { error_setg(errp, @@ -441,6 +501,9 @@ void ich9_pm_device_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, } else { acpi_cpu_plug_cb(hotplug_dev, &lpc->pm.cpuhp_state, dev, errp); } + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_plug_cb(hotplug_dev, &lpc->pm.acpi_pci_hotplug, + dev, errp); } else { error_setg(errp, "acpi: device plug request for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -473,6 +536,10 @@ void ich9_pm_device_unplug_request_cb(HotplugHandler *hotplug_dev, acpi_cpu_unplug_request_cb(hotplug_dev, &lpc->pm.cpuhp_state, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_unplug_request_cb(hotplug_dev, + &lpc->pm.acpi_pci_hotplug, + dev, errp); } else { error_setg(errp, "acpi: device unplug request for not supported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -490,6 +557,9 @@ void ich9_pm_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU) && !lpc->pm.cpu_hotplug_legacy) { acpi_cpu_unplug_cb(&lpc->pm.cpuhp_state, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + acpi_pcihp_device_unplug_cb(hotplug_dev, &lpc->pm.acpi_pci_hotplug, + dev, errp); } else { error_setg(errp, "acpi: device unplug for not supported device" " type: %s", object_get_typename(OBJECT(dev))); diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index 4999277d57b80..f4d706e47dcbd 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -30,6 +30,9 @@ #include "hw/pci-host/i440fx.h" #include "hw/pci/pci.h" #include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_host.h" +#include "hw/pci/pcie_port.h" +#include "hw/i386/acpi-build.h" #include "hw/acpi/acpi.h" #include "hw/pci/pci_bus.h" #include "migration/vmstate.h" @@ -37,7 +40,6 @@ #include "qom/qom-qobject.h" #include "trace.h" -#define ACPI_PCIHP_ADDR 0xae00 #define ACPI_PCIHP_SIZE 0x0018 #define PCI_UP_BASE 0x0000 #define PCI_DOWN_BASE 0x0004 @@ -104,6 +106,7 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque) static void acpi_set_pci_info(void) { static bool bsel_is_set; + Object *host = acpi_get_i386_pci_host(); PCIBus *bus; unsigned bsel_alloc = ACPI_PCIHP_BSEL_DEFAULT; @@ -112,7 +115,11 @@ static void acpi_set_pci_info(void) } bsel_is_set = true; - bus = find_i440fx(); /* TODO: Q35 support */ + if (!host) { + return; + } + + bus = PCI_HOST_BRIDGE(host)->bus; if (bus) { /* Scan all PCI buses. Set property to enable acpi based hotplug. */ pci_for_each_bus_depth_first(bus, acpi_set_bsel, NULL, &bsel_alloc); @@ -122,13 +129,14 @@ static void acpi_set_pci_info(void) static void acpi_pcihp_disable_root_bus(void) { static bool root_hp_disabled; + Object *host = acpi_get_i386_pci_host(); PCIBus *bus; if (root_hp_disabled) { return; } - bus = find_i440fx(); + bus = PCI_HOST_BRIDGE(host)->bus; if (bus) { /* setting the hotplug handler to NULL makes the bus non-hotpluggable */ qbus_set_hotplug_handler(BUS(bus), NULL); @@ -329,6 +337,13 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { PCIBus *sec = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); + /* Remove all hot-plug handlers if hot-plug is disabled on slot */ + if (object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT) && + !PCIE_SLOT(pdev)->hotplug) { + qbus_set_hotplug_handler(BUS(sec), NULL); + return; + } + qbus_set_hotplug_handler(BUS(sec), OBJECT(hotplug_dev)); /* We don't have to overwrite any other hotplug handler yet */ assert(QLIST_EMPTY(&sec->child)); @@ -488,10 +503,11 @@ static const MemoryRegionOps acpi_pcihp_io_ops = { }; void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus, - MemoryRegion *address_space_io, bool bridges_enabled) + MemoryRegion *address_space_io, bool bridges_enabled, + uint16_t io_base) { s->io_len = ACPI_PCIHP_SIZE; - s->io_base = ACPI_PCIHP_ADDR; + s->io_base = io_base; s->root = root_bus; s->legacy_piix = !bridges_enabled; diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 0bd23d74e2427..48f7a1edbcbc0 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -49,6 +49,8 @@ #define GPE_BASE 0xafe0 #define GPE_LEN 4 +#define ACPI_PCIHP_ADDR_PIIX4 0xae00 + struct pci_status { uint32_t up; /* deprecated, maintained for migration compatibility */ uint32_t down; @@ -607,7 +609,7 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent, if (s->use_acpi_hotplug_bridge || s->use_acpi_root_pci_hotplug) { acpi_pcihp_init(OBJECT(s), &s->acpi_pci_hotplug, bus, parent, - s->use_acpi_hotplug_bridge); + s->use_acpi_hotplug_bridge, ACPI_PCIHP_ADDR_PIIX4); } s->cpu_hotplug_legacy = true; diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index f1024843ddbd4..037cc1fd82c34 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -44,6 +44,7 @@ #include "hw/acpi/tpm.h" #include "hw/pci/pcie_host.h" #include "hw/pci/pci.h" +#include "hw/pci/pci_bus.h" #include "hw/pci-host/gpex.h" #include "hw/arm/virt.h" #include "hw/mem/nvdimm.h" @@ -239,23 +240,89 @@ static void acpi_dsdt_add_tpm(Aml *scope, VirtMachineState *vms) } #endif +/* Build the iort ID mapping to SMMUv3 for a given PCI host bridge */ +static int +iort_host_bridges(Object *obj, void *opaque) +{ + GArray *idmap_blob = opaque; + + if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) { + PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; + + if (bus && !pci_bus_bypass_iommu(bus)) { + int min_bus, max_bus; + + pci_bus_range(bus, &min_bus, &max_bus); + + AcpiIortIdMapping idmap = { + .input_base = min_bus << 8, + .id_count = (max_bus - min_bus + 1) << 8, + }; + g_array_append_val(idmap_blob, idmap); + } + } + + return 0; +} + +static int iort_idmap_compare(gconstpointer a, gconstpointer b) +{ + AcpiIortIdMapping *idmap_a = (AcpiIortIdMapping *)a; + AcpiIortIdMapping *idmap_b = (AcpiIortIdMapping *)b; + + return idmap_a->input_base - idmap_b->input_base; +} + static void build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) { - int nb_nodes, iort_start = table_data->len; + int i, nb_nodes, rc_mapping_count, iort_start = table_data->len; AcpiIortIdMapping *idmap; AcpiIortItsGroup *its; AcpiIortTable *iort; AcpiIortSmmu3 *smmu; size_t node_size, iort_node_offset, iort_length, smmu_offset = 0; AcpiIortRC *rc; + GArray *smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping)); + GArray *its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping)); iort = acpi_data_push(table_data, sizeof(*iort)); if (vms->iommu == VIRT_IOMMU_SMMUV3) { + AcpiIortIdMapping next_range = {0}; + + object_child_foreach_recursive(object_get_root(), + iort_host_bridges, smmu_idmaps); + + /* Sort the smmu idmap by input_base */ + g_array_sort(smmu_idmaps, iort_idmap_compare); + + /* + * Split the whole RIDs by mapping from RC to SMMU, + * build the ID mapping from RC to ITS directly. + */ + for (i = 0; i < smmu_idmaps->len; i++) { + idmap = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i); + + if (next_range.input_base < idmap->input_base) { + next_range.id_count = idmap->input_base - next_range.input_base; + g_array_append_val(its_idmaps, next_range); + } + + next_range.input_base = idmap->input_base + idmap->id_count; + } + + /* Append the last RC -> ITS ID mapping */ + if (next_range.input_base < 0xFFFF) { + next_range.id_count = 0xFFFF - next_range.input_base; + g_array_append_val(its_idmaps, next_range); + } + nb_nodes = 3; /* RC, ITS, SMMUv3 */ + rc_mapping_count = smmu_idmaps->len + its_idmaps->len; } else { nb_nodes = 2; /* RC, ITS */ + rc_mapping_count = 1; } iort_length = sizeof(*iort); @@ -307,13 +374,13 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) } /* Root Complex Node */ - node_size = sizeof(*rc) + sizeof(*idmap); + node_size = sizeof(*rc) + sizeof(*idmap) * rc_mapping_count; iort_length += node_size; rc = acpi_data_push(table_data, node_size); rc->type = ACPI_IORT_NODE_PCI_ROOT_COMPLEX; rc->length = cpu_to_le16(node_size); - rc->mapping_count = cpu_to_le32(1); + rc->mapping_count = cpu_to_le32(rc_mapping_count); rc->mapping_offset = cpu_to_le32(sizeof(*rc)); /* fully coherent device */ @@ -321,20 +388,45 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) rc->memory_properties.memory_flags = 0x3; /* CCA = CPM = DCAS = 1 */ rc->pci_segment_number = 0; /* MCFG pci_segment */ - /* Identity RID mapping covering the whole input RID range */ - idmap = &rc->id_mapping_array[0]; - idmap->input_base = 0; - idmap->id_count = cpu_to_le32(0xFFFF); - idmap->output_base = 0; - if (vms->iommu == VIRT_IOMMU_SMMUV3) { - /* output IORT node is the smmuv3 node */ - idmap->output_reference = cpu_to_le32(smmu_offset); + AcpiIortIdMapping *range; + + /* translated RIDs connect to SMMUv3 node: RC -> SMMUv3 -> ITS */ + for (i = 0; i < smmu_idmaps->len; i++) { + idmap = &rc->id_mapping_array[i]; + range = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i); + + idmap->input_base = cpu_to_le32(range->input_base); + idmap->id_count = cpu_to_le32(range->id_count); + idmap->output_base = cpu_to_le32(range->input_base); + /* output IORT node is the smmuv3 node */ + idmap->output_reference = cpu_to_le32(smmu_offset); + } + + /* bypassed RIDs connect to ITS group node directly: RC -> ITS */ + for (i = 0; i < its_idmaps->len; i++) { + idmap = &rc->id_mapping_array[smmu_idmaps->len + i]; + range = &g_array_index(its_idmaps, AcpiIortIdMapping, i); + + idmap->input_base = cpu_to_le32(range->input_base); + idmap->id_count = cpu_to_le32(range->id_count); + idmap->output_base = cpu_to_le32(range->input_base); + /* output IORT node is the ITS group node (the first node) */ + idmap->output_reference = cpu_to_le32(iort_node_offset); + } } else { + /* Identity RID mapping covering the whole input RID range */ + idmap = &rc->id_mapping_array[0]; + idmap->input_base = cpu_to_le32(0); + idmap->id_count = cpu_to_le32(0xFFFF); + idmap->output_base = cpu_to_le32(0); /* output IORT node is the ITS group node (the first node) */ idmap->output_reference = cpu_to_le32(iort_node_offset); } + g_array_free(smmu_idmaps, true); + g_array_free(its_idmaps, true); + /* * Update the pointer address in case table_data->data moves during above * acpi_data_push operations. diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 93ab9d21ea0ae..81eda46b0bb94 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1367,6 +1367,7 @@ static void create_pcie(VirtMachineState *vms) } pci = PCI_HOST_BRIDGE(dev); + pci->bypass_iommu = vms->default_bus_bypass_iommu; vms->bus = pci->bus; if (vms->bus) { for (i = 0; i < nb_nics; i++) { @@ -2322,6 +2323,21 @@ static void virt_set_iommu(Object *obj, const char *value, Error **errp) } } +static bool virt_get_default_bus_bypass_iommu(Object *obj, Error **errp) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + + return vms->default_bus_bypass_iommu; +} + +static void virt_set_default_bus_bypass_iommu(Object *obj, bool value, + Error **errp) +{ + VirtMachineState *vms = VIRT_MACHINE(obj); + + vms->default_bus_bypass_iommu = value; +} + static CpuInstanceProperties virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) { @@ -2661,6 +2677,13 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) "Set the IOMMU type. " "Valid values are none and smmuv3"); + object_class_property_add_bool(oc, "default_bus_bypass_iommu", + virt_get_default_bus_bypass_iommu, + virt_set_default_bus_bypass_iommu); + object_class_property_set_description(oc, "default_bus_bypass_iommu", + "Set on/off to enable/disable " + "bypass_iommu for default root bus"); + object_class_property_add_bool(oc, "ras", virt_get_ras, virt_set_ras); object_class_property_set_description(oc, "ras", @@ -2728,6 +2751,9 @@ static void virt_instance_init(Object *obj) /* Default disallows iommu instantiation */ vms->iommu = VIRT_IOMMU_NONE; + /* The default root bus is attached to iommu by default */ + vms->default_bus_bypass_iommu = false; + /* Default disallows RAS instantiation */ vms->ras = false; diff --git a/hw/core/machine.c b/hw/core/machine.c index 6f59fb0b7f2c0..775add0795769 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -584,7 +584,6 @@ static void machine_set_memdev(Object *obj, const char *value, Error **errp) ms->ram_memdev_id = g_strdup(value); } - static void machine_init_notify(Notifier *notifier, void *data) { MachineState *machine = MACHINE(qdev_get_machine()); diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 357437ff1d7de..17836149fe543 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -219,10 +219,6 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) /* w2k requires FADT(rev1) or it won't boot, keep PC compatible */ pm->fadt.rev = 1; pm->cpu_hp_io_base = PIIX4_CPU_HOTPLUG_IO_BASE; - pm->pcihp_io_base = - object_property_get_uint(obj, ACPI_PCIHP_IO_BASE_PROP, NULL); - pm->pcihp_io_len = - object_property_get_uint(obj, ACPI_PCIHP_IO_LEN_PROP, NULL); } if (lpc) { uint64_t smi_features = object_property_get_uint(lpc, @@ -238,6 +234,10 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) pm->smi_on_cpu_unplug = !!(smi_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOT_UNPLUG_BIT)); } + pm->pcihp_io_base = + object_property_get_uint(obj, ACPI_PCIHP_IO_BASE_PROP, NULL); + pm->pcihp_io_len = + object_property_get_uint(obj, ACPI_PCIHP_IO_LEN_PROP, NULL); /* The above need not be conditional on machine type because the reset port * happens to be the same on PIIX (pc) and ICH9 (q35). */ @@ -299,7 +299,7 @@ static void acpi_get_misc_info(AcpiMiscInfo *info) * Because of the PXB hosts we cannot simply query TYPE_PCI_HOST_BRIDGE. * On i386 arch we only have two pci hosts, so we can look only for them. */ -static Object *acpi_get_i386_pci_host(void) +Object *acpi_get_i386_pci_host(void) { PCIHostState *host; @@ -320,7 +320,10 @@ static void acpi_get_pci_holes(Range *hole, Range *hole64) Object *pci_host; pci_host = acpi_get_i386_pci_host(); - g_assert(pci_host); + + if (!pci_host) { + return; + } range_set_bounds1(hole, object_property_get_uint(pci_host, @@ -392,6 +395,9 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, if (!pdev) { if (bsel) { /* add hotplug slots for non present devices */ + if (pci_bus_is_express(bus) && slot > 0) { + break; + } dev = aml_device("S%.02X", PCI_DEVFN(slot, 0)); aml_append(dev, aml_name_decl("_SUN", aml_int(slot))); aml_append(dev, aml_name_decl("_ADR", aml_int(slot << 16))); @@ -521,7 +527,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, QLIST_FOREACH(sec, &bus->child, sibling) { int32_t devfn = sec->parent_dev->devfn; - if (pci_bus_is_root(sec) || pci_bus_is_express(sec)) { + if (pci_bus_is_root(sec)) { continue; } @@ -1251,7 +1257,7 @@ static void build_piix4_isa_bridge(Aml *table) aml_append(table, scope); } -static void build_piix4_pci_hotplug(Aml *table) +static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr) { Aml *scope; Aml *field; @@ -1260,20 +1266,22 @@ static void build_piix4_pci_hotplug(Aml *table) scope = aml_scope("_SB.PCI0"); aml_append(scope, - aml_operation_region("PCST", AML_SYSTEM_IO, aml_int(0xae00), 0x08)); + aml_operation_region("PCST", AML_SYSTEM_IO, aml_int(pcihp_addr), 0x08)); field = aml_field("PCST", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); aml_append(field, aml_named_field("PCIU", 32)); aml_append(field, aml_named_field("PCID", 32)); aml_append(scope, field); aml_append(scope, - aml_operation_region("SEJ", AML_SYSTEM_IO, aml_int(0xae08), 0x04)); + aml_operation_region("SEJ", AML_SYSTEM_IO, + aml_int(pcihp_addr + ACPI_PCIHP_SEJ_BASE), 0x04)); field = aml_field("SEJ", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); aml_append(field, aml_named_field("B0EJ", 32)); aml_append(scope, field); aml_append(scope, - aml_operation_region("BNMR", AML_SYSTEM_IO, aml_int(0xae10), 0x08)); + aml_operation_region("BNMR", AML_SYSTEM_IO, + aml_int(pcihp_addr + ACPI_PCIHP_BNMR_BASE), 0x08)); field = aml_field("BNMR", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS); aml_append(field, aml_named_field("BNUM", 32)); aml_append(field, aml_named_field("PIDX", 32)); @@ -1407,7 +1415,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, build_piix4_isa_bridge(dsdt); build_isa_devices_aml(dsdt); if (pm->pcihp_bridge_en || pm->pcihp_root_en) { - build_piix4_pci_hotplug(dsdt); + build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); } build_piix4_pci0_int(dsdt); } else { @@ -1455,6 +1463,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, } build_q35_isa_bridge(dsdt); build_isa_devices_aml(dsdt); + if (pm->pcihp_bridge_en) { + build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); + } build_q35_pci0_int(dsdt); if (pcms->smbus && !pcmc->do_not_add_smb_acpi) { build_smb0(dsdt, pcms->smbus, ICH9_SMB_DEV, ICH9_SMB_FUNC); @@ -1489,7 +1500,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, { aml_append(scope, aml_name_decl("_HID", aml_string("ACPI0006"))); - if (misc->is_piix4 && (pm->pcihp_bridge_en || pm->pcihp_root_en)) { + if (pm->pcihp_bridge_en || pm->pcihp_root_en) { method = aml_method("_E01", 0, AML_NOTSERIALIZED); aml_append(method, aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF)); @@ -1757,6 +1768,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, PCIBus *bus = NULL; pci_host = acpi_get_i386_pci_host(); + if (pci_host) { bus = PCI_HOST_BRIDGE(pci_host)->bus; } @@ -2010,6 +2022,56 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) x86ms->oem_table_id); } +/* + * Insert DMAR scope for PCI bridges and endpoint devcie + */ +static void +insert_scope(PCIBus *bus, PCIDevice *dev, void *opaque) +{ + GArray *scope_blob = opaque; + AcpiDmarDeviceScope *scope = NULL; + + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { + /* Dmar Scope Type: 0x02 for PCI Bridge */ + build_append_int_noprefix(scope_blob, 0x02, 1); + } else { + /* Dmar Scope Type: 0x01 for PCI Endpoint Device */ + build_append_int_noprefix(scope_blob, 0x01, 1); + } + + /* length */ + build_append_int_noprefix(scope_blob, + sizeof(*scope) + sizeof(scope->path[0]), 1); + /* reserved */ + build_append_int_noprefix(scope_blob, 0, 2); + /* enumeration_id */ + build_append_int_noprefix(scope_blob, 0, 1); + /* bus */ + build_append_int_noprefix(scope_blob, pci_bus_num(bus), 1); + /* device */ + build_append_int_noprefix(scope_blob, PCI_SLOT(dev->devfn), 1); + /* function */ + build_append_int_noprefix(scope_blob, PCI_FUNC(dev->devfn), 1); +} + +/* For a given PCI host bridge, walk and insert DMAR scope */ +static int +dmar_host_bridges(Object *obj, void *opaque) +{ + GArray *scope_blob = opaque; + + if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) { + PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; + + if (bus && !pci_bus_bypass_iommu(bus)) { + pci_for_each_device(bus, pci_bus_num(bus), insert_scope, + scope_blob); + } + } + + return 0; +} + /* * VT-d spec 8.1 DMA Remapping Reporting Structure * (version Oct. 2014 or later) @@ -2029,6 +2091,15 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker, const char *oem_id, /* Root complex IOAPIC use one path[0] only */ size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]); IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu); + GArray *scope_blob = g_array_new(false, true, 1); + + /* + * A PCI bus walk, for each PCI host bridge. + * Insert scope for each PCI bridge and endpoint device which + * is attached to a bus with iommu enabled. + */ + object_child_foreach_recursive(object_get_root(), + dmar_host_bridges, scope_blob); assert(iommu); if (x86_iommu_ir_supported(iommu)) { @@ -2042,8 +2113,9 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker, const char *oem_id, /* DMAR Remapping Hardware Unit Definition structure */ drhd = acpi_data_push(table_data, sizeof(*drhd) + ioapic_scope_size); drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT); - drhd->length = cpu_to_le16(sizeof(*drhd) + ioapic_scope_size); - drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL; + drhd->length = + cpu_to_le16(sizeof(*drhd) + ioapic_scope_size + scope_blob->len); + drhd->flags = 0; /* Don't include all pci device */ drhd->pci_segment = cpu_to_le16(0); drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR); @@ -2057,6 +2129,10 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker, const char *oem_id, scope->path[0].device = PCI_SLOT(Q35_PSEUDO_DEVFN_IOAPIC); scope->path[0].function = PCI_FUNC(Q35_PSEUDO_DEVFN_IOAPIC); + /* Add scope found above */ + g_array_append_vals(table_data, scope_blob->data, scope_blob->len); + g_array_free(scope_blob, true); + if (iommu->dt_supported) { atsr = acpi_data_push(table_data, sizeof(*atsr)); atsr->type = cpu_to_le16(ACPI_DMAR_TYPE_ATSR); @@ -2187,7 +2263,7 @@ ivrs_host_bridges(Object *obj, void *opaque) if (object_dynamic_cast(obj, TYPE_PCI_HOST_BRIDGE)) { PCIBus *bus = PCI_HOST_BRIDGE(obj)->bus; - if (bus) { + if (bus && !pci_bus_bypass_iommu(bus)) { pci_for_each_device(bus, pci_bus_num(bus), insert_ivhd, ivhd_blob); } } @@ -2313,7 +2389,9 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg) QObject *o; pci_host = acpi_get_i386_pci_host(); - g_assert(pci_host); + if (!pci_host) { + return false; + } o = object_property_get_qobject(pci_host, PCIE_HOST_MCFG_BASE, NULL); if (!o) { @@ -2343,7 +2421,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) AcpiPmInfo pm; AcpiMiscInfo misc; AcpiMcfgInfo mcfg; - Range pci_hole, pci_hole64; + Range pci_hole = {}, pci_hole64 = {}; uint8_t *u; size_t aml_len = 0; GArray *tables_blob = tables->table_data; diff --git a/hw/i386/acpi-build.h b/hw/i386/acpi-build.h index 74df5fc612889..0dce155c8cc8b 100644 --- a/hw/i386/acpi-build.h +++ b/hw/i386/acpi-build.h @@ -5,6 +5,11 @@ extern const struct AcpiGenericAddress x86_nvdimm_acpi_dsmio; +/* PCI Hot-plug registers bases. See docs/spec/acpi_pci_hotplug.txt */ +#define ACPI_PCIHP_SEJ_BASE 0x8 +#define ACPI_PCIHP_BNMR_BASE 0x10 + void acpi_setup(void); +Object *acpi_get_i386_pci_host(void); #endif diff --git a/hw/i386/pc.c b/hw/i386/pc.c index aa79c5e0e6ff2..c2b9d62a358f7 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -99,6 +99,7 @@ GlobalProperty pc_compat_6_0[] = { { "qemu64" "-" TYPE_X86_CPU, "model", "6" }, { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" }, { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" }, + { "ICH9-LPC", "acpi-pci-hotplug-with-bridge-support", "off" }, }; const size_t pc_compat_6_0_len = G_N_ELEMENTS(pc_compat_6_0); @@ -1523,6 +1524,21 @@ static void pc_machine_set_hpet(Object *obj, bool value, Error **errp) pcms->hpet_enabled = value; } +static bool pc_machine_get_default_bus_bypass_iommu(Object *obj, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + return pcms->default_bus_bypass_iommu; +} + +static void pc_machine_set_default_bus_bypass_iommu(Object *obj, bool value, + Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + pcms->default_bus_bypass_iommu = value; +} + static void pc_machine_get_max_ram_below_4g(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) @@ -1622,6 +1638,7 @@ static void pc_machine_initfn(Object *obj) #ifdef CONFIG_HPET pcms->hpet_enabled = true; #endif + pcms->default_bus_bypass_iommu = false; pc_system_flash_create(pcms); pcms->pcspk = isa_new(TYPE_PC_SPEAKER); @@ -1746,6 +1763,10 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) object_class_property_add_bool(oc, "hpet", pc_machine_get_hpet, pc_machine_set_hpet); + object_class_property_add_bool(oc, "default_bus_bypass_iommu", + pc_machine_get_default_bus_bypass_iommu, + pc_machine_set_default_bus_bypass_iommu); + object_class_property_add(oc, PC_MACHINE_MAX_FW_SIZE, "size", pc_machine_get_max_fw_size, pc_machine_set_max_fw_size, NULL, NULL); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 46a0f196f413d..04b4a4788d75b 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -37,6 +37,7 @@ #include "sysemu/kvm.h" #include "hw/kvm/clock.h" #include "hw/pci-host/q35.h" +#include "hw/pci/pcie_port.h" #include "hw/qdev-properties.h" #include "hw/i386/x86.h" #include "hw/i386/pc.h" @@ -136,6 +137,7 @@ static void pc_q35_init(MachineState *machine) ram_addr_t lowmem; DriveInfo *hd[MAX_SATA_PORTS]; MachineClass *mc = MACHINE_GET_CLASS(machine); + bool acpi_pcihp; /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping @@ -236,6 +238,15 @@ static void pc_q35_init(MachineState *machine) object_property_set_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP, OBJECT(lpc), &error_abort); + acpi_pcihp = object_property_get_bool(OBJECT(lpc), + "acpi-pci-hotplug-with-bridge-support", + NULL); + + if (acpi_pcihp) { + object_register_sugar_prop(TYPE_PCIE_SLOT, "native-hotplug", + "false", true); + } + /* irq lines */ gsi_state = pc_gsi_create(&x86ms->gsi, pcmc->pci_enabled); diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index aedded106422c..7112dc30623e2 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -57,6 +57,7 @@ struct PXBDev { uint8_t bus_nr; uint16_t numa_node; + bool bypass_iommu; }; static PXBDev *convert_to_pxb(PCIDevice *dev) @@ -255,6 +256,7 @@ static void pxb_dev_realize_common(PCIDevice *dev, bool pcie, Error **errp) bus->map_irq = pxb_map_irq_fn; PCI_HOST_BRIDGE(ds)->bus = bus; + PCI_HOST_BRIDGE(ds)->bypass_iommu = pxb->bypass_iommu; pxb_register_bus(dev, bus, &local_err); if (local_err) { @@ -301,6 +303,7 @@ static Property pxb_dev_properties[] = { /* Note: 0 is not a legal PXB bus number. */ DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0), DEFINE_PROP_UINT16("numa_node", PXBDev, numa_node, NUMA_NODE_UNASSIGNED), + DEFINE_PROP_BOOL("bypass_iommu", PXBDev, bypass_iommu, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 0f37cf056a9af..ab5a47aff560b 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -65,6 +65,8 @@ static void q35_host_realize(DeviceState *dev, Error **errp) s->mch.address_space_io, 0, TYPE_PCIE_BUS); PC_MACHINE(qdev_get_machine())->bus = pci->bus; + pci->bypass_iommu = + PC_MACHINE(qdev_get_machine())->default_bus_bypass_iommu; qdev_realize(DEVICE(&s->mch), BUS(pci->bus), &error_fatal); } diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 377084f1a8d30..23d2ae2ab232f 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -416,6 +416,22 @@ const char *pci_root_bus_path(PCIDevice *dev) return rootbus->qbus.name; } +bool pci_bus_bypass_iommu(PCIBus *bus) +{ + PCIBus *rootbus = bus; + PCIHostState *host_bridge; + + if (!pci_bus_is_root(bus)) { + rootbus = pci_device_root_bus(bus->parent_dev); + } + + host_bridge = PCI_HOST_BRIDGE(rootbus->qbus.parent); + + assert(host_bridge->bus == rootbus); + + return host_bridge->bypass_iommu; +} + static void pci_root_bus_init(PCIBus *bus, DeviceState *parent, MemoryRegion *address_space_mem, MemoryRegion *address_space_io, @@ -521,6 +537,22 @@ int pci_bus_num(PCIBus *s) return PCI_BUS_GET_CLASS(s)->bus_num(s); } +/* Returns the min and max bus numbers of a PCI bus hierarchy */ +void pci_bus_range(PCIBus *bus, int *min_bus, int *max_bus) +{ + int i; + *min_bus = *max_bus = pci_bus_num(bus); + + for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) { + PCIDevice *dev = bus->devices[i]; + + if (dev && PCI_DEVICE_GET_CLASS(dev)->is_bridge) { + *min_bus = MIN(*min_bus, dev->config[PCI_SECONDARY_BUS]); + *max_bus = MAX(*max_bus, dev->config[PCI_SUBORDINATE_BUS]); + } + } +} + int pci_bus_numa_node(PCIBus *bus) { return PCI_BUS_GET_CLASS(bus)->numa_node(bus); @@ -2718,7 +2750,7 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) iommu_bus = parent_bus; } - if (iommu_bus && iommu_bus->iommu_fn) { + if (!pci_bus_bypass_iommu(bus) && iommu_bus && iommu_bus->iommu_fn) { return iommu_bus->iommu_fn(bus, iommu_bus->iommu_opaque, devfn); } return &address_space_memory; diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index 8ca5fadcbd4c4..cf02f0d6a5018 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -222,6 +222,7 @@ const VMStateDescription vmstate_pcihost = { static Property pci_host_properties_common[] = { DEFINE_PROP_BOOL("x-config-reg-migration-enabled", PCIHostState, mig_enabled, true), + DEFINE_PROP_BOOL("bypass-iommu", PCIHostState, bypass_iommu, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index fd0fa157e813a..6e95d829037ab 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -529,7 +529,13 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s) PCI_EXP_SLTCAP_PIP | PCI_EXP_SLTCAP_AIP | PCI_EXP_SLTCAP_ABP); - if (s->hotplug) { + + /* + * Enable native hot-plug on all hot-plugged bridges unless + * hot-plug is disabled on the slot. + */ + if (s->hotplug && + (s->native_hotplug || DEVICE(dev)->hotplugged)) { pci_long_test_and_set_mask(dev->config + pos + PCI_EXP_SLTCAP, PCI_EXP_SLTCAP_HPS | PCI_EXP_SLTCAP_HPC); diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c index eb563ad4354f0..da850e8dde7c8 100644 --- a/hw/pci/pcie_port.c +++ b/hw/pci/pcie_port.c @@ -148,6 +148,7 @@ static Property pcie_slot_props[] = { DEFINE_PROP_UINT8("chassis", PCIESlot, chassis, 0), DEFINE_PROP_UINT16("slot", PCIESlot, slot, 0), DEFINE_PROP_BOOL("hotplug", PCIESlot, hotplug, true), + DEFINE_PROP_BOOL("native-hotplug", PCIESlot, native_hotplug, true), DEFINE_PROP_END_OF_LIST() }; diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index 0eda25c4e1bff..6c917d7f1f0e0 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -58,3 +58,13 @@ config VIRTIO_MEM depends on LINUX depends on VIRTIO_MEM_SUPPORTED select MEM_DEVICE + +config VHOST_USER_RNG + bool + default y + depends on VIRTIO && VHOST_USER + +config VHOST_USER_I2C + bool + default y + depends on VIRTIO && VHOST_USER diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index fbff9bc9d4de4..08629c8f2c29f 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -25,6 +25,10 @@ virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock. virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) +virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) +virtio_ss.add(when: ['CONFIG_VHOST_USER_RNG', 'CONFIG_VIRTIO_PCI'], if_true: files('vhost-user-rng-pci.c')) +virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c')) +virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'], if_true: files('vhost-user-i2c-pci.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) diff --git a/hw/virtio/vhost-user-i2c-pci.c b/hw/virtio/vhost-user-i2c-pci.c new file mode 100644 index 0000000000000..70b7b65fd9700 --- /dev/null +++ b/hw/virtio/vhost-user-i2c-pci.c @@ -0,0 +1,69 @@ +/* + * Vhost-user i2c virtio device PCI glue + * + * Copyright (c) 2021 Viresh Kumar + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-user-i2c.h" +#include "virtio-pci.h" + +struct VHostUserI2CPCI { + VirtIOPCIProxy parent_obj; + VHostUserI2C vdev; +}; + +typedef struct VHostUserI2CPCI VHostUserI2CPCI; + +#define TYPE_VHOST_USER_I2C_PCI "vhost-user-i2c-pci-base" + +DECLARE_INSTANCE_CHECKER(VHostUserI2CPCI, VHOST_USER_I2C_PCI, + TYPE_VHOST_USER_I2C_PCI) + +static void vhost_user_i2c_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserI2CPCI *dev = VHOST_USER_I2C_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + vpci_dev->nvectors = 1; + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); +} + +static void vhost_user_i2c_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = vhost_user_i2c_pci_realize; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */ + pcidev_k->revision = 0x00; + pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER; +} + +static void vhost_user_i2c_pci_instance_init(Object *obj) +{ + VHostUserI2CPCI *dev = VHOST_USER_I2C_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_USER_I2C); +} + +static const VirtioPCIDeviceTypeInfo vhost_user_i2c_pci_info = { + .base_name = TYPE_VHOST_USER_I2C_PCI, + .non_transitional_name = "vhost-user-i2c-pci", + .instance_size = sizeof(VHostUserI2CPCI), + .instance_init = vhost_user_i2c_pci_instance_init, + .class_init = vhost_user_i2c_pci_class_init, +}; + +static void vhost_user_i2c_pci_register(void) +{ + virtio_pci_types_register(&vhost_user_i2c_pci_info); +} + +type_init(vhost_user_i2c_pci_register); diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c new file mode 100644 index 0000000000000..d172632bb0cd3 --- /dev/null +++ b/hw/virtio/vhost-user-i2c.c @@ -0,0 +1,288 @@ +/* + * Vhost-user i2c virtio device + * + * Copyright (c) 2021 Viresh Kumar + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/vhost-user-i2c.h" +#include "qemu/error-report.h" +#include "standard-headers/linux/virtio_ids.h" + +/* Remove this once the header is updated in Linux kernel */ +#ifndef VIRTIO_ID_I2C_ADAPTER +#define VIRTIO_ID_I2C_ADAPTER 34 +#endif + +static void vu_i2c_start(VirtIODevice *vdev) +{ + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + int ret, i; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return; + } + + ret = vhost_dev_enable_notifiers(&i2c->vhost_dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", -ret); + return; + } + + ret = k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", -ret); + goto err_host_notifiers; + } + + i2c->vhost_dev.acked_features = vdev->guest_features; + + ret = vhost_dev_start(&i2c->vhost_dev, vdev); + if (ret < 0) { + error_report("Error starting vhost-user-i2c: %d", -ret); + goto err_guest_notifiers; + } + + /* + * guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < i2c->vhost_dev.nvqs; i++) { + vhost_virtqueue_mask(&i2c->vhost_dev, vdev, i, false); + } + + return; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&i2c->vhost_dev, vdev); +} + +static void vu_i2c_stop(VirtIODevice *vdev) +{ + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + vhost_dev_stop(&i2c->vhost_dev, vdev); + + ret = k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + + vhost_dev_disable_notifiers(&i2c->vhost_dev, vdev); +} + +static void vu_i2c_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + + if (!vdev->vm_running) { + should_start = false; + } + + if (i2c->vhost_dev.started == should_start) { + return; + } + + if (should_start) { + vu_i2c_start(vdev); + } else { + vu_i2c_stop(vdev); + } +} + +static uint64_t vu_i2c_get_features(VirtIODevice *vdev, + uint64_t requested_features, Error **errp) +{ + /* No feature bits used yet */ + return requested_features; +} + +static void vu_i2c_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + /* + * Not normally called; it's the daemon that handles the queue; + * however virtio's cleanup path can call this. + */ +} + +static void vu_i2c_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) +{ + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + + vhost_virtqueue_mask(&i2c->vhost_dev, vdev, idx, mask); +} + +static bool vu_i2c_guest_notifier_pending(VirtIODevice *vdev, int idx) +{ + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + + return vhost_virtqueue_pending(&i2c->vhost_dev, idx); +} + +static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserI2C *i2c) +{ + vhost_user_cleanup(&i2c->vhost_user); + virtio_delete_queue(i2c->vq); + virtio_cleanup(vdev); + g_free(i2c->vhost_dev.vqs); + i2c->vhost_dev.vqs = NULL; +} + +static int vu_i2c_connect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + + if (i2c->connected) { + return 0; + } + i2c->connected = true; + + /* restore vhost state */ + if (virtio_device_started(vdev, vdev->status)) { + vu_i2c_start(vdev); + } + + return 0; +} + +static void vu_i2c_disconnect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + + if (!i2c->connected) { + return; + } + i2c->connected = false; + + if (i2c->vhost_dev.started) { + vu_i2c_stop(vdev); + } +} + +static void vu_i2c_event(void *opaque, QEMUChrEvent event) +{ + DeviceState *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + + switch (event) { + case CHR_EVENT_OPENED: + if (vu_i2c_connect(dev) < 0) { + qemu_chr_fe_disconnect(&i2c->chardev); + return; + } + break; + case CHR_EVENT_CLOSED: + vu_i2c_disconnect(dev); + break; + case CHR_EVENT_BREAK: + case CHR_EVENT_MUX_IN: + case CHR_EVENT_MUX_OUT: + /* Ignore */ + break; + } +} + +static void vu_i2c_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserI2C *i2c = VHOST_USER_I2C(dev); + int ret; + + if (!i2c->chardev.chr) { + error_setg(errp, "vhost-user-i2c: missing chardev"); + return; + } + + if (!vhost_user_init(&i2c->vhost_user, &i2c->chardev, errp)) { + return; + } + + virtio_init(vdev, "vhost-user-i2c", VIRTIO_ID_I2C_ADAPTER, 0); + + i2c->vhost_dev.nvqs = 1; + i2c->vq = virtio_add_queue(vdev, 4, vu_i2c_handle_output); + i2c->vhost_dev.vqs = g_new0(struct vhost_virtqueue, i2c->vhost_dev.nvqs); + + ret = vhost_dev_init(&i2c->vhost_dev, &i2c->vhost_user, + VHOST_BACKEND_TYPE_USER, 0, errp); + if (ret < 0) { + do_vhost_user_cleanup(vdev, i2c); + } + + qemu_chr_fe_set_handlers(&i2c->chardev, NULL, NULL, vu_i2c_event, NULL, + dev, NULL, true); +} + +static void vu_i2c_device_unrealize(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserI2C *i2c = VHOST_USER_I2C(dev); + + /* This will stop vhost backend if appropriate. */ + vu_i2c_set_status(vdev, 0); + vhost_dev_cleanup(&i2c->vhost_dev); + do_vhost_user_cleanup(vdev, i2c); +} + +static const VMStateDescription vu_i2c_vmstate = { + .name = "vhost-user-i2c", + .unmigratable = 1, +}; + +static Property vu_i2c_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserI2C, chardev), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vu_i2c_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + device_class_set_props(dc, vu_i2c_properties); + dc->vmsd = &vu_i2c_vmstate; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + vdc->realize = vu_i2c_device_realize; + vdc->unrealize = vu_i2c_device_unrealize; + vdc->get_features = vu_i2c_get_features; + vdc->set_status = vu_i2c_set_status; + vdc->guest_notifier_mask = vu_i2c_guest_notifier_mask; + vdc->guest_notifier_pending = vu_i2c_guest_notifier_pending; +} + +static const TypeInfo vu_i2c_info = { + .name = TYPE_VHOST_USER_I2C, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostUserI2C), + .class_init = vu_i2c_class_init, +}; + +static void vu_i2c_register_types(void) +{ + type_register_static(&vu_i2c_info); +} + +type_init(vu_i2c_register_types) diff --git a/hw/virtio/vhost-user-rng-pci.c b/hw/virtio/vhost-user-rng-pci.c new file mode 100644 index 0000000000000..ffff2de39fd44 --- /dev/null +++ b/hw/virtio/vhost-user-rng-pci.c @@ -0,0 +1,79 @@ +/* + * Vhost-user RNG virtio device PCI glue + * + * Copyright (c) 2021 Mathieu Poirier + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-user-rng.h" +#include "virtio-pci.h" + +struct VHostUserRNGPCI { + VirtIOPCIProxy parent_obj; + VHostUserRNG vdev; +}; + +typedef struct VHostUserRNGPCI VHostUserRNGPCI; + +#define TYPE_VHOST_USER_RNG_PCI "vhost-user-rng-pci-base" + +DECLARE_INSTANCE_CHECKER(VHostUserRNGPCI, VHOST_USER_RNG_PCI, + TYPE_VHOST_USER_RNG_PCI) + +static Property vhost_user_rng_pci_properties[] = { + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, + DEV_NVECTORS_UNSPECIFIED), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_user_rng_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserRNGPCI *dev = VHOST_USER_RNG_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { + vpci_dev->nvectors = 1; + } + + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); +} + +static void vhost_user_rng_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = vhost_user_rng_pci_realize; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + device_class_set_props(dc, vhost_user_rng_pci_properties); + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */ + pcidev_k->revision = 0x00; + pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER; +} + +static void vhost_user_rng_pci_instance_init(Object *obj) +{ + VHostUserRNGPCI *dev = VHOST_USER_RNG_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_USER_RNG); +} + +static const VirtioPCIDeviceTypeInfo vhost_user_rng_pci_info = { + .base_name = TYPE_VHOST_USER_RNG_PCI, + .non_transitional_name = "vhost-user-rng-pci", + .instance_size = sizeof(VHostUserRNGPCI), + .instance_init = vhost_user_rng_pci_instance_init, + .class_init = vhost_user_rng_pci_class_init, +}; + +static void vhost_user_rng_pci_register(void) +{ + virtio_pci_types_register(&vhost_user_rng_pci_info); +} + +type_init(vhost_user_rng_pci_register); diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c new file mode 100644 index 0000000000000..3825266bdf46f --- /dev/null +++ b/hw/virtio/vhost-user-rng.c @@ -0,0 +1,294 @@ +/* + * Vhost-user RNG virtio device + * + * Copyright (c) 2021 Mathieu Poirier + * + * Implementation seriously tailored on vhost-user-i2c.c + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/vhost-user-rng.h" +#include "qemu/error-report.h" +#include "standard-headers/linux/virtio_ids.h" + +static void vu_rng_start(VirtIODevice *vdev) +{ + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + int i; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return; + } + + ret = vhost_dev_enable_notifiers(&rng->vhost_dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", -ret); + return; + } + + ret = k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", -ret); + goto err_host_notifiers; + } + + rng->vhost_dev.acked_features = vdev->guest_features; + ret = vhost_dev_start(&rng->vhost_dev, vdev); + if (ret < 0) { + error_report("Error starting vhost-user-rng: %d", -ret); + goto err_guest_notifiers; + } + + /* + * guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < rng->vhost_dev.nvqs; i++) { + vhost_virtqueue_mask(&rng->vhost_dev, vdev, i, false); + } + + return; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&rng->vhost_dev, vdev); +} + +static void vu_rng_stop(VirtIODevice *vdev) +{ + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + vhost_dev_stop(&rng->vhost_dev, vdev); + + ret = k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + + vhost_dev_disable_notifiers(&rng->vhost_dev, vdev); +} + +static void vu_rng_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + + if (!vdev->vm_running) { + should_start = false; + } + + if (rng->vhost_dev.started == should_start) { + return; + } + + if (should_start) { + vu_rng_start(vdev); + } else { + vu_rng_stop(vdev); + } +} + +static uint64_t vu_rng_get_features(VirtIODevice *vdev, + uint64_t requested_features, Error **errp) +{ + /* No feature bits used yet */ + return requested_features; +} + +static void vu_rng_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + /* + * Not normally called; it's the daemon that handles the queue; + * however virtio's cleanup path can call this. + */ +} + +static void vu_rng_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) +{ + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + + vhost_virtqueue_mask(&rng->vhost_dev, vdev, idx, mask); +} + +static bool vu_rng_guest_notifier_pending(VirtIODevice *vdev, int idx) +{ + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + + return vhost_virtqueue_pending(&rng->vhost_dev, idx); +} + +static void vu_rng_connect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + + if (rng->connected) { + return; + } + + rng->connected = true; + + /* restore vhost state */ + if (virtio_device_started(vdev, vdev->status)) { + vu_rng_start(vdev); + } +} + +static void vu_rng_disconnect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserRNG *rng = VHOST_USER_RNG(vdev); + + if (!rng->connected) { + return; + } + + rng->connected = false; + + if (rng->vhost_dev.started) { + vu_rng_stop(vdev); + } +} + +static void vu_rng_event(void *opaque, QEMUChrEvent event) +{ + DeviceState *dev = opaque; + + switch (event) { + case CHR_EVENT_OPENED: + vu_rng_connect(dev); + break; + case CHR_EVENT_CLOSED: + vu_rng_disconnect(dev); + break; + case CHR_EVENT_BREAK: + case CHR_EVENT_MUX_IN: + case CHR_EVENT_MUX_OUT: + /* Ignore */ + break; + } +} + +static void vu_rng_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserRNG *rng = VHOST_USER_RNG(dev); + int ret; + + if (!rng->chardev.chr) { + error_setg(errp, "missing chardev"); + return; + } + + if (!vhost_user_init(&rng->vhost_user, &rng->chardev, errp)) { + return; + } + + virtio_init(vdev, "vhost-user-rng", VIRTIO_ID_RNG, 0); + + rng->req_vq = virtio_add_queue(vdev, 4, vu_rng_handle_output); + if (!rng->req_vq) { + error_setg_errno(errp, -1, "virtio_add_queue() failed"); + goto virtio_add_queue_failed; + } + + rng->vhost_dev.nvqs = 1; + rng->vhost_dev.vqs = g_new0(struct vhost_virtqueue, rng->vhost_dev.nvqs); + if (!rng->vhost_dev.vqs) { + error_setg_errno(errp, -1, "memory allocation failed"); + goto vhost_dev_init_failed; + } + + ret = vhost_dev_init(&rng->vhost_dev, &rng->vhost_user, + VHOST_BACKEND_TYPE_USER, 0, errp); + if (ret < 0) { + error_setg_errno(errp, -ret, "vhost_dev_init() failed"); + goto vhost_dev_init_failed; + } + + qemu_chr_fe_set_handlers(&rng->chardev, NULL, NULL, vu_rng_event, NULL, + dev, NULL, true); + + return; + +vhost_dev_init_failed: + virtio_delete_queue(rng->req_vq); +virtio_add_queue_failed: + virtio_cleanup(vdev); + vhost_user_cleanup(&rng->vhost_user); +} + +static void vu_rng_device_unrealize(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserRNG *rng = VHOST_USER_RNG(dev); + + vu_rng_set_status(vdev, 0); + + vhost_dev_cleanup(&rng->vhost_dev); + g_free(rng->vhost_dev.vqs); + rng->vhost_dev.vqs = NULL; + virtio_delete_queue(rng->req_vq); + virtio_cleanup(vdev); + vhost_user_cleanup(&rng->vhost_user); +} + +static const VMStateDescription vu_rng_vmstate = { + .name = "vhost-user-rng", + .unmigratable = 1, +}; + +static Property vu_rng_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserRNG, chardev), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vu_rng_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + device_class_set_props(dc, vu_rng_properties); + dc->vmsd = &vu_rng_vmstate; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + + vdc->realize = vu_rng_device_realize; + vdc->unrealize = vu_rng_device_unrealize; + vdc->get_features = vu_rng_get_features; + vdc->set_status = vu_rng_set_status; + vdc->guest_notifier_mask = vu_rng_guest_notifier_mask; + vdc->guest_notifier_pending = vu_rng_guest_notifier_pending; +} + +static const TypeInfo vu_rng_info = { + .name = TYPE_VHOST_USER_RNG, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostUserRNG), + .class_init = vu_rng_class_init, +}; + +static void vu_rng_register_types(void) +{ + type_register_static(&vu_rng_info); +} + +type_init(vu_rng_register_types) diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index 777cafe70db29..1b1a5c70eded0 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -21,6 +21,11 @@ #include "hw/virtio/vhost-vsock.h" #include "monitor/monitor.h" +const int feature_bits[] = { + VIRTIO_VSOCK_F_SEQPACKET, + VHOST_INVALID_FEATURE_BIT +}; + static void vhost_vsock_get_config(VirtIODevice *vdev, uint8_t *config) { VHostVSock *vsock = VHOST_VSOCK(vdev); @@ -108,8 +113,11 @@ static uint64_t vhost_vsock_get_features(VirtIODevice *vdev, uint64_t requested_features, Error **errp) { - /* No feature bits used yet */ - return requested_features; + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + + virtio_add_feature(&requested_features, VIRTIO_VSOCK_F_SEQPACKET); + return vhost_get_features(&vvc->vhost_dev, feature_bits, + requested_features); } static const VMStateDescription vmstate_virtio_vhost_vsock = { diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h index df519e40b5773..a329ce43abb27 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -24,10 +24,13 @@ #include "hw/acpi/acpi.h" #include "hw/acpi/cpu_hotplug.h" #include "hw/acpi/cpu.h" +#include "hw/acpi/pcihp.h" #include "hw/acpi/memory_hotplug.h" #include "hw/acpi/acpi_dev_interface.h" #include "hw/acpi/tco.h" +#define ACPI_PCIHP_ADDR_ICH9 0x0cc4 + typedef struct ICH9LPCPMRegs { /* * In ich9 spec says that pm1_cnt register is 32bit width and @@ -53,6 +56,8 @@ typedef struct ICH9LPCPMRegs { AcpiCpuHotplug gpe_cpu; CPUHotplugState cpuhp_state; + bool use_acpi_hotplug_bridge; + AcpiPciHpState acpi_pci_hotplug; MemHotplugState acpi_memory_hotplug; uint8_t disable_s3; diff --git a/include/hw/acpi/pcihp.h b/include/hw/acpi/pcihp.h index 2dd90aea30914..af1a169fc32db 100644 --- a/include/hw/acpi/pcihp.h +++ b/include/hw/acpi/pcihp.h @@ -55,7 +55,8 @@ typedef struct AcpiPciHpState { } AcpiPciHpState; void acpi_pcihp_init(Object *owner, AcpiPciHpState *, PCIBus *root, - MemoryRegion *address_space_io, bool bridges_enabled); + MemoryRegion *address_space_io, bool bridges_enabled, + uint16_t io_base); void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index 921416f918bfe..9661c46699607 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -147,6 +147,7 @@ struct VirtMachineState { OnOffAuto acpi; VirtGICType gic_version; VirtIOMMUType iommu; + bool default_bus_bypass_iommu; VirtMSIControllerType msi_controller; uint16_t virtio_iommu_bdf; struct arm_boot_info bootinfo; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 87294f2632e11..fd741119fab7f 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -44,6 +44,7 @@ typedef struct PCMachineState { bool sata_enabled; bool pit_enabled; bool hpet_enabled; + bool default_bus_bypass_iommu; uint64_t max_fw_size; /* NUMA information: */ diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 6be4e0c460c5a..d0f4266e37252 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -450,6 +450,7 @@ static inline PCIBus *pci_get_bus(const PCIDevice *dev) return PCI_BUS(qdev_get_parent_bus(DEVICE(dev))); } int pci_bus_num(PCIBus *s); +void pci_bus_range(PCIBus *bus, int *min_bus, int *max_bus); static inline int pci_dev_bus_num(const PCIDevice *dev) { return pci_bus_num(pci_get_bus(dev)); @@ -480,6 +481,7 @@ void pci_for_each_bus(PCIBus *bus, PCIBus *pci_device_root_bus(const PCIDevice *d); const char *pci_root_bus_path(PCIDevice *dev); +bool pci_bus_bypass_iommu(PCIBus *bus); PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn); int pci_qdev_find_device(const char *id, PCIDevice **pdev); void pci_bus_get_w64_range(PCIBus *bus, Range *range); diff --git a/include/hw/pci/pci_host.h b/include/hw/pci/pci_host.h index 52e038c0196fd..c6f4eb45851d2 100644 --- a/include/hw/pci/pci_host.h +++ b/include/hw/pci/pci_host.h @@ -43,6 +43,7 @@ struct PCIHostState { uint32_t config_reg; bool mig_enabled; PCIBus *bus; + bool bypass_iommu; QLIST_ENTRY(PCIHostState) next; }; diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h index bea8ecad0fdb0..e25b289ce84c7 100644 --- a/include/hw/pci/pcie_port.h +++ b/include/hw/pci/pcie_port.h @@ -57,8 +57,11 @@ struct PCIESlot { /* Disable ACS (really for a pcie_root_port) */ bool disable_acs; - /* Indicates whether hot-plug is enabled on the slot */ + /* Indicates whether any type of hot-plug is allowed on the slot */ bool hotplug; + + bool native_hotplug; + QLIST_ENTRY(PCIESlot) next; }; diff --git a/include/hw/virtio/vhost-user-i2c.h b/include/hw/virtio/vhost-user-i2c.h new file mode 100644 index 0000000000000..deae47a76d557 --- /dev/null +++ b/include/hw/virtio/vhost-user-i2c.h @@ -0,0 +1,28 @@ +/* + * Vhost-user i2c virtio device + * + * Copyright (c) 2021 Viresh Kumar + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef _QEMU_VHOST_USER_I2C_H +#define _QEMU_VHOST_USER_I2C_H + +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" + +#define TYPE_VHOST_USER_I2C "vhost-user-i2c-device" +OBJECT_DECLARE_SIMPLE_TYPE(VHostUserI2C, VHOST_USER_I2C) + +struct VHostUserI2C { + VirtIODevice parent; + CharBackend chardev; + struct vhost_virtqueue *vhost_vq; + struct vhost_dev vhost_dev; + VhostUserState vhost_user; + VirtQueue *vq; + bool connected; +}; + +#endif /* _QEMU_VHOST_USER_I2C_H */ diff --git a/include/hw/virtio/vhost-user-rng.h b/include/hw/virtio/vhost-user-rng.h new file mode 100644 index 0000000000000..071539996d1dd --- /dev/null +++ b/include/hw/virtio/vhost-user-rng.h @@ -0,0 +1,33 @@ +/* + * Vhost-user RNG virtio device + * + * Copyright (c) 2021 Mathieu Poirier + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef _QEMU_VHOST_USER_RNG_H +#define _QEMU_VHOST_USER_RNG_H + +#include "hw/virtio/virtio.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" +#include "chardev/char-fe.h" + +#define TYPE_VHOST_USER_RNG "vhost-user-rng" +OBJECT_DECLARE_SIMPLE_TYPE(VHostUserRNG, VHOST_USER_RNG) + +struct VHostUserRNG { + /*< private >*/ + VirtIODevice parent; + CharBackend chardev; + struct vhost_virtqueue *vhost_vq; + struct vhost_dev vhost_dev; + VhostUserState vhost_user; + VirtQueue *req_vq; + bool connected; + + /*< public >*/ +}; + +#endif /* _QEMU_VHOST_USER_RNG_H */ diff --git a/qemu-options.hx b/qemu-options.hx index 8965dabc83e39..0fcc8973dd443 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -926,6 +926,39 @@ SRST ``-device pci-ipmi-bt,bmc=id`` Like the KCS interface, but defines a BT interface on the PCI bus. + +``-device intel-iommu[,option=...]`` + This is only supported by ``-machine q35``, which will enable Intel VT-d + emulation within the guest. It supports below options: + + ``intremap=on|off`` (default: auto) + This enables interrupt remapping feature. It's required to enable + complete x2apic. Currently it only supports kvm kernel-irqchip modes + ``off`` or ``split``, while full kernel-irqchip is not yet supported. + The default value is "auto", which will be decided by the mode of + kernel-irqchip. + + ``caching-mode=on|off`` (default: off) + This enables caching mode for the VT-d emulated device. When + caching-mode is enabled, each guest DMA buffer mapping will generate an + IOTLB invalidation from the guest IOMMU driver to the vIOMMU device in + a synchronous way. It is required for ``-device vfio-pci`` to work + with the VT-d device, because host assigned devices requires to setup + the DMA mapping on the host before guest DMA starts. + + ``device-iotlb=on|off`` (default: off) + This enables device-iotlb capability for the emulated VT-d device. So + far virtio/vhost should be the only real user for this parameter, + paired with ats=on configured for the device. + + ``aw-bits=39|48`` (default: 39) + This decides the address width of IOVA address space. The address + space has 39 bits width for 3-level IOMMU page tables, and 48 bits for + 4-level IOMMU page tables. + + Please also refer to the wiki page for general scenarios of VT-d + emulation in QEMU: https://wiki.qemu.org/Features/VT-d. + ERST DEF("name", HAS_ARG, QEMU_OPTION_name, diff --git a/tests/data/acpi/q35/DSDT b/tests/data/acpi/q35/DSDT index cccf92f0466fa..842533f53e6db 100644 Binary files a/tests/data/acpi/q35/DSDT and b/tests/data/acpi/q35/DSDT differ diff --git a/tests/data/acpi/q35/DSDT.acpihmat b/tests/data/acpi/q35/DSDT.acpihmat index b3c1dd6bc4021..8d00f2ea0dd78 100644 Binary files a/tests/data/acpi/q35/DSDT.acpihmat and b/tests/data/acpi/q35/DSDT.acpihmat differ diff --git a/tests/data/acpi/q35/DSDT.bridge b/tests/data/acpi/q35/DSDT.bridge index eb5d27d95b2cd..55ad4bd7ab475 100644 Binary files a/tests/data/acpi/q35/DSDT.bridge and b/tests/data/acpi/q35/DSDT.bridge differ diff --git a/tests/data/acpi/q35/DSDT.cphp b/tests/data/acpi/q35/DSDT.cphp index e55d12990c98d..ccde2add9f87d 100644 Binary files a/tests/data/acpi/q35/DSDT.cphp and b/tests/data/acpi/q35/DSDT.cphp differ diff --git a/tests/data/acpi/q35/DSDT.dimmpxm b/tests/data/acpi/q35/DSDT.dimmpxm index 95901f94c0d44..b062e30117f95 100644 Binary files a/tests/data/acpi/q35/DSDT.dimmpxm and b/tests/data/acpi/q35/DSDT.dimmpxm differ diff --git a/tests/data/acpi/q35/DSDT.ipmibt b/tests/data/acpi/q35/DSDT.ipmibt index ce07e9f152def..1c5737692f56f 100644 Binary files a/tests/data/acpi/q35/DSDT.ipmibt and b/tests/data/acpi/q35/DSDT.ipmibt differ diff --git a/tests/data/acpi/q35/DSDT.memhp b/tests/data/acpi/q35/DSDT.memhp index 7acf6243f08cd..7b6f6487b229c 100644 Binary files a/tests/data/acpi/q35/DSDT.memhp and b/tests/data/acpi/q35/DSDT.memhp differ diff --git a/tests/data/acpi/q35/DSDT.mmio64 b/tests/data/acpi/q35/DSDT.mmio64 index 77d46369e48ef..2e0a772a85275 100644 Binary files a/tests/data/acpi/q35/DSDT.mmio64 and b/tests/data/acpi/q35/DSDT.mmio64 differ diff --git a/tests/data/acpi/q35/DSDT.nohpet b/tests/data/acpi/q35/DSDT.nohpet index 0b10128e42af0..ceb61f4115c2c 100644 Binary files a/tests/data/acpi/q35/DSDT.nohpet and b/tests/data/acpi/q35/DSDT.nohpet differ diff --git a/tests/data/acpi/q35/DSDT.numamem b/tests/data/acpi/q35/DSDT.numamem index e4c4582e7f76b..a3f846df541a7 100644 Binary files a/tests/data/acpi/q35/DSDT.numamem and b/tests/data/acpi/q35/DSDT.numamem differ diff --git a/tests/data/acpi/q35/DSDT.tis b/tests/data/acpi/q35/DSDT.tis index 15a26a14e4be5..d1433e3c14570 100644 Binary files a/tests/data/acpi/q35/DSDT.tis and b/tests/data/acpi/q35/DSDT.tis differ diff --git a/tools/meson.build b/tools/meson.build index 3e5a0abfa29f7..66b0a11fbb45b 100644 --- a/tools/meson.build +++ b/tools/meson.build @@ -24,3 +24,11 @@ endif if have_virtiofsd subdir('virtiofsd') endif + +have_virtiorng = (have_system and + have_tools and + 'CONFIG_LINUX' in config_host) + +if have_virtiorng + subdir('vhost-user-rng') +endif diff --git a/tools/vhost-user-rng/50-qemu-rng.json.in b/tools/vhost-user-rng/50-qemu-rng.json.in new file mode 100644 index 0000000000000..9186c3c6fe1d0 --- /dev/null +++ b/tools/vhost-user-rng/50-qemu-rng.json.in @@ -0,0 +1,5 @@ +{ + "description": "QEMU vhost-user-rng", + "type": "bridge", + "binary": "@libexecdir@/vhost-user-rng" +} diff --git a/tools/vhost-user-rng/main.c b/tools/vhost-user-rng/main.c new file mode 100644 index 0000000000000..c3b8f69227578 --- /dev/null +++ b/tools/vhost-user-rng/main.c @@ -0,0 +1,403 @@ +/* + * VIRTIO RNG Emulation via vhost-user + * + * Copyright (c) 2021 Mathieu Poirier + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#define G_LOG_DOMAIN "vhost-user-rng" +#define G_LOG_USE_STRUCTURED 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "qemu/cutils.h" +#include "subprojects/libvhost-user/libvhost-user-glib.h" +#include "subprojects/libvhost-user/libvhost-user.h" + +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + const typeof(((type *) 0)->member) * __mptr = (ptr); \ + (type *) ((char *) __mptr - offsetof(type, member)); }) +#endif + +typedef struct { + VugDev dev; + struct itimerspec ts; + timer_t rate_limit_timer; + pthread_mutex_t rng_mutex; + pthread_cond_t rng_cond; + int64_t quota_remaining; + bool activate_timer; + GMainLoop *loop; +} VuRNG; + +static gboolean print_cap, verbose; +static gchar *source_path, *socket_path; +static gint source_fd, socket_fd = -1; + +/* Defaults tailored on virtio-rng.c */ +static uint32_t period_ms = 1 << 16; +static uint64_t max_bytes = INT64_MAX; + +static void check_rate_limit(union sigval sv) +{ + VuRNG *rng = sv.sival_ptr; + bool wakeup = false; + + pthread_mutex_lock(&rng->rng_mutex); + /* + * The timer has expired and the guest has used all available + * entropy, which means function vu_rng_handle_request() is waiting + * on us. As such wake it up once we're done here. + */ + if (rng->quota_remaining == 0) { + wakeup = true; + } + + /* + * Reset the entropy available to the guest and tell function + * vu_rng_handle_requests() to start the timer before using it. + */ + rng->quota_remaining = max_bytes; + rng->activate_timer = true; + pthread_mutex_unlock(&rng->rng_mutex); + + if (wakeup) { + pthread_cond_signal(&rng->rng_cond); + } +} + +static void setup_timer(VuRNG *rng) +{ + struct sigevent sev; + int ret; + + memset(&rng->ts, 0, sizeof(struct itimerspec)); + rng->ts.it_value.tv_sec = period_ms / 1000; + rng->ts.it_value.tv_nsec = (period_ms % 1000) * 1000000; + + /* + * Call function check_rate_limit() as if it was the start of + * a new thread when the timer expires. + */ + sev.sigev_notify = SIGEV_THREAD; + sev.sigev_notify_function = check_rate_limit; + sev.sigev_value.sival_ptr = rng; + /* Needs to be NULL if defaults attributes are to be used. */ + sev.sigev_notify_attributes = NULL; + ret = timer_create(CLOCK_MONOTONIC, &sev, &rng->rate_limit_timer); + if (ret < 0) { + fprintf(stderr, "timer_create() failed\n"); + } + +} + + +/* Virtio helpers */ +static uint64_t rng_get_features(VuDev *dev) +{ + if (verbose) { + g_info("%s: replying", __func__); + } + return 0; +} + +static void rng_set_features(VuDev *dev, uint64_t features) +{ + if (verbose && features) { + g_autoptr(GString) s = g_string_new("Requested un-handled feature"); + g_string_append_printf(s, " 0x%" PRIx64 "", features); + g_info("%s: %s", __func__, s->str); + } +} + +static void vu_rng_handle_requests(VuDev *dev, int qidx) +{ + VuRNG *rng = container_of(dev, VuRNG, dev.parent); + VuVirtq *vq = vu_get_queue(dev, qidx); + VuVirtqElement *elem; + size_t to_read; + int len, ret; + + for (;;) { + /* Get element in the vhost virtqueue */ + elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement)); + if (!elem) { + break; + } + + /* Get the amount of entropy to read from the vhost server */ + to_read = elem->in_sg[0].iov_len; + + pthread_mutex_lock(&rng->rng_mutex); + + /* + * We have consumed all entropy available for this time slice. + * Wait for the timer (check_rate_limit()) to tell us about the + * start of a new time slice. + */ + if (rng->quota_remaining == 0) { + pthread_cond_wait(&rng->rng_cond, &rng->rng_mutex); + } + + /* Start the timer if the last time slice has expired */ + if (rng->activate_timer == true) { + rng->activate_timer = false; + ret = timer_settime(rng->rate_limit_timer, 0, &rng->ts, NULL); + if (ret < 0) { + fprintf(stderr, "timer_settime() failed\n"); + } + } + + /* Make sure we don't read more than it's available */ + if (rng->quota_remaining < to_read) { + to_read = rng->quota_remaining; + } + + len = read(source_fd, elem->in_sg[0].iov_base, to_read); + + /* Simply return 0 if an error occurs */ + if (len < 0) { + len = 0; + } + + rng->quota_remaining -= len; + + pthread_mutex_unlock(&rng->rng_mutex); + + vu_queue_push(dev, vq, elem, len); + free(elem); + } + + vu_queue_notify(dev, vq); +} + +static void +vu_rng_queue_set_started(VuDev *dev, int qidx, bool started) +{ + VuVirtq *vq = vu_get_queue(dev, qidx); + + g_debug("queue started %d:%d\n", qidx, started); + + if (!qidx) { + vu_set_queue_handler(dev, vq, started ? vu_rng_handle_requests : NULL); + } +} + +/* + * Any messages not handled here are processed by the libvhost library + * itself. + */ +static int rng_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply) +{ + VuRNG *rng = container_of(dev, VuRNG, dev.parent); + + if (msg->request == VHOST_USER_NONE) { + g_main_loop_quit(rng->loop); + return 1; + } + + return 0; +} + +static const VuDevIface vuiface = { + .set_features = rng_set_features, + .get_features = rng_get_features, + .queue_set_started = vu_rng_queue_set_started, + .process_msg = rng_process_msg, +}; + +static gboolean hangup(gpointer user_data) +{ + GMainLoop *loop = (GMainLoop *) user_data; + + g_printerr("%s: caught hangup/quit signal, quitting", __func__); + g_main_loop_quit(loop); + return true; +} + +static void panic(VuDev *dev, const char *msg) +{ + g_critical("%s\n", msg); + exit(EXIT_FAILURE); +} + +/* Print vhost-user.json backend program capabilities */ +static void print_capabilities(void) +{ + printf("{\n"); + printf(" \"type\": \"RNG\"\n"); + printf(" \"filename\": [ RNG source ]\n"); + printf("}\n"); +} + +static GOptionEntry options[] = { + { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &socket_path, + "Location of vhost-user Unix domain socket, incompatible with --fd", + "PATH" }, + { "fd", 'f', 0, G_OPTION_ARG_INT, &socket_fd, + "Specify the backend file-descriptor, incompatible with --socket-path", + "FD" }, + { "period", 'p', 0, G_OPTION_ARG_INT, &period_ms, + "Time needed (in ms) to transfer a maximum amount of byte", NULL }, + { "max-bytes", 'm', 0, G_OPTION_ARG_INT64, &max_bytes, + "Maximum amount of byte that can be transferred in a period", NULL }, + { "filename", 'n', 0, G_OPTION_ARG_FILENAME, &source_path, + "RNG source, defaults to /dev/urandom", "PATH" }, + { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &print_cap, + "Output to stdout the backend capabilities in JSON format and exit", + NULL}, + { "verbose", 'v', 0, G_OPTION_ARG_NONE, &verbose, + "Be more verbose in output", NULL}, + { NULL } +}; + +int main(int argc, char *argv[]) +{ + GError *error = NULL; + GOptionContext *context; + g_autoptr(GSocket) socket = NULL; + char default_source[] = "/dev/urandom"; + char *source = default_source; + VuRNG rng; + + context = g_option_context_new("vhost-user emulation of RNG device"); + g_option_context_add_main_entries(context, options, "vhost-user-rng"); + if (!g_option_context_parse(context, &argc, &argv, &error)) { + g_printerr("option parsing failed: %s\n", error->message); + exit(1); + } + + if (print_cap) { + print_capabilities(); + exit(0); + } + + if (!socket_path && socket_fd < 0) { + g_printerr("Please specify either --fd or --socket-path\n"); + exit(EXIT_FAILURE); + } + + if (socket_path && socket_fd > 0) { + g_printerr("Either --fd or --socket-path, not both\n"); + exit(EXIT_FAILURE); + } + + if (max_bytes > INT64_MAX) { + g_printerr("'max-bytes' parameter must be non-negative, " + "and less than 2^63\n"); + exit(EXIT_FAILURE); + } + + if (period_ms <= 0) { + g_printerr("'period' parameter expects a positive integer\n"); + exit(EXIT_FAILURE); + } + + /* + * Now create a vhost-user socket that we will receive messages + * on. Once we have our handler set up we can enter the glib main + * loop. + */ + if (socket_path) { + g_autoptr(GSocketAddress) addr = g_unix_socket_address_new(socket_path); + g_autoptr(GSocket) bind_socket = g_socket_new(G_SOCKET_FAMILY_UNIX, + G_SOCKET_TYPE_STREAM, + G_SOCKET_PROTOCOL_DEFAULT, + &error); + + if (!g_socket_bind(bind_socket, addr, false, &error)) { + g_printerr("Failed to bind to socket at %s (%s).\n", + socket_path, error->message); + exit(EXIT_FAILURE); + } + if (!g_socket_listen(bind_socket, &error)) { + g_printerr("Failed to listen on socket %s (%s).\n", + socket_path, error->message); + } + g_message("awaiting connection to %s", socket_path); + socket = g_socket_accept(bind_socket, NULL, &error); + if (!socket) { + g_printerr("Failed to accept on socket %s (%s).\n", + socket_path, error->message); + } + } else { + socket = g_socket_new_from_fd(socket_fd, &error); + if (!socket) { + g_printerr("Failed to connect to FD %d (%s).\n", + socket_fd, error->message); + exit(EXIT_FAILURE); + } + } + + /* Overwrite default RNG source with what user provided, if any */ + if (source_path) { + source = source_path; + } + + source_fd = open(source, O_RDWR); + if (source_fd < 0) { + g_printerr("Failed to open RNG source %s\n", source); + g_socket_close(socket, &error); + unlink(socket_path); + exit(EXIT_FAILURE); + } + + /* catch exit signals */ + g_unix_signal_add(SIGHUP, hangup, rng.loop); + g_unix_signal_add(SIGINT, hangup, rng.loop); + + /* + * Create the main loop first so all the various sources can be + * added. As well as catching signals we need to ensure vug_init + * can add it's GSource watches. + */ + rng.loop = g_main_loop_new(NULL, FALSE); + + if (!vug_init(&rng.dev, 1, g_socket_get_fd(socket), + panic, &vuiface)) { + g_printerr("Failed to initialize libvhost-user-glib.\n"); + exit(EXIT_FAILURE); + } + + rng.quota_remaining = max_bytes; + rng.activate_timer = true; + pthread_mutex_init(&rng.rng_mutex, NULL); + pthread_cond_init(&rng.rng_cond, NULL); + setup_timer(&rng); + + if (verbose) { + g_info("period_ms: %d tv_sec: %ld tv_nsec: %lu\n", + period_ms, rng.ts.it_value.tv_sec, rng.ts.it_value.tv_nsec); + } + + g_message("entering main loop, awaiting messages"); + g_main_loop_run(rng.loop); + g_message("finished main loop, cleaning up"); + + g_main_loop_unref(rng.loop); + vug_deinit(&rng.dev); + timer_delete(rng.rate_limit_timer); + close(source_fd); + unlink(socket_path); +} diff --git a/tools/vhost-user-rng/meson.build b/tools/vhost-user-rng/meson.build new file mode 100644 index 0000000000000..4dc386daf335f --- /dev/null +++ b/tools/vhost-user-rng/meson.build @@ -0,0 +1,10 @@ +executable('vhost-user-rng', files( + 'main.c'), + dependencies: [qemuutil, glib, gio, rt], + install: true, + install_dir: get_option('libexecdir')) + +configure_file(input: '50-qemu-rng.json.in', + output: '50-qemu-rng.json', + configuration: config_host, + install_dir: qemu_datadir / 'vhost-user')