433 changes: 406 additions & 27 deletions backends/cryptodev.c

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions backends/meson.build
@@ -1,5 +1,6 @@
softmmu_ss.add([files(
'cryptodev-builtin.c',
'cryptodev-hmp-cmds.c',
'cryptodev.c',
'hostmem-ram.c',
'hostmem.c',
Expand Down
14 changes: 14 additions & 0 deletions hmp-commands-info.hx
Expand Up @@ -993,3 +993,17 @@ SRST
``info virtio-queue-element`` *path* *queue* [*index*]
Display element of a given virtio queue
ERST

{
.name = "cryptodev",
.args_type = "",
.params = "",
.help = "show the crypto devices",
.cmd = hmp_info_cryptodev,
.flags = "p",
},

SRST
``info cryptodev``
Show the crypto devices.
ERST
9 changes: 6 additions & 3 deletions hw/acpi/acpi-pci-hotplug-stub.c
Expand Up @@ -5,8 +5,7 @@
const VMStateDescription vmstate_acpi_pcihp_pci_status;

void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
MemoryRegion *address_space_io, bool bridges_enabled,
uint16_t io_base)
MemoryRegion *address_space_io, uint16_t io_base)
{
return;
}
Expand Down Expand Up @@ -36,8 +35,12 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
return;
}

void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off)
void acpi_pcihp_reset(AcpiPciHpState *s)
{
return;
}

bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus)
{
return true;
}
21 changes: 13 additions & 8 deletions hw/acpi/ich9.c
Expand Up @@ -218,7 +218,7 @@ static bool vmstate_test_use_pcihp(void *opaque)
{
ICH9LPCPMRegs *s = opaque;

return s->use_acpi_hotplug_bridge;
return s->acpi_pci_hotplug.use_acpi_hotplug_bridge;
}

static const VMStateDescription vmstate_pcihp_state = {
Expand Down Expand Up @@ -277,8 +277,8 @@ static void pm_reset(void *opaque)
}
pm->smi_en_wmask = ~0;

if (pm->use_acpi_hotplug_bridge) {
acpi_pcihp_reset(&pm->acpi_pci_hotplug, true);
if (pm->acpi_pci_hotplug.use_acpi_hotplug_bridge) {
acpi_pcihp_reset(&pm->acpi_pci_hotplug);
}

acpi_update_sci(&pm->acpi_regs, pm->irq);
Expand Down Expand Up @@ -316,12 +316,11 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, qemu_irq sci_irq)
acpi_pm_tco_init(&pm->tco_regs, &pm->io);
}

if (pm->use_acpi_hotplug_bridge) {
if (pm->acpi_pci_hotplug.use_acpi_hotplug_bridge) {
acpi_pcihp_init(OBJECT(lpc_pci),
&pm->acpi_pci_hotplug,
pci_get_bus(lpc_pci),
pci_address_space_io(lpc_pci),
true,
ACPI_PCIHP_ADDR_ICH9);

qbus_set_hotplug_handler(BUS(pci_get_bus(lpc_pci)),
Expand Down Expand Up @@ -403,14 +402,14 @@ static bool ich9_pm_get_acpi_pci_hotplug(Object *obj, Error **errp)
{
ICH9LPCState *s = ICH9_LPC_DEVICE(obj);

return s->pm.use_acpi_hotplug_bridge;
return s->pm.acpi_pci_hotplug.use_acpi_hotplug_bridge;
}

static void ich9_pm_set_acpi_pci_hotplug(Object *obj, bool value, Error **errp)
{
ICH9LPCState *s = ICH9_LPC_DEVICE(obj);

s->pm.use_acpi_hotplug_bridge = value;
s->pm.acpi_pci_hotplug.use_acpi_hotplug_bridge = value;
}

static bool ich9_pm_get_keep_pci_slot_hpc(Object *obj, Error **errp)
Expand All @@ -435,7 +434,7 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm)
pm->disable_s3 = 0;
pm->disable_s4 = 0;
pm->s4_val = 2;
pm->use_acpi_hotplug_bridge = true;
pm->acpi_pci_hotplug.use_acpi_hotplug_bridge = true;
pm->keep_pci_slot_hpc = true;
pm->enable_tco = true;

Expand Down Expand Up @@ -579,6 +578,12 @@ void ich9_pm_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
}
}

bool ich9_pm_is_hotpluggable_bus(HotplugHandler *hotplug_dev, BusState *bus)
{
ICH9LPCState *lpc = ICH9_LPC_DEVICE(hotplug_dev);
return acpi_pcihp_is_hotpluggbale_bus(&lpc->pm.acpi_pci_hotplug, bus);
}

void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list)
{
ICH9LPCState *s = ICH9_LPC_DEVICE(adev);
Expand Down
14 changes: 12 additions & 2 deletions hw/acpi/pci-bridge.c
Expand Up @@ -21,7 +21,17 @@ void build_pci_bridge_aml(AcpiDevAmlIf *adev, Aml *scope)
{
PCIBridge *br = PCI_BRIDGE(adev);

if (object_property_find(OBJECT(&br->sec_bus), ACPI_PCIHP_PROP_BSEL)) {
build_append_pci_bus_devices(scope, pci_bridge_get_sec_bus(br));
if (!DEVICE(br)->hotplugged) {
PCIBus *sec_bus = pci_bridge_get_sec_bus(br);

build_append_pci_bus_devices(scope, sec_bus);

/*
* generate hotplug slots descriptors if
* bridge has ACPI PCI hotplug attached,
*/
if (object_property_find(OBJECT(sec_bus), ACPI_PCIHP_PROP_BSEL)) {
build_append_pcihp_slots(scope, sec_bus);
}
}
}
112 changes: 25 additions & 87 deletions hw/acpi/pcihp.c
Expand Up @@ -54,21 +54,6 @@ typedef struct AcpiPciHpFind {
PCIBus *bus;
} AcpiPciHpFind;

static gint g_cmp_uint32(gconstpointer a, gconstpointer b, gpointer user_data)
{
return a - b;
}

static GSequence *pci_acpi_index_list(void)
{
static GSequence *used_acpi_index_list;

if (!used_acpi_index_list) {
used_acpi_index_list = g_sequence_new(NULL);
}
return used_acpi_index_list;
}

static int acpi_pcihp_get_bsel(PCIBus *bus)
{
Error *local_err = NULL;
Expand Down Expand Up @@ -136,20 +121,6 @@ static void acpi_set_pci_info(bool has_bridge_hotplug)
}
}

static void acpi_pcihp_disable_root_bus(void)
{
Object *host = acpi_get_i386_pci_host();
PCIBus *bus;

bus = PCI_HOST_BRIDGE(host)->bus;
if (bus && qbus_is_hotpluggable(BUS(bus))) {
/* setting the hotplug handler to NULL makes the bus non-hotpluggable */
qbus_set_hotplug_handler(BUS(bus), NULL);
}

return;
}

static void acpi_pcihp_test_hotplug_bus(PCIBus *bus, void *opaque)
{
AcpiPciHpFind *find = opaque;
Expand Down Expand Up @@ -291,17 +262,12 @@ static void acpi_pcihp_update(AcpiPciHpState *s)
}
}

void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off)
void acpi_pcihp_reset(AcpiPciHpState *s)
{
if (acpihp_root_off) {
acpi_pcihp_disable_root_bus();
}
acpi_set_pci_info(!s->legacy_piix);
acpi_set_pci_info(s->use_acpi_hotplug_bridge);
acpi_pcihp_update(s);
}

#define ONBOARD_INDEX_MAX (16 * 1024 - 1)

void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
Expand All @@ -314,34 +280,6 @@ void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev,
ACPI_PCIHP_PROP_BSEL "' set");
return;
}

/*
* capped by systemd (see: udev-builtin-net_id.c)
* as it's the only known user honor it to avoid users
* misconfigure QEMU and then wonder why acpi-index doesn't work
*/
if (pdev->acpi_index > ONBOARD_INDEX_MAX) {
error_setg(errp, "acpi-index should be less or equal to %u",
ONBOARD_INDEX_MAX);
return;
}

/*
* make sure that acpi-index is unique across all present PCI devices
*/
if (pdev->acpi_index) {
GSequence *used_indexes = pci_acpi_index_list();

if (g_sequence_lookup(used_indexes, GINT_TO_POINTER(pdev->acpi_index),
g_cmp_uint32, NULL)) {
error_setg(errp, "a PCI device with acpi-index = %" PRIu32
" already exist", pdev->acpi_index);
return;
}
g_sequence_insert_sorted(used_indexes,
GINT_TO_POINTER(pdev->acpi_index),
g_cmp_uint32, NULL);
}
}

void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
Expand All @@ -361,17 +299,10 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
* Overwrite the default hotplug handler with the ACPI PCI one
* for cold plugged bridges only.
*/
if (!s->legacy_piix &&
if (s->use_acpi_hotplug_bridge &&
object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
PCIBus *sec = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));

/* Remove all hot-plug handlers if hot-plug is disabled on slot */
if (object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT) &&
!PCIE_SLOT(pdev)->hotplug) {
qbus_set_hotplug_handler(BUS(sec), NULL);
return;
}

qbus_set_hotplug_handler(BUS(sec), OBJECT(hotplug_dev));
/* We don't have to overwrite any other hotplug handler yet */
assert(QLIST_EMPTY(&sec->child));
Expand Down Expand Up @@ -401,17 +332,6 @@ void acpi_pcihp_device_unplug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
trace_acpi_pci_unplug(PCI_SLOT(pdev->devfn),
acpi_pcihp_get_bsel(pci_get_bus(pdev)));

/*
* clean up acpi-index so it could reused by another device
*/
if (pdev->acpi_index) {
GSequence *used_indexes = pci_acpi_index_list();

g_sequence_remove(g_sequence_lookup(used_indexes,
GINT_TO_POINTER(pdev->acpi_index),
g_cmp_uint32, NULL));
}

qdev_unrealize(dev);
}

Expand Down Expand Up @@ -441,6 +361,24 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS);
}

bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus)
{
Object *o = OBJECT(bus->parent);

if (s->use_acpi_hotplug_bridge &&
object_dynamic_cast(o, TYPE_PCI_BRIDGE)) {
if (object_dynamic_cast(o, TYPE_PCIE_SLOT) && !PCIE_SLOT(o)->hotplug) {
return false;
}
return true;
}

if (s->use_acpi_root_pci_hotplug) {
return true;
}
return false;
}

static uint64_t pci_read(void *opaque, hwaddr addr, unsigned int size)
{
AcpiPciHpState *s = opaque;
Expand All @@ -454,7 +392,7 @@ static uint64_t pci_read(void *opaque, hwaddr addr, unsigned int size)
switch (addr) {
case PCI_UP_BASE:
val = s->acpi_pcihp_pci_status[bsel].up;
if (!s->legacy_piix) {
if (s->use_acpi_hotplug_bridge) {
s->acpi_pcihp_pci_status[bsel].up = 0;
}
trace_acpi_pci_up_read(val);
Expand Down Expand Up @@ -529,7 +467,8 @@ static void pci_write(void *opaque, hwaddr addr, uint64_t data,
trace_acpi_pci_ej_write(addr, data);
break;
case PCI_SEL_BASE:
s->hotplug_select = s->legacy_piix ? ACPI_PCIHP_BSEL_DEFAULT : data;
s->hotplug_select = s->use_acpi_hotplug_bridge ? data :
ACPI_PCIHP_BSEL_DEFAULT;
trace_acpi_pci_sel_write(addr, data);
default:
break;
Expand All @@ -547,14 +486,13 @@ static const MemoryRegionOps acpi_pcihp_io_ops = {
};

void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
MemoryRegion *address_space_io, bool bridges_enabled,
MemoryRegion *address_space_io,
uint16_t io_base)
{
s->io_len = ACPI_PCIHP_SIZE;
s->io_base = io_base;

s->root = root_bus;
s->legacy_piix = !bridges_enabled;

memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s,
"acpi-pci-hotplug", s->io_len);
Expand Down
33 changes: 22 additions & 11 deletions hw/acpi/piix4.c
Expand Up @@ -170,14 +170,14 @@ static const VMStateDescription vmstate_pci_status = {
static bool vmstate_test_use_acpi_hotplug_bridge(void *opaque, int version_id)
{
PIIX4PMState *s = opaque;
return s->use_acpi_hotplug_bridge;
return s->acpi_pci_hotplug.use_acpi_hotplug_bridge;
}

static bool vmstate_test_no_use_acpi_hotplug_bridge(void *opaque,
int version_id)
{
PIIX4PMState *s = opaque;
return !s->use_acpi_hotplug_bridge;
return !s->acpi_pci_hotplug.use_acpi_hotplug_bridge;
}

static bool vmstate_test_use_memhp(void *opaque)
Expand Down Expand Up @@ -234,7 +234,8 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id)
static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id)
{
PIIX4PMState *s = PIIX4_PM(opaque);
return s->use_acpi_hotplug_bridge && !s->not_migrate_acpi_index;
return s->acpi_pci_hotplug.use_acpi_hotplug_bridge &&
!s->not_migrate_acpi_index;
}

/* qemu-kvm 1.2 uses version 3 but advertised as 2
Expand Down Expand Up @@ -303,8 +304,9 @@ static void piix4_pm_reset(DeviceState *dev)
acpi_update_sci(&s->ar, s->irq);

pm_io_space_update(s);
if (s->use_acpi_hotplug_bridge || s->use_acpi_root_pci_hotplug) {
acpi_pcihp_reset(&s->acpi_pci_hotplug, !s->use_acpi_root_pci_hotplug);
if (s->acpi_pci_hotplug.use_acpi_hotplug_bridge ||
s->acpi_pci_hotplug.use_acpi_root_pci_hotplug) {
acpi_pcihp_reset(&s->acpi_pci_hotplug);
}
}

Expand Down Expand Up @@ -402,6 +404,13 @@ static void piix4_device_unplug_cb(HotplugHandler *hotplug_dev,
}
}

static bool piix4_is_hotpluggable_bus(HotplugHandler *hotplug_dev,
BusState *bus)
{
PIIX4PMState *s = PIIX4_PM(hotplug_dev);
return acpi_pcihp_is_hotpluggbale_bus(&s->acpi_pci_hotplug, bus);
}

static void piix4_pm_machine_ready(Notifier *n, void *opaque)
{
PIIX4PMState *s = container_of(n, PIIX4PMState, machine_ready);
Expand Down Expand Up @@ -487,12 +496,11 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp)
qemu_add_machine_init_done_notifier(&s->machine_ready);

if (xen_enabled()) {
s->use_acpi_hotplug_bridge = false;
s->acpi_pci_hotplug.use_acpi_hotplug_bridge = false;
}

piix4_acpi_system_hot_add_init(pci_address_space_io(dev),
pci_get_bus(dev), s);
qbus_set_hotplug_handler(BUS(pci_get_bus(dev)), OBJECT(s));

piix4_pm_add_properties(s);
}
Expand Down Expand Up @@ -561,9 +569,11 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
"acpi-gpe0", GPE_LEN);
memory_region_add_subregion(parent, GPE_BASE, &s->io_gpe);

if (s->use_acpi_hotplug_bridge || s->use_acpi_root_pci_hotplug) {
if (s->acpi_pci_hotplug.use_acpi_hotplug_bridge ||
s->acpi_pci_hotplug.use_acpi_root_pci_hotplug) {
acpi_pcihp_init(OBJECT(s), &s->acpi_pci_hotplug, bus, parent,
s->use_acpi_hotplug_bridge, ACPI_PCIHP_ADDR_PIIX4);
ACPI_PCIHP_ADDR_PIIX4);
qbus_set_hotplug_handler(BUS(pci_get_bus(PCI_DEVICE(s))), OBJECT(s));
}

s->cpu_hotplug_legacy = true;
Expand Down Expand Up @@ -602,9 +612,9 @@ static Property piix4_pm_properties[] = {
DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0),
DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2),
DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState,
use_acpi_hotplug_bridge, true),
acpi_pci_hotplug.use_acpi_hotplug_bridge, true),
DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCI_ROOTHP, PIIX4PMState,
use_acpi_root_pci_hotplug, true),
acpi_pci_hotplug.use_acpi_root_pci_hotplug, true),
DEFINE_PROP_BOOL("memory-hotplug-support", PIIX4PMState,
acpi_memory_hotplug.is_enabled, true),
DEFINE_PROP_BOOL("smm-compat", PIIX4PMState, smm_compat, false),
Expand Down Expand Up @@ -641,6 +651,7 @@ static void piix4_pm_class_init(ObjectClass *klass, void *data)
hc->plug = piix4_device_plug_cb;
hc->unplug_request = piix4_device_unplug_request_cb;
hc->unplug = piix4_device_unplug_cb;
hc->is_hotpluggable_bus = piix4_is_hotpluggable_bus;
adevc->ospm_status = piix4_ospm_status;
adevc->send_event = piix4_send_gpe;
adevc->madt_cpu = pc_madt_cpu_entry;
Expand Down
20 changes: 11 additions & 9 deletions hw/cxl/cxl-component-utils.c
Expand Up @@ -141,17 +141,19 @@ static void ras_init_common(uint32_t *reg_state, uint32_t *write_msk)
* Error status is RW1C but given bits are not yet set, it can
* be handled as RO.
*/
reg_state[R_CXL_RAS_UNC_ERR_STATUS] = 0;
stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, 0);
stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_STATUS, 0x1cfff);
/* Bits 12-13 and 17-31 reserved in CXL 2.0 */
reg_state[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff;
write_msk[R_CXL_RAS_UNC_ERR_MASK] = 0x1cfff;
reg_state[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff;
write_msk[R_CXL_RAS_UNC_ERR_SEVERITY] = 0x1cfff;
reg_state[R_CXL_RAS_COR_ERR_STATUS] = 0;
reg_state[R_CXL_RAS_COR_ERR_MASK] = 0x7f;
write_msk[R_CXL_RAS_COR_ERR_MASK] = 0x7f;
stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK, 0x1cfff);
stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_MASK, 0x1cfff);
stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_SEVERITY, 0x1cfff);
stl_le_p(write_msk + R_CXL_RAS_UNC_ERR_SEVERITY, 0x1cfff);
stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, 0);
stl_le_p(write_msk + R_CXL_RAS_COR_ERR_STATUS, 0x7f);
stl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK, 0x7f);
stl_le_p(write_msk + R_CXL_RAS_COR_ERR_MASK, 0x7f);
/* CXL switches and devices must set */
reg_state[R_CXL_RAS_ERR_CAP_CTRL] = 0x00;
stl_le_p(reg_state + R_CXL_RAS_ERR_CAP_CTRL, 0x200);
}

static void hdm_init_common(uint32_t *reg_state, uint32_t *write_msk,
Expand Down
31 changes: 19 additions & 12 deletions hw/cxl/cxl-host.c
Expand Up @@ -146,21 +146,28 @@ static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr)
return NULL;
}

hb_cstate = cxl_get_hb_cstate(hb);
if (!hb_cstate) {
return NULL;
}
if (cxl_get_hb_passthrough(hb)) {
rp = pcie_find_port_first(hb->bus);
if (!rp) {
return NULL;
}
} else {
hb_cstate = cxl_get_hb_cstate(hb);
if (!hb_cstate) {
return NULL;
}

cache_mem = hb_cstate->crb.cache_mem_registers;
cache_mem = hb_cstate->crb.cache_mem_registers;

target_found = cxl_hdm_find_target(cache_mem, addr, &target);
if (!target_found) {
return NULL;
}
target_found = cxl_hdm_find_target(cache_mem, addr, &target);
if (!target_found) {
return NULL;
}

rp = pcie_find_port_by_pn(hb->bus, target);
if (!rp) {
return NULL;
rp = pcie_find_port_by_pn(hb->bus, target);
if (!rp) {
return NULL;
}
}

d = pci_bridge_get_sec_bus(PCI_BRIDGE(rp))->devices[0];
Expand Down
179 changes: 129 additions & 50 deletions hw/i386/acpi-build.c
Expand Up @@ -373,6 +373,104 @@ Aml *aml_pci_device_dsm(void)
return method;
}

static void build_append_pci_dsm_func0_common(Aml *ctx, Aml *retvar)
{
Aml *UUID, *ifctx1;
uint8_t byte_list[1] = { 0 }; /* nothing supported yet */

aml_append(ctx, aml_store(aml_buffer(1, byte_list), retvar));
/*
* PCI Firmware Specification 3.1
* 4.6. _DSM Definitions for PCI
*/
UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D");
ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(0), UUID)));
{
/* call is for unsupported UUID, bail out */
aml_append(ifctx1, aml_return(retvar));
}
aml_append(ctx, ifctx1);

ifctx1 = aml_if(aml_lless(aml_arg(1), aml_int(2)));
{
/* call is for unsupported REV, bail out */
aml_append(ifctx1, aml_return(retvar));
}
aml_append(ctx, ifctx1);
}

static Aml *aml_pci_edsm(void)
{
Aml *method, *ifctx;
Aml *zero = aml_int(0);
Aml *func = aml_arg(2);
Aml *ret = aml_local(0);
Aml *aidx = aml_local(1);
Aml *params = aml_arg(4);

method = aml_method("EDSM", 5, AML_SERIALIZED);

/* get supported functions */
ifctx = aml_if(aml_equal(func, zero));
{
/* 1: have supported functions */
/* 7: support for function 7 */
const uint8_t caps = 1 | BIT(7);
build_append_pci_dsm_func0_common(ifctx, ret);
aml_append(ifctx, aml_store(aml_int(caps), aml_index(ret, zero)));
aml_append(ifctx, aml_return(ret));
}
aml_append(method, ifctx);

/* handle specific functions requests */
/*
* PCI Firmware Specification 3.1
* 4.6.7. _DSM for Naming a PCI or PCI Express Device Under
* Operating Systems
*/
ifctx = aml_if(aml_equal(func, aml_int(7)));
{
Aml *pkg = aml_package(2);
aml_append(pkg, zero);
/* optional, if not impl. should return null string */
aml_append(pkg, aml_string("%s", ""));
aml_append(ifctx, aml_store(pkg, ret));

/*
* IASL is fine when initializing Package with computational data,
* however it makes guest unhappy /it fails to process such AML/.
* So use runtime assignment to set acpi-index after initializer
* to make OSPM happy.
*/
aml_append(ifctx,
aml_store(aml_derefof(aml_index(params, aml_int(0))), aidx));
aml_append(ifctx, aml_store(aidx, aml_index(ret, zero)));
aml_append(ifctx, aml_return(ret));
}
aml_append(method, ifctx);

return method;
}

static Aml *aml_pci_static_endpoint_dsm(PCIDevice *pdev)
{
Aml *method;

g_assert(pdev->acpi_index != 0);
method = aml_method("_DSM", 4, AML_SERIALIZED);
{
Aml *params = aml_local(0);
Aml *pkg = aml_package(1);
aml_append(pkg, aml_int(pdev->acpi_index));
aml_append(method, aml_store(pkg, params));
aml_append(method,
aml_return(aml_call5("EDSM", aml_arg(0), aml_arg(1),
aml_arg(2), aml_arg(3), params))
);
}
return method;
}

static void build_append_pcihp_notify_entry(Aml *method, int slot)
{
Aml *if_ctx;
Expand All @@ -396,12 +494,6 @@ static bool is_devfn_ignored_generic(const int devfn, const PCIBus *bus)
if (DEVICE(pdev)->hotplugged) {
return true;
}
} else if (!get_dev_aml_func(DEVICE(pdev))) {
/*
* Ignore all other devices on !0 functions unless they
* have AML description (i.e have get_dev_aml_func() != 0)
*/
return true;
}
}
return false;
Expand All @@ -428,12 +520,14 @@ static bool is_devfn_ignored_hotplug(const int devfn, const PCIBus *bus)
return false;
}

static void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus,
QObject *bsel)
void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus)
{
int devfn;
Aml *dev, *notify_method = NULL, *method;
QObject *bsel = object_property_get_qobject(OBJECT(bus),
ACPI_PCIHP_PROP_BSEL, NULL);
uint64_t bsel_val = qnum_get_uint(qobject_to(QNum, bsel));
qobject_unref(bsel);

aml_append(parent_scope, aml_name_decl("BSEL", aml_int(bsel_val)));
notify_method = aml_method("DVNT", 2, AML_NOTSERIALIZED);
Expand Down Expand Up @@ -478,12 +572,9 @@ static void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus,

void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus)
{
QObject *bsel;
int devfn;
Aml *dev;

bsel = object_property_get_qobject(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, NULL);

for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
/* ACPI spec: 1.0b: Table 6-2 _ADR Object Bus Types, PCI type */
int adr = PCI_SLOT(devfn) << 16 | PCI_FUNC(devfn);
Expand All @@ -498,16 +589,16 @@ void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus)
aml_append(dev, aml_name_decl("_ADR", aml_int(adr)));

call_dev_aml_func(DEVICE(bus->devices[devfn]), dev);
/* add _DSM if device has acpi-index set */
if (pdev->acpi_index &&
!object_property_get_bool(OBJECT(pdev), "hotpluggable",
&error_abort)) {
aml_append(dev, aml_pci_static_endpoint_dsm(pdev));
}

/* device descriptor has been composed, add it into parent context */
aml_append(parent_scope, dev);
}

if (bsel) {
build_append_pcihp_slots(parent_scope, bus, bsel);
}

qobject_unref(bsel);
}

static bool build_append_notfication_callback(Aml *parent_scope,
Expand All @@ -517,16 +608,24 @@ static bool build_append_notfication_callback(Aml *parent_scope,
PCIBus *sec;
QObject *bsel;
int nr_notifiers = 0;
GQueue *pcnt_bus_list = g_queue_new();

QLIST_FOREACH(sec, &bus->child, sibling) {
Aml *br_scope = aml_scope("S%.02X", sec->parent_dev->devfn);
if (pci_bus_is_root(sec) ||
!object_property_find(OBJECT(sec), ACPI_PCIHP_PROP_BSEL)) {
if (pci_bus_is_root(sec)) {
continue;
}
nr_notifiers = nr_notifiers +
build_append_notfication_callback(br_scope, sec);
aml_append(parent_scope, br_scope);
/*
* add new child scope to parent
* and keep track of bus that have PCNT,
* bus list is used later to call children PCNTs from this level PCNT
*/
if (nr_notifiers) {
g_queue_push_tail(pcnt_bus_list, sec);
aml_append(parent_scope, br_scope);
}
}

/*
Expand All @@ -550,30 +649,25 @@ static bool build_append_notfication_callback(Aml *parent_scope,
}

/* Notify about child bus events in any case */
QLIST_FOREACH(sec, &bus->child, sibling) {
if (pci_bus_is_root(sec) ||
!object_property_find(OBJECT(sec), ACPI_PCIHP_PROP_BSEL)) {
continue;
}

while ((sec = g_queue_pop_head(pcnt_bus_list))) {
aml_append(method, aml_name("^S%.02X.PCNT", sec->parent_dev->devfn));
}

aml_append(parent_scope, method);
qobject_unref(bsel);
g_queue_free(pcnt_bus_list);
return !!nr_notifiers;
}

static Aml *aml_pci_pdsm(void)
{
Aml *method, *UUID, *ifctx, *ifctx1;
Aml *method, *ifctx, *ifctx1;
Aml *ret = aml_local(0);
Aml *caps = aml_local(1);
Aml *acpi_index = aml_local(2);
Aml *zero = aml_int(0);
Aml *one = aml_int(1);
Aml *func = aml_arg(2);
Aml *rev = aml_arg(1);
Aml *params = aml_arg(4);
Aml *bnum = aml_derefof(aml_index(params, aml_int(0)));
Aml *sunum = aml_derefof(aml_index(params, aml_int(1)));
Expand All @@ -583,29 +677,9 @@ static Aml *aml_pci_pdsm(void)
/* get supported functions */
ifctx = aml_if(aml_equal(func, zero));
{
uint8_t byte_list[1] = { 0 }; /* nothing supported yet */
aml_append(ifctx, aml_store(aml_buffer(1, byte_list), ret));
aml_append(ifctx, aml_store(zero, caps));

/*
* PCI Firmware Specification 3.1
* 4.6. _DSM Definitions for PCI
*/
UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D");
ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(0), UUID)));
{
/* call is for unsupported UUID, bail out */
aml_append(ifctx1, aml_return(ret));
}
aml_append(ifctx, ifctx1);

ifctx1 = aml_if(aml_lless(rev, aml_int(2)));
{
/* call is for unsupported REV, bail out */
aml_append(ifctx1, aml_return(ret));
}
aml_append(ifctx, ifctx1);
build_append_pci_dsm_func0_common(ifctx, ret);

aml_append(ifctx, aml_store(zero, caps));
aml_append(ifctx,
aml_store(aml_call2("AIDX", bnum, sunum), acpi_index));
/*
Expand Down Expand Up @@ -1388,6 +1462,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03")));
aml_append(dev, aml_name_decl("_ADR", aml_int(0)));
aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid)));
aml_append(dev, aml_pci_edsm());
aml_append(sb_scope, dev);
aml_append(dsdt, sb_scope);

Expand All @@ -1403,6 +1478,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(dev, aml_name_decl("_ADR", aml_int(0)));
aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid)));
aml_append(dev, build_q35_osc_method(!pm->pcihp_bridge_en));
aml_append(dev, aml_pci_edsm());
aml_append(sb_scope, dev);
if (mcfg_valid) {
aml_append(sb_scope, build_q35_dram_controller(&mcfg));
Expand Down Expand Up @@ -1710,6 +1786,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
Aml *scope = aml_scope("PCI0");
/* Scan all PCI buses. Generate tables to support hotplug. */
build_append_pci_bus_devices(scope, bus);
if (object_property_find(OBJECT(bus), ACPI_PCIHP_PROP_BSEL)) {
build_append_pcihp_slots(scope, bus);
}
aml_append(sb_scope, scope);
}
}
Expand Down
1 change: 1 addition & 0 deletions hw/isa/lpc_ich9.c
Expand Up @@ -865,6 +865,7 @@ static void ich9_lpc_class_init(ObjectClass *klass, void *data)
hc->plug = ich9_pm_device_plug_cb;
hc->unplug_request = ich9_pm_device_unplug_request_cb;
hc->unplug = ich9_pm_device_unplug_cb;
hc->is_hotpluggable_bus = ich9_pm_is_hotpluggable_bus;
adevc->ospm_status = ich9_pm_ospm_status;
adevc->send_event = ich9_send_gpe;
adevc->madt_cpu = pc_madt_cpu_entry;
Expand Down
294 changes: 294 additions & 0 deletions hw/mem/cxl_type3.c
@@ -1,6 +1,7 @@
#include "qemu/osdep.h"
#include "qemu/units.h"
#include "qemu/error-report.h"
#include "qapi/qapi-commands-cxl.h"
#include "hw/mem/memory-device.h"
#include "hw/mem/pc-dimm.h"
#include "hw/pci/pci.h"
Expand Down Expand Up @@ -250,6 +251,7 @@ static void ct3d_config_write(PCIDevice *pci_dev, uint32_t addr, uint32_t val,

pcie_doe_write_config(&ct3d->doe_cdat, addr, val, size);
pci_default_write_config(pci_dev, addr, val, size);
pcie_aer_write_config(pci_dev, addr, val, size);
}

/*
Expand Down Expand Up @@ -322,6 +324,66 @@ static void hdm_decoder_commit(CXLType3Dev *ct3d, int which)
ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
}

static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err)
{
switch (qmp_err) {
case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_PARITY:
return CXL_RAS_UNC_ERR_CACHE_DATA_PARITY;
case CXL_UNCOR_ERROR_TYPE_CACHE_ADDRESS_PARITY:
return CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY;
case CXL_UNCOR_ERROR_TYPE_CACHE_BE_PARITY:
return CXL_RAS_UNC_ERR_CACHE_BE_PARITY;
case CXL_UNCOR_ERROR_TYPE_CACHE_DATA_ECC:
return CXL_RAS_UNC_ERR_CACHE_DATA_ECC;
case CXL_UNCOR_ERROR_TYPE_MEM_DATA_PARITY:
return CXL_RAS_UNC_ERR_MEM_DATA_PARITY;
case CXL_UNCOR_ERROR_TYPE_MEM_ADDRESS_PARITY:
return CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY;
case CXL_UNCOR_ERROR_TYPE_MEM_BE_PARITY:
return CXL_RAS_UNC_ERR_MEM_BE_PARITY;
case CXL_UNCOR_ERROR_TYPE_MEM_DATA_ECC:
return CXL_RAS_UNC_ERR_MEM_DATA_ECC;
case CXL_UNCOR_ERROR_TYPE_REINIT_THRESHOLD:
return CXL_RAS_UNC_ERR_REINIT_THRESHOLD;
case CXL_UNCOR_ERROR_TYPE_RSVD_ENCODING:
return CXL_RAS_UNC_ERR_RSVD_ENCODING;
case CXL_UNCOR_ERROR_TYPE_POISON_RECEIVED:
return CXL_RAS_UNC_ERR_POISON_RECEIVED;
case CXL_UNCOR_ERROR_TYPE_RECEIVER_OVERFLOW:
return CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW;
case CXL_UNCOR_ERROR_TYPE_INTERNAL:
return CXL_RAS_UNC_ERR_INTERNAL;
case CXL_UNCOR_ERROR_TYPE_CXL_IDE_TX:
return CXL_RAS_UNC_ERR_CXL_IDE_TX;
case CXL_UNCOR_ERROR_TYPE_CXL_IDE_RX:
return CXL_RAS_UNC_ERR_CXL_IDE_RX;
default:
return -EINVAL;
}
}

static int ct3d_qmp_cor_err_to_cxl(CxlCorErrorType qmp_err)
{
switch (qmp_err) {
case CXL_COR_ERROR_TYPE_CACHE_DATA_ECC:
return CXL_RAS_COR_ERR_CACHE_DATA_ECC;
case CXL_COR_ERROR_TYPE_MEM_DATA_ECC:
return CXL_RAS_COR_ERR_MEM_DATA_ECC;
case CXL_COR_ERROR_TYPE_CRC_THRESHOLD:
return CXL_RAS_COR_ERR_CRC_THRESHOLD;
case CXL_COR_ERROR_TYPE_RETRY_THRESHOLD:
return CXL_RAS_COR_ERR_RETRY_THRESHOLD;
case CXL_COR_ERROR_TYPE_CACHE_POISON_RECEIVED:
return CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED;
case CXL_COR_ERROR_TYPE_MEM_POISON_RECEIVED:
return CXL_RAS_COR_ERR_MEM_POISON_RECEIVED;
case CXL_COR_ERROR_TYPE_PHYSICAL:
return CXL_RAS_COR_ERR_PHYSICAL;
default:
return -EINVAL;
}
}

static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
unsigned size)
{
Expand All @@ -340,6 +402,83 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
which_hdm = 0;
break;
case A_CXL_RAS_UNC_ERR_STATUS:
{
uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
uint32_t fe = FIELD_EX32(capctrl, CXL_RAS_ERR_CAP_CTRL, FIRST_ERROR_POINTER);
CXLError *cxl_err;
uint32_t unc_err;

/*
* If single bit written that corresponds to the first error
* pointer being cleared, update the status and header log.
*/
if (!QTAILQ_EMPTY(&ct3d->error_list)) {
if ((1 << fe) ^ value) {
CXLError *cxl_next;
/*
* Software is using wrong flow for multiple header recording
* Following behavior in PCIe r6.0 and assuming multiple
* header support. Implementation defined choice to clear all
* matching records if more than one bit set - which corresponds
* closest to behavior of hardware not capable of multiple
* header recording.
*/
QTAILQ_FOREACH_SAFE(cxl_err, &ct3d->error_list, node, cxl_next) {
if ((1 << cxl_err->type) & value) {
QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
g_free(cxl_err);
}
}
} else {
/* Done with previous FE, so drop from list */
cxl_err = QTAILQ_FIRST(&ct3d->error_list);
QTAILQ_REMOVE(&ct3d->error_list, cxl_err, node);
g_free(cxl_err);
}

/*
* If there is another FE, then put that in place and update
* the header log
*/
if (!QTAILQ_EMPTY(&ct3d->error_list)) {
uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
int i;

cxl_err = QTAILQ_FIRST(&ct3d->error_list);
for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
stl_le_p(header_log + i, cxl_err->header[i]);
}
capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
FIRST_ERROR_POINTER, cxl_err->type);
} else {
/*
* If no more errors, then follow recomendation of PCI spec
* r6.0 6.2.4.2 to set the first error pointer to a status
* bit that will never be used.
*/
capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
FIRST_ERROR_POINTER,
CXL_RAS_UNC_ERR_CXL_UNUSED);
}
stl_le_p((uint8_t *)cache_mem + A_CXL_RAS_ERR_CAP_CTRL, capctrl);
}
unc_err = 0;
QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
unc_err |= 1 << cxl_err->type;
}
stl_le_p((uint8_t *)cache_mem + offset, unc_err);

return;
}
case A_CXL_RAS_COR_ERR_STATUS:
{
uint32_t rw1c = value;
uint32_t temp = ldl_le_p((uint8_t *)cache_mem + offset);
temp &= ~rw1c;
stl_le_p((uint8_t *)cache_mem + offset, temp);
return;
}
default:
break;
}
Expand Down Expand Up @@ -403,6 +542,8 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
unsigned short msix_num = 1;
int i, rc;

QTAILQ_INIT(&ct3d->error_list);

if (!cxl_setup_memory(ct3d, errp)) {
return;
}
Expand Down Expand Up @@ -452,8 +593,19 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp)
cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
cxl_cstate->cdat.private = ct3d;
cxl_doe_cdat_init(cxl_cstate, errp);

pcie_cap_deverr_init(pci_dev);
/* Leave a bit of room for expansion */
rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, NULL);
if (rc) {
goto err_release_cdat;
}

return;

err_release_cdat:
cxl_doe_cdat_release(cxl_cstate);
g_free(regs->special_ops);
err_address_space_free:
address_space_destroy(&ct3d->hostmem_as);
return;
Expand All @@ -465,6 +617,7 @@ static void ct3_exit(PCIDevice *pci_dev)
CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
ComponentRegisters *regs = &cxl_cstate->crb;

pcie_aer_exit(pci_dev);
cxl_doe_cdat_release(cxl_cstate);
g_free(regs->special_ops);
address_space_destroy(&ct3d->hostmem_as);
Expand Down Expand Up @@ -618,6 +771,147 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
*/
}

/* For uncorrectable errors include support for multiple header recording */
void qmp_cxl_inject_uncorrectable_errors(const char *path,
CXLUncorErrorRecordList *errors,
Error **errp)
{
Object *obj = object_resolve_path(path, NULL);
static PCIEAERErr err = {};
CXLType3Dev *ct3d;
CXLError *cxl_err;
uint32_t *reg_state;
uint32_t unc_err;
bool first;

if (!obj) {
error_setg(errp, "Unable to resolve path");
return;
}

if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
error_setg(errp, "Path does not point to a CXL type 3 device");
return;
}

err.status = PCI_ERR_UNC_INTN;
err.source_id = pci_requester_id(PCI_DEVICE(obj));
err.flags = 0;

ct3d = CXL_TYPE3(obj);

first = QTAILQ_EMPTY(&ct3d->error_list);
reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
while (errors) {
uint32List *header = errors->value->header;
uint8_t header_count = 0;
int cxl_err_code;

cxl_err_code = ct3d_qmp_uncor_err_to_cxl(errors->value->type);
if (cxl_err_code < 0) {
error_setg(errp, "Unknown error code");
return;
}

/* If the error is masked, nothing to do here */
if (!((1 << cxl_err_code) &
~ldl_le_p(reg_state + R_CXL_RAS_UNC_ERR_MASK))) {
errors = errors->next;
continue;
}

cxl_err = g_malloc0(sizeof(*cxl_err));
if (!cxl_err) {
return;
}

cxl_err->type = cxl_err_code;
while (header && header_count < 32) {
cxl_err->header[header_count++] = header->value;
header = header->next;
}
if (header_count > 32) {
error_setg(errp, "Header must be 32 DWORD or less");
return;
}
QTAILQ_INSERT_TAIL(&ct3d->error_list, cxl_err, node);

errors = errors->next;
}

if (first && !QTAILQ_EMPTY(&ct3d->error_list)) {
uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
uint32_t capctrl = ldl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL);
uint32_t *header_log = &cache_mem[R_CXL_RAS_ERR_HEADER0];
int i;

cxl_err = QTAILQ_FIRST(&ct3d->error_list);
for (i = 0; i < CXL_RAS_ERR_HEADER_NUM; i++) {
stl_le_p(header_log + i, cxl_err->header[i]);
}

capctrl = FIELD_DP32(capctrl, CXL_RAS_ERR_CAP_CTRL,
FIRST_ERROR_POINTER, cxl_err->type);
stl_le_p(cache_mem + R_CXL_RAS_ERR_CAP_CTRL, capctrl);
}

unc_err = 0;
QTAILQ_FOREACH(cxl_err, &ct3d->error_list, node) {
unc_err |= (1 << cxl_err->type);
}
if (!unc_err) {
return;
}

stl_le_p(reg_state + R_CXL_RAS_UNC_ERR_STATUS, unc_err);
pcie_aer_inject_error(PCI_DEVICE(obj), &err);

return;
}

void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
Error **errp)
{
static PCIEAERErr err = {};
Object *obj = object_resolve_path(path, NULL);
CXLType3Dev *ct3d;
uint32_t *reg_state;
uint32_t cor_err;
int cxl_err_type;

if (!obj) {
error_setg(errp, "Unable to resolve path");
return;
}
if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
error_setg(errp, "Path does not point to a CXL type 3 device");
return;
}

err.status = PCI_ERR_COR_INTERNAL;
err.source_id = pci_requester_id(PCI_DEVICE(obj));
err.flags = PCIE_AER_ERR_IS_CORRECTABLE;

ct3d = CXL_TYPE3(obj);
reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
cor_err = ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS);

cxl_err_type = ct3d_qmp_cor_err_to_cxl(type);
if (cxl_err_type < 0) {
error_setg(errp, "Invalid COR error");
return;
}
/* If the error is masked, nothting to do here */
if (!((1 << cxl_err_type) & ~ldl_le_p(reg_state + R_CXL_RAS_COR_ERR_MASK))) {
return;
}

cor_err |= (1 << cxl_err_type);
stl_le_p(reg_state + R_CXL_RAS_COR_ERR_STATUS, cor_err);

pcie_aer_inject_error(PCI_DEVICE(obj), &err);
}

static void ct3_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
Expand Down
17 changes: 17 additions & 0 deletions hw/mem/cxl_type3_stubs.c
@@ -0,0 +1,17 @@

#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-cxl.h"

void qmp_cxl_inject_uncorrectable_errors(const char *path,
CXLUncorErrorRecordList *errors,
Error **errp)
{
error_setg(errp, "CXL Type 3 support is not compiled in");
}

void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
Error **errp)
{
error_setg(errp, "CXL Type 3 support is not compiled in");
}
2 changes: 2 additions & 0 deletions hw/mem/meson.build
Expand Up @@ -4,6 +4,8 @@ mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c'))
mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c'))
mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c'))
mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c'))
softmmu_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_false: files('cxl_type3_stubs.c'))
softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('cxl_type3_stubs.c'))

softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss)

Expand Down
64 changes: 64 additions & 0 deletions hw/pci-bridge/cxl_root_port.c
Expand Up @@ -22,13 +22,18 @@
#include "qemu/range.h"
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pcie_port.h"
#include "hw/pci/msi.h"
#include "hw/qdev-properties.h"
#include "hw/sysbus.h"
#include "qapi/error.h"
#include "hw/cxl/cxl.h"

#define CXL_ROOT_PORT_DID 0x7075

#define CXL_RP_MSI_OFFSET 0x60
#define CXL_RP_MSI_SUPPORTED_FLAGS PCI_MSI_FLAGS_MASKBIT
#define CXL_RP_MSI_NR_VECTOR 2

/* Copied from the gen root port which we derive */
#define GEN_PCIE_ROOT_PORT_AER_OFFSET 0x100
#define GEN_PCIE_ROOT_PORT_ACS_OFFSET \
Expand All @@ -47,6 +52,49 @@ typedef struct CXLRootPort {
#define TYPE_CXL_ROOT_PORT "cxl-rp"
DECLARE_INSTANCE_CHECKER(CXLRootPort, CXL_ROOT_PORT, TYPE_CXL_ROOT_PORT)

/*
* If two MSI vector are allocated, Advanced Error Interrupt Message Number
* is 1. otherwise 0.
* 17.12.5.10 RPERRSTS, 32:27 bit Advanced Error Interrupt Message Number.
*/
static uint8_t cxl_rp_aer_vector(const PCIDevice *d)
{
switch (msi_nr_vectors_allocated(d)) {
case 1:
return 0;
case 2:
return 1;
case 4:
case 8:
case 16:
case 32:
default:
break;
}
abort();
return 0;
}

static int cxl_rp_interrupts_init(PCIDevice *d, Error **errp)
{
int rc;

rc = msi_init(d, CXL_RP_MSI_OFFSET, CXL_RP_MSI_NR_VECTOR,
CXL_RP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_64BIT,
CXL_RP_MSI_SUPPORTED_FLAGS & PCI_MSI_FLAGS_MASKBIT,
errp);
if (rc < 0) {
assert(rc == -ENOTSUP);
}

return rc;
}

static void cxl_rp_interrupts_uninit(PCIDevice *d)
{
msi_uninit(d);
}

static void latch_registers(CXLRootPort *crp)
{
uint32_t *reg_state = crp->cxl_cstate.crb.cache_mem_registers;
Expand Down Expand Up @@ -183,16 +231,29 @@ static void cxl_rp_dvsec_write_config(PCIDevice *dev, uint32_t addr,
}
}

static void cxl_rp_aer_vector_update(PCIDevice *d)
{
PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(d);

if (rpc->aer_vector) {
pcie_aer_root_set_vector(d, rpc->aer_vector(d));
}
}

static void cxl_rp_write_config(PCIDevice *d, uint32_t address, uint32_t val,
int len)
{
uint16_t slt_ctl, slt_sta;
uint32_t root_cmd =
pci_get_long(d->config + d->exp.aer_cap + PCI_ERR_ROOT_COMMAND);

pcie_cap_slot_get(d, &slt_ctl, &slt_sta);
pci_bridge_write_config(d, address, val, len);
cxl_rp_aer_vector_update(d);
pcie_cap_flr_write_config(d, address, val, len);
pcie_cap_slot_write_config(d, slt_ctl, slt_sta, address, val, len);
pcie_aer_write_config(d, address, val, len);
pcie_aer_root_write_config(d, address, val, len, root_cmd);

cxl_rp_dvsec_write_config(d, address, val, len);
}
Expand All @@ -217,6 +278,9 @@ static void cxl_root_port_class_init(ObjectClass *oc, void *data)

rpc->aer_offset = GEN_PCIE_ROOT_PORT_AER_OFFSET;
rpc->acs_offset = GEN_PCIE_ROOT_PORT_ACS_OFFSET;
rpc->aer_vector = cxl_rp_aer_vector;
rpc->interrupts_init = cxl_rp_interrupts_init;
rpc->interrupts_uninit = cxl_rp_interrupts_uninit;

dc->hotpluggable = false;
}
Expand Down
44 changes: 39 additions & 5 deletions hw/pci-bridge/pci_expander_bridge.c
Expand Up @@ -15,6 +15,7 @@
#include "hw/pci/pci.h"
#include "hw/pci/pci_bus.h"
#include "hw/pci/pci_host.h"
#include "hw/pci/pcie_port.h"
#include "hw/qdev-properties.h"
#include "hw/pci/pci_bridge.h"
#include "hw/pci-bridge/pci_expander_bridge.h"
Expand Down Expand Up @@ -79,6 +80,13 @@ CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb)
return &host->cxl_cstate;
}

bool cxl_get_hb_passthrough(PCIHostState *hb)
{
CXLHost *host = PXB_CXL_HOST(hb);

return host->passthrough;
}

static int pxb_bus_num(PCIBus *bus)
{
PXBDev *pxb = convert_to_pxb(bus->parent_dev);
Expand Down Expand Up @@ -289,15 +297,32 @@ static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin)
return pin - PCI_SLOT(pxb->devfn);
}

static void pxb_dev_reset(DeviceState *dev)
static void pxb_cxl_dev_reset(DeviceState *dev)
{
CXLHost *cxl = PXB_CXL_DEV(dev)->cxl.cxl_host_bridge;
CXLComponentState *cxl_cstate = &cxl->cxl_cstate;
PCIHostState *hb = PCI_HOST_BRIDGE(cxl);
uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers;
uint32_t *write_msk = cxl_cstate->crb.cache_mem_regs_write_mask;
int dsp_count = 0;

cxl_component_register_init_common(reg_state, write_msk, CXL2_ROOT_PORT);
ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT, 8);
/*
* The CXL specification allows for host bridges with no HDM decoders
* if they only have a single root port.
*/
if (!PXB_DEV(dev)->hdm_for_passthrough) {
dsp_count = pcie_count_ds_ports(hb->bus);
}
/* Initial reset will have 0 dsp so wait until > 0 */
if (dsp_count == 1) {
cxl->passthrough = true;
/* Set Capability ID in header to NONE */
ARRAY_FIELD_DP32(reg_state, CXL_HDM_CAPABILITY_HEADER, ID, 0);
} else {
ARRAY_FIELD_DP32(reg_state, CXL_HDM_DECODER_CAPABILITY, TARGET_COUNT,
8);
}
}

static gint pxb_compare(gconstpointer a, gconstpointer b)
Expand Down Expand Up @@ -481,9 +506,18 @@ static void pxb_cxl_dev_realize(PCIDevice *dev, Error **errp)
}

pxb_dev_realize_common(dev, CXL, errp);
pxb_dev_reset(DEVICE(dev));
pxb_cxl_dev_reset(DEVICE(dev));
}

static Property pxb_cxl_dev_properties[] = {
/* Note: 0 is not a legal PXB bus number. */
DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0),
DEFINE_PROP_UINT16("numa_node", PXBDev, numa_node, NUMA_NODE_UNASSIGNED),
DEFINE_PROP_BOOL("bypass_iommu", PXBDev, bypass_iommu, false),
DEFINE_PROP_BOOL("hdm_for_passthrough", PXBDev, hdm_for_passthrough, false),
DEFINE_PROP_END_OF_LIST(),
};

static void pxb_cxl_dev_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
Expand All @@ -497,12 +531,12 @@ static void pxb_cxl_dev_class_init(ObjectClass *klass, void *data)
*/

dc->desc = "CXL Host Bridge";
device_class_set_props(dc, pxb_dev_properties);
device_class_set_props(dc, pxb_cxl_dev_properties);
set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);

/* Host bridges aren't hotpluggable. FIXME: spec reference */
dc->hotpluggable = false;
dc->reset = pxb_dev_reset;
dc->reset = pxb_cxl_dev_reset;
}

static const TypeInfo pxb_cxl_dev_info = {
Expand Down
1 change: 0 additions & 1 deletion hw/pci/pci-internal.h
Expand Up @@ -20,6 +20,5 @@ void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent);

int pcie_aer_parse_error_string(const char *error_name,
uint32_t *status, bool *correctable);
int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);

#endif
57 changes: 57 additions & 0 deletions hw/pci/pci.c
Expand Up @@ -95,6 +95,21 @@ static const VMStateDescription vmstate_pcibus = {
}
};

static gint g_cmp_uint32(gconstpointer a, gconstpointer b, gpointer user_data)
{
return a - b;
}

static GSequence *pci_acpi_index_list(void)
{
static GSequence *used_acpi_index_list;

if (!used_acpi_index_list) {
used_acpi_index_list = g_sequence_new(NULL);
}
return used_acpi_index_list;
}

static void pci_init_bus_master(PCIDevice *pci_dev)
{
AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
Expand Down Expand Up @@ -1246,6 +1261,17 @@ static void pci_qdev_unrealize(DeviceState *dev)
do_pci_unregister_device(pci_dev);

pci_dev->msi_trigger = NULL;

/*
* clean up acpi-index so it could reused by another device
*/
if (pci_dev->acpi_index) {
GSequence *used_indexes = pci_acpi_index_list();

g_sequence_remove(g_sequence_lookup(used_indexes,
GINT_TO_POINTER(pci_dev->acpi_index),
g_cmp_uint32, NULL));
}
}

void pci_register_bar(PCIDevice *pci_dev, int region_num,
Expand Down Expand Up @@ -2005,6 +2031,8 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn)
return bus->devices[devfn];
}

#define ONBOARD_INDEX_MAX (16 * 1024 - 1)

static void pci_qdev_realize(DeviceState *qdev, Error **errp)
{
PCIDevice *pci_dev = (PCIDevice *)qdev;
Expand All @@ -2014,6 +2042,35 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
bool is_default_rom;
uint16_t class_id;

/*
* capped by systemd (see: udev-builtin-net_id.c)
* as it's the only known user honor it to avoid users
* misconfigure QEMU and then wonder why acpi-index doesn't work
*/
if (pci_dev->acpi_index > ONBOARD_INDEX_MAX) {
error_setg(errp, "acpi-index should be less or equal to %u",
ONBOARD_INDEX_MAX);
return;
}

/*
* make sure that acpi-index is unique across all present PCI devices
*/
if (pci_dev->acpi_index) {
GSequence *used_indexes = pci_acpi_index_list();

if (g_sequence_lookup(used_indexes,
GINT_TO_POINTER(pci_dev->acpi_index),
g_cmp_uint32, NULL)) {
error_setg(errp, "a PCI device with acpi-index = %" PRIu32
" already exist", pci_dev->acpi_index);
return;
}
g_sequence_insert_sorted(used_indexes,
GINT_TO_POINTER(pci_dev->acpi_index),
g_cmp_uint32, NULL);
}

if (pci_dev->romsize != -1 && !is_power_of_2(pci_dev->romsize)) {
error_setg(errp, "ROM size %u is not a power of two", pci_dev->romsize);
return;
Expand Down
14 changes: 13 additions & 1 deletion hw/pci/pcie_aer.c
Expand Up @@ -112,6 +112,10 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,

pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
PCI_ERR_UNC_SUPPORTED);
pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK,
PCI_ERR_UNC_MASK_DEFAULT);
pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK,
PCI_ERR_UNC_SUPPORTED);

pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
PCI_ERR_UNC_SEVERITY_DEFAULT);
Expand Down Expand Up @@ -188,8 +192,16 @@ static void pcie_aer_update_uncor_status(PCIDevice *dev)
static bool
pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
{
uint16_t devctl = pci_get_word(dev->config + dev->exp.exp_cap +
PCI_EXP_DEVCTL);
if (!(pcie_aer_msg_is_uncor(msg) &&
(pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) {
(pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR)) &&
!((msg->severity == PCI_ERR_ROOT_CMD_NONFATAL_EN) &&
(devctl & PCI_EXP_DEVCTL_NFERE)) &&
!((msg->severity == PCI_ERR_ROOT_CMD_COR_EN) &&
(devctl & PCI_EXP_DEVCTL_CERE)) &&
!((msg->severity == PCI_ERR_ROOT_CMD_FATAL_EN) &&
(devctl & PCI_EXP_DEVCTL_FERE))) {
return false;
}

Expand Down
46 changes: 46 additions & 0 deletions hw/pci/pcie_port.c
Expand Up @@ -161,6 +161,51 @@ PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn)
return NULL;
}

/* Find first port in devfn number order */
PCIDevice *pcie_find_port_first(PCIBus *bus)
{
int devfn;

for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
PCIDevice *d = bus->devices[devfn];

if (!d || !pci_is_express(d) || !d->exp.exp_cap) {
continue;
}

if (object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) {
return d;
}
}

return NULL;
}

int pcie_count_ds_ports(PCIBus *bus)
{
int dsp_count = 0;
int devfn;

for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
PCIDevice *d = bus->devices[devfn];

if (!d || !pci_is_express(d) || !d->exp.exp_cap) {
continue;
}
if (object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) {
dsp_count++;
}
}
return dsp_count;
}

static bool pcie_slot_is_hotpluggbale_bus(HotplugHandler *plug_handler,
BusState *bus)
{
PCIESlot *s = PCIE_SLOT(bus->parent);
return s->hotplug;
}

static const TypeInfo pcie_port_type_info = {
.name = TYPE_PCIE_PORT,
.parent = TYPE_PCI_BRIDGE,
Expand Down Expand Up @@ -188,6 +233,7 @@ static void pcie_slot_class_init(ObjectClass *oc, void *data)
hc->plug = pcie_cap_slot_plug_cb;
hc->unplug = pcie_cap_slot_unplug_cb;
hc->unplug_request = pcie_cap_slot_unplug_request_cb;
hc->is_hotpluggable_bus = pcie_slot_is_hotpluggbale_bus;
}

static const TypeInfo pcie_slot_type_info = {
Expand Down
1 change: 1 addition & 0 deletions hw/virtio/trace-events
Expand Up @@ -50,6 +50,7 @@ vhost_vdpa_set_vring_ready(void *dev) "dev: %p"
vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s"
vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32
vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32
vhost_vdpa_suspend(void *dev) "dev: %p"
vhost_vdpa_dev_start(void *dev, bool started) "dev: %p started: %d"
vhost_vdpa_set_log_base(void *dev, uint64_t base, unsigned long long size, int refcnt, int fd, void *log) "dev: %p base: 0x%"PRIx64" size: %llu refcnt: %d fd: %d log: %p"
vhost_vdpa_set_vring_addr(void *dev, unsigned int index, unsigned int flags, uint64_t desc_user_addr, uint64_t used_user_addr, uint64_t avail_user_addr, uint64_t log_guest_addr) "dev: %p index: %u flags: 0x%x desc_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" log_guest_addr: 0x%"PRIx64
Expand Down
8 changes: 6 additions & 2 deletions hw/virtio/vhost-shadow-virtqueue.c
Expand Up @@ -694,13 +694,17 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
g_autofree VirtQueueElement *elem = NULL;
elem = g_steal_pointer(&svq->desc_state[i].elem);
if (elem) {
virtqueue_detach_element(svq->vq, elem, 0);
/*
* TODO: This is ok for networking, but other kinds of devices
* might have problems with just unpop these.
*/
virtqueue_unpop(svq->vq, elem, 0);
}
}

next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
if (next_avail_elem) {
virtqueue_detach_element(svq->vq, next_avail_elem, 0);
virtqueue_unpop(svq->vq, next_avail_elem, 0);
}
svq->vq = NULL;
g_free(svq->desc_next);
Expand Down
4 changes: 2 additions & 2 deletions hw/virtio/vhost-user.c
Expand Up @@ -2031,8 +2031,8 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
} else {
if (virtio_has_feature(protocol_features,
VHOST_USER_PROTOCOL_F_CONFIG)) {
warn_reportf_err(*errp, "vhost-user backend supports "
"VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not.");
warn_report("vhost-user backend supports "
"VHOST_USER_PROTOCOL_F_CONFIG but QEMU does not.");
protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG);
}
}
Expand Down
126 changes: 89 additions & 37 deletions hw/virtio/vhost-vdpa.c
Expand Up @@ -431,27 +431,43 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
trace_vhost_vdpa_init(dev, opaque);
int ret;

/*
* Similar to VFIO, we end up pinning all guest memory and have to
* disable discarding of RAM.
*/
ret = ram_block_discard_disable(true);
if (ret) {
error_report("Cannot set discarding of RAM broken");
return ret;
}

v = opaque;
v->dev = dev;
dev->opaque = opaque ;
v->listener = vhost_vdpa_memory_listener;
v->msg_type = VHOST_IOTLB_MSG_V2;
vhost_vdpa_init_svq(dev, v);

error_propagate(&dev->migration_blocker, v->migration_blocker);
if (!vhost_vdpa_first_dev(dev)) {
return 0;
}

/*
* If dev->shadow_vqs_enabled at initialization that means the device has
* been started with x-svq=on, so don't block migration
*/
if (dev->migration_blocker == NULL && !v->shadow_vqs_enabled) {
/* We don't have dev->features yet */
uint64_t features;
ret = vhost_vdpa_get_dev_features(dev, &features);
if (unlikely(ret)) {
error_setg_errno(errp, -ret, "Could not get device features");
return ret;
}
vhost_svq_valid_features(features, &dev->migration_blocker);
}

/*
* Similar to VFIO, we end up pinning all guest memory and have to
* disable discarding of RAM.
*/
ret = ram_block_discard_disable(true);
if (ret) {
error_report("Cannot set discarding of RAM broken");
return ret;
}

vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER);

Expand Down Expand Up @@ -577,12 +593,15 @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev)
assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA);
v = dev->opaque;
trace_vhost_vdpa_cleanup(dev, v);
if (vhost_vdpa_first_dev(dev)) {
ram_block_discard_disable(false);
}

vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
memory_listener_unregister(&v->listener);
vhost_vdpa_svq_cleanup(dev);

dev->opaque = NULL;
ram_block_discard_disable(false);

return 0;
}
Expand Down Expand Up @@ -659,7 +678,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
uint64_t features;
uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
0x1ULL << VHOST_BACKEND_F_IOTLB_ASID;
0x1ULL << VHOST_BACKEND_F_IOTLB_ASID |
0x1ULL << VHOST_BACKEND_F_SUSPEND;
int r;

if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
Expand Down Expand Up @@ -691,11 +711,13 @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev,

static int vhost_vdpa_reset_device(struct vhost_dev *dev)
{
struct vhost_vdpa *v = dev->opaque;
int ret;
uint8_t status = 0;

ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
trace_vhost_vdpa_reset_device(dev, status);
v->suspended = false;
return ret;
}

Expand Down Expand Up @@ -1094,6 +1116,29 @@ static void vhost_vdpa_svqs_stop(struct vhost_dev *dev)
}
}

static void vhost_vdpa_suspend(struct vhost_dev *dev)
{
struct vhost_vdpa *v = dev->opaque;
int r;

if (!vhost_vdpa_first_dev(dev)) {
return;
}

if (dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) {
trace_vhost_vdpa_suspend(dev);
r = ioctl(v->device_fd, VHOST_VDPA_SUSPEND);
if (unlikely(r)) {
error_report("Cannot suspend: %s(%d)", g_strerror(errno), errno);
} else {
v->suspended = true;
return;
}
}

vhost_vdpa_reset_device(dev);
}

static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
{
struct vhost_vdpa *v = dev->opaque;
Expand All @@ -1108,6 +1153,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
}
vhost_vdpa_set_vring_ready(dev);
} else {
vhost_vdpa_suspend(dev);
vhost_vdpa_svqs_stop(dev);
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
}
Expand All @@ -1119,14 +1165,23 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
if (started) {
memory_listener_register(&v->listener, &address_space_memory);
return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
} else {
vhost_vdpa_reset_device(dev);
vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER);
memory_listener_unregister(&v->listener);
}

return 0;
return 0;
}

static void vhost_vdpa_reset_status(struct vhost_dev *dev)
{
struct vhost_vdpa *v = dev->opaque;

if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
return;
}

vhost_vdpa_reset_device(dev);
vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER);
memory_listener_unregister(&v->listener);
}

static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
Expand Down Expand Up @@ -1169,18 +1224,7 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
struct vhost_vring_state *ring)
{
struct vhost_vdpa *v = dev->opaque;
VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);

/*
* vhost-vdpa devices does not support in-flight requests. Set all of them
* as available.
*
* TODO: This is ok for networking, but other kinds of devices might
* have problems with these retransmissions.
*/
while (virtqueue_rewind(vq, 1)) {
continue;
}
if (v->shadow_vqs_enabled) {
/*
* Device vring base was set at device start. SVQ base is handled by
Expand All @@ -1203,6 +1247,14 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
return 0;
}

if (!v->suspended) {
/*
* Cannot trust in value returned by device, let vhost recover used
* idx from guest.
*/
return -1;
}

ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num);
return ret;
Expand All @@ -1227,25 +1279,24 @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
struct vhost_vring_file *file)
{
struct vhost_vdpa *v = dev->opaque;
int vdpa_idx = file->index - dev->vq_index;
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);

/* Remember last call fd because we can switch to SVQ anytime. */
vhost_svq_set_svq_call_fd(svq, file->fd);
if (v->shadow_vqs_enabled) {
int vdpa_idx = file->index - dev->vq_index;
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);

vhost_svq_set_svq_call_fd(svq, file->fd);
return 0;
} else {
return vhost_vdpa_set_vring_dev_call(dev, file);
}

return vhost_vdpa_set_vring_dev_call(dev, file);
}

static int vhost_vdpa_get_features(struct vhost_dev *dev,
uint64_t *features)
{
struct vhost_vdpa *v = dev->opaque;
int ret = vhost_vdpa_get_dev_features(dev, features);

if (ret == 0 && v->shadow_vqs_enabled) {
if (ret == 0) {
/* Add SVQ logging capabilities */
*features |= BIT_ULL(VHOST_F_LOG_ALL);
}
Expand Down Expand Up @@ -1313,4 +1364,5 @@ const VhostOps vdpa_ops = {
.vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
.vhost_force_iommu = vhost_vdpa_force_iommu,
.vhost_set_config_call = vhost_vdpa_set_config_call,
.vhost_reset_status = vhost_vdpa_reset_status,
};
3 changes: 3 additions & 0 deletions hw/virtio/vhost.c
Expand Up @@ -2049,6 +2049,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
hdev->vqs + i,
hdev->vq_index + i);
}
if (hdev->vhost_ops->vhost_reset_status) {
hdev->vhost_ops->vhost_reset_status(hdev);
}

if (vhost_dev_has_iommu(hdev)) {
if (hdev->vhost_ops->vhost_set_iotlb_callback) {
Expand Down
48 changes: 36 additions & 12 deletions hw/virtio/virtio-crypto.c
Expand Up @@ -462,7 +462,7 @@ static void virtio_crypto_init_request(VirtIOCrypto *vcrypto, VirtQueue *vq,
req->in_iov = NULL;
req->in_num = 0;
req->in_len = 0;
req->flags = CRYPTODEV_BACKEND_ALG__MAX;
req->flags = QCRYPTODEV_BACKEND_ALG__MAX;
memset(&req->op_info, 0x00, sizeof(req->op_info));
}

Expand All @@ -472,7 +472,7 @@ static void virtio_crypto_free_request(VirtIOCryptoReq *req)
return;
}

if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) {
if (req->flags == QCRYPTODEV_BACKEND_ALG_SYM) {
size_t max_len;
CryptoDevBackendSymOpInfo *op_info = req->op_info.u.sym_op_info;

Expand All @@ -485,7 +485,7 @@ static void virtio_crypto_free_request(VirtIOCryptoReq *req)
/* Zeroize and free request data structure */
memset(op_info, 0, sizeof(*op_info) + max_len);
g_free(op_info);
} else if (req->flags == CRYPTODEV_BACKEND_ALG_ASYM) {
} else if (req->flags == QCRYPTODEV_BACKEND_ALG_ASYM) {
CryptoDevBackendAsymOpInfo *op_info = req->op_info.u.asym_op_info;
if (op_info) {
g_free(op_info->src);
Expand Down Expand Up @@ -570,10 +570,10 @@ static void virtio_crypto_req_complete(void *opaque, int ret)
VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
uint8_t status = -ret;

if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) {
if (req->flags == QCRYPTODEV_BACKEND_ALG_SYM) {
virtio_crypto_sym_input_data_helper(vdev, req, status,
req->op_info.u.sym_op_info);
} else if (req->flags == CRYPTODEV_BACKEND_ALG_ASYM) {
} else if (req->flags == QCRYPTODEV_BACKEND_ALG_ASYM) {
virtio_crypto_akcipher_input_data_helper(vdev, req, status,
req->op_info.u.asym_op_info);
}
Expand Down Expand Up @@ -871,11 +871,14 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request)
opcode = ldl_le_p(&req.header.opcode);
op_info->session_id = ldq_le_p(&req.header.session_id);
op_info->op_code = opcode;
op_info->queue_index = queue_index;
op_info->cb = virtio_crypto_req_complete;
op_info->opaque = request;

switch (opcode) {
case VIRTIO_CRYPTO_CIPHER_ENCRYPT:
case VIRTIO_CRYPTO_CIPHER_DECRYPT:
op_info->algtype = request->flags = CRYPTODEV_BACKEND_ALG_SYM;
op_info->algtype = request->flags = QCRYPTODEV_BACKEND_ALG_SYM;
ret = virtio_crypto_handle_sym_req(vcrypto,
&req.u.sym_req, op_info,
out_iov, out_num);
Expand All @@ -885,7 +888,7 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request)
case VIRTIO_CRYPTO_AKCIPHER_DECRYPT:
case VIRTIO_CRYPTO_AKCIPHER_SIGN:
case VIRTIO_CRYPTO_AKCIPHER_VERIFY:
op_info->algtype = request->flags = CRYPTODEV_BACKEND_ALG_ASYM;
op_info->algtype = request->flags = QCRYPTODEV_BACKEND_ALG_ASYM;
ret = virtio_crypto_handle_asym_req(vcrypto,
&req.u.akcipher_req, op_info,
out_iov, out_num);
Expand All @@ -898,9 +901,7 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request)
virtio_crypto_req_complete(request, -VIRTIO_CRYPTO_NOTSUPP);
} else {
ret = cryptodev_backend_crypto_operation(vcrypto->cryptodev,
request, queue_index,
virtio_crypto_req_complete,
request);
op_info);
if (ret < 0) {
virtio_crypto_req_complete(request, ret);
}
Expand Down Expand Up @@ -997,12 +998,35 @@ static void virtio_crypto_reset(VirtIODevice *vdev)
}
}

static uint32_t virtio_crypto_init_services(uint32_t qservices)
{
uint32_t vservices = 0;

if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_CIPHER)) {
vservices |= (1 << VIRTIO_CRYPTO_SERVICE_CIPHER);
}
if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_HASH)) {
vservices |= (1 << VIRTIO_CRYPTO_SERVICE_HASH);
}
if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_MAC)) {
vservices |= (1 << VIRTIO_CRYPTO_SERVICE_MAC);
}
if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_AEAD)) {
vservices |= (1 << VIRTIO_CRYPTO_SERVICE_AEAD);
}
if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_AKCIPHER)) {
vservices |= (1 << VIRTIO_CRYPTO_SERVICE_AKCIPHER);
}

return vservices;
}

static void virtio_crypto_init_config(VirtIODevice *vdev)
{
VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev);

vcrypto->conf.crypto_services =
vcrypto->conf.cryptodev->conf.crypto_services;
vcrypto->conf.crypto_services = virtio_crypto_init_services(
vcrypto->conf.cryptodev->conf.crypto_services);
vcrypto->conf.cipher_algo_l =
vcrypto->conf.cryptodev->conf.cipher_algo_l;
vcrypto->conf.cipher_algo_h =
Expand Down
11 changes: 5 additions & 6 deletions hw/virtio/virtio.c
Expand Up @@ -1069,7 +1069,7 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
VRingMemoryRegionCaches *caches)
{
VirtIODevice *vdev = vq->vdev;
unsigned int max, idx;
unsigned int idx;
unsigned int total_bufs, in_total, out_total;
MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
int64_t len = 0;
Expand All @@ -1078,13 +1078,12 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
idx = vq->last_avail_idx;
total_bufs = in_total = out_total = 0;

max = vq->vring.num;

while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
MemoryRegionCache *desc_cache = &caches->desc;
unsigned int num_bufs;
VRingDesc desc;
unsigned int i;
unsigned int max = vq->vring.num;

num_bufs = total_bufs;

Expand Down Expand Up @@ -1206,7 +1205,7 @@ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
VRingMemoryRegionCaches *caches)
{
VirtIODevice *vdev = vq->vdev;
unsigned int max, idx;
unsigned int idx;
unsigned int total_bufs, in_total, out_total;
MemoryRegionCache *desc_cache;
MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
Expand All @@ -1218,14 +1217,14 @@ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
wrap_counter = vq->last_avail_wrap_counter;
total_bufs = in_total = out_total = 0;

max = vq->vring.num;

for (;;) {
unsigned int num_bufs = total_bufs;
unsigned int i = idx;
int rc;
unsigned int max = vq->vring.num;

desc_cache = &caches->desc;

vring_packed_desc_read(vdev, &desc, desc_cache, idx, true);
if (!is_desc_avail(desc.flags, wrap_counter)) {
break;
Expand Down
1 change: 1 addition & 0 deletions include/hw/acpi/ich9.h
Expand Up @@ -87,6 +87,7 @@ void ich9_pm_device_unplug_request_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp);
void ich9_pm_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp);
bool ich9_pm_is_hotpluggable_bus(HotplugHandler *hotplug_dev, BusState *bus);

void ich9_pm_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list);
#endif /* HW_ACPI_ICH9_H */
11 changes: 7 additions & 4 deletions include/hw/acpi/pcihp.h
Expand Up @@ -49,15 +49,16 @@ typedef struct AcpiPciHpState {
uint32_t acpi_index;
PCIBus *root;
MemoryRegion io;
bool legacy_piix;
uint16_t io_base;
uint16_t io_len;
bool use_acpi_hotplug_bridge;
bool use_acpi_root_pci_hotplug;
} AcpiPciHpState;

void acpi_pcihp_init(Object *owner, AcpiPciHpState *, PCIBus *root,
MemoryRegion *address_space_io, bool bridges_enabled,
uint16_t io_base);
MemoryRegion *address_space_io, uint16_t io_base);

bool acpi_pcihp_is_hotpluggbale_bus(AcpiPciHpState *s, BusState *bus);
void acpi_pcihp_device_pre_plug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp);
void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s,
Expand All @@ -69,7 +70,9 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev,
Error **errp);

/* Called on reset */
void acpi_pcihp_reset(AcpiPciHpState *s, bool acpihp_root_off);
void acpi_pcihp_reset(AcpiPciHpState *s);

void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus);

extern const VMStateDescription vmstate_acpi_pcihp_pci_status;

Expand Down
2 changes: 0 additions & 2 deletions include/hw/acpi/piix4.h
Expand Up @@ -57,8 +57,6 @@ struct PIIX4PMState {
Notifier powerdown_notifier;

AcpiPciHpState acpi_pci_hotplug;
bool use_acpi_hotplug_bridge;
bool use_acpi_root_pci_hotplug;
bool not_migrate_acpi_index;

uint8_t disable_s3;
Expand Down
1 change: 1 addition & 0 deletions include/hw/cxl/cxl.h
Expand Up @@ -49,6 +49,7 @@ struct CXLHost {
PCIHostState parent_obj;

CXLComponentState cxl_cstate;
bool passthrough;
};

#define TYPE_PXB_CXL_HOST "pxb-cxl-host"
Expand Down
27 changes: 27 additions & 0 deletions include/hw/cxl/cxl_component.h
Expand Up @@ -65,11 +65,37 @@ CXLx_CAPABILITY_HEADER(SNOOP, 0x14)
#define CXL_RAS_REGISTERS_OFFSET 0x80
#define CXL_RAS_REGISTERS_SIZE 0x58
REG32(CXL_RAS_UNC_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET)
#define CXL_RAS_UNC_ERR_CACHE_DATA_PARITY 0
#define CXL_RAS_UNC_ERR_CACHE_ADDRESS_PARITY 1
#define CXL_RAS_UNC_ERR_CACHE_BE_PARITY 2
#define CXL_RAS_UNC_ERR_CACHE_DATA_ECC 3
#define CXL_RAS_UNC_ERR_MEM_DATA_PARITY 4
#define CXL_RAS_UNC_ERR_MEM_ADDRESS_PARITY 5
#define CXL_RAS_UNC_ERR_MEM_BE_PARITY 6
#define CXL_RAS_UNC_ERR_MEM_DATA_ECC 7
#define CXL_RAS_UNC_ERR_REINIT_THRESHOLD 8
#define CXL_RAS_UNC_ERR_RSVD_ENCODING 9
#define CXL_RAS_UNC_ERR_POISON_RECEIVED 10
#define CXL_RAS_UNC_ERR_RECEIVER_OVERFLOW 11
#define CXL_RAS_UNC_ERR_INTERNAL 14
#define CXL_RAS_UNC_ERR_CXL_IDE_TX 15
#define CXL_RAS_UNC_ERR_CXL_IDE_RX 16
#define CXL_RAS_UNC_ERR_CXL_UNUSED 63 /* Magic value */
REG32(CXL_RAS_UNC_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x4)
REG32(CXL_RAS_UNC_ERR_SEVERITY, CXL_RAS_REGISTERS_OFFSET + 0x8)
REG32(CXL_RAS_COR_ERR_STATUS, CXL_RAS_REGISTERS_OFFSET + 0xc)
#define CXL_RAS_COR_ERR_CACHE_DATA_ECC 0
#define CXL_RAS_COR_ERR_MEM_DATA_ECC 1
#define CXL_RAS_COR_ERR_CRC_THRESHOLD 2
#define CXL_RAS_COR_ERR_RETRY_THRESHOLD 3
#define CXL_RAS_COR_ERR_CACHE_POISON_RECEIVED 4
#define CXL_RAS_COR_ERR_MEM_POISON_RECEIVED 5
#define CXL_RAS_COR_ERR_PHYSICAL 6
REG32(CXL_RAS_COR_ERR_MASK, CXL_RAS_REGISTERS_OFFSET + 0x10)
REG32(CXL_RAS_ERR_CAP_CTRL, CXL_RAS_REGISTERS_OFFSET + 0x14)
FIELD(CXL_RAS_ERR_CAP_CTRL, FIRST_ERROR_POINTER, 0, 6)
REG32(CXL_RAS_ERR_HEADER0, CXL_RAS_REGISTERS_OFFSET + 0x18)
#define CXL_RAS_ERR_HEADER_NUM 32
/* Offset 0x18 - 0x58 reserved for RAS logs */

/* 8.2.5.10 - CXL Security Capability Structure */
Expand Down Expand Up @@ -221,6 +247,7 @@ static inline hwaddr cxl_decode_ig(int ig)
}

CXLComponentState *cxl_get_hb_cstate(PCIHostState *hb);
bool cxl_get_hb_passthrough(PCIHostState *hb);

void cxl_doe_cdat_init(CXLComponentState *cxl_cstate, Error **errp);
void cxl_doe_cdat_release(CXLComponentState *cxl_cstate);
Expand Down
11 changes: 11 additions & 0 deletions include/hw/cxl/cxl_device.h
Expand Up @@ -232,6 +232,14 @@ REG64(CXL_MEM_DEV_STS, 0)
FIELD(CXL_MEM_DEV_STS, MBOX_READY, 4, 1)
FIELD(CXL_MEM_DEV_STS, RESET_NEEDED, 5, 3)

typedef struct CXLError {
QTAILQ_ENTRY(CXLError) node;
int type; /* Error code as per FE definition */
uint32_t header[32];
} CXLError;

typedef QTAILQ_HEAD(, CXLError) CXLErrorList;

struct CXLType3Dev {
/* Private */
PCIDevice parent_obj;
Expand All @@ -248,6 +256,9 @@ struct CXLType3Dev {

/* DOE */
DOECap doe_cdat;

/* Error injection */
CXLErrorList error_list;
};

#define TYPE_CXL_TYPE3 "cxl-type3"
Expand Down
2 changes: 2 additions & 0 deletions include/hw/hotplug.h
Expand Up @@ -48,6 +48,7 @@ typedef void (*hotplug_fn)(HotplugHandler *plug_handler,
* @unplug: unplug callback.
* Used for device removal with devices that implement
* asynchronous and synchronous (surprise) removal.
* @is_hotpluggable_bus: called to check if bus/its parent allow hotplug on bus
*/
struct HotplugHandlerClass {
/* <private> */
Expand All @@ -58,6 +59,7 @@ struct HotplugHandlerClass {
hotplug_fn plug;
hotplug_fn unplug_request;
hotplug_fn unplug;
bool (*is_hotpluggable_bus)(HotplugHandler *plug_handler, BusState *bus);
};

/**
Expand Down
1 change: 1 addition & 0 deletions include/hw/pci/pci_bridge.h
Expand Up @@ -92,6 +92,7 @@ struct PXBDev {
uint8_t bus_nr;
uint16_t numa_node;
bool bypass_iommu;
bool hdm_for_passthrough;
struct cxl_dev {
CXLHost *cxl_host_bridge; /* Pointer to a CXLHost */
} cxl;
Expand Down
1 change: 1 addition & 0 deletions include/hw/pci/pcie_aer.h
Expand Up @@ -100,4 +100,5 @@ void pcie_aer_root_write_config(PCIDevice *dev,
uint32_t addr, uint32_t val, int len,
uint32_t root_cmd_prev);

int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err);
#endif /* QEMU_PCIE_AER_H */
2 changes: 2 additions & 0 deletions include/hw/pci/pcie_port.h
Expand Up @@ -41,6 +41,8 @@ struct PCIEPort {
void pcie_port_init_reg(PCIDevice *d);

PCIDevice *pcie_find_port_by_pn(PCIBus *bus, uint8_t pn);
PCIDevice *pcie_find_port_first(PCIBus *bus);
int pcie_count_ds_ports(PCIBus *bus);

#define TYPE_PCIE_SLOT "pcie-slot"
OBJECT_DECLARE_SIMPLE_TYPE(PCIESlot, PCIE_SLOT)
Expand Down
3 changes: 3 additions & 0 deletions include/hw/pci/pcie_regs.h
Expand Up @@ -141,6 +141,9 @@ typedef enum PCIExpLinkWidth {
PCI_ERR_UNC_ATOP_EBLOCKED | \
PCI_ERR_UNC_TLP_PRF_BLOCKED)

#define PCI_ERR_UNC_MASK_DEFAULT (PCI_ERR_UNC_INTN | \
PCI_ERR_UNC_TLP_PRF_BLOCKED)

#define PCI_ERR_UNC_SEVERITY_DEFAULT (PCI_ERR_UNC_DLP | \
PCI_ERR_UNC_SDN | \
PCI_ERR_UNC_FCP | \
Expand Down
13 changes: 12 additions & 1 deletion include/hw/qdev-core.h
Expand Up @@ -812,7 +812,18 @@ void qbus_set_bus_hotplug_handler(BusState *bus);

static inline bool qbus_is_hotpluggable(BusState *bus)
{
return bus->hotplug_handler;
HotplugHandler *plug_handler = bus->hotplug_handler;
bool ret = !!plug_handler;

if (plug_handler) {
HotplugHandlerClass *hdc;

hdc = HOTPLUG_HANDLER_GET_CLASS(plug_handler);
if (hdc->is_hotpluggable_bus) {
ret = hdc->is_hotpluggable_bus(plug_handler, bus);
}
}
return ret;
}

/**
Expand Down
4 changes: 4 additions & 0 deletions include/hw/virtio/vhost-backend.h
Expand Up @@ -130,6 +130,9 @@ typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);

typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev,
int fd);

typedef void (*vhost_reset_status_op)(struct vhost_dev *dev);

typedef struct VhostOps {
VhostBackendType backend_type;
vhost_backend_init vhost_backend_init;
Expand Down Expand Up @@ -177,6 +180,7 @@ typedef struct VhostOps {
vhost_get_device_id_op vhost_get_device_id;
vhost_force_iommu_op vhost_force_iommu;
vhost_set_config_call_op vhost_set_config_call;
vhost_reset_status_op vhost_reset_status;
} VhostOps;

int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
Expand Down
3 changes: 3 additions & 0 deletions include/hw/virtio/vhost-vdpa.h
Expand Up @@ -42,12 +42,15 @@ typedef struct vhost_vdpa {
bool shadow_vqs_enabled;
/* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */
bool shadow_data;
/* Device suspended successfully */
bool suspended;
/* IOVA mapping used by the Shadow Virtqueue */
VhostIOVATree *iova_tree;
GPtrArray *shadow_vqs;
const VhostShadowVirtqueueOps *shadow_vq_ops;
void *shadow_vq_ops_opaque;
struct vhost_dev *dev;
Error *migration_blocker;
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
} VhostVDPA;

Expand Down
1 change: 1 addition & 0 deletions include/monitor/hmp.h
Expand Up @@ -180,5 +180,6 @@ void hmp_ioport_read(Monitor *mon, const QDict *qdict);
void hmp_ioport_write(Monitor *mon, const QDict *qdict);
void hmp_boot_set(Monitor *mon, const QDict *qdict);
void hmp_info_mtree(Monitor *mon, const QDict *qdict);
void hmp_info_cryptodev(Monitor *mon, const QDict *qdict);

#endif
113 changes: 72 additions & 41 deletions include/sysemu/cryptodev.h
Expand Up @@ -24,7 +24,9 @@
#define CRYPTODEV_H

#include "qemu/queue.h"
#include "qemu/throttle.h"
#include "qom/object.h"
#include "qapi/qapi-types-cryptodev.h"

/**
* CryptoDevBackend:
Expand All @@ -48,12 +50,6 @@ typedef struct CryptoDevBackendPeers CryptoDevBackendPeers;
typedef struct CryptoDevBackendClient
CryptoDevBackendClient;

enum CryptoDevBackendAlgType {
CRYPTODEV_BACKEND_ALG_SYM,
CRYPTODEV_BACKEND_ALG_ASYM,
CRYPTODEV_BACKEND_ALG__MAX,
};

/**
* CryptoDevBackendSymSessionInfo:
*
Expand Down Expand Up @@ -179,17 +175,22 @@ typedef struct CryptoDevBackendAsymOpInfo {
uint8_t *dst;
} CryptoDevBackendAsymOpInfo;

typedef void (*CryptoDevCompletionFunc) (void *opaque, int ret);

typedef struct CryptoDevBackendOpInfo {
enum CryptoDevBackendAlgType algtype;
QCryptodevBackendAlgType algtype;
uint32_t op_code;
uint32_t queue_index;
CryptoDevCompletionFunc cb;
void *opaque; /* argument for cb */
uint64_t session_id;
union {
CryptoDevBackendSymOpInfo *sym_op_info;
CryptoDevBackendAsymOpInfo *asym_op_info;
} u;
QTAILQ_ENTRY(CryptoDevBackendOpInfo) next;
} CryptoDevBackendOpInfo;

typedef void (*CryptoDevCompletionFunc) (void *opaque, int ret);
struct CryptoDevBackendClass {
ObjectClass parent_class;

Expand All @@ -209,24 +210,11 @@ struct CryptoDevBackendClass {
void *opaque);

int (*do_op)(CryptoDevBackend *backend,
CryptoDevBackendOpInfo *op_info,
uint32_t queue_index,
CryptoDevCompletionFunc cb,
void *opaque);
CryptoDevBackendOpInfo *op_info);
};

typedef enum CryptoDevBackendOptionsType {
CRYPTODEV_BACKEND_TYPE_NONE = 0,
CRYPTODEV_BACKEND_TYPE_BUILTIN = 1,
CRYPTODEV_BACKEND_TYPE_VHOST_USER = 2,
CRYPTODEV_BACKEND_TYPE_LKCF = 3,
CRYPTODEV_BACKEND_TYPE__MAX,
} CryptoDevBackendOptionsType;

struct CryptoDevBackendClient {
CryptoDevBackendOptionsType type;
char *model;
char *name;
QCryptodevBackendType type;
char *info_str;
unsigned int queue_index;
int vring_enable;
Expand Down Expand Up @@ -260,32 +248,82 @@ struct CryptoDevBackendConf {
uint64_t max_size;
};

typedef struct CryptodevBackendSymStat {
int64_t encrypt_ops;
int64_t decrypt_ops;
int64_t encrypt_bytes;
int64_t decrypt_bytes;
} CryptodevBackendSymStat;

typedef struct CryptodevBackendAsymStat {
int64_t encrypt_ops;
int64_t decrypt_ops;
int64_t sign_ops;
int64_t verify_ops;
int64_t encrypt_bytes;
int64_t decrypt_bytes;
int64_t sign_bytes;
int64_t verify_bytes;
} CryptodevBackendAsymStat;

struct CryptoDevBackend {
Object parent_obj;

bool ready;
/* Tag the cryptodev backend is used by virtio-crypto or not */
bool is_used;
CryptoDevBackendConf conf;
CryptodevBackendSymStat *sym_stat;
CryptodevBackendAsymStat *asym_stat;

ThrottleState ts;
ThrottleTimers tt;
ThrottleConfig tc;
QTAILQ_HEAD(, CryptoDevBackendOpInfo) opinfos;
};

#define CryptodevSymStatInc(be, op, bytes) do { \
be->sym_stat->op##_bytes += (bytes); \
be->sym_stat->op##_ops += 1; \
} while (/*CONSTCOND*/0)

#define CryptodevSymStatIncEncrypt(be, bytes) \
CryptodevSymStatInc(be, encrypt, bytes)

#define CryptodevSymStatIncDecrypt(be, bytes) \
CryptodevSymStatInc(be, decrypt, bytes)

#define CryptodevAsymStatInc(be, op, bytes) do { \
be->asym_stat->op##_bytes += (bytes); \
be->asym_stat->op##_ops += 1; \
} while (/*CONSTCOND*/0)

#define CryptodevAsymStatIncEncrypt(be, bytes) \
CryptodevAsymStatInc(be, encrypt, bytes)

#define CryptodevAsymStatIncDecrypt(be, bytes) \
CryptodevAsymStatInc(be, decrypt, bytes)

#define CryptodevAsymStatIncSign(be, bytes) \
CryptodevAsymStatInc(be, sign, bytes)

#define CryptodevAsymStatIncVerify(be, bytes) \
CryptodevAsymStatInc(be, verify, bytes)


/**
* cryptodev_backend_new_client:
* @model: the cryptodev backend model
* @name: the cryptodev backend name, can be NULL
*
* Creates a new cryptodev backend client object
* with the @name in the model @model.
* Creates a new cryptodev backend client object.
*
* The returned object must be released with
* cryptodev_backend_free_client() when no
* longer required
*
* Returns: a new cryptodev backend client object
*/
CryptoDevBackendClient *
cryptodev_backend_new_client(const char *model,
const char *name);
CryptoDevBackendClient *cryptodev_backend_new_client(void);

/**
* cryptodev_backend_free_client:
* @cc: the cryptodev backend client object
Expand Down Expand Up @@ -354,24 +392,17 @@ int cryptodev_backend_close_session(
/**
* cryptodev_backend_crypto_operation:
* @backend: the cryptodev backend object
* @opaque1: pointer to a VirtIOCryptoReq object
* @queue_index: queue index of cryptodev backend client
* @errp: pointer to a NULL-initialized error object
* @cb: callbacks when operation is completed
* @opaque2: parameter passed to cb
* @op_info: pointer to a CryptoDevBackendOpInfo object
*
* Do crypto operation, such as encryption and
* decryption
* Do crypto operation, such as encryption, decryption, signature and
* verification
*
* Returns: 0 for success and cb will be called when creation is completed,
* negative value for error, and cb will not be called.
*/
int cryptodev_backend_crypto_operation(
CryptoDevBackend *backend,
void *opaque1,
uint32_t queue_index,
CryptoDevCompletionFunc cb,
void *opaque2);
CryptoDevBackendOpInfo *op_info);

/**
* cryptodev_backend_set_used:
Expand Down
198 changes: 166 additions & 32 deletions net/vhost-vdpa.c
Expand Up @@ -26,12 +26,15 @@
#include <err.h>
#include "standard-headers/linux/virtio_net.h"
#include "monitor/monitor.h"
#include "migration/migration.h"
#include "migration/misc.h"
#include "hw/virtio/vhost.h"

/* Todo:need to add the multiqueue support here */
typedef struct VhostVDPAState {
NetClientState nc;
struct vhost_vdpa vhost_vdpa;
Notifier migration_state;
VHostNetState *vhost_net;

/* Control commands shadow buffers */
Expand Down Expand Up @@ -98,6 +101,8 @@ static const uint64_t vdpa_svq_device_features =
BIT_ULL(VIRTIO_NET_F_MQ) |
BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
/* VHOST_F_LOG_ALL is exposed by SVQ */
BIT_ULL(VHOST_F_LOG_ALL) |
BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
BIT_ULL(VIRTIO_NET_F_STANDBY);

Expand Down Expand Up @@ -178,13 +183,9 @@ static int vhost_vdpa_add(NetClientState *ncs, void *be,
static void vhost_vdpa_cleanup(NetClientState *nc)
{
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
struct vhost_dev *dev = &s->vhost_net->dev;

qemu_vfree(s->cvq_cmd_out_buffer);
qemu_vfree(s->status);
if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
}
if (s->vhost_net) {
vhost_net_cleanup(s->vhost_net);
g_free(s->vhost_net);
Expand Down Expand Up @@ -234,10 +235,126 @@ static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
return size;
}

/** From any vdpa net client, get the netclient of the first queue pair */
static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
{
NICState *nic = qemu_get_nic(s->nc.peer);
NetClientState *nc0 = qemu_get_peer(nic->ncs, 0);

return DO_UPCAST(VhostVDPAState, nc, nc0);
}

static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
{
struct vhost_vdpa *v = &s->vhost_vdpa;
VirtIONet *n;
VirtIODevice *vdev;
int data_queue_pairs, cvq, r;

/* We are only called on the first data vqs and only if x-svq is not set */
if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
return;
}

vdev = v->dev->vdev;
n = VIRTIO_NET(vdev);
if (!n->vhost_started) {
return;
}

data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
n->max_ncs - n->max_queue_pairs : 0;
/*
* TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
* in the future and resume the device if read-only operations between
* suspend and reset goes wrong.
*/
vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);

/* Start will check migration setup_or_active to configure or not SVQ */
r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
if (unlikely(r < 0)) {
error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
}
}

static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
{
MigrationState *migration = data;
VhostVDPAState *s = container_of(notifier, VhostVDPAState,
migration_state);

if (migration_in_setup(migration)) {
vhost_vdpa_net_log_global_enable(s, true);
} else if (migration_has_failed(migration)) {
vhost_vdpa_net_log_global_enable(s, false);
}
}

static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
{
struct vhost_vdpa *v = &s->vhost_vdpa;

add_migration_state_change_notifier(&s->migration_state);
if (v->shadow_vqs_enabled) {
v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
v->iova_range.last);
}
}

static int vhost_vdpa_net_data_start(NetClientState *nc)
{
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
struct vhost_vdpa *v = &s->vhost_vdpa;

assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);

if (s->always_svq ||
migration_is_setup_or_active(migrate_get_current()->state)) {
v->shadow_vqs_enabled = true;
v->shadow_data = true;
} else {
v->shadow_vqs_enabled = false;
v->shadow_data = false;
}

if (v->index == 0) {
vhost_vdpa_net_data_start_first(s);
return 0;
}

if (v->shadow_vqs_enabled) {
VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s);
v->iova_tree = s0->vhost_vdpa.iova_tree;
}

return 0;
}

static void vhost_vdpa_net_client_stop(NetClientState *nc)
{
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
struct vhost_dev *dev;

assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);

if (s->vhost_vdpa.index == 0) {
remove_migration_state_change_notifier(&s->migration_state);
}

dev = s->vhost_vdpa.dev;
if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
}
}

static NetClientInfo net_vhost_vdpa_info = {
.type = NET_CLIENT_DRIVER_VHOST_VDPA,
.size = sizeof(VhostVDPAState),
.receive = vhost_vdpa_receive,
.start = vhost_vdpa_net_data_start,
.stop = vhost_vdpa_net_client_stop,
.cleanup = vhost_vdpa_cleanup,
.has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
.has_ufo = vhost_vdpa_has_ufo,
Expand Down Expand Up @@ -351,7 +468,7 @@ static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,

static int vhost_vdpa_net_cvq_start(NetClientState *nc)
{
VhostVDPAState *s;
VhostVDPAState *s, *s0;
struct vhost_vdpa *v;
uint64_t backend_features;
int64_t cvq_group;
Expand All @@ -362,11 +479,12 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
s = DO_UPCAST(VhostVDPAState, nc, nc);
v = &s->vhost_vdpa;

v->shadow_data = s->always_svq;
s0 = vhost_vdpa_net_first_nc_vdpa(s);
v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled;
v->shadow_vqs_enabled = s->always_svq;
s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;

if (s->always_svq) {
if (s->vhost_vdpa.shadow_data) {
/* SVQ is already configured for all virtqueues */
goto out;
}
Expand Down Expand Up @@ -415,8 +533,6 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
return r;
}

v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
v->iova_range.last);
v->shadow_vqs_enabled = true;
s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;

Expand All @@ -425,6 +541,26 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
return 0;
}

if (s0->vhost_vdpa.iova_tree) {
/*
* SVQ is already configured for all virtqueues. Reuse IOVA tree for
* simplicity, whether CVQ shares ASID with guest or not, because:
* - Memory listener need access to guest's memory addresses allocated
* in the IOVA tree.
* - There should be plenty of IOVA address space for both ASID not to
* worry about collisions between them. Guest's translations are
* still validated with virtio virtqueue_pop so there is no risk for
* the guest to access memory that it shouldn't.
*
* To allocate a iova tree per ASID is doable but it complicates the
* code and it is not worth it for the moment.
*/
v->iova_tree = s0->vhost_vdpa.iova_tree;
} else {
v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
v->iova_range.last);
}

r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
vhost_vdpa_net_cvq_cmd_page_len(), false);
if (unlikely(r < 0)) {
Expand All @@ -449,15 +585,9 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
if (s->vhost_vdpa.shadow_vqs_enabled) {
vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
if (!s->always_svq) {
/*
* If only the CVQ is shadowed we can delete this safely.
* If all the VQs are shadows this will be needed by the time the
* device is started again to register SVQ vrings and similar.
*/
g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
}
}

vhost_vdpa_net_client_stop(nc);
}

static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
Expand Down Expand Up @@ -668,7 +798,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
bool is_datapath,
bool svq,
struct vhost_vdpa_iova_range iova_range,
VhostIOVATree *iova_tree)
uint64_t features)
{
NetClientState *nc = NULL;
VhostVDPAState *s;
Expand All @@ -687,11 +817,14 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
s->vhost_vdpa.device_fd = vdpa_device_fd;
s->vhost_vdpa.index = queue_pair_index;
s->always_svq = svq;
s->migration_state.notify = vdpa_net_migration_state_notifier;
s->vhost_vdpa.shadow_vqs_enabled = svq;
s->vhost_vdpa.iova_range = iova_range;
s->vhost_vdpa.shadow_data = svq;
s->vhost_vdpa.iova_tree = iova_tree;
if (!is_datapath) {
if (queue_pair_index == 0) {
vhost_vdpa_net_valid_svq_features(features,
&s->vhost_vdpa.migration_blocker);
} else if (!is_datapath) {
s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
vhost_vdpa_net_cvq_cmd_page_len());
memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
Expand All @@ -701,6 +834,15 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,

s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
s->vhost_vdpa.shadow_vq_ops_opaque = s;

/*
* TODO: We cannot migrate devices with CVQ as there is no way to set
* the device state (MAC, MQ, etc) before starting the datapath.
*
* Migration blocker ownership now belongs to s->vhost_vdpa.
*/
error_setg(&s->vhost_vdpa.migration_blocker,
"net vdpa cannot migrate with CVQ feature");
}
ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
if (ret) {
Expand Down Expand Up @@ -760,7 +902,6 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
uint64_t features;
int vdpa_device_fd;
g_autofree NetClientState **ncs = NULL;
g_autoptr(VhostIOVATree) iova_tree = NULL;
struct vhost_vdpa_iova_range iova_range;
NetClientState *nc;
int queue_pairs, r, i = 0, has_cvq = 0;
Expand Down Expand Up @@ -812,34 +953,28 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
goto err;
}

if (opts->x_svq) {
if (!vhost_vdpa_net_valid_svq_features(features, errp)) {
goto err_svq;
}

iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) {
goto err;
}

ncs = g_malloc0(sizeof(*ncs) * queue_pairs);

for (i = 0; i < queue_pairs; i++) {
ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
vdpa_device_fd, i, 2, true, opts->x_svq,
iova_range, iova_tree);
iova_range, features);
if (!ncs[i])
goto err;
}

if (has_cvq) {
nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
vdpa_device_fd, i, 1, false,
opts->x_svq, iova_range, iova_tree);
opts->x_svq, iova_range, features);
if (!nc)
goto err;
}

/* iova_tree ownership belongs to last NetClientState */
g_steal_pointer(&iova_tree);
return 0;

err:
Expand All @@ -849,7 +984,6 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
}
}

err_svq:
qemu_close(vdpa_device_fd);

return -1;
Expand Down
89 changes: 89 additions & 0 deletions qapi/cryptodev.json
@@ -0,0 +1,89 @@
# -*- Mode: Python -*-
# vim: filetype=python
#
# This work is licensed under the terms of the GNU GPL, version 2 or later.
# See the COPYING file in the top-level directory.

##
# @QCryptodevBackendAlgType:
#
# The supported algorithm types of a crypto device.
#
# @sym: symmetric encryption
# @asym: asymmetric Encryption
#
# Since: 8.0
##
{ 'enum': 'QCryptodevBackendAlgType',
'prefix': 'QCRYPTODEV_BACKEND_ALG',
'data': ['sym', 'asym']}

##
# @QCryptodevBackendServiceType:
#
# The supported service types of a crypto device.
#
# Since: 8.0
##
{ 'enum': 'QCryptodevBackendServiceType',
'prefix': 'QCRYPTODEV_BACKEND_SERVICE',
'data': ['cipher', 'hash', 'mac', 'aead', 'akcipher']}

##
# @QCryptodevBackendType:
#
# The crypto device backend type
#
# @builtin: the QEMU builtin support
# @vhost-user: vhost-user
# @lkcf: Linux kernel cryptographic framework
#
# Since: 8.0
##
{ 'enum': 'QCryptodevBackendType',
'prefix': 'QCRYPTODEV_BACKEND_TYPE',
'data': ['builtin', 'vhost-user', 'lkcf']}

##
# @QCryptodevBackendClient:
#
# Information about a queue of crypto device.
#
# @queue: the queue index of the crypto device
#
# @type: the type of the crypto device
#
# Since: 8.0
##
{ 'struct': 'QCryptodevBackendClient',
'data': { 'queue': 'uint32',
'type': 'QCryptodevBackendType' } }

##
# @QCryptodevInfo:
#
# Information about a crypto device.
#
# @id: the id of the crypto device
#
# @service: supported service types of a crypto device
#
# @client: the additional infomation of the crypto device
#
# Since: 8.0
##
{ 'struct': 'QCryptodevInfo',
'data': { 'id': 'str',
'service': ['QCryptodevBackendServiceType'],
'client': ['QCryptodevBackendClient'] } }

##
# @query-cryptodev:
#
# Returns information about current crypto devices.
#
# Returns: a list of @QCryptodevInfo
#
# Since: 8.0
##
{ 'command': 'query-cryptodev', 'returns': ['QCryptodevInfo']}