Skip to content

Commit

Permalink
nvme: introduce PMR support from NVMe 1.4 spec
Browse files Browse the repository at this point in the history
This patch introduces support for PMR that has been defined as part of NVMe 1.4
spec. User can now specify a pmrdev option that should point to HostMemoryBackend.
pmrdev memory region will subsequently be exposed as PCI BAR 2 in emulated NVMe
device. Guest OS can perform mmio read and writes to the PMR region that will stay
persistent across system reboot.

Signed-off-by: Andrzej Jakowski <andrzej.jakowski@linux.intel.com>
Reviewed-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-Id: <20200330164656.9348-1-andrzej.jakowski@linux.intel.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
  • Loading branch information
Andrzej Jakowski authored and kevmw committed Apr 30, 2020
1 parent eb8a0cf commit 6cf9413
Show file tree
Hide file tree
Showing 5 changed files with 288 additions and 1 deletion.
2 changes: 1 addition & 1 deletion hw/block/Makefile.objs
Expand Up @@ -7,12 +7,12 @@ common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
common-obj-$(CONFIG_XEN) += xen-block.o
common-obj-$(CONFIG_ECC) += ecc.o
common-obj-$(CONFIG_ONENAND) += onenand.o
common-obj-$(CONFIG_NVME_PCI) += nvme.o
common-obj-$(CONFIG_SWIM) += swim.o

common-obj-$(CONFIG_SH4) += tc58128.o

obj-$(CONFIG_VIRTIO_BLK) += virtio-blk.o
obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o
obj-$(CONFIG_NVME_PCI) += nvme.o

obj-y += dataplane/
109 changes: 109 additions & 0 deletions hw/block/nvme.c
Expand Up @@ -19,10 +19,19 @@
* -drive file=<file>,if=none,id=<drive_id>
* -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
* cmb_size_mb=<cmb_size_mb[optional]>, \
* [pmrdev=<mem_backend_file_id>,] \
* num_queues=<N[optional]>
*
* Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
* offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
*
* cmb_size_mb= and pmrdev= options are mutually exclusive due to limitation
* in available BAR's. cmb_size_mb= will take precedence over pmrdev= when
* both provided.
* Enabling pmr emulation can be achieved by pointing to memory-backend-file.
* For example:
* -object memory-backend-file,id=<mem_id>,share=on,mem-path=<file_path>, \
* size=<size> .... -device nvme,...,pmrdev=<mem_id>
*/

#include "qemu/osdep.h"
Expand All @@ -35,7 +44,9 @@
#include "sysemu/sysemu.h"
#include "qapi/error.h"
#include "qapi/visitor.h"
#include "sysemu/hostmem.h"
#include "sysemu/block-backend.h"
#include "exec/ram_addr.h"

#include "qemu/log.h"
#include "qemu/module.h"
Expand Down Expand Up @@ -1141,6 +1152,26 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly,
"invalid write to read only CMBSZ, ignored");
return;
case 0xE00: /* PMRCAP */
NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly,
"invalid write to PMRCAP register, ignored");
return;
case 0xE04: /* TODO PMRCTL */
break;
case 0xE08: /* PMRSTS */
NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly,
"invalid write to PMRSTS register, ignored");
return;
case 0xE0C: /* PMREBS */
NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly,
"invalid write to PMREBS register, ignored");
return;
case 0xE10: /* PMRSWTP */
NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly,
"invalid write to PMRSWTP register, ignored");
return;
case 0xE14: /* TODO PMRMSC */
break;
default:
NVME_GUEST_ERR(nvme_ub_mmiowr_invalid,
"invalid MMIO write,"
Expand Down Expand Up @@ -1169,6 +1200,16 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
}

if (addr < sizeof(n->bar)) {
/*
* When PMRWBM bit 1 is set then read from
* from PMRSTS should ensure prior writes
* made it to persistent media
*/
if (addr == 0xE08 &&
(NVME_PMRCAP_PMRWBM(n->bar.pmrcap) & 0x02)) {
qemu_ram_writeback(n->pmrdev->mr.ram_block,
0, n->pmrdev->size);
}
memcpy(&val, ptr + addr, size);
} else {
NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs,
Expand Down Expand Up @@ -1332,6 +1373,23 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
error_setg(errp, "serial property not set");
return;
}

if (!n->cmb_size_mb && n->pmrdev) {
if (host_memory_backend_is_mapped(n->pmrdev)) {
char *path = object_get_canonical_path_component(OBJECT(n->pmrdev));
error_setg(errp, "can't use already busy memdev: %s", path);
g_free(path);
return;
}

if (!is_power_of_2(n->pmrdev->size)) {
error_setg(errp, "pmr backend size needs to be power of 2 in size");
return;
}

host_memory_backend_set_mapped(n->pmrdev, true);
}

blkconf_blocksizes(&n->conf);
if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
false, errp)) {
Expand Down Expand Up @@ -1415,6 +1473,51 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);

} else if (n->pmrdev) {
/* Controller Capabilities register */
NVME_CAP_SET_PMRS(n->bar.cap, 1);

/* PMR Capabities register */
n->bar.pmrcap = 0;
NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0);
NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0);
NVME_PMRCAP_SET_BIR(n->bar.pmrcap, 2);
NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0);
/* Turn on bit 1 support */
NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02);
NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0);
NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0);

/* PMR Control register */
n->bar.pmrctl = 0;
NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0);

/* PMR Status register */
n->bar.pmrsts = 0;
NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0);
NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0);
NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0);
NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0);

/* PMR Elasticity Buffer Size register */
n->bar.pmrebs = 0;
NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0);
NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0);
NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0);

/* PMR Sustained Write Throughput register */
n->bar.pmrswtp = 0;
NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0);
NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0);

/* PMR Memory Space Control register */
n->bar.pmrmsc = 0;
NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0);
NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0);

pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap),
PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmrdev->mr);
}

for (i = 0; i < n->num_namespaces; i++) {
Expand Down Expand Up @@ -1445,11 +1548,17 @@ static void nvme_exit(PCIDevice *pci_dev)
if (n->cmb_size_mb) {
g_free(n->cmbuf);
}

if (n->pmrdev) {
host_memory_backend_set_mapped(n->pmrdev, false);
}
msix_uninit_exclusive_bar(pci_dev);
}

static Property nvme_props[] = {
DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf),
DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmrdev, TYPE_MEMORY_BACKEND,
HostMemoryBackend *),
DEFINE_PROP_STRING("serial", NvmeCtrl, serial),
DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0),
DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64),
Expand Down
2 changes: 2 additions & 0 deletions hw/block/nvme.h
Expand Up @@ -83,6 +83,8 @@ typedef struct NvmeCtrl {
uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */

char *serial;
HostMemoryBackend *pmrdev;

NvmeNamespace *namespaces;
NvmeSQueue **sq;
NvmeCQueue **cq;
Expand Down
4 changes: 4 additions & 0 deletions hw/block/trace-events
Expand Up @@ -110,6 +110,10 @@ nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CA
nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)"
nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored"
nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored"
nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored"
nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored"
nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored"
nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored"
nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64""
nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64""
nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64""
Expand Down

0 comments on commit 6cf9413

Please sign in to comment.