Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/nvme/tags/nvme-next-pull-reques…
Browse files Browse the repository at this point in the history
…t' into staging

Emulated NVMe device updates

  * deallocate or unwritten logical block error feature (me)
  * dataset management command (me)
  * compare command (Gollu Appalanaidu)
  * namespace types (Niklas Cassel)
  * zoned namespaces (Dmitry Fomichev)
  * smart critical warning toggle (Zhenwei Pi)
  * allow cmb and pmr to coexist (me)
  * pmr rds/wds support (Naveen Nagar)
  * cmb v1.4 logic (Padmakar Kalghatgi)

And a lot of smaller fixes from Gollu Appalanaidu and Minwoo Im.

# gpg: Signature made Tue 09 Feb 2021 07:25:18 GMT
# gpg:                using RSA key 522833AA75E2DCE6A24766C04DE1AF316D4F0DE9
# gpg: Good signature from "Klaus Jensen <its@irrelevant.dk>" [unknown]
# gpg:                 aka "Klaus Jensen <k.jensen@samsung.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: DDCA 4D9C 9EF9 31CC 3468  4272 63D5 6FC5 E55D A838
#      Subkey fingerprint: 5228 33AA 75E2 DCE6 A247  66C0 4DE1 AF31 6D4F 0DE9

* remotes/nvme/tags/nvme-next-pull-request: (56 commits)
  hw/block/nvme: refactor the logic for zone write checks
  hw/block/nvme: fix zone boundary check for append
  hw/block/nvme: fix wrong parameter name 'cross_read'
  hw/block/nvme: align with existing style
  hw/block/nvme: fix set feature save field check
  hw/block/nvme: fix set feature for error recovery
  hw/block/nvme: error if drive less than a zone size
  hw/block/nvme: lift cmb restrictions
  hw/block/nvme: bump to v1.4
  hw/block/nvme: move cmb logic to v1.4
  hw/block/nvme: add PMR RDS/WDS support
  hw/block/nvme: disable PMR at boot up
  hw/block/nvme: remove redundant zeroing of PMR registers
  hw/block/nvme: rename PMR/CMB shift/mask fields
  hw/block/nvme: allow cmb and pmr to coexist
  hw/block/nvme: move msix table and pba to BAR 0
  hw/block/nvme: indicate CMB support through controller capabilities register
  hw/block/nvme: fix 64 bit register hi/lo split writes
  hw/block/nvme: add size to mmio read/write trace events
  hw/block/nvme: trigger async event during injecting smart warning
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
  • Loading branch information
pm215 committed Feb 9, 2021
2 parents 41d306e + 3e22762 commit 1214d55
Show file tree
Hide file tree
Showing 6 changed files with 2,879 additions and 389 deletions.
290 changes: 277 additions & 13 deletions hw/block/nvme-ns.c
Expand Up @@ -16,6 +16,7 @@
#include "qemu/units.h"
#include "qemu/cutils.h"
#include "qemu/log.h"
#include "qemu/error-report.h"
#include "hw/block/block.h"
#include "hw/pci/pci.h"
#include "sysemu/sysemu.h"
Expand All @@ -25,28 +26,47 @@
#include "hw/qdev-properties.h"
#include "hw/qdev-core.h"

#include "trace.h"
#include "nvme.h"
#include "nvme-ns.h"

static void nvme_ns_init(NvmeNamespace *ns)
#define MIN_DISCARD_GRANULARITY (4 * KiB)

static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
{
BlockDriverInfo bdi;
NvmeIdNs *id_ns = &ns->id_ns;
int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
int npdg;

if (blk_get_flags(ns->blkconf.blk) & BDRV_O_UNMAP) {
ns->id_ns.dlfeat = 0x9;
}
ns->id_ns.dlfeat = 0x9;

id_ns->lbaf[lba_index].ds = 31 - clz32(ns->blkconf.logical_block_size);

id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns));

ns->csi = NVME_CSI_NVM;

/* no thin provisioning */
id_ns->ncap = id_ns->nsze;
id_ns->nuse = id_ns->ncap;

/* support DULBE and I/O optimization fields */
id_ns->nsfeat |= (0x4 | 0x10);

npdg = ns->blkconf.discard_granularity / ns->blkconf.logical_block_size;

if (bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi) >= 0 &&
bdi.cluster_size > ns->blkconf.discard_granularity) {
npdg = bdi.cluster_size / ns->blkconf.logical_block_size;
}

id_ns->npda = id_ns->npdg = npdg - 1;

return 0;
}

static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
static int nvme_ns_init_blk(NvmeNamespace *ns, Error **errp)
{
bool read_only;

Expand All @@ -59,19 +79,225 @@ static int nvme_ns_init_blk(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
return -1;
}

if (ns->blkconf.discard_granularity == -1) {
ns->blkconf.discard_granularity =
MAX(ns->blkconf.logical_block_size, MIN_DISCARD_GRANULARITY);
}

ns->size = blk_getlength(ns->blkconf.blk);
if (ns->size < 0) {
error_setg_errno(errp, -ns->size, "could not get blockdev size");
return -1;
}

if (blk_enable_write_cache(ns->blkconf.blk)) {
n->features.vwc = 0x1;
return 0;
}

static int nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp)
{
uint64_t zone_size, zone_cap;
uint32_t lbasz = ns->blkconf.logical_block_size;

/* Make sure that the values of ZNS properties are sane */
if (ns->params.zone_size_bs) {
zone_size = ns->params.zone_size_bs;
} else {
zone_size = NVME_DEFAULT_ZONE_SIZE;
}
if (ns->params.zone_cap_bs) {
zone_cap = ns->params.zone_cap_bs;
} else {
zone_cap = zone_size;
}
if (zone_cap > zone_size) {
error_setg(errp, "zone capacity %"PRIu64"B exceeds "
"zone size %"PRIu64"B", zone_cap, zone_size);
return -1;
}
if (zone_size < lbasz) {
error_setg(errp, "zone size %"PRIu64"B too small, "
"must be at least %"PRIu32"B", zone_size, lbasz);
return -1;
}
if (zone_cap < lbasz) {
error_setg(errp, "zone capacity %"PRIu64"B too small, "
"must be at least %"PRIu32"B", zone_cap, lbasz);
return -1;
}

/*
* Save the main zone geometry values to avoid
* calculating them later again.
*/
ns->zone_size = zone_size / lbasz;
ns->zone_capacity = zone_cap / lbasz;
ns->num_zones = ns->size / lbasz / ns->zone_size;

/* Do a few more sanity checks of ZNS properties */
if (!ns->num_zones) {
error_setg(errp,
"insufficient drive capacity, must be at least the size "
"of one zone (%"PRIu64"B)", zone_size);
return -1;
}

if (ns->params.max_open_zones > ns->num_zones) {
error_setg(errp,
"max_open_zones value %u exceeds the number of zones %u",
ns->params.max_open_zones, ns->num_zones);
return -1;
}
if (ns->params.max_active_zones > ns->num_zones) {
error_setg(errp,
"max_active_zones value %u exceeds the number of zones %u",
ns->params.max_active_zones, ns->num_zones);
return -1;
}

if (ns->params.zd_extension_size) {
if (ns->params.zd_extension_size & 0x3f) {
error_setg(errp,
"zone descriptor extension size must be a multiple of 64B");
return -1;
}
if ((ns->params.zd_extension_size >> 6) > 0xff) {
error_setg(errp, "zone descriptor extension size is too large");
return -1;
}
}

return 0;
}

static void nvme_ns_zoned_init_state(NvmeNamespace *ns)
{
uint64_t start = 0, zone_size = ns->zone_size;
uint64_t capacity = ns->num_zones * zone_size;
NvmeZone *zone;
int i;

ns->zone_array = g_new0(NvmeZone, ns->num_zones);
if (ns->params.zd_extension_size) {
ns->zd_extensions = g_malloc0(ns->params.zd_extension_size *
ns->num_zones);
}

QTAILQ_INIT(&ns->exp_open_zones);
QTAILQ_INIT(&ns->imp_open_zones);
QTAILQ_INIT(&ns->closed_zones);
QTAILQ_INIT(&ns->full_zones);

zone = ns->zone_array;
for (i = 0; i < ns->num_zones; i++, zone++) {
if (start + zone_size > capacity) {
zone_size = capacity - start;
}
zone->d.zt = NVME_ZONE_TYPE_SEQ_WRITE;
nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
zone->d.za = 0;
zone->d.zcap = ns->zone_capacity;
zone->d.zslba = start;
zone->d.wp = start;
zone->w_ptr = start;
start += zone_size;
}

ns->zone_size_log2 = 0;
if (is_power_of_2(ns->zone_size)) {
ns->zone_size_log2 = 63 - clz64(ns->zone_size);
}
}

static void nvme_ns_init_zoned(NvmeNamespace *ns, int lba_index)
{
NvmeIdNsZoned *id_ns_z;

nvme_ns_zoned_init_state(ns);

id_ns_z = g_malloc0(sizeof(NvmeIdNsZoned));

/* MAR/MOR are zeroes-based, 0xffffffff means no limit */
id_ns_z->mar = cpu_to_le32(ns->params.max_active_zones - 1);
id_ns_z->mor = cpu_to_le32(ns->params.max_open_zones - 1);
id_ns_z->zoc = 0;
id_ns_z->ozcs = ns->params.cross_zone_read ? 0x01 : 0x00;

id_ns_z->lbafe[lba_index].zsze = cpu_to_le64(ns->zone_size);
id_ns_z->lbafe[lba_index].zdes =
ns->params.zd_extension_size >> 6; /* Units of 64B */

ns->csi = NVME_CSI_ZONED;
ns->id_ns.nsze = cpu_to_le64(ns->num_zones * ns->zone_size);
ns->id_ns.ncap = ns->id_ns.nsze;
ns->id_ns.nuse = ns->id_ns.ncap;

/*
* The device uses the BDRV_BLOCK_ZERO flag to determine the "deallocated"
* status of logical blocks. Since the spec defines that logical blocks
* SHALL be deallocated when then zone is in the Empty or Offline states,
* we can only support DULBE if the zone size is a multiple of the
* calculated NPDG.
*/
if (ns->zone_size % (ns->id_ns.npdg + 1)) {
warn_report("the zone size (%"PRIu64" blocks) is not a multiple of "
"the calculated deallocation granularity (%d blocks); "
"DULBE support disabled",
ns->zone_size, ns->id_ns.npdg + 1);

ns->id_ns.nsfeat &= ~0x4;
}

ns->id_ns_zoned = id_ns_z;
}

static void nvme_clear_zone(NvmeNamespace *ns, NvmeZone *zone)
{
uint8_t state;

zone->w_ptr = zone->d.wp;
state = nvme_get_zone_state(zone);
if (zone->d.wp != zone->d.zslba ||
(zone->d.za & NVME_ZA_ZD_EXT_VALID)) {
if (state != NVME_ZONE_STATE_CLOSED) {
trace_pci_nvme_clear_ns_close(state, zone->d.zslba);
nvme_set_zone_state(zone, NVME_ZONE_STATE_CLOSED);
}
nvme_aor_inc_active(ns);
QTAILQ_INSERT_HEAD(&ns->closed_zones, zone, entry);
} else {
trace_pci_nvme_clear_ns_reset(state, zone->d.zslba);
nvme_set_zone_state(zone, NVME_ZONE_STATE_EMPTY);
}
}

/*
* Close all the zones that are currently open.
*/
static void nvme_zoned_ns_shutdown(NvmeNamespace *ns)
{
NvmeZone *zone, *next;

QTAILQ_FOREACH_SAFE(zone, &ns->closed_zones, entry, next) {
QTAILQ_REMOVE(&ns->closed_zones, zone, entry);
nvme_aor_dec_active(ns);
nvme_clear_zone(ns, zone);
}
QTAILQ_FOREACH_SAFE(zone, &ns->imp_open_zones, entry, next) {
QTAILQ_REMOVE(&ns->imp_open_zones, zone, entry);
nvme_aor_dec_open(ns);
nvme_aor_dec_active(ns);
nvme_clear_zone(ns, zone);
}
QTAILQ_FOREACH_SAFE(zone, &ns->exp_open_zones, entry, next) {
QTAILQ_REMOVE(&ns->exp_open_zones, zone, entry);
nvme_aor_dec_open(ns);
nvme_aor_dec_active(ns);
nvme_clear_zone(ns, zone);
}

assert(ns->nr_open_zones == 0);
}

static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
{
if (!ns->blkconf.blk) {
Expand All @@ -82,20 +308,25 @@ static int nvme_ns_check_constraints(NvmeNamespace *ns, Error **errp)
return 0;
}

int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
int nvme_ns_setup(NvmeNamespace *ns, Error **errp)
{
if (nvme_ns_check_constraints(ns, errp)) {
return -1;
}

if (nvme_ns_init_blk(n, ns, errp)) {
if (nvme_ns_init_blk(ns, errp)) {
return -1;
}

nvme_ns_init(ns);
if (nvme_register_namespace(n, ns, errp)) {
if (nvme_ns_init(ns, errp)) {
return -1;
}
if (ns->params.zoned) {
if (nvme_ns_zoned_check_calc_geometry(ns, errp) != 0) {
return -1;
}
nvme_ns_init_zoned(ns, 0);
}

return 0;
}
Expand All @@ -105,9 +336,21 @@ void nvme_ns_drain(NvmeNamespace *ns)
blk_drain(ns->blkconf.blk);
}

void nvme_ns_flush(NvmeNamespace *ns)
void nvme_ns_shutdown(NvmeNamespace *ns)
{
blk_flush(ns->blkconf.blk);
if (ns->params.zoned) {
nvme_zoned_ns_shutdown(ns);
}
}

void nvme_ns_cleanup(NvmeNamespace *ns)
{
if (ns->params.zoned) {
g_free(ns->id_ns_zoned);
g_free(ns->zone_array);
g_free(ns->zd_extensions);
}
}

static void nvme_ns_realize(DeviceState *dev, Error **errp)
Expand All @@ -117,16 +360,37 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
NvmeCtrl *n = NVME(s->parent);
Error *local_err = NULL;

if (nvme_ns_setup(n, ns, &local_err)) {
if (nvme_ns_setup(ns, &local_err)) {
error_propagate_prepend(errp, local_err,
"could not setup namespace: ");
return;
}

if (nvme_register_namespace(n, ns, errp)) {
error_propagate_prepend(errp, local_err,
"could not register namespace: ");
return;
}

}

static Property nvme_ns_props[] = {
DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
DEFINE_PROP_UUID("uuid", NvmeNamespace, params.uuid),
DEFINE_PROP_BOOL("zoned", NvmeNamespace, params.zoned, false),
DEFINE_PROP_SIZE("zoned.zone_size", NvmeNamespace, params.zone_size_bs,
NVME_DEFAULT_ZONE_SIZE),
DEFINE_PROP_SIZE("zoned.zone_capacity", NvmeNamespace, params.zone_cap_bs,
0),
DEFINE_PROP_BOOL("zoned.cross_read", NvmeNamespace,
params.cross_zone_read, false),
DEFINE_PROP_UINT32("zoned.max_active", NvmeNamespace,
params.max_active_zones, 0),
DEFINE_PROP_UINT32("zoned.max_open", NvmeNamespace,
params.max_open_zones, 0),
DEFINE_PROP_UINT32("zoned.descr_ext_size", NvmeNamespace,
params.zd_extension_size, 0),
DEFINE_PROP_END_OF_LIST(),
};

Expand Down

0 comments on commit 1214d55

Please sign in to comment.