Expand Up
@@ -17,6 +17,7 @@
#include "qemu/module.h"
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
#include "block/block_int.h"
#include "trace.h"
#include "hw/block/block.h"
#include "hw/qdev-properties.h"
Expand Down
Expand Up
@@ -601,6 +602,335 @@ static uint8_t virtio_blk_handle_discard_write_zeroes(VirtIOBlockReq *req,
return err_status ;
}
typedef struct ZoneCmdData {
VirtIOBlockReq * req ;
struct iovec * in_iov ;
unsigned in_num ;
union {
struct {
unsigned int nr_zones ;
BlockZoneDescriptor * zones ;
} zone_report_data ;
struct {
int64_t offset ;
} zone_append_data ;
};
} ZoneCmdData ;
/*
* check zoned_request: error checking before issuing requests. If all checks
* passed, return true.
* append: true if only zone append requests issued.
*/
static bool check_zoned_request (VirtIOBlock * s , int64_t offset , int64_t len ,
bool append , uint8_t * status ) {
BlockDriverState * bs = blk_bs (s -> blk );
int index ;
if (!virtio_has_feature (s -> host_features , VIRTIO_BLK_F_ZONED )) {
* status = VIRTIO_BLK_S_UNSUPP ;
return false;
}
if (offset < 0 || len < 0 || len > (bs -> total_sectors << BDRV_SECTOR_BITS )
|| offset > (bs -> total_sectors << BDRV_SECTOR_BITS ) - len ) {
* status = VIRTIO_BLK_S_ZONE_INVALID_CMD ;
return false;
}
if (append ) {
if (bs -> bl .write_granularity ) {
if ((offset % bs -> bl .write_granularity ) != 0 ) {
* status = VIRTIO_BLK_S_ZONE_UNALIGNED_WP ;
return false;
}
}
index = offset / bs -> bl .zone_size ;
if (BDRV_ZT_IS_CONV (bs -> wps -> wp [index ])) {
* status = VIRTIO_BLK_S_ZONE_INVALID_CMD ;
return false;
}
if (len / 512 > bs -> bl .max_append_sectors ) {
if (bs -> bl .max_append_sectors == 0 ) {
* status = VIRTIO_BLK_S_UNSUPP ;
} else {
* status = VIRTIO_BLK_S_ZONE_INVALID_CMD ;
}
return false;
}
}
return true;
}
static void virtio_blk_zone_report_complete (void * opaque , int ret )
{
ZoneCmdData * data = opaque ;
VirtIOBlockReq * req = data -> req ;
VirtIOBlock * s = req -> dev ;
VirtIODevice * vdev = VIRTIO_DEVICE (req -> dev );
struct iovec * in_iov = data -> in_iov ;
unsigned in_num = data -> in_num ;
int64_t zrp_size , n , j = 0 ;
int64_t nz = data -> zone_report_data .nr_zones ;
int8_t err_status = VIRTIO_BLK_S_OK ;
if (ret ) {
err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD ;
goto out ;
}
struct virtio_blk_zone_report zrp_hdr = (struct virtio_blk_zone_report ) {
.nr_zones = cpu_to_le64 (nz ),
};
zrp_size = sizeof (struct virtio_blk_zone_report )
+ sizeof (struct virtio_blk_zone_descriptor ) * nz ;
n = iov_from_buf (in_iov , in_num , 0 , & zrp_hdr , sizeof (zrp_hdr ));
if (n != sizeof (zrp_hdr )) {
virtio_error (vdev , "Driver provided input buffer that is too small!" );
err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD ;
goto out ;
}
for (size_t i = sizeof (zrp_hdr ); i < zrp_size ;
i += sizeof (struct virtio_blk_zone_descriptor ), ++ j ) {
struct virtio_blk_zone_descriptor desc =
(struct virtio_blk_zone_descriptor ) {
.z_start = cpu_to_le64 (data -> zone_report_data .zones [j ].start
>> BDRV_SECTOR_BITS ),
.z_cap = cpu_to_le64 (data -> zone_report_data .zones [j ].cap
>> BDRV_SECTOR_BITS ),
.z_wp = cpu_to_le64 (data -> zone_report_data .zones [j ].wp
>> BDRV_SECTOR_BITS ),
};
switch (data -> zone_report_data .zones [j ].type ) {
case BLK_ZT_CONV :
desc .z_type = VIRTIO_BLK_ZT_CONV ;
break ;
case BLK_ZT_SWR :
desc .z_type = VIRTIO_BLK_ZT_SWR ;
break ;
case BLK_ZT_SWP :
desc .z_type = VIRTIO_BLK_ZT_SWP ;
break ;
default :
g_assert_not_reached ();
}
switch (data -> zone_report_data .zones [j ].state ) {
case BLK_ZS_RDONLY :
desc .z_state = VIRTIO_BLK_ZS_RDONLY ;
break ;
case BLK_ZS_OFFLINE :
desc .z_state = VIRTIO_BLK_ZS_OFFLINE ;
break ;
case BLK_ZS_EMPTY :
desc .z_state = VIRTIO_BLK_ZS_EMPTY ;
break ;
case BLK_ZS_CLOSED :
desc .z_state = VIRTIO_BLK_ZS_CLOSED ;
break ;
case BLK_ZS_FULL :
desc .z_state = VIRTIO_BLK_ZS_FULL ;
break ;
case BLK_ZS_EOPEN :
desc .z_state = VIRTIO_BLK_ZS_EOPEN ;
break ;
case BLK_ZS_IOPEN :
desc .z_state = VIRTIO_BLK_ZS_IOPEN ;
break ;
case BLK_ZS_NOT_WP :
desc .z_state = VIRTIO_BLK_ZS_NOT_WP ;
break ;
default :
g_assert_not_reached ();
}
/* TODO: it takes O(n^2) time complexity. Optimizations required. */
n = iov_from_buf (in_iov , in_num , i , & desc , sizeof (desc ));
if (n != sizeof (desc )) {
virtio_error (vdev , "Driver provided input buffer "
"for descriptors that is too small!" );
err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD ;
}
}
out :
aio_context_acquire (blk_get_aio_context (s -> conf .conf .blk ));
virtio_blk_req_complete (req , err_status );
virtio_blk_free_request (req );
aio_context_release (blk_get_aio_context (s -> conf .conf .blk ));
g_free (data -> zone_report_data .zones );
g_free (data );
}
static void virtio_blk_handle_zone_report (VirtIOBlockReq * req ,
struct iovec * in_iov ,
unsigned in_num )
{
VirtIOBlock * s = req -> dev ;
VirtIODevice * vdev = VIRTIO_DEVICE (s );
unsigned int nr_zones ;
ZoneCmdData * data ;
int64_t zone_size , offset ;
uint8_t err_status ;
if (req -> in_len < sizeof (struct virtio_blk_inhdr ) +
sizeof (struct virtio_blk_zone_report ) +
sizeof (struct virtio_blk_zone_descriptor )) {
virtio_error (vdev , "in buffer too small for zone report" );
return ;
}
/* start byte offset of the zone report */
offset = virtio_ldq_p (vdev , & req -> out .sector ) << BDRV_SECTOR_BITS ;
if (!check_zoned_request (s , offset , 0 , false, & err_status )) {
goto out ;
}
nr_zones = (req -> in_len - sizeof (struct virtio_blk_inhdr ) -
sizeof (struct virtio_blk_zone_report )) /
sizeof (struct virtio_blk_zone_descriptor );
zone_size = sizeof (BlockZoneDescriptor ) * nr_zones ;
data = g_malloc (sizeof (ZoneCmdData ));
data -> req = req ;
data -> in_iov = in_iov ;
data -> in_num = in_num ;
data -> zone_report_data .nr_zones = nr_zones ;
data -> zone_report_data .zones = g_malloc (zone_size ),
blk_aio_zone_report (s -> blk , offset , & data -> zone_report_data .nr_zones ,
data -> zone_report_data .zones ,
virtio_blk_zone_report_complete , data );
return ;
out :
virtio_blk_req_complete (req , err_status );
virtio_blk_free_request (req );
}
static void virtio_blk_zone_mgmt_complete (void * opaque , int ret )
{
VirtIOBlockReq * req = opaque ;
VirtIOBlock * s = req -> dev ;
int8_t err_status = VIRTIO_BLK_S_OK ;
if (ret ) {
err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD ;
}
aio_context_acquire (blk_get_aio_context (s -> conf .conf .blk ));
virtio_blk_req_complete (req , err_status );
virtio_blk_free_request (req );
aio_context_release (blk_get_aio_context (s -> conf .conf .blk ));
}
static int virtio_blk_handle_zone_mgmt (VirtIOBlockReq * req , BlockZoneOp op )
{
VirtIOBlock * s = req -> dev ;
VirtIODevice * vdev = VIRTIO_DEVICE (s );
BlockDriverState * bs = blk_bs (s -> blk );
int64_t offset = virtio_ldq_p (vdev , & req -> out .sector ) << BDRV_SECTOR_BITS ;
uint64_t len ;
uint64_t capacity = bs -> total_sectors << BDRV_SECTOR_BITS ;
uint8_t err_status = VIRTIO_BLK_S_OK ;
uint32_t type = virtio_ldl_p (vdev , & req -> out .type );
if (type == VIRTIO_BLK_T_ZONE_RESET_ALL ) {
/* Entire drive capacity */
offset = 0 ;
len = capacity ;
} else {
if (bs -> bl .zone_size > capacity - offset ) {
/* The zoned device allows the last smaller zone. */
len = capacity - bs -> bl .zone_size * (bs -> bl .nr_zones - 1 );
} else {
len = bs -> bl .zone_size ;
}
}
if (!check_zoned_request (s , offset , len , false, & err_status )) {
goto out ;
}
blk_aio_zone_mgmt (s -> blk , op , offset , len ,
virtio_blk_zone_mgmt_complete , req );
return 0 ;
out :
virtio_blk_req_complete (req , err_status );
virtio_blk_free_request (req );
return err_status ;
}
static void virtio_blk_zone_append_complete (void * opaque , int ret )
{
ZoneCmdData * data = opaque ;
VirtIOBlockReq * req = data -> req ;
VirtIOBlock * s = req -> dev ;
VirtIODevice * vdev = VIRTIO_DEVICE (req -> dev );
int64_t append_sector , n ;
uint8_t err_status = VIRTIO_BLK_S_OK ;
if (ret ) {
err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD ;
goto out ;
}
virtio_stq_p (vdev , & append_sector ,
data -> zone_append_data .offset >> BDRV_SECTOR_BITS );
n = iov_from_buf (data -> in_iov , data -> in_num , 0 , & append_sector ,
sizeof (append_sector ));
if (n != sizeof (append_sector )) {
virtio_error (vdev , "Driver provided input buffer less than size of "
"append_sector" );
err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD ;
goto out ;
}
out :
aio_context_acquire (blk_get_aio_context (s -> conf .conf .blk ));
virtio_blk_req_complete (req , err_status );
virtio_blk_free_request (req );
aio_context_release (blk_get_aio_context (s -> conf .conf .blk ));
g_free (data );
}
static int virtio_blk_handle_zone_append (VirtIOBlockReq * req ,
struct iovec * out_iov ,
struct iovec * in_iov ,
uint64_t out_num ,
unsigned in_num ) {
VirtIOBlock * s = req -> dev ;
VirtIODevice * vdev = VIRTIO_DEVICE (s );
uint8_t err_status = VIRTIO_BLK_S_OK ;
int64_t offset = virtio_ldq_p (vdev , & req -> out .sector ) << BDRV_SECTOR_BITS ;
int64_t len = iov_size (out_iov , out_num );
if (!check_zoned_request (s , offset , len , true, & err_status )) {
goto out ;
}
ZoneCmdData * data = g_malloc (sizeof (ZoneCmdData ));
data -> req = req ;
data -> in_iov = in_iov ;
data -> in_num = in_num ;
data -> zone_append_data .offset = offset ;
qemu_iovec_init_external (& req -> qiov , out_iov , out_num );
blk_aio_zone_append (s -> blk , & data -> zone_append_data .offset , & req -> qiov , 0 ,
virtio_blk_zone_append_complete , data );
return 0 ;
out :
aio_context_acquire (blk_get_aio_context (s -> conf .conf .blk ));
virtio_blk_req_complete (req , err_status );
virtio_blk_free_request (req );
aio_context_release (blk_get_aio_context (s -> conf .conf .blk ));
return err_status ;
}
static int virtio_blk_handle_request (VirtIOBlockReq * req , MultiReqBuffer * mrb )
{
uint32_t type ;
Expand Down
Expand Up
@@ -687,6 +1017,24 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
case VIRTIO_BLK_T_FLUSH :
virtio_blk_handle_flush (req , mrb );
break ;
case VIRTIO_BLK_T_ZONE_REPORT :
virtio_blk_handle_zone_report (req , in_iov , in_num );
break ;
case VIRTIO_BLK_T_ZONE_OPEN :
virtio_blk_handle_zone_mgmt (req , BLK_ZO_OPEN );
break ;
case VIRTIO_BLK_T_ZONE_CLOSE :
virtio_blk_handle_zone_mgmt (req , BLK_ZO_CLOSE );
break ;
case VIRTIO_BLK_T_ZONE_FINISH :
virtio_blk_handle_zone_mgmt (req , BLK_ZO_FINISH );
break ;
case VIRTIO_BLK_T_ZONE_RESET :
virtio_blk_handle_zone_mgmt (req , BLK_ZO_RESET );
break ;
case VIRTIO_BLK_T_ZONE_RESET_ALL :
virtio_blk_handle_zone_mgmt (req , BLK_ZO_RESET );
break ;
case VIRTIO_BLK_T_SCSI_CMD :
virtio_blk_handle_scsi (req );
break ;
Expand All
@@ -705,6 +1053,14 @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
virtio_blk_free_request (req );
break ;
}
case VIRTIO_BLK_T_ZONE_APPEND & ~VIRTIO_BLK_T_OUT :
/*
* Passing out_iov/out_num and in_iov/in_num is not safe
* to access req->elem.out_sg directly because it may be
* modified by virtio_blk_handle_request().
*/
virtio_blk_handle_zone_append (req , out_iov , in_iov , out_num , in_num );
break ;
/*
* VIRTIO_BLK_T_DISCARD and VIRTIO_BLK_T_WRITE_ZEROES are defined with
* VIRTIO_BLK_T_OUT flag set. We masked this flag in the switch statement,
Expand Down
Expand Up
@@ -890,6 +1246,7 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
{
VirtIOBlock * s = VIRTIO_BLK (vdev );
BlockConf * conf = & s -> conf .conf ;
BlockDriverState * bs = blk_bs (s -> blk );
struct virtio_blk_config blkcfg ;
uint64_t capacity ;
int64_t length ;
Expand Down
Expand Up
@@ -954,6 +1311,30 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
blkcfg .write_zeroes_may_unmap = 1 ;
virtio_stl_p (vdev , & blkcfg .max_write_zeroes_seg , 1 );
}
if (bs -> bl .zoned != BLK_Z_NONE ) {
switch (bs -> bl .zoned ) {
case BLK_Z_HM :
blkcfg .zoned .model = VIRTIO_BLK_Z_HM ;
break ;
case BLK_Z_HA :
blkcfg .zoned .model = VIRTIO_BLK_Z_HA ;
break ;
default :
g_assert_not_reached ();
}
virtio_stl_p (vdev , & blkcfg .zoned .zone_sectors ,
bs -> bl .zone_size / 512 );
virtio_stl_p (vdev , & blkcfg .zoned .max_active_zones ,
bs -> bl .max_active_zones );
virtio_stl_p (vdev , & blkcfg .zoned .max_open_zones ,
bs -> bl .max_open_zones );
virtio_stl_p (vdev , & blkcfg .zoned .write_granularity , blk_size );
virtio_stl_p (vdev , & blkcfg .zoned .max_append_sectors ,
bs -> bl .max_append_sectors );
} else {
blkcfg .zoned .model = VIRTIO_BLK_Z_NONE ;
}
memcpy (config , & blkcfg , s -> config_size );
}
Expand Down
Expand Up
@@ -1118,6 +1499,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
VirtIODevice * vdev = VIRTIO_DEVICE (dev );
VirtIOBlock * s = VIRTIO_BLK (dev );
VirtIOBlkConf * conf = & s -> conf ;
BlockDriverState * bs = blk_bs (conf -> conf .blk );
Error * err = NULL ;
unsigned i ;
Expand Down
Expand Up
@@ -1163,6 +1545,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
return ;
}
if (bs -> bl .zoned != BLK_Z_NONE ) {
virtio_add_feature (& s -> host_features , VIRTIO_BLK_F_ZONED );
if (bs -> bl .zoned == BLK_Z_HM ) {
virtio_clear_feature (& s -> host_features , VIRTIO_BLK_F_DISCARD );
}
}
if (virtio_has_feature (s -> host_features , VIRTIO_BLK_F_DISCARD ) &&
(!conf -> max_discard_sectors ||
conf -> max_discard_sectors > BDRV_REQUEST_MAX_SECTORS )) {
Expand Down