Skip to content

Commit 2ddd24e

Browse files
conghuic23wenlingz
authored andcommitted
dm: storage: support discard command
Support DISCARD command is meaningful when eMMC usage is high or there are lots of remove operations. For example, when Guest Android is running, there will be lots of files being created and removed. However, virtio-blk BE does not support DISCARD command, data remove operation in UOS will not trigger erase in eMMC. After period of time, the eMMC will be consumed out, and erase must be done by eMMC firmware before writing any new data. This causes the eMMC performance decrease in the whole system (SOS and UOS). To solve the problem, DISCARD should be supported in virtio-blk BE. Tracked-On: #2011 Signed-off-by: Conghui Chen <conghui.chen@intel.com> Reviewed-by: Shuo A Liu <shuo.a.liu@intel.com> Acked-by: Yu Wang <yu1.wang@intel.com>
1 parent f71370a commit 2ddd24e

File tree

3 files changed

+190
-22
lines changed

3 files changed

+190
-22
lines changed

devicemodel/hw/block_if.c

Lines changed: 159 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <sys/queue.h>
3131
#include <sys/stat.h>
3232
#include <sys/ioctl.h>
33+
#include <linux/falloc.h>
3334
#include <linux/fs.h>
3435
#include <errno.h>
3536
#include <assert.h>
@@ -63,6 +64,7 @@
6364

6465
#define BLOCKIF_NUMTHR 8
6566
#define BLOCKIF_MAXREQ (64 + BLOCKIF_NUMTHR)
67+
#define MAX_DISCARD_SEGMENT 256
6668

6769
/*
6870
* Debug printf
@@ -108,6 +110,9 @@ struct blockif_ctxt {
108110
int sectsz;
109111
int psectsz;
110112
int psectoff;
113+
int max_discard_sectors;
114+
int max_discard_seg;
115+
int discard_sector_alignment;
111116
int closing;
112117
pthread_t btid[BLOCKIF_NUMTHR];
113118
pthread_mutex_t mtx;
@@ -132,6 +137,12 @@ struct blockif_sig_elem {
132137
struct blockif_sig_elem *next;
133138
};
134139

140+
struct discard_range {
141+
uint64_t sector;
142+
uint32_t num_sectors;
143+
uint32_t flags;
144+
};
145+
135146
static struct blockif_sig_elem *blockif_bse_head;
136147

137148
static int
@@ -232,11 +243,92 @@ blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
232243
TAILQ_INSERT_TAIL(&bc->freeq, be, link);
233244
}
234245

246+
static int
247+
discard_range_validate(struct blockif_ctxt *bc, off_t start, off_t size)
248+
{
249+
off_t start_sector = start / DEV_BSIZE;
250+
off_t size_sector = size / DEV_BSIZE;
251+
252+
if (!size || (start + size) > (bc->size + bc->sub_file_start_lba))
253+
return -1;
254+
255+
if ((size_sector > bc->max_discard_sectors) ||
256+
(bc->discard_sector_alignment &&
257+
start_sector % bc->discard_sector_alignment))
258+
return -1;
259+
return 0;
260+
}
261+
262+
static int
263+
blockif_process_discard(struct blockif_ctxt *bc, struct blockif_req *br)
264+
{
265+
int err;
266+
struct discard_range *range;
267+
int n_range, i, segment;
268+
off_t arg[MAX_DISCARD_SEGMENT][2];
269+
270+
err = 0;
271+
n_range = 0;
272+
segment = 0;
273+
if (!bc->candiscard)
274+
return EOPNOTSUPP;
275+
276+
if (bc->rdonly)
277+
return EROFS;
278+
279+
if (br->iovcnt == 1) {
280+
/* virtio-blk use iov to transfer discard range */
281+
n_range = br->iov[0].iov_len/sizeof(*range);
282+
range = br->iov[0].iov_base;
283+
for (i = 0; i < n_range; i++) {
284+
arg[i][0] = range[i].sector * DEV_BSIZE +
285+
bc->sub_file_start_lba;
286+
arg[i][1] = range[i].num_sectors * DEV_BSIZE;
287+
segment++;
288+
if (segment > bc->max_discard_seg) {
289+
WPRINTF(("segment > max_discard_seg\n"));
290+
return EINVAL;
291+
}
292+
if (discard_range_validate(bc, arg[i][0], arg[i][1])) {
293+
WPRINTF(("range [%ld: %ld] is invalid\n", arg[i][0], arg[i][1]));
294+
return EINVAL;
295+
}
296+
}
297+
} else {
298+
/* ahci parse discard range to br->offset and br->reside */
299+
arg[0][0] = br->offset + bc->sub_file_start_lba;
300+
arg[0][1] = br->resid;
301+
segment = 1;
302+
}
303+
for (i = 0; i < segment; i++) {
304+
if (bc->isblk) {
305+
err = ioctl(bc->fd, BLKDISCARD, arg[i]);
306+
} else {
307+
/* FALLOC_FL_PUNCH_HOLE:
308+
* Deallocates space in the byte range starting at offset and
309+
* continuing for length bytes. After a successful call,
310+
* subsequent reads from this range will return zeroes.
311+
* FALLOC_FL_KEEP_SIZE:
312+
* Do not modify the apparent length of the file.
313+
*/
314+
err = fallocate(bc->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
315+
arg[i][0], arg[i][1]);
316+
}
317+
if (err) {
318+
WPRINTF(("Failed to discard offset=%ld nbytes=%ld err code: %d\n",
319+
arg[i][0], arg[i][1], err));
320+
return err;
321+
}
322+
}
323+
br->resid = 0;
324+
325+
return 0;
326+
}
327+
235328
static void
236329
blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
237330
{
238331
struct blockif_req *br;
239-
off_t arg[2];
240332
ssize_t len;
241333
int err;
242334

@@ -271,21 +363,7 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
271363
err = errno;
272364
break;
273365
case BOP_DISCARD:
274-
/* only used by AHCI */
275-
if (!bc->candiscard)
276-
err = EOPNOTSUPP;
277-
else if (bc->rdonly)
278-
err = EROFS;
279-
else if (bc->isblk) {
280-
arg[0] = br->offset;
281-
arg[1] = br->resid;
282-
if (ioctl(bc->fd, BLKDISCARD, arg))
283-
err = errno;
284-
else
285-
br->resid = 0;
286-
}
287-
else
288-
err = EOPNOTSUPP;
366+
err = blockif_process_discard(bc, br);
289367
break;
290368
default:
291369
err = EINVAL;
@@ -425,6 +503,8 @@ blockif_open(const char *optstr, const char *ident)
425503
int err_code = -1;
426504
off_t sub_file_start_lba, sub_file_size;
427505
int sub_file_assign;
506+
int max_discard_sectors, max_discard_seg, discard_sector_alignment;
507+
off_t probe_arg[] = {0, 0};
428508

429509
pthread_once(&blockif_once, blockif_init);
430510

@@ -436,9 +516,15 @@ blockif_open(const char *optstr, const char *ident)
436516
sub_file_start_lba = 0;
437517
sub_file_size = 0;
438518

519+
max_discard_sectors = -1;
520+
max_discard_seg = -1;
521+
discard_sector_alignment = -1;
522+
439523
/* writethru is on by default */
440524
writeback = 0;
441525

526+
candiscard = 0;
527+
442528
/*
443529
* The first element in the optstring is always a pathname.
444530
* Optional elements follow
@@ -458,7 +544,18 @@ blockif_open(const char *optstr, const char *ident)
458544
writeback = 0;
459545
else if (!strcmp(cp, "ro"))
460546
ro = 1;
461-
else if (!strncmp(cp, "sectorsize", strlen("sectorsize"))) {
547+
else if (!strncmp(cp, "discard", strlen("discard"))) {
548+
strsep(&cp, "=");
549+
if (cp != NULL) {
550+
if (!(!dm_strtoi(cp, &cp, 10, &max_discard_sectors) &&
551+
*cp == ':' &&
552+
!dm_strtoi(cp + 1, &cp, 10, &max_discard_seg) &&
553+
*cp == ':' &&
554+
!dm_strtoi(cp + 1, &cp, 10, &discard_sector_alignment)))
555+
goto err;
556+
}
557+
candiscard = 1;
558+
} else if (!strncmp(cp, "sectorsize", strlen("sectorsize"))) {
462559
/*
463560
* sectorsize=<sector size>
464561
* or
@@ -517,7 +614,6 @@ blockif_open(const char *optstr, const char *ident)
517614
size = sbuf.st_size;
518615
sectsz = DEV_BSIZE;
519616
psectsz = psectoff = 0;
520-
candiscard = 0;
521617

522618
if (S_ISBLK(sbuf.st_mode)) {
523619
/* get size */
@@ -552,8 +648,22 @@ blockif_open(const char *optstr, const char *ident)
552648
DPRINTF(("block partition physical sector size is 0x%lx\n",
553649
psectsz));
554650

555-
} else
651+
if (candiscard) {
652+
err_code = ioctl(fd, BLKDISCARD, probe_arg);
653+
if (err_code) {
654+
WPRINTF(("not support DISCARD\n"));
655+
candiscard = 0;
656+
}
657+
}
658+
659+
} else {
660+
if (size < DEV_BSIZE || (size & (DEV_BSIZE - 1))) {
661+
WPRINTF(("%s size not corret, should be multiple of %d\n",
662+
nopt, DEV_BSIZE));
663+
return 0;
664+
}
556665
psectsz = sbuf.st_blksize;
666+
}
557667

558668
if (ssopt != 0) {
559669
if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 ||
@@ -613,6 +723,15 @@ blockif_open(const char *optstr, const char *ident)
613723
bc->fd = fd;
614724
bc->isblk = S_ISBLK(sbuf.st_mode);
615725
bc->candiscard = candiscard;
726+
if (candiscard) {
727+
bc->max_discard_sectors =
728+
(max_discard_sectors != -1) ?
729+
max_discard_sectors : (size / DEV_BSIZE);
730+
bc->max_discard_seg =
731+
(max_discard_seg != -1) ? max_discard_seg : 1;
732+
bc->discard_sector_alignment =
733+
(discard_sector_alignment != -1) ? discard_sector_alignment : 0;
734+
}
616735
bc->rdonly = ro;
617736
bc->size = size;
618737
bc->sectsz = sectsz;
@@ -906,6 +1025,27 @@ blockif_candiscard(struct blockif_ctxt *bc)
9061025
return bc->candiscard;
9071026
}
9081027

1028+
int
1029+
blockif_max_discard_sectors(struct blockif_ctxt *bc)
1030+
{
1031+
assert(bc->magic == BLOCKIF_SIG);
1032+
return bc->max_discard_sectors;
1033+
}
1034+
1035+
int
1036+
blockif_max_discard_seg(struct blockif_ctxt *bc)
1037+
{
1038+
assert(bc->magic == BLOCKIF_SIG);
1039+
return bc->max_discard_seg;
1040+
}
1041+
1042+
int
1043+
blockif_discard_sector_alignment(struct blockif_ctxt *bc)
1044+
{
1045+
assert(bc->magic == BLOCKIF_SIG);
1046+
return bc->discard_sector_alignment;
1047+
}
1048+
9091049
uint8_t
9101050
blockif_get_wce(struct blockif_ctxt *bc)
9111051
{

devicemodel/hw/pci/virtio/virtio_block.c

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@
5858
/* Device can toggle its cache between writeback and writethrough modes */
5959
#define VIRTIO_BLK_F_CONFIG_WCE (1 << 11)
6060

61+
#define VIRTIO_BLK_F_DISCARD (1 << 13)
62+
6163
/*
6264
* Basic device capabilities
6365
*/
@@ -94,6 +96,15 @@ struct virtio_blk_config {
9496
uint32_t opt_io_size;
9597
} topology;
9698
uint8_t writeback;
99+
uint8_t unused;
100+
/* Reserve for num_queues when VIRTIO_BLK_F_MQ is support*/
101+
uint16_t reserve;
102+
/* The maximum discard sectors (in 512-byte sectors) for one segment */
103+
uint32_t max_discard_sectors;
104+
/* The maximum number of discard segments */
105+
uint32_t max_discard_seg;
106+
/* Discard commands must be aligned to this number of sectors. */
107+
uint32_t discard_sector_alignment;
97108
} __attribute__((packed));
98109

99110
/*
@@ -105,6 +116,7 @@ struct virtio_blk_hdr {
105116
#define VBH_OP_FLUSH 4
106117
#define VBH_OP_FLUSH_OUT 5
107118
#define VBH_OP_IDENT 8
119+
#define VBH_OP_DISCARD 11
108120
#define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into type */
109121
uint32_t type;
110122
uint32_t ioprio;
@@ -234,12 +246,13 @@ virtio_blk_proc(struct virtio_blk *blk, struct virtio_vq_info *vq)
234246
* we don't advertise the capability.
235247
*/
236248
type = vbh->type & ~VBH_FLAG_BARRIER;
237-
writeop = (type == VBH_OP_WRITE);
249+
writeop = ((type == VBH_OP_WRITE) ||
250+
(type == VBH_OP_DISCARD));
238251

239252
iolen = 0;
240253
for (i = 1; i < n; i++) {
241254
/*
242-
* - write op implies read-only descriptor,
255+
* - write/discard op implies read-only descriptor,
243256
* - read/ident op implies write-only descriptor,
244257
* therefore test the inverse of the descriptor bit
245258
* to the op.
@@ -250,7 +263,7 @@ virtio_blk_proc(struct virtio_blk *blk, struct virtio_vq_info *vq)
250263
io->req.resid = iolen;
251264

252265
DPRINTF(("virtio_blk: %s op, %zd bytes, %d segs, offset %ld\n\r",
253-
writeop ? "write" : "read/ident", iolen, i - 1,
266+
writeop ? "write/discard" : "read/ident", iolen, i - 1,
254267
io->req.offset));
255268

256269
switch (type) {
@@ -279,6 +292,9 @@ virtio_blk_proc(struct virtio_blk *blk, struct virtio_vq_info *vq)
279292
err = ((type == VBH_OP_READ) ? blockif_read : blockif_write)
280293
(blk->bc, &io->req);
281294
break;
295+
case VBH_OP_DISCARD:
296+
err = blockif_discard(blk->bc, &io->req);
297+
break;
282298
case VBH_OP_FLUSH:
283299
case VBH_OP_FLUSH_OUT:
284300
err = blockif_flush(blk->bc, &io->req);
@@ -315,6 +331,10 @@ virtio_blk_get_caps(struct virtio_blk *blk, bool wb)
315331
caps = VIRTIO_BLK_S_HOSTCAPS;
316332
if (wb)
317333
caps |= VIRTIO_BLK_F_WB_BITS;
334+
335+
if (blockif_candiscard(blk->bc))
336+
caps |= VIRTIO_BLK_F_DISCARD;
337+
318338
return caps;
319339
}
320340

@@ -420,6 +440,11 @@ virtio_blk_init(struct vmctx *ctx, struct pci_vdev *dev, char *opts)
420440
blk->cfg.topology.opt_io_size = 0;
421441
blk->cfg.writeback = blockif_get_wce(blk->bc);
422442
blk->original_wce = blk->cfg.writeback; /* save for reset */
443+
if (blockif_candiscard(blk->bc)) {
444+
blk->cfg.max_discard_sectors = blockif_max_discard_sectors(blk->bc);
445+
blk->cfg.max_discard_seg = blockif_max_discard_seg(blk->bc);
446+
blk->cfg.discard_sector_alignment = blockif_discard_sector_alignment(blk->bc);
447+
}
423448
blk->base.device_caps =
424449
virtio_blk_get_caps(blk, !!blk->cfg.writeback);
425450

devicemodel/include/block_if.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,5 +69,8 @@ int blockif_close(struct blockif_ctxt *bc);
6969
uint8_t blockif_get_wce(struct blockif_ctxt *bc);
7070
void blockif_set_wce(struct blockif_ctxt *bc, uint8_t wce);
7171
int blockif_flush_all(struct blockif_ctxt *bc);
72+
int blockif_max_discard_sectors(struct blockif_ctxt *bc);
73+
int blockif_max_discard_seg(struct blockif_ctxt *bc);
74+
int blockif_discard_sector_alignment(struct blockif_ctxt *bc);
7275

7376
#endif /* _BLOCK_IF_H_ */

0 commit comments

Comments
 (0)