30
30
#include <sys/queue.h>
31
31
#include <sys/stat.h>
32
32
#include <sys/ioctl.h>
33
+ #include <linux/falloc.h>
33
34
#include <linux/fs.h>
34
35
#include <errno.h>
35
36
#include <assert.h>
63
64
64
65
#define BLOCKIF_NUMTHR 8
65
66
#define BLOCKIF_MAXREQ (64 + BLOCKIF_NUMTHR)
67
+ #define MAX_DISCARD_SEGMENT 256
66
68
67
69
/*
68
70
* Debug printf
@@ -108,6 +110,9 @@ struct blockif_ctxt {
108
110
int sectsz ;
109
111
int psectsz ;
110
112
int psectoff ;
113
+ int max_discard_sectors ;
114
+ int max_discard_seg ;
115
+ int discard_sector_alignment ;
111
116
int closing ;
112
117
pthread_t btid [BLOCKIF_NUMTHR ];
113
118
pthread_mutex_t mtx ;
@@ -132,6 +137,12 @@ struct blockif_sig_elem {
132
137
struct blockif_sig_elem * next ;
133
138
};
134
139
140
+ struct discard_range {
141
+ uint64_t sector ;
142
+ uint32_t num_sectors ;
143
+ uint32_t flags ;
144
+ };
145
+
135
146
static struct blockif_sig_elem * blockif_bse_head ;
136
147
137
148
static int
@@ -232,11 +243,92 @@ blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
232
243
TAILQ_INSERT_TAIL (& bc -> freeq , be , link );
233
244
}
234
245
246
+ static int
247
+ discard_range_validate (struct blockif_ctxt * bc , off_t start , off_t size )
248
+ {
249
+ off_t start_sector = start / DEV_BSIZE ;
250
+ off_t size_sector = size / DEV_BSIZE ;
251
+
252
+ if (!size || (start + size ) > (bc -> size + bc -> sub_file_start_lba ))
253
+ return -1 ;
254
+
255
+ if ((size_sector > bc -> max_discard_sectors ) ||
256
+ (bc -> discard_sector_alignment &&
257
+ start_sector % bc -> discard_sector_alignment ))
258
+ return -1 ;
259
+ return 0 ;
260
+ }
261
+
262
+ static int
263
+ blockif_process_discard (struct blockif_ctxt * bc , struct blockif_req * br )
264
+ {
265
+ int err ;
266
+ struct discard_range * range ;
267
+ int n_range , i , segment ;
268
+ off_t arg [MAX_DISCARD_SEGMENT ][2 ];
269
+
270
+ err = 0 ;
271
+ n_range = 0 ;
272
+ segment = 0 ;
273
+ if (!bc -> candiscard )
274
+ return EOPNOTSUPP ;
275
+
276
+ if (bc -> rdonly )
277
+ return EROFS ;
278
+
279
+ if (br -> iovcnt == 1 ) {
280
+ /* virtio-blk use iov to transfer discard range */
281
+ n_range = br -> iov [0 ].iov_len /sizeof (* range );
282
+ range = br -> iov [0 ].iov_base ;
283
+ for (i = 0 ; i < n_range ; i ++ ) {
284
+ arg [i ][0 ] = range [i ].sector * DEV_BSIZE +
285
+ bc -> sub_file_start_lba ;
286
+ arg [i ][1 ] = range [i ].num_sectors * DEV_BSIZE ;
287
+ segment ++ ;
288
+ if (segment > bc -> max_discard_seg ) {
289
+ WPRINTF (("segment > max_discard_seg\n" ));
290
+ return EINVAL ;
291
+ }
292
+ if (discard_range_validate (bc , arg [i ][0 ], arg [i ][1 ])) {
293
+ WPRINTF (("range [%ld: %ld] is invalid\n" , arg [i ][0 ], arg [i ][1 ]));
294
+ return EINVAL ;
295
+ }
296
+ }
297
+ } else {
298
+ /* ahci parse discard range to br->offset and br->reside */
299
+ arg [0 ][0 ] = br -> offset + bc -> sub_file_start_lba ;
300
+ arg [0 ][1 ] = br -> resid ;
301
+ segment = 1 ;
302
+ }
303
+ for (i = 0 ; i < segment ; i ++ ) {
304
+ if (bc -> isblk ) {
305
+ err = ioctl (bc -> fd , BLKDISCARD , arg [i ]);
306
+ } else {
307
+ /* FALLOC_FL_PUNCH_HOLE:
308
+ * Deallocates space in the byte range starting at offset and
309
+ * continuing for length bytes. After a successful call,
310
+ * subsequent reads from this range will return zeroes.
311
+ * FALLOC_FL_KEEP_SIZE:
312
+ * Do not modify the apparent length of the file.
313
+ */
314
+ err = fallocate (bc -> fd , FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE ,
315
+ arg [i ][0 ], arg [i ][1 ]);
316
+ }
317
+ if (err ) {
318
+ WPRINTF (("Failed to discard offset=%ld nbytes=%ld err code: %d\n" ,
319
+ arg [i ][0 ], arg [i ][1 ], err ));
320
+ return err ;
321
+ }
322
+ }
323
+ br -> resid = 0 ;
324
+
325
+ return 0 ;
326
+ }
327
+
235
328
static void
236
329
blockif_proc (struct blockif_ctxt * bc , struct blockif_elem * be )
237
330
{
238
331
struct blockif_req * br ;
239
- off_t arg [2 ];
240
332
ssize_t len ;
241
333
int err ;
242
334
@@ -271,21 +363,7 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
271
363
err = errno ;
272
364
break ;
273
365
case BOP_DISCARD :
274
- /* only used by AHCI */
275
- if (!bc -> candiscard )
276
- err = EOPNOTSUPP ;
277
- else if (bc -> rdonly )
278
- err = EROFS ;
279
- else if (bc -> isblk ) {
280
- arg [0 ] = br -> offset ;
281
- arg [1 ] = br -> resid ;
282
- if (ioctl (bc -> fd , BLKDISCARD , arg ))
283
- err = errno ;
284
- else
285
- br -> resid = 0 ;
286
- }
287
- else
288
- err = EOPNOTSUPP ;
366
+ err = blockif_process_discard (bc , br );
289
367
break ;
290
368
default :
291
369
err = EINVAL ;
@@ -425,6 +503,8 @@ blockif_open(const char *optstr, const char *ident)
425
503
int err_code = -1 ;
426
504
off_t sub_file_start_lba , sub_file_size ;
427
505
int sub_file_assign ;
506
+ int max_discard_sectors , max_discard_seg , discard_sector_alignment ;
507
+ off_t probe_arg [] = {0 , 0 };
428
508
429
509
pthread_once (& blockif_once , blockif_init );
430
510
@@ -436,9 +516,15 @@ blockif_open(const char *optstr, const char *ident)
436
516
sub_file_start_lba = 0 ;
437
517
sub_file_size = 0 ;
438
518
519
+ max_discard_sectors = -1 ;
520
+ max_discard_seg = -1 ;
521
+ discard_sector_alignment = -1 ;
522
+
439
523
/* writethru is on by default */
440
524
writeback = 0 ;
441
525
526
+ candiscard = 0 ;
527
+
442
528
/*
443
529
* The first element in the optstring is always a pathname.
444
530
* Optional elements follow
@@ -458,7 +544,18 @@ blockif_open(const char *optstr, const char *ident)
458
544
writeback = 0 ;
459
545
else if (!strcmp (cp , "ro" ))
460
546
ro = 1 ;
461
- else if (!strncmp (cp , "sectorsize" , strlen ("sectorsize" ))) {
547
+ else if (!strncmp (cp , "discard" , strlen ("discard" ))) {
548
+ strsep (& cp , "=" );
549
+ if (cp != NULL ) {
550
+ if (!(!dm_strtoi (cp , & cp , 10 , & max_discard_sectors ) &&
551
+ * cp == ':' &&
552
+ !dm_strtoi (cp + 1 , & cp , 10 , & max_discard_seg ) &&
553
+ * cp == ':' &&
554
+ !dm_strtoi (cp + 1 , & cp , 10 , & discard_sector_alignment )))
555
+ goto err ;
556
+ }
557
+ candiscard = 1 ;
558
+ } else if (!strncmp (cp , "sectorsize" , strlen ("sectorsize" ))) {
462
559
/*
463
560
* sectorsize=<sector size>
464
561
* or
@@ -517,7 +614,6 @@ blockif_open(const char *optstr, const char *ident)
517
614
size = sbuf .st_size ;
518
615
sectsz = DEV_BSIZE ;
519
616
psectsz = psectoff = 0 ;
520
- candiscard = 0 ;
521
617
522
618
if (S_ISBLK (sbuf .st_mode )) {
523
619
/* get size */
@@ -552,8 +648,22 @@ blockif_open(const char *optstr, const char *ident)
552
648
DPRINTF (("block partition physical sector size is 0x%lx\n" ,
553
649
psectsz ));
554
650
555
- } else
651
+ if (candiscard ) {
652
+ err_code = ioctl (fd , BLKDISCARD , probe_arg );
653
+ if (err_code ) {
654
+ WPRINTF (("not support DISCARD\n" ));
655
+ candiscard = 0 ;
656
+ }
657
+ }
658
+
659
+ } else {
660
+ if (size < DEV_BSIZE || (size & (DEV_BSIZE - 1 ))) {
661
+ WPRINTF (("%s size not corret, should be multiple of %d\n" ,
662
+ nopt , DEV_BSIZE ));
663
+ return 0 ;
664
+ }
556
665
psectsz = sbuf .st_blksize ;
666
+ }
557
667
558
668
if (ssopt != 0 ) {
559
669
if (!powerof2 (ssopt ) || !powerof2 (pssopt ) || ssopt < 512 ||
@@ -613,6 +723,15 @@ blockif_open(const char *optstr, const char *ident)
613
723
bc -> fd = fd ;
614
724
bc -> isblk = S_ISBLK (sbuf .st_mode );
615
725
bc -> candiscard = candiscard ;
726
+ if (candiscard ) {
727
+ bc -> max_discard_sectors =
728
+ (max_discard_sectors != -1 ) ?
729
+ max_discard_sectors : (size / DEV_BSIZE );
730
+ bc -> max_discard_seg =
731
+ (max_discard_seg != -1 ) ? max_discard_seg : 1 ;
732
+ bc -> discard_sector_alignment =
733
+ (discard_sector_alignment != -1 ) ? discard_sector_alignment : 0 ;
734
+ }
616
735
bc -> rdonly = ro ;
617
736
bc -> size = size ;
618
737
bc -> sectsz = sectsz ;
@@ -906,6 +1025,27 @@ blockif_candiscard(struct blockif_ctxt *bc)
906
1025
return bc -> candiscard ;
907
1026
}
908
1027
1028
+ int
1029
+ blockif_max_discard_sectors (struct blockif_ctxt * bc )
1030
+ {
1031
+ assert (bc -> magic == BLOCKIF_SIG );
1032
+ return bc -> max_discard_sectors ;
1033
+ }
1034
+
1035
+ int
1036
+ blockif_max_discard_seg (struct blockif_ctxt * bc )
1037
+ {
1038
+ assert (bc -> magic == BLOCKIF_SIG );
1039
+ return bc -> max_discard_seg ;
1040
+ }
1041
+
1042
+ int
1043
+ blockif_discard_sector_alignment (struct blockif_ctxt * bc )
1044
+ {
1045
+ assert (bc -> magic == BLOCKIF_SIG );
1046
+ return bc -> discard_sector_alignment ;
1047
+ }
1048
+
909
1049
uint8_t
910
1050
blockif_get_wce (struct blockif_ctxt * bc )
911
1051
{
0 commit comments