Skip to content
Browse files

add IO schedulers

  • Loading branch information...
1 parent dfffded commit 3376972a23857410f634ce90880e8e2da6294b44 @omegamoon committed Sep 17, 2012
Showing with 1,872 additions and 0 deletions.
  1. +37 −0 block/Kconfig.iosched
  2. +3 −0 block/Makefile
  3. +346 −0 block/sio-iosched.c
  4. +1,038 −0 block/test-iosched.c
  5. +448 −0 block/vr-iosched.c
View
37 block/Kconfig.iosched 100644 → 100755
@@ -12,6 +12,17 @@ config IOSCHED_NOOP
that do their own scheduling and require only minimal assistance from
the kernel.
+config IOSCHED_TEST
+ tristate "Test I/O scheduler"
+ depends on DEBUG_FS
+ default m
+ ---help---
+ The test I/O scheduler is a duplicate of the noop scheduler with
+ addition of test utlity.
+ It allows testing a block device by dispatching specific requests
+ according to the test case and declare PASS/FAIL according to the
+ requests completion error code.
+
config IOSCHED_DEADLINE
tristate "Deadline I/O scheduler"
default y
@@ -43,6 +54,23 @@ config CFQ_GROUP_IOSCHED
---help---
Enable group IO scheduling in CFQ.
+config IOSCHED_VR
+ tristate "V(R) I/O scheduler"
+ default n
+ ---help---
+ Requests are chosen according to SSTF with a penalty of rev_penalty
+ for switching head direction.
+
+config IOSCHED_SIO
+ tristate "Simple I/O scheduler"
+ default y
+ ---help---
+ The Simple I/O scheduler is an extremely simple scheduler,
+ based on noop and deadline, that relies on deadlines to
+ ensure fairness. The algorithm does not do any sorting but
+ basic merging, trying to keep a minimum overhead. It is aimed
+ mainly for aleatory access devices (eg: flash devices).
+
choice
prompt "Default I/O scheduler"
default DEFAULT_CFQ
@@ -59,13 +87,22 @@ choice
config DEFAULT_NOOP
bool "No-op"
+ config DEFAULT_VR
+ bool "V(R)" if IOSCHED_VR=y
+
+ config DEFAULT_SIO
+ bool "SIO" if IOSCHED_SIO=y
+
endchoice
config DEFAULT_IOSCHED
string
default "deadline" if DEFAULT_DEADLINE
default "cfq" if DEFAULT_CFQ
+ default "bfq" if DEFAULT_BFQ
default "noop" if DEFAULT_NOOP
+ default "vr" if DEFAULT_VR
+ default "sio" if DEFAULT_SIO
endmenu
View
3 block/Makefile 100644 → 100755
@@ -13,6 +13,9 @@ obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o
obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
+obj-$(CONFIG_IOSCHED_VR) += vr-iosched.o
+obj-$(CONFIG_IOSCHED_SIO) += sio-iosched.o
+obj-$(CONFIG_IOSCHED_TEST) += test-iosched.o
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
View
346 block/sio-iosched.c
@@ -0,0 +1,346 @@
+/*
+ * Simple IO scheduler
+ * Based on Noop, Deadline and V(R) IO schedulers.
+ *
+ * Copyright (C) 2010 Miguel Boton <mboton@gmail.com>
+ *
+ *
+ * This algorithm does not do any kind of sorting, as it is aimed for
+ * aleatory access devices, but it does some basic merging. We try to
+ * keep minimum overhead to achieve low latency.
+ *
+ * Asynchronous and synchronous requests are not treated separately, but
+ * we relay on deadlines to ensure fairness.
+ *
+ */
+#include <linux/blkdev.h>
+#include <linux/elevator.h>
+#include <linux/bio.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+
+enum {
+ ASYNC,
+ SYNC,
+};
+
+/* Tunables */
+static const int sync_expire = HZ / 2; /* max time before a sync is submitted. */
+static const int async_expire = 5 * HZ; /* ditto for async, these limits are SOFT! */
+static const int fifo_batch = 16; /* # of sequential requests treated as one
+ by the above parameters. For throughput. */
+
+/* Elevator data */
+struct sio_data {
+ /* Request queues */
+ struct list_head fifo_list[2];
+
+ /* Attributes */
+ unsigned int batched;
+
+ /* Settings */
+ int fifo_expire[2];
+ int fifo_batch;
+};
+
+static void
+sio_merged_requests(struct request_queue *q, struct request *rq,
+ struct request *next)
+{
+ /*
+ * If next expires before rq, assign its expire time to rq
+ * and move into next position (next will be deleted) in fifo.
+ */
+ if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) {
+ if (time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
+ list_move(&rq->queuelist, &next->queuelist);
+ rq_set_fifo_time(rq, rq_fifo_time(next));
+ }
+ }
+
+ /* Delete next request */
+ rq_fifo_clear(next);
+}
+
+static void
+sio_add_request(struct request_queue *q, struct request *rq)
+{
+ struct sio_data *sd = q->elevator->elevator_data;
+ const int sync = rq_is_sync(rq);
+
+ /*
+ * Add request to the proper fifo list and set its
+ * expire time.
+ */
+ rq_set_fifo_time(rq, jiffies + sd->fifo_expire[sync]);
+ list_add_tail(&rq->queuelist, &sd->fifo_list[sync]);
+}
+
+static int
+sio_queue_empty(struct request_queue *q)
+{
+ struct sio_data *sd = q->elevator->elevator_data;
+
+ /* Check if fifo lists are empty */
+ return list_empty(&sd->fifo_list[SYNC]) &&
+ list_empty(&sd->fifo_list[ASYNC]);
+}
+
+static struct request *
+sio_expired_request(struct sio_data *sd, int sync)
+{
+ struct request *rq;
+
+ if (list_empty(&sd->fifo_list[sync]))
+ return NULL;
+
+ /* Retrieve request */
+ rq = rq_entry_fifo(sd->fifo_list[sync].next);
+
+ /* Request has expired */
+ if (time_after(jiffies, rq_fifo_time(rq)))
+ return rq;
+
+ return NULL;
+}
+
+static struct request *
+sio_choose_expired_request(struct sio_data *sd)
+{
+ struct request *sync = sio_expired_request(sd, SYNC);
+ struct request *async = sio_expired_request(sd, ASYNC);
+
+ /*
+ * Check expired requests. Asynchronous requests have
+ * priority over synchronous.
+ */
+ if (sync && async)
+ return async;
+ if (sync)
+ return sync;
+
+ return async;
+
+}
+
+static struct request *
+sio_choose_request(struct sio_data *sd)
+{
+ /*
+ * Retrieve request from available fifo list.
+ * Synchronous requests have priority over asynchronous.
+ */
+ if (!list_empty(&sd->fifo_list[SYNC]))
+ return rq_entry_fifo(sd->fifo_list[SYNC].next);
+
+ if (!list_empty(&sd->fifo_list[ASYNC]))
+ return rq_entry_fifo(sd->fifo_list[ASYNC].next);
+
+ return NULL;
+}
+
+static inline void
+sio_dispatch_request(struct sio_data *sd, struct request *rq)
+{
+ /*
+ * Remove the request from the fifo list
+ * and dispatch it.
+ */
+ rq_fifo_clear(rq);
+ elv_dispatch_add_tail(rq->q, rq);
+
+ sd->batched++;
+}
+
+static int
+sio_dispatch_requests(struct request_queue *q, int force)
+{
+ struct sio_data *sd = q->elevator->elevator_data;
+ struct request *rq = NULL;
+
+ /*
+ * Retrieve any expired request after a batch of
+ * sequential requests.
+ */
+ if (sd->batched > sd->fifo_batch) {
+ sd->batched = 0;
+ rq = sio_choose_expired_request(sd);
+ }
+
+ /* Retrieve request */
+ if (!rq) {
+ rq = sio_choose_request(sd);
+ if (!rq)
+ return 0;
+ }
+
+ /* Dispatch request */
+ sio_dispatch_request(sd, rq);
+
+ return 1;
+}
+
+static struct request *
+sio_former_request(struct request_queue *q, struct request *rq)
+{
+ struct sio_data *sd = q->elevator->elevator_data;
+ const int sync = rq_is_sync(rq);
+
+ if (rq->queuelist.prev == &sd->fifo_list[sync])
+ return NULL;
+
+ /* Return former request */
+ return list_entry(rq->queuelist.prev, struct request, queuelist);
+}
+
+static struct request *
+sio_latter_request(struct request_queue *q, struct request *rq)
+{
+ struct sio_data *sd = q->elevator->elevator_data;
+ const int sync = rq_is_sync(rq);
+
+ if (rq->queuelist.next == &sd->fifo_list[sync])
+ return NULL;
+
+ /* Return latter request */
+ return list_entry(rq->queuelist.next, struct request, queuelist);
+}
+
+static void *
+sio_init_queue(struct request_queue *q)
+{
+ struct sio_data *sd;
+
+ /* Allocate structure */
+ sd = kmalloc_node(sizeof(*sd), GFP_KERNEL, q->node);
+ if (!sd)
+ return NULL;
+
+ /* Initialize fifo lists */
+ INIT_LIST_HEAD(&sd->fifo_list[SYNC]);
+ INIT_LIST_HEAD(&sd->fifo_list[ASYNC]);
+
+ /* Initialize data */
+ sd->batched = 0;
+ sd->fifo_expire[SYNC] = sync_expire;
+ sd->fifo_expire[ASYNC] = async_expire;
+ sd->fifo_batch = fifo_batch;
+
+ return sd;
+}
+
+static void
+sio_exit_queue(struct elevator_queue *e)
+{
+ struct sio_data *sd = e->elevator_data;
+
+ BUG_ON(!list_empty(&sd->fifo_list[SYNC]));
+ BUG_ON(!list_empty(&sd->fifo_list[ASYNC]));
+
+ /* Free structure */
+ kfree(sd);
+}
+
+/*
+ * sysfs code
+ */
+
+static ssize_t
+sio_var_show(int var, char *page)
+{
+ return sprintf(page, "%d\n", var);
+}
+
+static ssize_t
+sio_var_store(int *var, const char *page, size_t count)
+{
+ char *p = (char *) page;
+
+ *var = simple_strtol(p, &p, 10);
+ return count;
+}
+
+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
+static ssize_t __FUNC(struct elevator_queue *e, char *page) \
+{ \
+ struct sio_data *sd = e->elevator_data; \
+ int __data = __VAR; \
+ if (__CONV) \
+ __data = jiffies_to_msecs(__data); \
+ return sio_var_show(__data, (page)); \
+}
+SHOW_FUNCTION(sio_sync_expire_show, sd->fifo_expire[SYNC], 1);
+SHOW_FUNCTION(sio_async_expire_show, sd->fifo_expire[ASYNC], 1);
+SHOW_FUNCTION(sio_fifo_batch_show, sd->fifo_batch, 0);
+#undef SHOW_FUNCTION
+
+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
+{ \
+ struct sio_data *sd = e->elevator_data; \
+ int __data; \
+ int ret = sio_var_store(&__data, (page), count); \
+ if (__data < (MIN)) \
+ __data = (MIN); \
+ else if (__data > (MAX)) \
+ __data = (MAX); \
+ if (__CONV) \
+ *(__PTR) = msecs_to_jiffies(__data); \
+ else \
+ *(__PTR) = __data; \
+ return ret; \
+}
+STORE_FUNCTION(sio_sync_expire_store, &sd->fifo_expire[SYNC], 0, INT_MAX, 1);
+STORE_FUNCTION(sio_async_expire_store, &sd->fifo_expire[ASYNC], 0, INT_MAX, 1);
+STORE_FUNCTION(sio_fifo_batch_store, &sd->fifo_batch, 0, INT_MAX, 0);
+#undef STORE_FUNCTION
+
+#define DD_ATTR(name) \
+ __ATTR(name, S_IRUGO|S_IWUSR, sio_##name##_show, \
+ sio_##name##_store)
+
+static struct elv_fs_entry sio_attrs[] = {
+ DD_ATTR(sync_expire),
+ DD_ATTR(async_expire),
+ DD_ATTR(fifo_batch),
+ __ATTR_NULL
+};
+
+static struct elevator_type iosched_sio = {
+ .ops = {
+ .elevator_merge_req_fn = sio_merged_requests,
+ .elevator_dispatch_fn = sio_dispatch_requests,
+ .elevator_add_req_fn = sio_add_request,
+// .elevator_queue_empty_fn = sio_queue_empty,
+ .elevator_former_req_fn = sio_former_request,
+ .elevator_latter_req_fn = sio_latter_request,
+ .elevator_init_fn = sio_init_queue,
+ .elevator_exit_fn = sio_exit_queue,
+ },
+
+ .elevator_attrs = sio_attrs,
+ .elevator_name = "sio",
+ .elevator_owner = THIS_MODULE,
+};
+
+static int __init sio_init(void)
+{
+ /* Register elevator */
+ elv_register(&iosched_sio);
+
+ return 0;
+}
+
+static void __exit sio_exit(void)
+{
+ /* Unregister elevator */
+ elv_unregister(&iosched_sio);
+}
+
+module_init(sio_init);
+module_exit(sio_exit);
+
+MODULE_AUTHOR("Miguel Boton");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Simple IO scheduler");
View
1,038 block/test-iosched.c
@@ -0,0 +1,1038 @@
+/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * The test scheduler allows to test the block device by dispatching
+ * specific requests according to the test case and declare PASS/FAIL
+ * according to the requests completion error code.
+ * Each test is exposed via debugfs and can be triggered by writing to
+ * the debugfs file.
+ *
+ */
+
+/* elevator test iosched */
+#include <linux/blkdev.h>
+#include <linux/elevator.h>
+#include <linux/bio.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/test-iosched.h>
+#include <linux/delay.h>
+#include "blk.h"
+
+#define MODULE_NAME "test-iosched"
+#define WR_RD_START_REQ_ID 1234
+#define UNIQUE_START_REQ_ID 5678
+#define TIMEOUT_TIMER_MS 40000
+#define TEST_MAX_TESTCASE_ROUNDS 15
+
+#define test_pr_debug(fmt, args...) pr_debug("%s: "fmt"\n", MODULE_NAME, args)
+#define test_pr_info(fmt, args...) pr_info("%s: "fmt"\n", MODULE_NAME, args)
+#define test_pr_err(fmt, args...) pr_err("%s: "fmt"\n", MODULE_NAME, args)
+
+static DEFINE_SPINLOCK(blk_dev_test_list_lock);
+static LIST_HEAD(blk_dev_test_list);
+static struct test_data *ptd;
+
+/* Get the request after `test_rq' in the test requests list */
+static struct test_request *
+latter_test_request(struct request_queue *q,
+ struct test_request *test_rq)
+{
+ struct test_data *td = q->elevator->elevator_data;
+
+ if (test_rq->queuelist.next == &td->test_queue)
+ return NULL;
+ return list_entry(test_rq->queuelist.next, struct test_request,
+ queuelist);
+}
+
+/**
+ * test_iosched_get_req_queue() - returns the request queue
+ * served by the scheduler
+ */
+struct request_queue *test_iosched_get_req_queue(void)
+{
+ if (!ptd)
+ return NULL;
+
+ return ptd->req_q;
+}
+EXPORT_SYMBOL(test_iosched_get_req_queue);
+
+/**
+ * test_iosched_mark_test_completion() - Wakeup the debugfs
+ * thread, waiting on the test completion
+ */
+void test_iosched_mark_test_completion(void)
+{
+ if (!ptd)
+ return;
+
+ ptd->test_state = TEST_COMPLETED;
+ wake_up(&ptd->wait_q);
+}
+EXPORT_SYMBOL(test_iosched_mark_test_completion);
+
+/* Check if all the queued test requests were completed */
+static void check_test_completion(void)
+{
+ struct test_request *test_rq;
+ struct request *rq;
+
+ list_for_each_entry(test_rq, &ptd->test_queue, queuelist) {
+ rq = test_rq->rq;
+ if (!test_rq->req_completed)
+ return;
+ }
+
+ test_pr_info("%s: Test is completed", __func__);
+
+ test_iosched_mark_test_completion();
+}
+
+/*
+ * A callback to be called per bio completion.
+ * Frees the bio memory.
+ */
+static void end_test_bio(struct bio *bio, int err)
+{
+ if (err)
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+
+ bio_put(bio);
+}
+
+/*
+ * A callback to be called per request completion.
+ * the request memory is not freed here, will be freed later after the test
+ * results checking.
+ */
+static void end_test_req(struct request *rq, int err)
+{
+ struct test_request *test_rq;
+
+ test_rq = (struct test_request *)rq->elevator_private[0];
+ BUG_ON(!test_rq);
+
+ test_pr_info("%s: request %d completed, err=%d",
+ __func__, test_rq->req_id, err);
+
+ test_rq->req_completed = true;
+ test_rq->req_result = err;
+
+ check_test_completion();
+}
+
+/**
+ * test_iosched_add_unique_test_req() - Create and queue a non
+ * read/write request (such as FLUSH/DISCRAD/SANITIZE).
+ * @is_err_expcted: A flag to indicate if this request
+ * should succeed or not
+ * @req_unique: The type of request to add
+ * @start_sec: start address of the first bio
+ * @nr_sects: number of sectors in the request
+ * @end_req_io: specific completion callback. When not
+ * set, the defaulcallback will be used
+ */
+int test_iosched_add_unique_test_req(int is_err_expcted,
+ enum req_unique_type req_unique,
+ int start_sec, int nr_sects, rq_end_io_fn *end_req_io)
+{
+ struct bio *bio;
+ struct request *rq;
+ int rw_flags;
+ struct test_request *test_rq;
+
+ if (!ptd)
+ return -ENODEV;
+
+ bio = bio_alloc(GFP_KERNEL, 0);
+ if (!bio) {
+ test_pr_err("%s: Failed to allocate a bio", __func__);
+ return -ENODEV;
+ }
+ bio_get(bio);
+ bio->bi_end_io = end_test_bio;
+
+ switch (req_unique) {
+ case REQ_UNIQUE_FLUSH:
+ bio->bi_rw = WRITE_FLUSH;
+ break;
+ case REQ_UNIQUE_DISCARD:
+ bio->bi_rw = REQ_WRITE | REQ_DISCARD;
+ bio->bi_size = nr_sects << 9;
+ bio->bi_sector = start_sec;
+ break;
+ case REQ_UNIQUE_SANITIZE:
+ bio->bi_rw = REQ_WRITE | REQ_SANITIZE;
+ break;
+ default:
+ test_pr_err("%s: Invalid request type %d", __func__,
+ req_unique);
+ bio_put(bio);
+ return -ENODEV;
+ }
+
+ rw_flags = bio_data_dir(bio);
+ if (bio->bi_rw & REQ_SYNC)
+ rw_flags |= REQ_SYNC;
+
+ rq = blk_get_request(ptd->req_q, rw_flags, GFP_KERNEL);
+ if (!rq) {
+ test_pr_err("%s: Failed to allocate a request", __func__);
+ bio_put(bio);
+ return -ENODEV;
+ }
+
+ init_request_from_bio(rq, bio);
+ if (end_req_io)
+ rq->end_io = end_req_io;
+ else
+ rq->end_io = end_test_req;
+
+ test_rq = kzalloc(sizeof(struct test_request), GFP_KERNEL);
+ if (!test_rq) {
+ test_pr_err("%s: Failed to allocate a test request", __func__);
+ bio_put(bio);
+ blk_put_request(rq);
+ return -ENODEV;
+ }
+ test_rq->req_completed = false;
+ test_rq->req_result = -EINVAL;
+ test_rq->rq = rq;
+ test_rq->is_err_expected = is_err_expcted;
+ rq->elevator_private[0] = (void *)test_rq;
+ test_rq->req_id = ptd->unique_next_req_id++;
+
+ test_pr_debug(
+ "%s: added request %d to the test requests list, type = %d",
+ __func__, test_rq->req_id, req_unique);
+
+ list_add_tail(&test_rq->queuelist, &ptd->test_queue);
+
+ return 0;
+}
+EXPORT_SYMBOL(test_iosched_add_unique_test_req);
+
+/*
+ * Get a pattern to be filled in the request data buffer.
+ * If the pattern used is (-1) the buffer will be filled with sequential
+ * numbers
+ */
+static void fill_buf_with_pattern(int *buf, int num_bytes, int pattern)
+{
+ int i = 0;
+ int num_of_dwords = num_bytes/sizeof(int);
+
+ if (pattern == TEST_NO_PATTERN)
+ return;
+
+ /* num_bytes should be aligned to sizeof(int) */
+ BUG_ON((num_bytes % sizeof(int)) != 0);
+
+ if (pattern == TEST_PATTERN_SEQUENTIAL) {
+ for (i = 0; i < num_of_dwords; i++)
+ buf[i] = i;
+ } else {
+ for (i = 0; i < num_of_dwords; i++)
+ buf[i] = pattern;
+ }
+}
+
+/**
+ * test_iosched_add_wr_rd_test_req() - Create and queue a
+ * read/write request.
+ * @is_err_expcted: A flag to indicate if this request
+ * should succeed or not
+ * @direction: READ/WRITE
+ * @start_sec: start address of the first bio
+ * @num_bios: number of BIOs to be allocated for the
+ * request
+ * @pattern: A pattern, to be written into the write
+ * requests data buffer. In case of READ
+ * request, the given pattern is kept as
+ * the expected pattern. The expected
+ * pattern will be compared in the test
+ * check result function. If no comparisson
+ * is required, set pattern to
+ * TEST_NO_PATTERN.
+ * @end_req_io: specific completion callback. When not
+ * set,the default callback will be used
+ *
+ * This function allocates the test request and the block
+ * request and calls blk_rq_map_kern which allocates the
+ * required BIO. The allocated test request and the block
+ * request memory is freed at the end of the test and the
+ * allocated BIO memory is freed by end_test_bio.
+ */
+int test_iosched_add_wr_rd_test_req(int is_err_expcted,
+ int direction, int start_sec,
+ int num_bios, int pattern, rq_end_io_fn *end_req_io)
+{
+ struct request *rq = NULL;
+ struct test_request *test_rq = NULL;
+ int rw_flags = 0;
+ int buf_size = 0;
+ int ret = 0, i = 0;
+ unsigned int *bio_ptr = NULL;
+ struct bio *bio = NULL;
+
+ if (!ptd)
+ return -ENODEV;
+
+ rw_flags = direction;
+
+ rq = blk_get_request(ptd->req_q, rw_flags, GFP_KERNEL);
+ if (!rq) {
+ test_pr_err("%s: Failed to allocate a request", __func__);
+ return -ENODEV;
+ }
+
+ test_rq = kzalloc(sizeof(struct test_request), GFP_KERNEL);
+ if (!test_rq) {
+ test_pr_err("%s: Failed to allocate test request", __func__);
+ blk_put_request(rq);
+ return -ENODEV;
+ }
+
+ buf_size = sizeof(unsigned int) * BIO_U32_SIZE * num_bios;
+ test_rq->bios_buffer = kzalloc(buf_size, GFP_KERNEL);
+ if (!test_rq->bios_buffer) {
+ test_pr_err("%s: Failed to allocate the data buf", __func__);
+ goto err;
+ }
+ test_rq->buf_size = buf_size;
+
+ if (direction == WRITE)
+ fill_buf_with_pattern(test_rq->bios_buffer,
+ buf_size, pattern);
+ test_rq->wr_rd_data_pattern = pattern;
+
+ bio_ptr = test_rq->bios_buffer;
+ for (i = 0; i < num_bios; ++i) {
+ ret = blk_rq_map_kern(ptd->req_q, rq,
+ (void *)bio_ptr,
+ sizeof(unsigned int)*BIO_U32_SIZE,
+ GFP_KERNEL);
+ if (ret) {
+ test_pr_err("%s: blk_rq_map_kern returned error %d",
+ __func__, ret);
+ goto err;
+ }
+ bio_ptr += BIO_U32_SIZE;
+ }
+
+ if (end_req_io)
+ rq->end_io = end_req_io;
+ else
+ rq->end_io = end_test_req;
+ rq->__sector = start_sec;
+ rq->cmd_type |= REQ_TYPE_FS;
+
+ if (rq->bio) {
+ rq->bio->bi_sector = start_sec;
+ rq->bio->bi_end_io = end_test_bio;
+ bio = rq->bio;
+ while ((bio = bio->bi_next) != NULL)
+ bio->bi_end_io = end_test_bio;
+ }
+
+ ptd->num_of_write_bios += num_bios;
+ test_rq->req_id = ptd->wr_rd_next_req_id++;
+
+ test_rq->req_completed = false;
+ test_rq->req_result = -EINVAL;
+ test_rq->rq = rq;
+ test_rq->is_err_expected = is_err_expcted;
+ rq->elevator_private[0] = (void *)test_rq;
+
+ test_pr_debug(
+ "%s: added request %d to the test requests list, buf_size=%d",
+ __func__, test_rq->req_id, buf_size);
+
+ list_add_tail(&test_rq->queuelist, &ptd->test_queue);
+
+ return 0;
+err:
+ blk_put_request(rq);
+ kfree(test_rq->bios_buffer);
+ return -ENODEV;
+}
+EXPORT_SYMBOL(test_iosched_add_wr_rd_test_req);
+
+/* Converts the testcase number into a string */
+static char *get_test_case_str(struct test_data *td)
+{
+ if (td->test_info.get_test_case_str_fn)
+ return td->test_info.get_test_case_str_fn(td);
+
+ return "Unknown testcase";
+}
+
+/*
+ * Verify that the test request data buffer includes the expected
+ * pattern
+ */
+static int compare_buffer_to_pattern(struct test_request *test_rq)
+{
+ int i = 0;
+ int num_of_dwords = test_rq->buf_size/sizeof(int);
+
+ /* num_bytes should be aligned to sizeof(int) */
+ BUG_ON((test_rq->buf_size % sizeof(int)) != 0);
+ BUG_ON(test_rq->bios_buffer == NULL);
+
+ if (test_rq->wr_rd_data_pattern == TEST_NO_PATTERN)
+ return 0;
+
+ if (test_rq->wr_rd_data_pattern == TEST_PATTERN_SEQUENTIAL) {
+ for (i = 0; i < num_of_dwords; i++) {
+ if (test_rq->bios_buffer[i] != i) {
+ test_pr_err(
+ "%s: wrong pattern 0x%x in index %d",
+ __func__, test_rq->bios_buffer[i], i);
+ return -EINVAL;
+ }
+ }
+ } else {
+ for (i = 0; i < num_of_dwords; i++) {
+ if (test_rq->bios_buffer[i] !=
+ test_rq->wr_rd_data_pattern) {
+ test_pr_err(
+ "%s: wrong pattern 0x%x in index %d",
+ __func__, test_rq->bios_buffer[i], i);
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Determine if the test passed or failed.
+ * The function checks the test request completion value and calls
+ * check_testcase_result for result checking that are specific
+ * to a test case.
+ */
+static int check_test_result(struct test_data *td)
+{
+ struct test_request *test_rq;
+ struct request *rq;
+ int res = 0;
+ static int run;
+
+ list_for_each_entry(test_rq, &ptd->test_queue, queuelist) {
+ rq = test_rq->rq;
+ if (!test_rq->req_completed) {
+ test_pr_err("%s: rq %d not completed", __func__,
+ test_rq->req_id);
+ res = -EINVAL;
+ goto err;
+ }
+
+ if ((test_rq->req_result < 0) && !test_rq->is_err_expected) {
+ test_pr_err(
+ "%s: rq %d completed with err, not as expected",
+ __func__, test_rq->req_id);
+ res = -EINVAL;
+ goto err;
+ }
+ if ((test_rq->req_result == 0) && test_rq->is_err_expected) {
+ test_pr_err("%s: rq %d succeeded, not as expected",
+ __func__, test_rq->req_id);
+ res = -EINVAL;
+ goto err;
+ }
+ if (rq_data_dir(test_rq->rq) == READ) {
+ res = compare_buffer_to_pattern(test_rq);
+ if (res) {
+ test_pr_err("%s: read pattern not as expected",
+ __func__);
+ res = -EINVAL;
+ goto err;
+ }
+ }
+ }
+
+ if (td->test_info.check_test_result_fn) {
+ res = td->test_info.check_test_result_fn(td);
+ if (res)
+ goto err;
+ }
+
+ test_pr_info("%s: %s, run# %03d, PASSED",
+ __func__, get_test_case_str(td), ++run);
+ td->test_result = TEST_PASSED;
+
+ return 0;
+err:
+ test_pr_err("%s: %s, run# %03d, FAILED",
+ __func__, get_test_case_str(td), ++run);
+ td->test_result = TEST_FAILED;
+ return res;
+}
+
+/* Create and queue the required requests according to the test case */
+static int prepare_test(struct test_data *td)
+{
+ int ret = 0;
+
+ if (td->test_info.prepare_test_fn) {
+ ret = td->test_info.prepare_test_fn(td);
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Run the test */
+static int run_test(struct test_data *td)
+{
+ int ret = 0;
+
+ if (td->test_info.run_test_fn) {
+ ret = td->test_info.run_test_fn(td);
+ return ret;
+ }
+
+ /*
+ * Set the next_req pointer to the first request in the test requests
+ * list
+ */
+ if (!list_empty(&td->test_queue))
+ td->next_req = list_entry(td->test_queue.next,
+ struct test_request, queuelist);
+ __blk_run_queue(td->req_q);
+
+ return 0;
+}
+
+/* Free the allocated test requests, their requests and BIOs buffer */
+static void free_test_requests(struct test_data *td)
+{
+ struct test_request *test_rq;
+ struct bio *bio;
+
+ while (!list_empty(&td->test_queue)) {
+ test_rq = list_entry(td->test_queue.next, struct test_request,
+ queuelist);
+ list_del_init(&test_rq->queuelist);
+ /*
+ * If the request was not completed we need to free its BIOs
+ * and remove it from the packed list
+ */
+ if (!test_rq->req_completed) {
+ test_pr_info(
+ "%s: Freeing memory of an uncompleted request",
+ __func__);
+ list_del_init(&test_rq->rq->queuelist);
+ while ((bio = test_rq->rq->bio) != NULL) {
+ test_rq->rq->bio = bio->bi_next;
+ bio_put(bio);
+ }
+ }
+ blk_put_request(test_rq->rq);
+ kfree(test_rq->bios_buffer);
+ kfree(test_rq);
+ }
+}
+
+/*
+ * Do post test operations.
+ * Free the allocated test requests, their requests and BIOs buffer.
+ */
+static int post_test(struct test_data *td)
+{
+ int ret = 0;
+
+ if (td->test_info.post_test_fn)
+ ret = td->test_info.post_test_fn(td);
+
+ ptd->test_info.testcase = 0;
+ ptd->test_state = TEST_IDLE;
+
+ free_test_requests(td);
+
+ return ret;
+}
+
+/*
+ * The timer verifies that the test will be completed even if we don't get
+ * the completion callback for all the requests.
+ */
+static void test_timeout_handler(unsigned long data)
+{
+ struct test_data *td = (struct test_data *)data;
+
+ test_pr_info("%s: TIMEOUT timer expired", __func__);
+ td->test_state = TEST_COMPLETED;
+ wake_up(&td->wait_q);
+ return;
+}
+
+static unsigned int get_timeout_msec(struct test_data *td)
+{
+ if (td->test_info.timeout_msec)
+ return td->test_info.timeout_msec;
+ else
+ return TIMEOUT_TIMER_MS;
+}
+
+/**
+ * test_iosched_start_test() - Prepares and runs the test.
+ * @t_info: the current test testcase and callbacks
+ * functions
+ *
+ * The function also checks the test result upon test completion
+ */
+int test_iosched_start_test(struct test_info *t_info)
+{
+ int ret = 0;
+ unsigned timeout_msec;
+ int counter = 0;
+ char *test_name = NULL;
+
+ if (!ptd)
+ return -ENODEV;
+
+ if (!t_info) {
+ ptd->test_result = TEST_FAILED;
+ return -EINVAL;
+ }
+
+ do {
+ if (ptd->ignore_round)
+ /*
+ * We ignored the last run due to FS write requests.
+ * Sleep to allow those requests to be issued
+ */
+ msleep(2000);
+
+ spin_lock(&ptd->lock);
+
+ if (ptd->test_state != TEST_IDLE) {
+ test_pr_info(
+ "%s: Another test is running, try again later",
+ __func__);
+ spin_unlock(&ptd->lock);
+ return -EBUSY;
+ }
+
+ if (ptd->start_sector == 0) {
+ test_pr_err("%s: Invalid start sector", __func__);
+ ptd->test_result = TEST_FAILED;
+ spin_unlock(&ptd->lock);
+ return -EINVAL;
+ }
+
+ memcpy(&ptd->test_info, t_info, sizeof(struct test_info));
+
+ ptd->next_req = NULL;
+ ptd->test_result = TEST_NO_RESULT;
+ ptd->num_of_write_bios = 0;
+
+ ptd->unique_next_req_id = UNIQUE_START_REQ_ID;
+ ptd->wr_rd_next_req_id = WR_RD_START_REQ_ID;
+
+ ptd->ignore_round = false;
+ ptd->fs_wr_reqs_during_test = false;
+
+ ptd->test_state = TEST_RUNNING;
+
+ spin_unlock(&ptd->lock);
+
+ timeout_msec = get_timeout_msec(ptd);
+ mod_timer(&ptd->timeout_timer, jiffies +
+ msecs_to_jiffies(timeout_msec));
+
+ if (ptd->test_info.get_test_case_str_fn)
+ test_name = ptd->test_info.get_test_case_str_fn(ptd);
+ else
+ test_name = "Unknown testcase";
+ test_pr_info("%s: Starting test %s\n", __func__, test_name);
+
+ ret = prepare_test(ptd);
+ if (ret) {
+ test_pr_err("%s: failed to prepare the test\n",
+ __func__);
+ goto error;
+ }
+
+ ret = run_test(ptd);
+ if (ret) {
+ test_pr_err("%s: failed to run the test\n", __func__);
+ goto error;
+ }
+
+ test_pr_info("%s: Waiting for the test completion", __func__);
+
+ wait_event(ptd->wait_q, ptd->test_state == TEST_COMPLETED);
+ del_timer_sync(&ptd->timeout_timer);
+
+ ret = check_test_result(ptd);
+ if (ret) {
+ test_pr_err("%s: check_test_result failed\n",
+ __func__);
+ goto error;
+ }
+
+ ret = post_test(ptd);
+ if (ret) {
+ test_pr_err("%s: post_test failed\n", __func__);
+ goto error;
+ }
+
+ /*
+ * Wakeup the queue thread to fetch FS requests that might got
+ * postponded due to the test
+ */
+ __blk_run_queue(ptd->req_q);
+
+ if (ptd->ignore_round)
+ test_pr_info(
+ "%s: Round canceled (Got wr reqs in the middle)",
+ __func__);
+
+ if (++counter == TEST_MAX_TESTCASE_ROUNDS) {
+ test_pr_info("%s: Too many rounds, did not succeed...",
+ __func__);
+ ptd->test_result = TEST_FAILED;
+ }
+
+ } while ((ptd->ignore_round) && (counter < TEST_MAX_TESTCASE_ROUNDS));
+
+ if (ptd->test_result == TEST_PASSED)
+ return 0;
+ else
+ return -EINVAL;
+
+error:
+ post_test(ptd);
+ ptd->test_result = TEST_FAILED;
+ return ret;
+}
+EXPORT_SYMBOL(test_iosched_start_test);
+
+/**
+ * test_iosched_register() - register a block device test
+ * utility.
+ * @bdt: the block device test type to register
+ */
+void test_iosched_register(struct blk_dev_test_type *bdt)
+{
+ spin_lock(&blk_dev_test_list_lock);
+ list_add_tail(&bdt->list, &blk_dev_test_list);
+ spin_unlock(&blk_dev_test_list_lock);
+}
+EXPORT_SYMBOL_GPL(test_iosched_register);
+
+/**
+ * test_iosched_unregister() - unregister a block device test
+ * utility.
+ * @bdt: the block device test type to unregister
+ */
+void test_iosched_unregister(struct blk_dev_test_type *bdt)
+{
+ spin_lock(&blk_dev_test_list_lock);
+ list_del_init(&bdt->list);
+ spin_unlock(&blk_dev_test_list_lock);
+}
+EXPORT_SYMBOL_GPL(test_iosched_unregister);
+
+/**
+ * test_iosched_set_test_result() - Set the test
+ * result(PASS/FAIL)
+ * @test_result: the test result
+ */
+void test_iosched_set_test_result(int test_result)
+{
+ if (!ptd)
+ return;
+
+ ptd->test_result = test_result;
+}
+EXPORT_SYMBOL(test_iosched_set_test_result);
+
+
+/**
+ * test_iosched_set_ignore_round() - Set the ignore_round flag
+ * @ignore_round: A flag to indicate if this test round
+ * should be ignored and re-run
+ */
+void test_iosched_set_ignore_round(bool ignore_round)
+{
+ if (!ptd)
+ return;
+
+ ptd->ignore_round = ignore_round;
+}
+EXPORT_SYMBOL(test_iosched_set_ignore_round);
+
+/**
+ * test_iosched_get_debugfs_tests_root() - returns the root
+ * debugfs directory for the test_iosched tests
+ */
+struct dentry *test_iosched_get_debugfs_tests_root(void)
+{
+ if (!ptd)
+ return NULL;
+
+ return ptd->debug.debug_tests_root;
+}
+EXPORT_SYMBOL(test_iosched_get_debugfs_tests_root);
+
+/**
+ * test_iosched_get_debugfs_utils_root() - returns the root
+ * debugfs directory for the test_iosched utils
+ */
+struct dentry *test_iosched_get_debugfs_utils_root(void)
+{
+ if (!ptd)
+ return NULL;
+
+ return ptd->debug.debug_utils_root;
+}
+EXPORT_SYMBOL(test_iosched_get_debugfs_utils_root);
+
+static int test_debugfs_init(struct test_data *td)
+{
+ td->debug.debug_root = debugfs_create_dir("test-iosched", NULL);
+ if (!td->debug.debug_root)
+ return -ENOENT;
+
+ td->debug.debug_tests_root = debugfs_create_dir("tests",
+ td->debug.debug_root);
+ if (!td->debug.debug_tests_root)
+ goto err;
+
+ td->debug.debug_utils_root = debugfs_create_dir("utils",
+ td->debug.debug_root);
+ if (!td->debug.debug_utils_root)
+ goto err;
+
+ td->debug.debug_test_result = debugfs_create_u32(
+ "test_result",
+ S_IRUGO | S_IWUGO,
+ td->debug.debug_utils_root,
+ &td->test_result);
+ if (!td->debug.debug_test_result)
+ goto err;
+
+ td->debug.start_sector = debugfs_create_u32(
+ "start_sector",
+ S_IRUGO | S_IWUGO,
+ td->debug.debug_utils_root,
+ &td->start_sector);
+ if (!td->debug.start_sector)
+ goto err;
+
+ return 0;
+
+err:
+ debugfs_remove_recursive(td->debug.debug_root);
+ return -ENOENT;
+}
+
+static void test_debugfs_cleanup(struct test_data *td)
+{
+ debugfs_remove_recursive(td->debug.debug_root);
+}
+
+static void print_req(struct request *req)
+{
+ struct bio *bio;
+ struct test_request *test_rq;
+
+ if (!req)
+ return;
+
+ test_rq = (struct test_request *)req->elevator_private[0];
+
+ if (test_rq) {
+ test_pr_debug("%s: Dispatch request %d: __sector=0x%lx",
+ __func__, test_rq->req_id, (unsigned long)req->__sector);
+ test_pr_debug("%s: nr_phys_segments=%d, num_of_sectors=%d",
+ __func__, req->nr_phys_segments, blk_rq_sectors(req));
+ bio = req->bio;
+ test_pr_debug("%s: bio: bi_size=%d, bi_sector=0x%lx",
+ __func__, bio->bi_size,
+ (unsigned long)bio->bi_sector);
+ while ((bio = bio->bi_next) != NULL) {
+ test_pr_debug("%s: bio: bi_size=%d, bi_sector=0x%lx",
+ __func__, bio->bi_size,
+ (unsigned long)bio->bi_sector);
+ }
+ }
+}
+
+static void test_merged_requests(struct request_queue *q,
+ struct request *rq, struct request *next)
+{
+ list_del_init(&next->queuelist);
+}
+
+/*
+ * Dispatch a test request in case there is a running test Otherwise, dispatch
+ * a request that was queued by the FS to keep the card functional.
+ */
+static int test_dispatch_requests(struct request_queue *q, int force)
+{
+ struct test_data *td = q->elevator->elevator_data;
+ struct request *rq = NULL;
+
+ switch (td->test_state) {
+ case TEST_IDLE:
+ if (!list_empty(&td->queue)) {
+ rq = list_entry(td->queue.next, struct request,
+ queuelist);
+ list_del_init(&rq->queuelist);
+ elv_dispatch_sort(q, rq);
+ return 1;
+ }
+ break;
+ case TEST_RUNNING:
+ if (td->next_req) {
+ rq = td->next_req->rq;
+ td->next_req =
+ latter_test_request(td->req_q, td->next_req);
+ if (!rq)
+ return 0;
+ print_req(rq);
+ elv_dispatch_sort(q, rq);
+ return 1;
+ }
+ break;
+ case TEST_COMPLETED:
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+static void test_add_request(struct request_queue *q, struct request *rq)
+{
+ struct test_data *td = q->elevator->elevator_data;
+
+ list_add_tail(&rq->queuelist, &td->queue);
+
+ /*
+ * The write requests can be followed by a FLUSH request that might
+ * cause unexpected results of the test.
+ */
+ if ((rq_data_dir(rq) == WRITE) && (td->test_state == TEST_RUNNING)) {
+ test_pr_debug("%s: got WRITE req in the middle of the test",
+ __func__);
+ td->fs_wr_reqs_during_test = true;
+ }
+}
+
+static struct request *
+test_former_request(struct request_queue *q, struct request *rq)
+{
+ struct test_data *td = q->elevator->elevator_data;
+
+ if (rq->queuelist.prev == &td->queue)
+ return NULL;
+ return list_entry(rq->queuelist.prev, struct request, queuelist);
+}
+
+static struct request *
+test_latter_request(struct request_queue *q, struct request *rq)
+{
+ struct test_data *td = q->elevator->elevator_data;
+
+ if (rq->queuelist.next == &td->queue)
+ return NULL;
+ return list_entry(rq->queuelist.next, struct request, queuelist);
+}
+
+static void *test_init_queue(struct request_queue *q)
+{
+ struct blk_dev_test_type *__bdt;
+
+ ptd = kmalloc_node(sizeof(struct test_data), GFP_KERNEL,
+ q->node);
+ if (!ptd) {
+ test_pr_err("%s: failed to allocate test data", __func__);
+ return NULL;
+ }
+ memset((void *)ptd, 0, sizeof(struct test_data));
+ INIT_LIST_HEAD(&ptd->queue);
+ INIT_LIST_HEAD(&ptd->test_queue);
+ init_waitqueue_head(&ptd->wait_q);
+ ptd->req_q = q;
+
+ setup_timer(&ptd->timeout_timer, test_timeout_handler,
+ (unsigned long)ptd);
+
+ spin_lock_init(&ptd->lock);
+
+ if (test_debugfs_init(ptd)) {
+ test_pr_err("%s: Failed to create debugfs files", __func__);
+ return NULL;
+ }
+
+ list_for_each_entry(__bdt, &blk_dev_test_list, list)
+ __bdt->init_fn();
+
+ return ptd;
+}
+
+static void test_exit_queue(struct elevator_queue *e)
+{
+ struct test_data *td = e->elevator_data;
+ struct blk_dev_test_type *__bdt;
+
+ BUG_ON(!list_empty(&td->queue));
+
+ list_for_each_entry(__bdt, &blk_dev_test_list, list)
+ __bdt->exit_fn();
+
+ test_debugfs_cleanup(td);
+
+ kfree(td);
+}
+
+static struct elevator_type elevator_test_iosched = {
+ .ops = {
+ .elevator_merge_req_fn = test_merged_requests,
+ .elevator_dispatch_fn = test_dispatch_requests,
+ .elevator_add_req_fn = test_add_request,
+ .elevator_former_req_fn = test_former_request,
+ .elevator_latter_req_fn = test_latter_request,
+ .elevator_init_fn = test_init_queue,
+ .elevator_exit_fn = test_exit_queue,
+ },
+ .elevator_name = "test-iosched",
+ .elevator_owner = THIS_MODULE,
+};
+
+static int __init test_init(void)
+{
+ elv_register(&elevator_test_iosched);
+
+ return 0;
+}
+
+static void __exit test_exit(void)
+{
+ elv_unregister(&elevator_test_iosched);
+}
+
+module_init(test_init);
+module_exit(test_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Test IO scheduler");
View
448 block/vr-iosched.c
@@ -0,0 +1,448 @@
+/*
+* V(R) I/O Scheduler
+*
+* Copyright (C) 2007 Aaron Carroll <aaronc@gelato.unsw.edu.au>
+*
+*
+* The algorithm:
+*
+* The next request is decided based on its distance from the last
+* request, with a multiplicative penalty of `rev_penalty' applied
+* for reversing the head direction. A rev_penalty of 1 means SSTF
+* behaviour. As this variable is increased, the algorithm approaches
+* pure SCAN. Setting rev_penalty to 0 forces SCAN.
+*
+* Async and synch requests are not treated seperately. Instead we
+* rely on deadlines to ensure fairness.
+*
+*/
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/elevator.h>
+#include <linux/bio.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/compiler.h>
+#include <linux/rbtree.h>
+
+#include <asm/div64.h>
+
+enum vr_data_dir {
+ASYNC,
+SYNC,
+};
+
+enum vr_head_dir {
+FORWARD,
+BACKWARD,
+};
+
+static const int sync_expire = HZ / 2; /* max time before a sync is submitted. */
+static const int async_expire = 5 * HZ; /* ditto for async, these limits are SOFT! */
+static const int fifo_batch = 16;
+static const int rev_penalty = 10; /* penalty for reversing head direction */
+
+struct vr_data {
+struct rb_root sort_list;
+struct list_head fifo_list[2];
+
+struct request *next_rq;
+struct request *prev_rq;
+
+unsigned int nbatched;
+sector_t last_sector; /* head position */
+int head_dir;
+
+/* tunables */
+int fifo_expire[2];
+int fifo_batch;
+int rev_penalty;
+};
+
+static void vr_move_request(struct vr_data *, struct request *);
+
+static inline struct vr_data *
+vr_get_data(struct request_queue *q)
+{
+return q->elevator->elevator_data;
+}
+
+static void
+vr_add_rq_rb(struct vr_data *vd, struct request *rq)
+{
+struct request *alias = elv_rb_add(&vd->sort_list, rq);
+
+if (unlikely(alias)) {
+vr_move_request(vd, alias);
+alias = elv_rb_add(&vd->sort_list, rq);
+BUG_ON(alias);
+}
+
+if (blk_rq_pos(rq) >= vd->last_sector) {
+if (!vd->next_rq || blk_rq_pos(vd->next_rq) > blk_rq_pos(rq))
+vd->next_rq = rq;
+}
+else {
+if (!vd->prev_rq || blk_rq_pos(vd->prev_rq) < blk_rq_pos(rq))
+vd->prev_rq = rq;
+}
+
+BUG_ON(vd->next_rq && vd->next_rq == vd->prev_rq);
+BUG_ON(vd->next_rq && vd->prev_rq && blk_rq_pos(vd->next_rq) < blk_rq_pos(vd->prev_rq));
+}
+
+static void
+vr_del_rq_rb(struct vr_data *vd, struct request *rq)
+{
+/*
+* We might be deleting our cached next request.
+* If so, find its sucessor.
+*/
+
+if (vd->next_rq == rq)
+vd->next_rq = elv_rb_latter_request(NULL, rq);
+else if (vd->prev_rq == rq)
+vd->prev_rq = elv_rb_former_request(NULL, rq);
+
+BUG_ON(vd->next_rq && vd->next_rq == vd->prev_rq);
+BUG_ON(vd->next_rq && vd->prev_rq && blk_rq_pos(vd->next_rq) < blk_rq_pos(vd->prev_rq));
+
+elv_rb_del(&vd->sort_list, rq);
+}
+
+/*
+* add rq to rbtree and fifo
+*/
+static void
+vr_add_request(struct request_queue *q, struct request *rq)
+{
+struct vr_data *vd = vr_get_data(q);
+const int dir = rq_is_sync(rq);
+
+vr_add_rq_rb(vd, rq);
+
+if (vd->fifo_expire[dir]) {
+rq_set_fifo_time(rq, jiffies + vd->fifo_expire[dir]);
+list_add_tail(&rq->queuelist, &vd->fifo_list[dir]);
+}
+}
+
+/*
+* remove rq from rbtree and fifo.
+*/
+static void
+vr_remove_request(struct request_queue *q, struct request *rq)
+{
+struct vr_data *vd = vr_get_data(q);
+
+rq_fifo_clear(rq);
+vr_del_rq_rb(vd, rq);
+}
+
+static int
+vr_merge(struct request_queue *q, struct request **rqp, struct bio *bio)
+{
+sector_t sector = bio->bi_sector + bio_sectors(bio);
+struct vr_data *vd = vr_get_data(q);
+struct request *rq = elv_rb_find(&vd->sort_list, sector);
+
+if (rq && elv_rq_merge_ok(rq, bio)) {
+*rqp = rq;
+return ELEVATOR_FRONT_MERGE;
+}
+return ELEVATOR_NO_MERGE;
+}
+
+static void
+vr_merged_request(struct request_queue *q, struct request *req, int type)
+{
+struct vr_data *vd = vr_get_data(q);
+
+/*
+* if the merge was a front merge, we need to reposition request
+*/
+if (type == ELEVATOR_FRONT_MERGE) {
+vr_del_rq_rb(vd, req);
+vr_add_rq_rb(vd, req);
+}
+}
+
+static void
+vr_merged_requests(struct request_queue *q, struct request *rq,
+struct request *next)
+{
+/*
+* if next expires before rq, assign its expire time to rq
+* and move into next position (next will be deleted) in fifo
+*/
+if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) {
+if (time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
+list_move(&rq->queuelist, &next->queuelist);
+rq_set_fifo_time(rq, rq_fifo_time(next));
+}
+}
+
+vr_remove_request(q, next);
+}
+
+/*
+* move an entry to dispatch queue
+*/
+static void
+vr_move_request(struct vr_data *vd, struct request *rq)
+{
+struct request_queue *q = rq->q;
+
+if (blk_rq_pos(rq) > vd->last_sector)
+vd->head_dir = FORWARD;
+else
+vd->head_dir = BACKWARD;
+
+vd->last_sector = blk_rq_pos(rq);
+vd->next_rq = elv_rb_latter_request(NULL, rq);
+vd->prev_rq = elv_rb_former_request(NULL, rq);
+
+BUG_ON(vd->next_rq && vd->next_rq == vd->prev_rq);
+
+vr_remove_request(q, rq);
+elv_dispatch_add_tail(q, rq);
+vd->nbatched++;
+}
+
+/*
+* get the first expired request in direction ddir
+*/
+static struct request *
+vr_expired_request(struct vr_data *vd, int ddir)
+{
+struct request *rq;
+
+if (list_empty(&vd->fifo_list[ddir]))
+return NULL;
+
+rq = rq_entry_fifo(vd->fifo_list[ddir].next);
+if (time_after(jiffies, rq_fifo_time(rq)))
+return rq;
+
+return NULL;
+}
+
+/*
+* Returns the oldest expired request
+*/
+static struct request *
+vr_check_fifo(struct vr_data *vd)
+{
+struct request *rq_sync = vr_expired_request(vd, SYNC);
+struct request *rq_async = vr_expired_request(vd, ASYNC);
+
+if (rq_async && rq_sync) {
+if (time_after(rq_fifo_time(rq_async), rq_fifo_time(rq_sync)))
+return rq_sync;
+}
+else if (rq_sync)
+return rq_sync;
+
+return rq_async;
+}
+
+/*
+* Return the request with the lowest penalty
+*/
+static struct request *
+vr_choose_request(struct vr_data *vd)
+{
+int penalty = (vd->rev_penalty) ? : INT_MAX;
+struct request *next = vd->next_rq;
+struct request *prev = vd->prev_rq;
+sector_t next_pen, prev_pen;
+
+BUG_ON(prev && prev == next);
+
+if (!prev)
+return next;
+else if (!next)
+return prev;
+
+/* At this point both prev and next are defined and distinct */
+
+next_pen = blk_rq_pos(next) - vd->last_sector;
+prev_pen = vd->last_sector - blk_rq_pos(prev);
+
+if (vd->head_dir == FORWARD)
+next_pen = do_div(next_pen, penalty);
+else
+prev_pen = do_div(prev_pen, penalty);
+
+if (next_pen <= prev_pen)
+return next;
+
+return prev;
+}
+
+static int
+vr_dispatch_requests(struct request_queue *q, int force)
+{
+struct vr_data *vd = vr_get_data(q);
+struct request *rq = NULL;
+
+/* Check for and issue expired requests */
+if (vd->nbatched > vd->fifo_batch) {
+vd->nbatched = 0;
+rq = vr_check_fifo(vd);
+}
+
+if (!rq) {
+rq = vr_choose_request(vd);
+if (!rq)
+return 0;
+}
+
+vr_move_request(vd, rq);
+
+return 1;
+}
+
+static int
+vr_queue_empty(struct request_queue *q)
+{
+struct vr_data *vd = vr_get_data(q);
+return RB_EMPTY_ROOT(&vd->sort_list);
+}
+
+static void
+vr_exit_queue(struct elevator_queue *e)
+{
+struct vr_data *vd = e->elevator_data;
+BUG_ON(!RB_EMPTY_ROOT(&vd->sort_list));
+kfree(vd);
+}
+
+/*
+* initialize elevator private data (vr_data).
+*/
+static void *vr_init_queue(struct request_queue *q)
+{
+struct vr_data *vd;
+
+vd = kmalloc_node(sizeof(*vd), GFP_KERNEL | __GFP_ZERO, q->node);
+if (!vd)
+return NULL;
+
+INIT_LIST_HEAD(&vd->fifo_list[SYNC]);
+INIT_LIST_HEAD(&vd->fifo_list[ASYNC]);
+vd->sort_list = RB_ROOT;
+vd->fifo_expire[SYNC] = sync_expire;
+vd->fifo_expire[ASYNC] = async_expire;
+vd->fifo_batch = fifo_batch;
+vd->rev_penalty = rev_penalty;
+return vd;
+}
+
+/*
+* sysfs parts below
+*/
+
+static ssize_t
+vr_var_show(int var, char *page)
+{
+return sprintf(page, "%d\n", var);
+}
+
+static ssize_t
+vr_var_store(int *var, const char *page, size_t count)
+{
+*var = simple_strtol(page, NULL, 10);
+return count;
+}
+
+#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
+static ssize_t __FUNC(struct elevator_queue *e, char *page) \
+{ \
+struct vr_data *vd = e->elevator_data; \
+int __data = __VAR; \
+if (__CONV) \
+__data = jiffies_to_msecs(__data); \
+return vr_var_show(__data, (page)); \
+}
+SHOW_FUNCTION(vr_sync_expire_show, vd->fifo_expire[SYNC], 1);
+SHOW_FUNCTION(vr_async_expire_show, vd->fifo_expire[ASYNC], 1);
+SHOW_FUNCTION(vr_fifo_batch_show, vd->fifo_batch, 0);
+SHOW_FUNCTION(vr_rev_penalty_show, vd->rev_penalty, 0);
+#undef SHOW_FUNCTION
+
+#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
+{ \
+struct vr_data *vd = e->elevator_data; \
+int __data; \
+int ret = vr_var_store(&__data, (page), count); \
+if (__data < (MIN)) \
+__data = (MIN); \
+else if (__data > (MAX)) \
+__data = (MAX); \
+if (__CONV) \
+*(__PTR) = msecs_to_jiffies(__data); \
+else \
+*(__PTR) = __data; \
+return ret; \
+}
+STORE_FUNCTION(vr_sync_expire_store, &vd->fifo_expire[SYNC], 0, INT_MAX, 1);
+STORE_FUNCTION(vr_async_expire_store, &vd->fifo_expire[ASYNC], 0, INT_MAX, 1);
+STORE_FUNCTION(vr_fifo_batch_store, &vd->fifo_batch, 0, INT_MAX, 0);
+STORE_FUNCTION(vr_rev_penalty_store, &vd->rev_penalty, 0, INT_MAX, 0);
+#undef STORE_FUNCTION
+
+#define DD_ATTR(name) \
+__ATTR(name, S_IRUGO|S_IWUSR, vr_##name##_show, \
+vr_##name##_store)
+
+static struct elv_fs_entry vr_attrs[] = {
+DD_ATTR(sync_expire),
+DD_ATTR(async_expire),
+DD_ATTR(fifo_batch),
+DD_ATTR(rev_penalty),
+__ATTR_NULL
+};
+
+static struct elevator_type iosched_vr = {
+.ops = {
+.elevator_merge_fn = vr_merge,
+.elevator_merged_fn = vr_merged_request,
+.elevator_merge_req_fn = vr_merged_requests,
+.elevator_dispatch_fn = vr_dispatch_requests,
+.elevator_add_req_fn = vr_add_request,
+//.elevator_queue_empty_fn = vr_queue_empty,
+.elevator_former_req_fn = elv_rb_former_request,
+.elevator_latter_req_fn = elv_rb_latter_request,
+.elevator_init_fn = vr_init_queue,
+.elevator_exit_fn = vr_exit_queue,
+},
+
+.elevator_attrs = vr_attrs,
+.elevator_name = "vr",
+.elevator_owner = THIS_MODULE,
+};
+
+static int __init vr_init(void)
+{
+elv_register(&iosched_vr);
+
+return 0;
+}
+
+static void __exit vr_exit(void)
+{
+elv_unregister(&iosched_vr);
+}
+
+module_init(vr_init);
+module_exit(vr_exit);
+
+MODULE_AUTHOR("Aaron Carroll");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("V(R) IO scheduler");
+
+

0 comments on commit 3376972

Please sign in to comment.
Something went wrong with that request. Please try again.