Skip to content

Commit

Permalink
btrfs: Add roundrobin read policy
Browse files Browse the repository at this point in the history
The new roundrobin policy selects raid1 mirrors based on the following
conditions:

- rotational (yes/no) - non-rotational devices are preferred and checked
  first
- number of inflight requests is lower than queue depth

Search for the mirror will consist of the following steps:

1) For each disk:
  a) Check if it's non-rotational. If it's rotational, stop the step 1).
  b) If number of inflight requests lower than queue depth, return the
     current disk and stop searching.
2) For each disk:
  a) Check if it's rotational.
  b) If number of inflight requests lower than queue depth, return the
     current disk and stop searching.
3) Return the next mirror (the last used + 1 % number of mirrors).

Signed-off-by: Michal Rostecki <mrostecki@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
  • Loading branch information
vadorovsky authored and kdave committed Jan 28, 2021
1 parent e395397 commit 4535566
Show file tree
Hide file tree
Showing 4 changed files with 252 additions and 4 deletions.
2 changes: 1 addition & 1 deletion fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -976,7 +976,7 @@ struct btrfs_fs_info {
/* Max size to emit ZONE_APPEND write command */
u64 max_zone_append_size;

/* Data for scheduling made by the "load" read policy */
/* Data for scheduling made by the "load" and "roundrobin" read policies */
u64 __percpu *last_sched_time;
int __percpu *last_mirror;

Expand Down
72 changes: 71 additions & 1 deletion fs/btrfs/sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,66 @@ static int btrfs_sysfs_add_read_policy_load(struct btrfs_fs_devices *devices)
return ret;
}

static ssize_t btrfs_read_policy_roundrobin_duration_show(struct kobject *kobj,
struct kobj_attribute *a,
char *buf)
{
struct btrfs_fs_devices *fs_devs = to_fs_devs(kobj->parent->parent);

return scnprintf(buf, PAGE_SIZE, "%d\n", fs_devs->read_policy_roundrobin_duration);
}

static ssize_t btrfs_read_policy_roundrobin_duration_store(struct kobject *kobj,
struct kobj_attribute *a,
const char *buf, size_t len)
{
struct btrfs_fs_devices *fs_devs = to_fs_devs(kobj->parent->parent);
u32 duration;
int ret;

ret = kstrtou32(buf, 10, &duration);
if (ret)
return -EINVAL;

WRITE_ONCE(fs_devs->read_policy_roundrobin_duration, duration);
return len;
}
BTRFS_ATTR_RW(read_policy_roundrobin, duration, btrfs_read_policy_roundrobin_duration_show,
btrfs_read_policy_roundrobin_duration_store);

static struct attribute *read_policy_roundrobin_attrs[] = {
BTRFS_ATTR_PTR(read_policy_roundrobin, duration),
NULL
};
ATTRIBUTE_GROUPS(read_policy_roundrobin);

static void btrfs_release_read_policy_roundrobin_kobj(struct kobject *kobj)
{
struct btrfs_fs_devices *fs_devs = to_fs_devs(kobj->parent);

memset(&fs_devs->read_policy_roundrobin_kobj, 0, sizeof(struct kobject));
}

static struct kobj_type read_policy_roundrobin_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.default_groups = read_policy_roundrobin_groups,
.release = btrfs_release_read_policy_roundrobin_kobj,
};

static int btrfs_sysfs_add_read_policy_roundrobin(struct btrfs_fs_devices *devices)
{
int ret;

ret = kobject_init_and_add(&devices->read_policy_roundrobin_kobj,
&read_policy_roundrobin_ktype,
devices->read_policies_kobj,
"%s", "roundrobin");
if (ret < 0)
kobject_put(&devices->read_policy_roundrobin_kobj);

return ret;
}

/*
* Look for an exact string @string in @buffer with possible leading or
* trailing whitespace
Expand All @@ -1008,7 +1068,7 @@ static bool strmatch(const char *buffer, const char *string)

/* Must follow the order as in enum btrfs_read_policy */
static const char * const btrfs_read_policy_name[] = { "pid", "latency",
"device", "load" };
"device", "load", "roundrobin" };

static ssize_t btrfs_read_policy_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
Expand Down Expand Up @@ -1157,6 +1217,12 @@ static void __btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
fs_devs->read_policies_kobj = NULL;
}

if (fs_devs->read_policies_kobj) {
kobject_del(fs_devs->read_policies_kobj);
kobject_put(fs_devs->read_policies_kobj);
fs_devs->read_policies_kobj = NULL;
}

if (fs_devs->devinfo_kobj) {
kobject_del(fs_devs->devinfo_kobj);
kobject_put(fs_devs->devinfo_kobj);
Expand Down Expand Up @@ -1755,6 +1821,10 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs)
if (error)
return error;

error = btrfs_sysfs_add_read_policy_roundrobin(fs_devs);
if (error)
return error;

return 0;
}

Expand Down
174 changes: 172 additions & 2 deletions fs/btrfs/volumes.c
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,9 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
else if (fsid)
memcpy(fs_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE);

fs_devs->read_policy_roundrobin_duration =
BTRFS_DEFAULT_READ_POLICY_ROUNDROBIN_DURATION;

fs_devs->read_policy_load_duration =
BTRFS_DEFAULT_READ_POLICY_LOAD_DURATION;
fs_devs->read_policy_load_rotating_inc =
Expand Down Expand Up @@ -5608,6 +5611,169 @@ static int find_live_mirror_load(struct btrfs_fs_info *fs_info,
return preferred_mirror;
}

/*
* __find_live_mirror_roundrobin_nonrot() searches for a non-rotational raid1
* mirror which can process more requests.
*
* @fs_info: the filesystem
* @map: extent mapping which contains stripes
* @first: number of the first mirror
* @limit: number of the first mirror + number of mirrors
* @preferred_mirror: a pointer used to return the first result - the number of
* the preferred mirror; if no suitable mirror found, the
* value is -1
* @rotational_first: a pointer used to return the second result - the number of
* the first rotational device, on which the iteration
* stopped; if no rotational mirrors found, the value is -1
*
* If no suitable mirror is found, return -1, which means that no suitable
* mirror could be found. That value should be handled correctly by
* find_live_mirror_roundrobin().
*/
static void __find_live_mirror_roundrobin_nonrot(struct btrfs_fs_info *fs_info,
struct map_lookup *map,
int first, int limit,
int *preferred_mirror,
int *rotational_first)
{
struct btrfs_device *device;
struct block_device *bdev;
unsigned int queue_depth;
int i;

/* Initial return values */
*preferred_mirror = -1;
*rotational_first = -1;

for (i = first; i < limit; i++) {
device = map->stripes[i].dev;
bdev = device->bdev;

/* If the device is rotational, stop the search. */
if (!blk_queue_nonrot(bdev->bd_disk->queue)) {
*rotational_first = i;
return;
}

queue_depth = blk_queue_depth(bdev->bd_disk->queue);

/*
* If the mirror with suitable criteria found, return it and
* stop the search.
*/
if (percpu_counter_compare(&device->inflight, queue_depth) == -1) {
*preferred_mirror = i;
return;
}
}
}

/*
* __find_live_mirror_roundrobin_rot() searches for a rotational raid1 mirror
* which can process more requests.
*
* @fs_info: the filesystem
* @map: extent mapping which contains stripes
* @first: number of the first rotational mirror
* @limit: number of the first mirror (in the whole array) + number
+ of mirrors
* @preferred_mirror: a pointer used to return the first result - the number of
* the preferred mirror; if no suitable mirror found, the
* value is -1
*
* Return: the number of the first rotational device on which the iteration
* stopped; if no rotational mirrors found, return -1
*/
static int __find_live_mirror_roundrobin_rot(struct btrfs_fs_info *fs_info,
struct map_lookup *map,
int first, int limit)
{
struct btrfs_device *device;
struct block_device *bdev;
unsigned int queue_depth;
int i;

for (i = first; i < limit; i++) {
device = map->stripes[i].dev;
bdev = device->bdev;

queue_depth = blk_queue_depth(bdev->bd_disk->queue);

/*
* If the mirror with suitable criteria found, return it and
* stop the search.
*/
if (percpu_counter_compare(&device->inflight, queue_depth) == -1)
return i;
}

return -1;
}

/*
* find_live_mirror_roundrobin() searches for a raid1 mirror which can process
* more requests.
*
* @fs_info: the filesystem
* @map: extent mapping which contains stripes
* @first: number of the first mirror
* @num_stripes: number of stripes in the array
*
* It calls __find_live_mirror_roundrobin() function to try to find a suitable
* mirror, firstly non-rotational, then rotational.
*
* If no suitable mirror found, it selects the next (last used + 1) mirror.
*/
static int find_live_mirror_roundrobin(struct btrfs_fs_info *fs_info,
struct map_lookup *map, int first,
int num_stripes)
{
int preferred_mirror;
int rotational_first;
u64 last_sched_time;
u64 duration;
int limit;
u64 now;

last_sched_time = this_cpu_read(*fs_info->last_sched_time);
if (last_sched_time != 0) {
now = ktime_get_ns();
duration = now - last_sched_time;

if (duration < (NSEC_PER_MSEC *
fs_info->fs_devices->read_policy_roundrobin_duration)) {
preferred_mirror = this_cpu_read(*fs_info->last_mirror);
goto out;
}
}

limit = first + num_stripes;

/* Try to find non-rotational mirror */
__find_live_mirror_roundrobin_nonrot(fs_info, map, first, limit,
&preferred_mirror,
&rotational_first);
if (preferred_mirror >= 0)
goto out;

/* Try to find rotational mirror if any available */
if (rotational_first >= 0) {
preferred_mirror = __find_live_mirror_roundrobin_rot(
fs_info, map, first, num_stripes);
if (preferred_mirror >= 0)
goto out;
}

/* If no suitable mirror found, return the next mirror */
preferred_mirror = (
this_cpu_read(*fs_info->last_mirror) + 1) % num_stripes;

out:
this_cpu_write(*fs_info->last_sched_time, now);
this_cpu_write(*fs_info->last_mirror, preferred_mirror);
return preferred_mirror;
}

static int find_live_mirror(struct btrfs_fs_info *fs_info,
struct map_lookup *map, int first,
int dev_replace_is_ongoing)
Expand All @@ -5630,13 +5796,17 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
default:
/* Shouldn't happen, just warn and use pid instead of failing */
btrfs_warn_rl(fs_info,
"unknown read_policy type %u, reset to pid",
"unknown read_policy type %u, reset to roundrobin",
fs_info->fs_devices->read_policy);
fs_info->fs_devices->read_policy = BTRFS_READ_POLICY_PID;
fs_info->fs_devices->read_policy = BTRFS_READ_POLICY_ROUNDROBIN;
fallthrough;
case BTRFS_READ_POLICY_PID:
preferred_mirror = first + (current->pid % num_stripes);
break;
case BTRFS_READ_POLICY_ROUNDROBIN:
preferred_mirror = find_live_mirror_roundrobin(
fs_info, map, first, num_stripes);
break;
case BTRFS_READ_POLICY_LOAD:
preferred_mirror = find_live_mirror_load(fs_info, map, first,
num_stripes);
Expand Down
8 changes: 8 additions & 0 deletions fs/btrfs/volumes.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,8 @@ enum btrfs_read_policy {
BTRFS_READ_POLICY_DEVICE,
/* Use the least loaded mirrors */
BTRFS_READ_POLICY_LOAD,
/* Round robin with priority based on class and queue length */
BTRFS_READ_POLICY_ROUNDROBIN,
BTRFS_NR_READ_POLICY,
};

Expand All @@ -244,6 +246,9 @@ enum btrfs_read_policy {
*/
#define BTRFS_DEFAULT_READ_POLICY_LOAD_ROTATING_INC 0

/* Default duration in the roundrobin read policy (100 ms) */
#define BTRFS_DEFAULT_READ_POLICY_ROUNDROBIN_DURATION 100

struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
u8 metadata_uuid[BTRFS_FSID_SIZE];
Expand Down Expand Up @@ -296,6 +301,7 @@ struct btrfs_fs_devices {
struct kobject *devinfo_kobj;
struct kobject *read_policies_kobj;
struct kobject read_policy_load_kobj;
struct kobject read_policy_roundrobin_kobj;
struct completion kobj_unregister;

enum btrfs_chunk_allocation_policy chunk_alloc_policy;
Expand All @@ -306,6 +312,8 @@ struct btrfs_fs_devices {
/* Configuration specific for the load read policy */
u32 read_policy_load_duration;
u32 read_policy_load_rotating_inc;
/* Configuration specific for the roundrobin read policy */
u32 read_policy_roundrobin_duration;
};

#define BTRFS_BIO_INLINE_CSUM_SIZE 64
Expand Down

0 comments on commit 4535566

Please sign in to comment.