Permalink
Browse files

Fix several critical cgroups bugs

This commit contains the following important bug fixes related to the
cgroup support in BFQ (and in particular, to the integration with the
blkio controller), plus an improvement. These changes entail also the
transition from v7r10 to v7r11.

BUGFIX Remove the group_list data structure, which ended up in an
inconsistent state if BFQ happened to be activated for some device
when some blkio groups already existed (these groups where not added
to the list). The blkg list for the request queue is now used where
the removed group_list was used.

BUGFIX Init and reset also dead_stats.

BUGFIX Added, in __bfq_deactivate_entity, the correct handling of the
case where the entity to deactivate has not yet been activated at all.

BUGFIX Added missing free of the root group for the case where full
hierarchical support is not activated.

IMPROVEMENT Removed the now useless bfq_disconnect_groups
function. The same functionality is achieved through multiple
invocations of bfq_pd_offline (which are in their turn guaranteed to
be executed, when needed, by the blk-cgroups code).
  • Loading branch information...
paolo-github committed Jan 20, 2016
1 parent 3adae5e commit c53f28ac108d738173b64dff1aede5c69070befd
Showing with 39 additions and 73 deletions.
  1. +29 −61 block/bfq-cgroup.c
  2. +3 −2 block/bfq-iosched.c
  3. +6 −3 block/bfq-sched.c
  4. +1 −7 block/bfq.h
@@ -398,14 +398,29 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
if (!bfqg)
return NULL;
if (bfqg_stats_init(&bfqg->stats, gfp)) {
if (bfqg_stats_init(&bfqg->stats, gfp) ||
bfqg_stats_init(&bfqg->dead_stats, gfp)) {
kfree(bfqg);
return NULL;
}
return &bfqg->pd;
}
static void bfq_group_set_parent(struct bfq_group *bfqg,
struct bfq_group *parent)
{
struct bfq_entity *entity;
BUG_ON(!parent);
BUG_ON(!bfqg);
BUG_ON(bfqg == parent);
entity = &bfqg->entity;
entity->parent = parent->my_entity;
entity->sched_data = &parent->sched_data;
}
static void bfq_pd_init(struct blkg_policy_data *pd)
{
struct blkcg_gq *blkg = pd_to_blkg(pd);
@@ -423,15 +438,16 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
bfqg->bfqd = bfqd;
bfqg->active_entities = 0;
bfqg->rq_pos_tree = RB_ROOT;
/* if the root_group does not exist, we are handling it right now */
if (bfqd->root_group && bfqg != bfqd->root_group)
hlist_add_head(&bfqg->bfqd_node, &bfqd->group_list);
}
static void bfq_pd_free(struct blkg_policy_data *pd)
{
return kfree(pd_to_bfqg(pd));
struct bfq_group *bfqg = pd_to_bfqg(pd);
bfqg_stats_exit(&bfqg->stats);
bfqg_stats_exit(&bfqg->dead_stats);
return kfree(bfqg);
}
/* offset delta from bfqg->stats to bfqg->dead_stats */
@@ -470,20 +486,6 @@ static void bfq_pd_reset_stats(struct blkg_policy_data *pd)
bfqg_stats_reset(&bfqg->dead_stats);
}
static void bfq_group_set_parent(struct bfq_group *bfqg,
struct bfq_group *parent)
{
struct bfq_entity *entity;
BUG_ON(!parent);
BUG_ON(!bfqg);
BUG_ON(bfqg == parent);
entity = &bfqg->entity;
entity->parent = parent->my_entity;
entity->sched_data = &parent->sched_data;
}
static struct bfq_group *bfq_find_alloc_group(struct bfq_data *bfqd,
struct blkcg *blkcg)
{
@@ -732,8 +734,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
bfqg = pd_to_bfqg(pd);
BUG_ON(!bfqg);
bfqd = bfqg->bfqd;
BUG_ON(!bfqd);
BUG_ON(!bfqd->root_group);
BUG_ON(bfqd && !bfqd->root_group);
entity = bfqg->my_entity;
@@ -745,8 +746,8 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
* deactivating the group itself.
*/
for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) {
st = bfqg->sched_data.service_tree + i;
BUG_ON(!bfqg->sched_data.service_tree);
st = bfqg->sched_data.service_tree + i;
/*
* The idle tree may still contain bfq_queues belonging
* to exited task because they never migrated to a different
@@ -774,7 +775,6 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
BUG_ON(bfqg->sched_data.next_in_service);
BUG_ON(bfqg->sched_data.in_service_entity);
hlist_del(&bfqg->bfqd_node);
__bfq_deactivate_entity(entity, 0);
bfq_put_async_queues(bfqd, bfqg);
BUG_ON(entity->tree);
@@ -784,46 +784,14 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
static void bfq_end_wr_async(struct bfq_data *bfqd)
{
struct hlist_node *tmp;
struct bfq_group *bfqg;
hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node)
bfq_end_wr_async_queues(bfqd, bfqg);
bfq_end_wr_async_queues(bfqd, bfqd->root_group);
}
/**
* bfq_disconnect_groups - disconnect @bfqd from all its groups.
* @bfqd: the device descriptor being exited.
*
* When the device exits we just make sure that no lookup can return
* the now unused group structures. They will be deallocated on cgroup
* destruction.
*/
static void bfq_disconnect_groups(struct bfq_data *bfqd)
{
struct hlist_node *tmp;
struct bfq_group *bfqg;
bfq_log(bfqd, "disconnect_groups beginning");
hlist_for_each_entry_safe(bfqg, tmp, &bfqd->group_list, bfqd_node) {
hlist_del(&bfqg->bfqd_node);
__bfq_deactivate_entity(bfqg->my_entity, 0);
struct blkcg_gq *blkg;
/*
* Don't remove from the group hash, just set an
* invalid key. No lookups can race with the
* assignment as bfqd is being destroyed; this
* implies also that new elements cannot be added
* to the list.
*/
rcu_assign_pointer(bfqg->bfqd, NULL);
list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
struct bfq_group *bfqg = blkg_to_bfqg(blkg);
bfq_log(bfqd, "disconnect_groups: put async for group %p",
bfqg);
bfq_put_async_queues(bfqd, bfqg);
bfq_end_wr_async_queues(bfqd, bfqg);
}
bfq_end_wr_async_queues(bfqd, bfqd->root_group);
}
static u64 bfqio_cgroup_weight_read(struct cgroup_subsys_state *css,
@@ -3919,7 +3919,6 @@ static void bfq_exit_queue(struct elevator_queue *e)
list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list)
bfq_deactivate_bfqq(bfqd, bfqq, 0);
bfq_disconnect_groups(bfqd);
spin_unlock_irq(q->queue_lock);
bfq_shutdown_timer_wq(bfqd);
@@ -3930,6 +3929,8 @@ static void bfq_exit_queue(struct elevator_queue *e)
#ifdef CONFIG_BFQ_GROUP_IOSCHED
blkcg_deactivate_policy(q, &blkcg_policy_bfq);
#else
kfree(bfqd->root_group);
#endif
kfree(bfqd);
@@ -4385,7 +4386,7 @@ static int __init bfq_init(void)
if (ret)
goto err_pol_unreg;
pr_info("BFQ I/O-scheduler: v7r10");
pr_info("BFQ I/O-scheduler: v7r11");
return 0;
@@ -839,13 +839,16 @@ static void bfq_activate_entity(struct bfq_entity *entity)
static int __bfq_deactivate_entity(struct bfq_entity *entity, int requeue)
{
struct bfq_sched_data *sd = entity->sched_data;
struct bfq_service_tree *st = bfq_entity_service_tree(entity);
int was_in_service = entity == sd->in_service_entity;
struct bfq_service_tree *st;
int was_in_service;
int ret = 0;
if (!entity->on_st)
if (sd == NULL || !entity->on_st) /* never activated, or inactive */
return 0;
st = bfq_entity_service_tree(entity);
was_in_service = entity == sd->in_service_entity;
BUG_ON(was_in_service && entity->tree);
if (was_in_service) {
@@ -1,5 +1,5 @@
/*
* BFQ-v7r10 for 4.4.0: data structures and common functions prototypes.
* BFQ-v7r11 for 4.4.0: data structures and common functions prototypes.
*
* Based on ideas and code from CFQ:
* Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
@@ -421,7 +421,6 @@ enum bfq_device_speed {
* @peak_rate_samples: number of samples used to calculate @peak_rate.
* @bfq_max_budget: maximum budget allotted to a bfq_queue before
* rescheduling.
* @group_list: list of all the bfq_groups active on the device.
* @active_list: list of all the bfq_queues active on the device.
* @idle_list: list of all the bfq_queues idle on the device.
* @bfq_fifo_expire: timeout for async/sync requests; when it expires
@@ -526,7 +525,6 @@ struct bfq_data {
u64 peak_rate;
int bfq_max_budget;
struct hlist_head group_list;
struct list_head active_list;
struct list_head idle_list;
@@ -702,8 +700,6 @@ struct bfq_group_data {
* @entity: schedulable entity to insert into the parent group sched_data.
* @sched_data: own sched_data, to contain child entities (they may be
* both bfq_queues and bfq_groups).
* @bfqd_node: node to be inserted into the @bfqd->group_list list
* of the groups active on the same device; used for cleanup.
* @bfqd: the bfq_data for the device this group acts upon.
* @async_bfqq: array of async queues for all the tasks belonging to
* the group, one queue per ioprio value per ioprio_class,
@@ -737,8 +733,6 @@ struct bfq_group {
struct bfq_entity entity;
struct bfq_sched_data sched_data;
struct hlist_node bfqd_node;
void *bfqd;
struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR];

0 comments on commit c53f28a

Please sign in to comment.