From ae04ea86feaa262ab256f0820e59c2c838f6ac15 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Wed, 28 Oct 2015 14:22:22 +0900 Subject: [PATCH] sheep: avoid diskfull caused by recovery process sheep can corrupt its cluster by diskfull with recovery process. For avoiding this problem, this patch adds a new option -F to sheep. If this command is passed to the sheep process, every sheep process of the cluster stops itself if there is a possibility of diskfull during recovery. Fixes https://github.com/sheepdog/sheepdog/issues/59 Signed-off-by: Hitoshi Mitake --- sheep/recovery.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++ sheep/sheep.c | 5 +++ sheep/sheep_priv.h | 2 ++ 3 files changed, 84 insertions(+) diff --git a/sheep/recovery.c b/sheep/recovery.c index bb89a8484..c871ca3e2 100644 --- a/sheep/recovery.c +++ b/sheep/recovery.c @@ -1151,6 +1151,78 @@ static void screen_object_list(struct recovery_list_work *rlw, xqsort(rlw->oids, rlw->count, obj_cmp); } +static int vnode_to_node_idx(struct sd_vnode *vnode, int nr_nodes, + struct sd_node *nodes) +{ + for (int i = 0; i < nr_nodes; i++) { + if (node_id_cmp(&vnode->node->nid, &nodes[i].nid) == 0) + return i; + } + + panic("vnode couldn't found in the node array"); + return -1; /* never executed */ +} + +static void check_diskfull_possibility(uint32_t epoch, struct vnode_info *vinfo, + int nr_nodes, struct sd_node *nodes) +{ + uint64_t **oids_per_node; + size_t *nr_oids; + uint64_t *required_space_per_node; + const struct sd_vnode *vnodes[SD_MAX_COPIES]; + + oids_per_node = xcalloc(nr_nodes, sizeof(uint64_t *)); + nr_oids = xcalloc(nr_nodes, sizeof(size_t)); + required_space_per_node = xcalloc(nr_nodes, sizeof(uint64_t)); + + for (int i = 0; i < nr_nodes; i++) + oids_per_node[i] = fetch_object_list(&nodes[i], epoch, + &nr_oids[i]); + + for (int i = 0; i < nr_nodes; i++) { + uint64_t *oids = oids_per_node[i]; + + for (int j = 0; j < nr_oids[i]; j++) { + int nr_objs = get_obj_copy_number(oids[j], + vinfo->nr_zones); + + oid_to_vnodes(oids[j], &vinfo->vroot, nr_objs, vnodes); + + for (int k = 0; k < nr_objs; k++) { + int node_idx = vnode_to_node_idx( + (struct sd_vnode *)vnodes[k], + nr_nodes, nodes); + + /* + * TODO: current calculation doesn't consider + * about the differences between object types + * e.g. inode, ledger, and normal data objects + */ + required_space_per_node[node_idx] += + get_vdi_object_size( + oid_to_vid(oids[j])); + } + } + } + + for (int i = 0; i < nr_nodes; i++) { + if (nodes[i].space < required_space_per_node[i]) + panic("node %s will cause disk full, stopping whole" + " cluster", node_to_str(&nodes[i])); + + sd_debug("node %s (space: %"PRIu64") can store required space" + " during next recovery (%"PRIu64")", + node_to_str(&nodes[i]), + nodes[i].space, required_space_per_node[i]); + + free(oids_per_node[i]); + } + + free(oids_per_node); + free(nr_oids); + free(required_space_per_node); +} + /* Prepare the object list that belongs to this node */ static void prepare_object_list(struct work *work) { @@ -1172,6 +1244,11 @@ static void prepare_object_list(struct work *work) nodes = xmalloc(sizeof(struct sd_node) * nr_nodes); nodes_to_buffer(&rw->cur_vinfo->nroot, nodes); + + if (sys->avoid_diskfull) + check_diskfull_possibility(rw->epoch, rw->cur_vinfo, + nr_nodes, nodes); + again: /* We need to start at random node for better load balance */ for (i = start; i < end; i++) { diff --git a/sheep/sheep.c b/sheep/sheep.c index 502c51cd2..5ffb35153 100644 --- a/sheep/sheep.c +++ b/sheep/sheep.c @@ -135,6 +135,8 @@ static struct sd_option sheep_options[] = { cluster_help}, {'D', "directio", false, "use direct IO for backend store"}, {'f', "foreground", false, "make the program run in foreground"}, + {'F', "avoid-diskfull", false, "stop sheep process if a recovery" + "process can cause disk full"}, {'g', "gateway", false, "make the program run as a gateway mode"}, {'h', "help", false, "display this help and exit"}, {'i', "ioaddr", true, "use separate network card to handle IO requests" @@ -726,6 +728,9 @@ int main(int argc, char **argv) case 'f': daemonize = false; break; + case 'F': + sys->avoid_diskfull = true; + break; case 'g': if (nr_vnodes > 0) { sd_err("Options '-g' and '-V' can not be both specified"); diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index fed61e2d1..bf52431ed 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -173,6 +173,8 @@ struct system_info { /* upgrade data layout before starting service if necessary*/ bool upgrade; struct sd_stat stat; + + bool avoid_diskfull; }; struct disk {