Skip to content

Commit

Permalink
sheep: avoid diskfull caused by recovery process
Browse files Browse the repository at this point in the history
sheep can corrupt its cluster by diskfull with recovery process. For
avoiding this problem, this patch adds a new option -F to sheep. If
this command is passed to the sheep process, every sheep process of
the cluster stops itself if there is a possibility of diskfull during
recovery.

Fixes #59

Signed-off-by: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp>
  • Loading branch information
mitake committed Oct 28, 2015
1 parent 7e746e6 commit ae04ea8
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 0 deletions.
77 changes: 77 additions & 0 deletions sheep/recovery.c
Expand Up @@ -1151,6 +1151,78 @@ static void screen_object_list(struct recovery_list_work *rlw,
xqsort(rlw->oids, rlw->count, obj_cmp);
}

static int vnode_to_node_idx(struct sd_vnode *vnode, int nr_nodes,
struct sd_node *nodes)
{
for (int i = 0; i < nr_nodes; i++) {
if (node_id_cmp(&vnode->node->nid, &nodes[i].nid) == 0)
return i;
}

panic("vnode couldn't found in the node array");
return -1; /* never executed */
}

static void check_diskfull_possibility(uint32_t epoch, struct vnode_info *vinfo,
int nr_nodes, struct sd_node *nodes)
{
uint64_t **oids_per_node;
size_t *nr_oids;
uint64_t *required_space_per_node;
const struct sd_vnode *vnodes[SD_MAX_COPIES];

oids_per_node = xcalloc(nr_nodes, sizeof(uint64_t *));
nr_oids = xcalloc(nr_nodes, sizeof(size_t));
required_space_per_node = xcalloc(nr_nodes, sizeof(uint64_t));

for (int i = 0; i < nr_nodes; i++)
oids_per_node[i] = fetch_object_list(&nodes[i], epoch,
&nr_oids[i]);

for (int i = 0; i < nr_nodes; i++) {
uint64_t *oids = oids_per_node[i];

for (int j = 0; j < nr_oids[i]; j++) {
int nr_objs = get_obj_copy_number(oids[j],
vinfo->nr_zones);

oid_to_vnodes(oids[j], &vinfo->vroot, nr_objs, vnodes);

for (int k = 0; k < nr_objs; k++) {
int node_idx = vnode_to_node_idx(
(struct sd_vnode *)vnodes[k],
nr_nodes, nodes);

/*
* TODO: current calculation doesn't consider
* about the differences between object types
* e.g. inode, ledger, and normal data objects
*/
required_space_per_node[node_idx] +=
get_vdi_object_size(
oid_to_vid(oids[j]));
}
}
}

for (int i = 0; i < nr_nodes; i++) {
if (nodes[i].space < required_space_per_node[i])
panic("node %s will cause disk full, stopping whole"
" cluster", node_to_str(&nodes[i]));

sd_debug("node %s (space: %"PRIu64") can store required space"
" during next recovery (%"PRIu64")",
node_to_str(&nodes[i]),
nodes[i].space, required_space_per_node[i]);

free(oids_per_node[i]);
}

free(oids_per_node);
free(nr_oids);
free(required_space_per_node);
}

/* Prepare the object list that belongs to this node */
static void prepare_object_list(struct work *work)
{
Expand All @@ -1172,6 +1244,11 @@ static void prepare_object_list(struct work *work)

nodes = xmalloc(sizeof(struct sd_node) * nr_nodes);
nodes_to_buffer(&rw->cur_vinfo->nroot, nodes);

if (sys->avoid_diskfull)
check_diskfull_possibility(rw->epoch, rw->cur_vinfo,
nr_nodes, nodes);

again:
/* We need to start at random node for better load balance */
for (i = start; i < end; i++) {
Expand Down
5 changes: 5 additions & 0 deletions sheep/sheep.c
Expand Up @@ -135,6 +135,8 @@ static struct sd_option sheep_options[] = {
cluster_help},
{'D', "directio", false, "use direct IO for backend store"},
{'f', "foreground", false, "make the program run in foreground"},
{'F', "avoid-diskfull", false, "stop sheep process if a recovery"
"process can cause disk full"},
{'g', "gateway", false, "make the program run as a gateway mode"},
{'h', "help", false, "display this help and exit"},
{'i', "ioaddr", true, "use separate network card to handle IO requests"
Expand Down Expand Up @@ -726,6 +728,9 @@ int main(int argc, char **argv)
case 'f':
daemonize = false;
break;
case 'F':
sys->avoid_diskfull = true;
break;
case 'g':
if (nr_vnodes > 0) {
sd_err("Options '-g' and '-V' can not be both specified");
Expand Down
2 changes: 2 additions & 0 deletions sheep/sheep_priv.h
Expand Up @@ -173,6 +173,8 @@ struct system_info {
/* upgrade data layout before starting service if necessary*/
bool upgrade;
struct sd_stat stat;

bool avoid_diskfull;
};

struct disk {
Expand Down

0 comments on commit ae04ea8

Please sign in to comment.