diff --git a/dog/cluster.c b/dog/cluster.c index 169e837b2..dbdc92ea6 100644 --- a/dog/cluster.c +++ b/dog/cluster.c @@ -281,19 +281,15 @@ static int list_snapshot(int argc, char **argv) return ret; } -static void fill_cb(void *data, enum btree_node_type type, void *arg) +static void fill_cb(struct sd_index *idx, void *arg, int ignore) { - struct sd_index *ext; struct sd_inode *inode = (struct sd_inode *)arg; uint64_t oid; - if (type == BTREE_INDEX) { - ext = (struct sd_index *)data; - if (ext->vdi_id) { - oid = vid_to_data_oid(ext->vdi_id, ext->idx); - object_tree_insert(oid, inode->nr_copies, - inode->copy_policy); - } + if (idx->vdi_id) { + oid = vid_to_data_oid(idx->vdi_id, idx->idx); + object_tree_insert(oid, inode->nr_copies, + inode->copy_policy); } } @@ -323,7 +319,7 @@ static void fill_object_tree(uint32_t vid, const char *name, const char *tag, object_tree_insert(oid, i->nr_copies, i->copy_policy); } } else - traverse_btree(i, fill_cb, &i); + sd_inode_index_walk(i, fill_cb, &i); /* fill vmstate object id */ nr_vmstate_object = DIV_ROUND_UP(i->vm_state_size, SD_DATA_OBJ_SIZE); diff --git a/dog/vdi.c b/dog/vdi.c index 3cc623999..debed0a31 100644 --- a/dog/vdi.c +++ b/dog/vdi.c @@ -1786,21 +1786,17 @@ struct check_arg { int nr_copies; }; -static void check_cb(void *data, enum btree_node_type type, void *arg) +static void check_cb(struct sd_index *idx, void *arg, int ignore) { - struct sd_index *ext; struct check_arg *carg = arg; uint64_t oid; - if (type == BTREE_INDEX) { - ext = (struct sd_index *)data; - if (ext->vdi_id) { - oid = vid_to_data_oid(ext->vdi_id, ext->idx); - *(carg->done) = (uint64_t)ext->idx * SD_DATA_OBJ_SIZE; - vdi_show_progress(*(carg->done), carg->inode->vdi_size); - queue_vdi_check_work(carg->inode, oid, NULL, carg->wq, - carg->nr_copies); - } + if (idx->vdi_id) { + oid = vid_to_data_oid(idx->vdi_id, idx->idx); + *(carg->done) = (uint64_t)idx->idx * SD_DATA_OBJ_SIZE; + vdi_show_progress(*(carg->done), carg->inode->vdi_size); + queue_vdi_check_work(carg->inode, oid, NULL, carg->wq, + carg->nr_copies); } } @@ -1841,7 +1837,7 @@ int do_vdi_check(const struct sd_inode *inode) } } else { struct check_arg arg = {inode, &done, wq, nr_copies}; - traverse_btree(inode, check_cb, &arg); + sd_inode_index_walk(inode, check_cb, &arg); vdi_show_progress(inode->vdi_size, inode->vdi_size); } diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index 9746fccac..1d17e8fab 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -249,11 +249,9 @@ struct sd_index_header { uint32_t entries; }; -enum btree_node_type { - BTREE_HEAD = 1, - BTREE_INDEX, - BTREE_INDIRECT_IDX, -}; +#define BTREE_HEAD 1 +#define BTREE_INDEX 2 +#define BTREE_INDIRECT_IDX 4 typedef int (*write_node_fn)(uint64_t id, void *mem, unsigned int len, uint64_t offset, uint32_t flags, int copies, @@ -287,9 +285,8 @@ extern void sd_inode_copy_vdis(write_node_fn writer, read_node_fn reader, uint8_t nr_copies, uint8_t copy_policy, struct sd_inode *newi); -typedef void (*btree_cb_fn)(void *data, enum btree_node_type type, void *arg); -extern void traverse_btree(const struct sd_inode *inode, - btree_cb_fn fn, void *arg); +typedef void (*index_cb_fn)(struct sd_index *, void *arg, int type); +void sd_inode_index_walk(const struct sd_inode *inode, index_cb_fn, void *); /* 64 bit FNV-1a non-zero initial basis */ #define FNV1A_64_INIT ((uint64_t) 0xcbf29ce484222325ULL) diff --git a/lib/sd_inode.c b/lib/sd_inode.c index 444e3d719..5056843ec 100644 --- a/lib/sd_inode.c +++ b/lib/sd_inode.c @@ -126,27 +126,36 @@ static int indirect_idx_compare(struct sd_indirect_idx *a, return intcmp(a->idx, b->idx); } +typedef void (*btree_cb_fn)(void *data, void *arg, int type); + /* - * traverse the whole btree that include all the inode->data_vdi_id, bnode, - * data objects and call btree_cb_fn() + * Traverse the whole btree that include all header, indirect_idx and index. + * @interest specify which objects user wants to run @fn against. + * + * If error happens when reading btree internal nodes, we simply continue to + * process next candidate. */ -void traverse_btree(const struct sd_inode *inode, btree_cb_fn fn, void *arg) +static void traverse_btree(const struct sd_inode *inode, btree_cb_fn fn, + void *arg, int interest) { struct sd_index_header *header = INDEX_HEADER(inode->data_vdi_id); struct sd_index_header *leaf_node = NULL; struct sd_index *last, *iter; struct sd_indirect_idx *last_idx, *iter_idx; void *tmp; + int ret; - fn(header, BTREE_HEAD, arg); + if (interest & BTREE_HEAD) + fn(header, arg, BTREE_HEAD); if (header->depth == 1) { last = LAST_INDEX(inode->data_vdi_id); iter = FIRST_INDEX(inode->data_vdi_id); - while (iter != last) { - fn(iter, BTREE_INDEX, arg); - iter++; - } + if (interest & BTREE_INDEX) + while (iter != last) { + fn(iter, arg, BTREE_INDEX); + iter++; + } } else if (header->depth == 2) { last_idx = LAST_INDRECT_IDX(inode->data_vdi_id); iter_idx = FIRST_INDIRECT_IDX(inode->data_vdi_id); @@ -154,17 +163,25 @@ void traverse_btree(const struct sd_inode *inode, btree_cb_fn fn, void *arg) tmp = (void *)leaf_node; while (iter_idx != last_idx) { - inode_actor.reader(iter_idx->oid, &tmp, - SD_INODE_DATA_INDEX_SIZE, 0); + ret = inode_actor.reader(iter_idx->oid, &tmp, + SD_INODE_DATA_INDEX_SIZE, 0); + if (ret != SD_RES_SUCCESS) { + sd_err("failed to read %"PRIx64, iter_idx->oid); + iter_idx++; + continue; + } - fn(iter_idx, BTREE_INDIRECT_IDX, arg); - fn(leaf_node, BTREE_HEAD, arg); + if (interest & BTREE_INDIRECT_IDX) + fn(iter_idx, arg, BTREE_INDIRECT_IDX); + if (interest & BTREE_HEAD) + fn(leaf_node, arg, BTREE_HEAD); last = LAST_INDEX(leaf_node); iter = FIRST_INDEX(leaf_node); - while (iter != last) { - fn(iter, BTREE_INDEX, arg); - iter++; - } + if (interest & BTREE_INDEX) + while (iter != last) { + fn(iter, arg, BTREE_INDEX); + iter++; + } iter_idx++; } @@ -173,8 +190,15 @@ void traverse_btree(const struct sd_inode *inode, btree_cb_fn fn, void *arg) panic("This B-tree not support depth %u", header->depth); } +/* Walk the sd_inode's vdi index array and call func against each sd_index */ +void sd_inode_index_walk(const struct sd_inode *inode, index_cb_fn func, + void *arg) +{ + traverse_btree(inode, (btree_cb_fn)func, arg, BTREE_INDEX); +} + #ifdef DEBUG -static void dump_cb(void *data, enum btree_node_type type, void *arg) +static void dump_cb(void *data, void *arg, int type) { struct sd_index_header *header; struct sd_index *ext; @@ -203,7 +227,8 @@ static void dump_btree(struct sd_inode *inode) { #ifdef DEBUG sd_info("btree> BEGIN"); - traverse_btree(inode, dump_cb, NULL); + traverse_btree(inode, dump_cb, NULL, + BTREE_INDEX | BTREE_HEAD | BTREE_INDIRECT_IDX); sd_info("btree> END"); #endif } @@ -840,27 +865,24 @@ struct stat_arg { uint32_t vid; }; -static void stat_cb(void *data, enum btree_node_type type, void *arg) +static void stat_cb(struct sd_index *idx, void *arg, + int ignore) { - struct sd_index *ext; struct stat_arg *sarg = arg; uint64_t *my = sarg->my; uint64_t *cow = sarg->cow; - if (type == BTREE_INDEX) { - ext = (struct sd_index *)data; - if (ext->vdi_id == sarg->vid) - (*my)++; - else if (ext->vdi_id != 0) - (*cow)++; - } + if (idx->vdi_id == sarg->vid) + (*my)++; + else if (idx->vdi_id != 0) + (*cow)++; } static void hypver_volume_stat(const struct sd_inode *inode, uint64_t *my_objs, uint64_t *cow_objs) { struct stat_arg arg = {my_objs, cow_objs, inode->vdi_id}; - traverse_btree(inode, stat_cb, &arg); + sd_inode_index_walk(inode, stat_cb, &arg); } static void volume_stat(const struct sd_inode *inode, uint64_t *my_objs, diff --git a/sheep/http/kv.c b/sheep/http/kv.c index fa7d13adb..482174119 100644 --- a/sheep/http/kv.c +++ b/sheep/http/kv.c @@ -90,34 +90,30 @@ int kv_create_account(const char *account) return sd_create_hyper_volume(account, &vdi_id); } -static void bucket_iterater(void *data, enum btree_node_type type, void *arg) +static void bucket_iterater(struct sd_index *idx, void *arg, int ignore) { - struct sd_index *ext; struct bucket_iterater_arg *biarg = arg; struct kv_bnode bnode; uint64_t oid; int ret; - if (type == BTREE_INDEX) { - ext = (struct sd_index *)data; - if (!ext->vdi_id) - return; + if (!idx->vdi_id) + return; - oid = vid_to_data_oid(ext->vdi_id, ext->idx); - ret = sd_read_object(oid, (char *)&bnode, sizeof(bnode), 0); - if (ret != SD_RES_SUCCESS) { - sd_err("Failed to read data object %"PRIx64, oid); - return; - } - - if (bnode.name[0] == 0) - return; - if (biarg->cb) - biarg->cb(bnode.name, biarg->opaque); - biarg->bucket_count++; - biarg->object_count += bnode.object_count; - biarg->bytes_used += bnode.bytes_used; + oid = vid_to_data_oid(idx->vdi_id, idx->idx); + ret = sd_read_object(oid, (char *)&bnode, sizeof(bnode), 0); + if (ret != SD_RES_SUCCESS) { + sd_err("Failed to read data object %"PRIx64, oid); + return; } + + if (bnode.name[0] == 0) + return; + if (biarg->cb) + biarg->cb(bnode.name, biarg->opaque); + biarg->bucket_count++; + biarg->object_count += bnode.object_count; + biarg->bytes_used += bnode.bytes_used; } static int read_account_meta(const char *account, uint64_t *bucket_count, @@ -141,7 +137,7 @@ static int read_account_meta(const char *account, uint64_t *bucket_count, goto out; } - traverse_btree(inode, bucket_iterater, &arg); + sd_inode_index_walk(inode, bucket_iterater, &arg); *object_count = arg.object_count; *bucket_count = arg.bucket_count; *used = arg.bytes_used; @@ -253,7 +249,7 @@ static int bnode_create(struct kv_bnode *bnode, uint32_t account_vid) int ret; ret = sd_read_object(vid_to_vdi_oid(account_vid), (char *)inode, - sizeof(*inode), 0); + sizeof(*inode), 0); if (ret != SD_RES_SUCCESS) { sd_err("failed to read %" PRIx32 " %s", account_vid, sd_strerror(ret)); @@ -430,33 +426,29 @@ struct object_iterater_arg { uint32_t count; }; -static void object_iterater(void *data, enum btree_node_type type, void *arg) +static void object_iterater(struct sd_index *idx, void *arg, int ignore) { - struct sd_index *ext; struct object_iterater_arg *oiarg = arg; struct kv_onode *onode = NULL; uint64_t oid; int ret; - if (type == BTREE_INDEX) { - ext = (struct sd_index *)data; - if (!ext->vdi_id) - goto out; - - onode = xmalloc(SD_DATA_OBJ_SIZE); - oid = vid_to_data_oid(ext->vdi_id, ext->idx); - ret = sd_read_object(oid, (char *)onode, SD_DATA_OBJ_SIZE, 0); - if (ret != SD_RES_SUCCESS) { - sd_err("Failed to read data object %"PRIx64, oid); - goto out; - } + if (!idx->vdi_id) + goto out; - if (onode->name[0] == '\0') - goto out; - if (oiarg->cb) - oiarg->cb(onode->name, oiarg->opaque); - oiarg->count++; + onode = xmalloc(SD_DATA_OBJ_SIZE); + oid = vid_to_data_oid(idx->vdi_id, idx->idx); + ret = sd_read_object(oid, (char *)onode, SD_DATA_OBJ_SIZE, 0); + if (ret != SD_RES_SUCCESS) { + sd_err("Failed to read data object %"PRIx64, oid); + goto out; } + + if (onode->name[0] == '\0') + goto out; + if (oiarg->cb) + oiarg->cb(onode->name, oiarg->opaque); + oiarg->count++; out: free(onode); } @@ -476,7 +468,7 @@ static int bucket_iterate_object(uint32_t bucket_vid, object_iter_cb cb, goto out; } - traverse_btree(inode, object_iterater, &arg); + sd_inode_index_walk(inode, object_iterater, &arg); out: free(inode); return ret; @@ -590,7 +582,7 @@ int kv_iterate_bucket(const char *account, bucket_iter_cb cb, void *opaque) goto out; } - traverse_btree(&account_inode, bucket_iterater, &arg); + sd_inode_index_walk(&account_inode, bucket_iterater, &arg); out: sys->cdrv->unlock(account_vid); return ret; diff --git a/sheep/vdi.c b/sheep/vdi.c index ac1fa905f..7af743e95 100644 --- a/sheep/vdi.c +++ b/sheep/vdi.c @@ -824,20 +824,15 @@ struct delete_arg { uint32_t *nr_deleted; }; -static void delete_cb(void *data, enum btree_node_type type, void *arg) +static void delete_cb(struct sd_index *idx, void *arg, int ignore) { - struct sd_index *ext; struct delete_arg *darg = (struct delete_arg *)arg; uint64_t oid; int ret; - if (type != BTREE_INDEX) - return; - - ext = (struct sd_index *)data; - if (ext->vdi_id) { - oid = vid_to_data_oid(ext->vdi_id, ext->idx); - if (ext->vdi_id != darg->inode->vdi_id) + if (idx->vdi_id) { + oid = vid_to_data_oid(idx->vdi_id, idx->idx); + if (idx->vdi_id != darg->inode->vdi_id) sd_debug("object %" PRIx64 " is base's data, would" " not be deleted.", oid); else { @@ -900,7 +895,7 @@ static int delete_one_vdi(uint32_t vdi_id) } } else { struct delete_arg arg = {inode, &nr_deleted}; - traverse_btree(inode, delete_cb, &arg); + sd_inode_index_walk(inode, delete_cb, &arg); } if (vdi_is_deleted(inode))