From 4adab0c1915ddd16a1b52ff465b4da40454c46d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= Date: Tue, 26 Sep 2017 14:03:21 +0200 Subject: [PATCH 1/2] skip FREEOBJECTS for objects which can't exist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when sending an incremental stream based on a snapshot, the receiving side must have the same base snapshot. thus we do not need to send FREEOBJECTS records for any objects past the maximum one which exists locally. this allows us to send incremental streams (again) to older ZFS implementations (e.g. ZoL < 0.7) which actually try to free all objects in a FREEOBJECTS record, instead of bailing out early. Signed-off-by: Fabian Grünbichler --- module/zfs/dmu_send.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index fc63b6e1a6a8..344ec68f2666 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -454,6 +454,22 @@ static int dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) { struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects); + uint64_t maxobj = DNODES_PER_BLOCK * + (DMU_META_DNODE(dsp->dsa_os)->dn_maxblkid + 1); + + /* + * ZoL < 0.7 does not handle large FREEOBJECTS records correctly, + * leading to zfs recv never completing. to avoid this issue, don't + * send FREEOBJECTS records for object IDs which cannot exist on the + * receiving side. + */ + if (maxobj > 0) { + if (maxobj < firstobj) + return (0); + + if (maxobj < firstobj + numobjs) + numobjs = maxobj - firstobj; + } /* * If there is a pending op, but it's not PENDING_FREEOBJECTS, From 9c2f987547950624ac08294a69d250c5f4c4acd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= Date: Fri, 29 Sep 2017 12:00:29 +0200 Subject: [PATCH 2/2] free objects when receiving full stream as clone MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit all objects after the last written or freed object are not supposed to exist after receiving the stream. free them accordingly, as if a freeobjects record for them had been included in the stream. Signed-off-by: Fabian Grünbichler --- include/sys/dmu_send.h | 1 + module/zfs/dmu_send.c | 55 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/include/sys/dmu_send.h b/include/sys/dmu_send.h index 081d3dd78b11..19d9a2d44e8f 100644 --- a/include/sys/dmu_send.h +++ b/include/sys/dmu_send.h @@ -62,6 +62,7 @@ typedef struct dmu_recv_cookie { boolean_t drc_force; boolean_t drc_resumable; boolean_t drc_raw; + boolean_t drc_clone; struct avl_tree *drc_guid_to_ds_map; zio_cksum_t drc_cksum; uint64_t drc_newsnapobj; diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 344ec68f2666..c63ab43e1a02 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -2088,6 +2088,7 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, drc->drc_force = force; drc->drc_resumable = resumable; drc->drc_cred = CRED(); + drc->drc_clone = (origin != NULL); if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { drc->drc_byteswap = B_TRUE; @@ -2149,7 +2150,9 @@ struct receive_writer_arg { avl_tree_t *guid_to_ds_map; boolean_t resumable; boolean_t raw; - uint64_t last_object, last_offset; + uint64_t last_object; + uint64_t last_offset; + uint64_t max_object; /* highest object ID referenced in stream */ uint64_t bytes_read; /* bytes read when current record created */ }; @@ -2451,6 +2454,9 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro, return (SET_ERROR(EINVAL)); object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT; + if (drro->drr_object > rwa->max_object) + rwa->max_object = drro->drr_object; + /* * If we are losing blkptrs or changing the block size this must * be a new file instance. We must clear out the previous file @@ -2587,6 +2593,9 @@ receive_freeobjects(struct receive_writer_arg *rwa, err = dmu_free_long_object(rwa->os, obj); if (err != 0) return (err); + + if (obj > rwa->max_object) + rwa->max_object = obj; } if (next_err != ESRCH) return (next_err); @@ -2617,6 +2626,9 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw, rwa->last_object = drrw->drr_object; rwa->last_offset = drrw->drr_offset; + if (rwa->last_object > rwa->max_object) + rwa->max_object = rwa->last_object; + if (dmu_object_info(rwa->os, drrw->drr_object, NULL) != 0) return (SET_ERROR(EINVAL)); @@ -2698,6 +2710,9 @@ receive_write_byref(struct receive_writer_arg *rwa, ref_os = rwa->os; } + if (drrwbr->drr_object > rwa->max_object) + rwa->max_object = drrwbr->drr_object; + if (rwa->raw) flags |= DMU_READ_NO_DECRYPT; @@ -2751,6 +2766,9 @@ receive_write_embedded(struct receive_writer_arg *rwa, if (drrwe->drr_compression >= ZIO_COMPRESS_FUNCTIONS) return (SET_ERROR(EINVAL)); + if (drrwe->drr_object > rwa->max_object) + rwa->max_object = drrwe->drr_object; + tx = dmu_tx_create(rwa->os); dmu_tx_hold_write(tx, drrwe->drr_object, @@ -2794,6 +2812,9 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, if (dmu_object_info(rwa->os, drrs->drr_object, NULL) != 0) return (SET_ERROR(EINVAL)); + if (drrs->drr_object > rwa->max_object) + rwa->max_object = drrs->drr_object; + VERIFY0(dmu_bonus_hold(rwa->os, drrs->drr_object, FTAG, &db)); if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) { dmu_buf_rele(db, FTAG); @@ -2840,6 +2861,9 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf) if (dmu_object_info(rwa->os, drrf->drr_object, NULL) != 0) return (SET_ERROR(EINVAL)); + if (drrf->drr_object > rwa->max_object) + rwa->max_object = drrf->drr_object; + err = dmu_free_long_range(rwa->os, drrf->drr_object, drrf->drr_offset, drrf->drr_length); @@ -2882,6 +2906,9 @@ receive_object_range(struct receive_writer_arg *rwa, !rwa->raw) return (SET_ERROR(EINVAL)); + if (drror->drr_firstobj > rwa->max_object) + rwa->max_object = drror->drr_firstobj; + offset = drror->drr_firstobj * sizeof (dnode_phys_t); mdn = DMU_META_DNODE(rwa->os); @@ -3720,6 +3747,32 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp, } mutex_exit(&rwa->mutex); + /* + * If we are receiving a full stream as a clone, all object IDs which + * are greater than the maximum ID referenced in the stream are + * by definition unused and must be freed. + */ + if (drc->drc_clone && drc->drc_drrb->drr_fromguid == 0) { + uint64_t obj = rwa->max_object + 1; + int free_err = 0; + int next_err = 0; + + while (next_err == 0) { + free_err = dmu_free_long_object(rwa->os, obj); + if (free_err != 0 && free_err != ENOENT) + break; + + next_err = dmu_object_next(rwa->os, &obj, FALSE, 0); + } + + if (err == 0) { + if (free_err != 0 && free_err != ENOENT) + err = free_err; + else if (next_err != ESRCH) + err = next_err; + } + } + cv_destroy(&rwa->cv); mutex_destroy(&rwa->mutex); bqueue_destroy(&rwa->q);