Skip to content

Commit

Permalink
Detect BTX boot loader in the zfs reserved boot section
Browse files Browse the repository at this point in the history
Signed-off-by: Don Brady <dev.fs.zfs@gmail.com>
  • Loading branch information
don-brady committed Aug 29, 2023
1 parent ca80cdc commit 283c97b
Show file tree
Hide file tree
Showing 11 changed files with 219 additions and 6 deletions.
1 change: 1 addition & 0 deletions include/sys/vdev.h
Expand Up @@ -209,6 +209,7 @@ extern void vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t
extern int vdev_label_read_bootenv(vdev_t *, nvlist_t *);
extern int vdev_label_write_bootenv(vdev_t *, nvlist_t *);
extern int vdev_uberblock_sync_list(vdev_t **, int, struct uberblock *, int);
extern int vdev_check_boot_reserve(spa_t *, vdev_t *);

typedef enum {
VDEV_LABEL_CREATE, /* create/add a new device */
Expand Down
20 changes: 17 additions & 3 deletions lib/libzfs/libzfs_pool.c
Expand Up @@ -3520,8 +3520,8 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
break;

case EBUSY:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"%s is busy"), new_disk);
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
new_disk);
(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
break;

Expand Down Expand Up @@ -3561,9 +3561,23 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
"being replaced"));
(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
break;
} else {
(void) zpool_standard_error(hdl, errno, errbuf);
}
/* fall through */
break;

case EADDRINUSE:
/*
* The boot reserved area is already being used (FreeBSD)
*/
if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"the reserved boot area needed for the expansion "
"is already being used by a boot loader"));
} else {
(void) zpool_standard_error(hdl, errno, errbuf);
}
break;
default:
(void) zpool_standard_error(hdl, errno, errbuf);
}
Expand Down
1 change: 1 addition & 0 deletions lib/libzpool/Makefile.am
Expand Up @@ -43,6 +43,7 @@ nodist_libzpool_la_SOURCES = \
module/os/linux/zfs/arc_os.c \
module/os/linux/zfs/trace.c \
module/os/linux/zfs/vdev_file.c \
module/os/linux/zfs/vdev_label_os.c \
module/os/linux/zfs/zfs_debug.c \
module/os/linux/zfs/zfs_racct.c \
module/os/linux/zfs/zfs_znode.c \
Expand Down
1 change: 1 addition & 0 deletions module/Kbuild.in
Expand Up @@ -445,6 +445,7 @@ ZFS_OBJS_OS := \
trace.o \
vdev_disk.o \
vdev_file.o \
vdev_label_os.o \
zfs_acl.o \
zfs_ctldir.o \
zfs_debug.o \
Expand Down
59 changes: 59 additions & 0 deletions module/os/freebsd/zfs/vdev_label_os.c
Expand Up @@ -72,3 +72,62 @@ vdev_label_write_pad2(vdev_t *vd, const char *buf, size_t size)
abd_free(pad2);
return (error);
}

static void
vdev_child_done(zio_t *zio)
{
zio_t *pio = zio->io_private;

mutex_enter(&pio->io_lock);
pio->io_error = zio_worst_error(pio->io_error, zio->io_error);
mutex_exit(&pio->io_lock);
}

/*
* Check if the reserved boot area is in-use.
*
* When booting FreeBSD with an MBR partition with ZFS, the zfsboot file
* (which understands the ZFS file system) is written to the ZFS BOOT
* reserve area (at offset 512K). We check for that here before attaching
* a disk to raidz which would then corrupt this boot data.
*/
int
vdev_check_boot_reserve(spa_t *spa, vdev_t *childvd)
{
ASSERT(childvd->vdev_ops->vdev_op_leaf);

size_t size = SPA_MINBLOCKSIZE;
abd_t *abd = abd_alloc_linear(size, B_FALSE);

zio_t *pio = zio_root(spa, NULL, NULL, 0);
/*
* Note: zio_vdev_child_io() adds VDEV_LABEL_START_SIZE to the offset
* to calculate the physical offset to write to. Passing in a negative
* offset lets us access the boot area.
*/
zio_nowait(zio_vdev_child_io(pio, NULL, childvd,
VDEV_BOOT_OFFSET - VDEV_LABEL_START_SIZE, abd, size, ZIO_TYPE_READ,
ZIO_PRIORITY_ASYNC_READ, 0, vdev_child_done, pio));
zio_wait(pio);

unsigned char *buf = abd_to_buf(abd);

/*
* The BTX server has a special header at the begining.
*
* btx_hdr: .byte 0xeb # Machine ID
* .byte 0xe # Header size
* .ascii "BTX" # Magic
* .byte 0x1 # Major version
* .byte 0x2 # Minor version
* .byte BTX_FLAGS # Flags
*/
if (buf[0] == 0xeb && buf[1] == 0x0e &&
buf[2] == 'B' && buf[3] == 'T' && buf[4] == 'X') {
abd_free(abd);
return (EBUSY);
}

abd_free(abd);
return (0);
}
44 changes: 44 additions & 0 deletions module/os/linux/zfs/vdev_label_os.c
@@ -0,0 +1,44 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/

/*
* Copyright (c) 2023 by iXsystems, Inc.
*/

#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/vdev.h>
#include <sys/vdev_impl.h>

/*
* Check if the reserved boot area is in-use.
*
* Not aware of any external uses on Linux.
*/
int
vdev_check_boot_reserve(spa_t *spa, vdev_t *childvd)
{
(void) spa;
(void) childvd;

return (0);
}
6 changes: 6 additions & 0 deletions module/zfs/spa.c
Expand Up @@ -7059,6 +7059,12 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
return (spa_vdev_exit(spa, newrootvd, txg,
ENXIO));
}
/* Also fail if reserved boot area is in-use */
if (vdev_check_boot_reserve(spa, oldvd->vdev_child[i])
!= 0) {
return (spa_vdev_exit(spa, newrootvd, txg,
EADDRINUSE));
}
}
}

Expand Down
4 changes: 2 additions & 2 deletions module/zfs/vdev_raidz.c
Expand Up @@ -207,7 +207,7 @@
* context. The design also allows for fast discovery of what data to copy.
*
* The VDEV metaslabs are processed, one at a time, to copy the block data to
* have it flow across all the disks. The metasab is disabled for allocations
* have it flow across all the disks. The metaslab is disabled for allocations
* during the copy. As an optimization, we only copy the allocated data which
* can be determined by looking at the metaslab range tree. During the copy we
* must maintain the redundancy guarantees of the RAIDZ VDEV (e.g. parity count
Expand Down Expand Up @@ -267,7 +267,7 @@
*
* == Reflow Progress Updates ==
* After the initial scratch-based reflow, the expansion process works
* similarly to device removal. We create a new open context thread whichi
* similarly to device removal. We create a new open context thread which
* reflows the data, and periodically kicks off sync tasks to update logical
* state. In this case, state is the committed progress (offset of next data
* to copy). We need to persist the completed offset on disk, so that if we
Expand Down
2 changes: 1 addition & 1 deletion tests/runfiles/common.run
Expand Up @@ -772,7 +772,7 @@ tags = ['functional', 'redacted_send']
tests = ['raidz_001_neg', 'raidz_002_pos', 'raidz_003_pos', 'raidz_004_pos',
'raidz_expand_001_pos', 'raidz_expand_002_pos', 'raidz_expand_003_neg',
'raidz_expand_003_pos', 'raidz_expand_004_pos', 'raidz_expand_005_pos',
'raidz_expand_006_neg']
'raidz_expand_006_neg', 'raidz_expand_007_neg']
tags = ['functional', 'raidz']
timeout = 1200

Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/Makefile.am
Expand Up @@ -1669,6 +1669,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/raidz/raidz_expand_004_pos.ksh \
functional/raidz/raidz_expand_005_pos.ksh \
functional/raidz/raidz_expand_006_neg.ksh \
functional/raidz/raidz_expand_007_neg.ksh \
functional/raidz/setup.ksh \
functional/redacted_send/cleanup.ksh \
functional/redacted_send/redacted_compressed.ksh \
Expand Down
86 changes: 86 additions & 0 deletions tests/zfs-tests/tests/functional/raidz/raidz_expand_007_neg.ksh
@@ -0,0 +1,86 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright (c) 2023 by iXsystems, Inc.
#

. $STF_SUITE/include/libtest.shlib

#
# DESCRIPTION:
# Negative for FreeBSD Only
#
# Attempting to expand a RAIDZ should fail if the scratch area on the
# existing disks contains BTX Server binary (used to boot FreeBSD when
# using MBR partitions with ZFS).
#
# STRATEGY:
# 1. Create raidz pool
# 2. Add a BTX header to the reserved boot area
# 3. Attempt to attach a device to the raidz vdev
# 4. Verify that device attached failed
# 5. Destroy the raidz pool

typeset -r devs=4
typeset -r dev_size_mb=128
typeset -a disks

function cleanup
{
log_pos zpool status "$TESTPOOL"

poolexists "$TESTPOOL" && log_must_busy zpool destroy "$TESTPOOL"

for i in {0..$devs}; do
log_must rm -f "$TEST_BASE_DIR/dev-$i"
done
}

log_onexit cleanup

for i in {0..$devs}; do
device=$TEST_BASE_DIR/dev-$i
# simulate active BTX Server data by inserting a BTX header
printf "\xeb\x0e%s\x01\x02\x80" "BTX" | dd of="$device" \
bs=512 seek=1024 status=none
log_must truncate -s ${dev_size_mb}M "$device"
if [[ $i -ne $devs ]]; then
disks[${#disks[*]}+1]=$device
fi
done

log_must zpool create -f -o cachefile=none "$TESTPOOL" raidz1 "${disks[@]}"

if is_freebsd; then
# expecting attach to fail
log_mustnot_expect "the reserved boot area" zpool attach -f \
"$TESTPOOL" raidz1-0 "$TEST_BASE_DIR/dev-$devs"
log_must zpool destroy "$TESTPOOL"
log_pass "raidz attach failed with in-use reserved boot area"
else
# expecting attach to pass everywhere else
log_must zpool attach -f "$TESTPOOL" raidz1-0 "$TEST_BASE_DIR/dev-$devs"
log_must zpool destroy "$TESTPOOL"
log_pass "raidz attach passed with in-use reserved boot area"
fi

0 comments on commit 283c97b

Please sign in to comment.