Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #1913 - NETFS+tar backup exit code testing insufficient (may fail to catch real backup errors) #1914

Merged
merged 3 commits into from Sep 21, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
217 changes: 137 additions & 80 deletions usr/share/rear/backup/NETFS/default/500_make_backup.sh
Expand Up @@ -67,49 +67,99 @@ else
SPLIT_COMMAND="dd of=$backuparchive"
fi

# Used by "tar" method to record which pipe command failed
FAILING_BACKUP_PROG_FILE="$TMP_DIR/failing_backup_prog"
FAILING_BACKUP_PROG_RC_FILE="$TMP_DIR/failing_backup_prog_rc"

LogPrint "Creating $BACKUP_PROG archive '$backuparchive'"
ProgressStart "Preparing archive operation"
(
case "$(basename ${BACKUP_PROG})" in
# tar compatible programs here
(tar)
set_tar_features
Log $BACKUP_PROG $TAR_OPTIONS --sparse --block-number --totals --verbose \
--no-wildcards-match-slash --one-file-system \
--ignore-failed-read "${BACKUP_PROG_OPTIONS[@]}" \
$BACKUP_PROG_CREATE_NEWER_OPTIONS \
${BACKUP_PROG_BLOCKS:+-b $BACKUP_PROG_BLOCKS} "${BACKUP_PROG_COMPRESS_OPTIONS[@]}" \
-X $TMP_DIR/backup-exclude.txt -C / -c -f - \
$(cat $TMP_DIR/backup-include.txt) $RUNTIME_LOGFILE \| $BACKUP_PROG_CRYPT_OPTIONS BACKUP_PROG_CRYPT_KEY \| $SPLIT_COMMAND
$BACKUP_PROG $TAR_OPTIONS --sparse --block-number --totals --verbose \
--no-wildcards-match-slash --one-file-system \
--ignore-failed-read "${BACKUP_PROG_OPTIONS[@]}" \
$BACKUP_PROG_CREATE_NEWER_OPTIONS \
${BACKUP_PROG_BLOCKS:+-b $BACKUP_PROG_BLOCKS} "${BACKUP_PROG_COMPRESS_OPTIONS[@]}" \
-X $TMP_DIR/backup-exclude.txt -C / -c -f - \
$(cat $TMP_DIR/backup-include.txt) $RUNTIME_LOGFILE | $BACKUP_PROG_CRYPT_OPTIONS $BACKUP_PROG_CRYPT_KEY | $SPLIT_COMMAND
;;
(rsync)
# make sure that the target is a directory
mkdir -p $v "$backuparchive" >&2
Log $BACKUP_PROG --verbose "${BACKUP_RSYNC_OPTIONS[@]}" --one-file-system --delete \
--exclude-from=$TMP_DIR/backup-exclude.txt --delete-excluded \
$(cat $TMP_DIR/backup-include.txt) "$backuparchive"
$BACKUP_PROG --verbose "${BACKUP_RSYNC_OPTIONS[@]}" --one-file-system --delete \
--exclude-from=$TMP_DIR/backup-exclude.txt --delete-excluded \
$(cat $TMP_DIR/backup-include.txt) "$backuparchive" >&2
;;
(*)
Log "Using unsupported backup program '$BACKUP_PROG'"
Log $BACKUP_PROG "${BACKUP_PROG_COMPRESS_OPTIONS[@]}" \
$BACKUP_PROG_OPTIONS_CREATE_ARCHIVE $TMP_DIR/backup-exclude.txt \
"${BACKUP_PROG_OPTIONS[@]}" $backuparchive \
$(cat $TMP_DIR/backup-include.txt) $RUNTIME_LOGFILE > $backuparchive
$BACKUP_PROG "${BACKUP_PROG_COMPRESS_OPTIONS[@]}" \
$BACKUP_PROG_OPTIONS_CREATE_ARCHIVE $TMP_DIR/backup-exclude.txt \
"${BACKUP_PROG_OPTIONS[@]}" $backuparchive \
$(cat $TMP_DIR/backup-include.txt) $RUNTIME_LOGFILE > $backuparchive
;;
# tar compatible programs here
(tar)
set_tar_features
Log $BACKUP_PROG $TAR_OPTIONS --sparse --block-number --totals --verbose \
--no-wildcards-match-slash --one-file-system \
--ignore-failed-read "${BACKUP_PROG_OPTIONS[@]}" \
$BACKUP_PROG_CREATE_NEWER_OPTIONS \
${BACKUP_PROG_BLOCKS:+-b $BACKUP_PROG_BLOCKS} "${BACKUP_PROG_COMPRESS_OPTIONS[@]}" \
-X $TMP_DIR/backup-exclude.txt -C / -c -f - \
$(cat $TMP_DIR/backup-include.txt) $RUNTIME_LOGFILE \| $BACKUP_PROG_CRYPT_OPTIONS BACKUP_PROG_CRYPT_KEY \| $SPLIT_COMMAND

# Variable used to record the short name of piped commands in case of
# error, e.g. ( "tar" "cat" "dd" ) in case of unencrypted and unsplit backup.
backup_prog_shortnames=(
"$(basename $(echo "$BACKUP_PROG" | awk '{ print $1 }'))"
"$(basename $(echo "$BACKUP_PROG_CRYPT_OPTIONS" | awk '{ print $1 }'))"
"$(basename $(echo "$SPLIT_COMMAND" | awk '{ print $1 }'))"
)
for index in ${!backup_prog_shortnames[@]} ; do
jsmeix marked this conversation as resolved.
Show resolved Hide resolved
[ -n "${backup_prog_shortnames[$index]}" ] || BugError "No computed shortname for pipe component $index"
done

$BACKUP_PROG $TAR_OPTIONS --sparse --block-number --totals --verbose \
--no-wildcards-match-slash --one-file-system \
--ignore-failed-read "${BACKUP_PROG_OPTIONS[@]}" \
$BACKUP_PROG_CREATE_NEWER_OPTIONS \
${BACKUP_PROG_BLOCKS:+-b $BACKUP_PROG_BLOCKS} \
"${BACKUP_PROG_COMPRESS_OPTIONS[@]}" \
-X $TMP_DIR/backup-exclude.txt -C / -c -f - \
$(cat $TMP_DIR/backup-include.txt) $RUNTIME_LOGFILE | \
\
$BACKUP_PROG_CRYPT_OPTIONS $BACKUP_PROG_CRYPT_KEY | \
\
$SPLIT_COMMAND
pipes_rc=( ${PIPESTATUS[@]} )

# Exit code logic:
# - never return rc=1 (this is reserved for "tar" warning about modified files)
# - process exit code in pipe's reverse order
# - if last command failed (e.g. "dd"), return an error
# - otherwise if previous command failed (e.g. "encrypt"), return an error
# ...
# - otherwise return "tar" exit code
#
# When an error occurs, record the program name in $FAILING_BACKUP_PROG_FILE
# and real exit code in $FAILING_BACKUP_PROG_RC_FILE.

let index=${#pipes_rc[@]}-1
while [ $index -ge 0 ] ; do
rc=${pipes_rc[$index]}
if [ $rc -ne 0 ] ; then
echo "${backup_prog_shortnames[$index]}" > $FAILING_BACKUP_PROG_FILE
echo "$rc" > $FAILING_BACKUP_PROG_RC_FILE
if [ $rc -eq 1 ] && [ "${backup_prog_shortnames[$index]}" != "tar" ] ; then
rc=2
fi
exit $rc
fi
# This pipe command succeeded, check the previous one
let index--
done
# This was a success
exit 0
;;
(rsync)
# make sure that the target is a directory
mkdir -p $v "$backuparchive" >&2
Log $BACKUP_PROG --verbose "${BACKUP_RSYNC_OPTIONS[@]}" --one-file-system --delete \
--exclude-from=$TMP_DIR/backup-exclude.txt --delete-excluded \
$(cat $TMP_DIR/backup-include.txt) "$backuparchive"
$BACKUP_PROG --verbose "${BACKUP_RSYNC_OPTIONS[@]}" --one-file-system --delete \
--exclude-from=$TMP_DIR/backup-exclude.txt --delete-excluded \
$(cat $TMP_DIR/backup-include.txt) "$backuparchive" >&2
;;
(*)
Log "Using unsupported backup program '$BACKUP_PROG'"
Log $BACKUP_PROG "${BACKUP_PROG_COMPRESS_OPTIONS[@]}" \
$BACKUP_PROG_OPTIONS_CREATE_ARCHIVE $TMP_DIR/backup-exclude.txt \
"${BACKUP_PROG_OPTIONS[@]}" $backuparchive \
$(cat $TMP_DIR/backup-include.txt) $RUNTIME_LOGFILE > $backuparchive
$BACKUP_PROG "${BACKUP_PROG_COMPRESS_OPTIONS[@]}" \
$BACKUP_PROG_OPTIONS_CREATE_ARCHIVE $TMP_DIR/backup-exclude.txt \
"${BACKUP_PROG_OPTIONS[@]}" $backuparchive \
$(cat $TMP_DIR/backup-include.txt) $RUNTIME_LOGFILE > $backuparchive
;;
esac 2> "${TMP_DIR}/${BACKUP_PROG_ARCHIVE}.log"
rmetrich marked this conversation as resolved.
Show resolved Hide resolved
# important trick: the backup prog is the last in each case entry and the case .. esac is the last command
# in the (..) subshell. As a result the return code of the subshell is the return code of the backup prog!
Expand All @@ -121,44 +171,44 @@ sleep 1 # Give the backup software a good chance to start working

# return disk usage in bytes
function get_disk_used() {
let "$(stat -f -c 'used=(%b-%f)*%S' $1)"
echo $used
let "$(stat -f -c 'used=(%b-%f)*%S' $1)"
echo $used
}

# While the backup runs in a sub-process, display some progress information to the user.
# ProgressInfo texts have a space at the end to get the 'OK' from ProgressStop shown separated.
test "$PROGRESS_WAIT_SECONDS" || PROGRESS_WAIT_SECONDS=1
case "$( basename $BACKUP_PROG )" in
(tar)
while sleep $PROGRESS_WAIT_SECONDS ; kill -0 $BackupPID 2>/dev/null; do
#blocks="$(stat -c %b ${backuparchive})"
#size="$((blocks*512))"
size="$(stat -c %s ${backuparchive}* | awk '{s+=$1} END {print s}')"
ProgressInfo "Archived $((size/1024/1024)) MiB [avg $((size/1024/(SECONDS-starttime))) KiB/sec] "
done
;;
(rsync)
# since we do not want to do a $(du -s) run every second we count disk usage instead
# this obviously leads to wrong results in case something else is writing to the same
# disk at the same time as is very likely with a networked file system. For local disks
# this should be good enough and in any case this is only some eye candy.
# TODO: Find a fast way to count the actual transfer data, preferrable getting the info from rsync.
let old_disk_used="$(get_disk_used "$backuparchive")"
while sleep $PROGRESS_WAIT_SECONDS ; kill -0 $BackupPID 2>/dev/null; do
let disk_used="$(get_disk_used "$backuparchive")" size=disk_used-old_disk_used
ProgressInfo "Archived $((size/1024/1024)) MiB [avg $((size/1024/(SECONDS-starttime))) KiB/sec] "
done
;;
(*)
while sleep $PROGRESS_WAIT_SECONDS ; kill -0 $BackupPID 2>/dev/null; do
size="$(stat -c "%s" "$backuparchive")" || {
kill -9 $BackupPID
ProgressError
Error "$(basename $BACKUP_PROG) failed to create the archive file"
}
ProgressInfo "Archived $((size/1024/1024)) MiB [avg $((size/1024/(SECONDS-starttime))) KiB/sec] "
done
;;
(tar)
while sleep $PROGRESS_WAIT_SECONDS ; kill -0 $BackupPID 2>/dev/null; do
#blocks="$(stat -c %b ${backuparchive})"
#size="$((blocks*512))"
size="$(stat -c %s ${backuparchive}* | awk '{s+=$1} END {print s}')"
ProgressInfo "Archived $((size/1024/1024)) MiB [avg $((size/1024/(SECONDS-starttime))) KiB/sec] "
done
;;
(rsync)
# since we do not want to do a $(du -s) run every second we count disk usage instead
# this obviously leads to wrong results in case something else is writing to the same
# disk at the same time as is very likely with a networked file system. For local disks
# this should be good enough and in any case this is only some eye candy.
# TODO: Find a fast way to count the actual transfer data, preferrable getting the info from rsync.
let old_disk_used="$(get_disk_used "$backuparchive")"
while sleep $PROGRESS_WAIT_SECONDS ; kill -0 $BackupPID 2>/dev/null; do
let disk_used="$(get_disk_used "$backuparchive")" size=disk_used-old_disk_used
ProgressInfo "Archived $((size/1024/1024)) MiB [avg $((size/1024/(SECONDS-starttime))) KiB/sec] "
done
;;
(*)
while sleep $PROGRESS_WAIT_SECONDS ; kill -0 $BackupPID 2>/dev/null; do
size="$(stat -c "%s" "$backuparchive")" || {
kill -9 $BackupPID
ProgressError
Error "$(basename $BACKUP_PROG) failed to create the archive file"
}
ProgressInfo "Archived $((size/1024/1024)) MiB [avg $((size/1024/(SECONDS-starttime))) KiB/sec] "
done
;;
esac
ProgressStop
transfertime="$((SECONDS-starttime))"
Expand All @@ -177,27 +227,32 @@ sleep 1
# everyone should see this warning, even if not verbose
case "$(basename $BACKUP_PROG)" in
(tar)
if (( $backup_prog_rc == 1 )); then
LogPrint "WARNING: $(basename $BACKUP_PROG) ended with return code $backup_prog_rc and below output:
if (( $backup_prog_rc != 0 )); then
prog="$(cat $FAILING_BACKUP_PROG_FILE)"
if (( $backup_prog_rc == 1 )); then
LogUserOutput "WARNING: $prog ended with return code 1 and below output:
---snip---
$(grep '^tar: ' $RUNTIME_LOGFILE | sed -e 's/^/ /' | tail -n3)
$(grep '^tar: ' "${TMP_DIR}/${BACKUP_PROG_ARCHIVE}.log" | sed -e 's/^/ /' | tail -n3)
----------
This means that files have been modified during the archiving
process. As a result the backup may not be completely consistent
or may not be a perfect copy of the system. Relax-and-Recover
will continue, however it is highly advisable to verify the
backup in order to be sure to safely recover this system.
"
elif (( $backup_prog_rc > 1 )); then
Error "$(basename $BACKUP_PROG) failed with return code $backup_prog_rc and below output:
else
rc=$(cat $FAILING_BACKUP_PROG_RC_FILE)
Error "$prog failed with return code $rc and below output:
---snip---
$(grep '^tar: ' $RUNTIME_LOGFILE | sed -e 's/^/ /' | tail -n3)
$(grep "^$prog: " "${TMP_DIR}/${BACKUP_PROG_ARCHIVE}.log" | sed -e 's/^/ /' | tail -n3)
----------
This means that the archiving process ended prematurely, or did
not even start. As a result it is unlikely you can recover this
system properly. Relax-and-Recover is therefore aborting execution.
"
fi;;
fi
fi
;;
(*)
if (( $backup_prog_rc > 0 )) ; then
Error "$(basename $BACKUP_PROG) failed with return code $backup_prog_rc
Expand All @@ -212,10 +267,12 @@ esac

tar_message="$(tac $RUNTIME_LOGFILE | grep -m1 '^Total bytes written: ')"
if [ $backup_prog_rc -eq 0 -a "$tar_message" ] ; then
LogPrint "$tar_message in $transfertime seconds."
LogPrint "$tar_message in $transfertime seconds."
elif [ "$size" ]; then
LogPrint "Archived $((size/1024/1024)) MiB in $((transfertime)) seconds [avg $((size/1024/transfertime)) KiB/sec]"
LogPrint "Archived $((size/1024/1024)) MiB in $((transfertime)) seconds [avg $((size/1024/transfertime)) KiB/sec]"
fi

### Copy progress log to backup media
cp $v "${TMP_DIR}/${BACKUP_PROG_ARCHIVE}.log" "${opath}/${BACKUP_PROG_ARCHIVE}.log" >&2

# vim: set et ts=4 sw=4: