Skip to content

Commit

Permalink
Simplify the disaster recovery scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
retroflexer committed Mar 5, 2020
1 parent 80e5b29 commit 0531dc4
Show file tree
Hide file tree
Showing 12 changed files with 321 additions and 1,228 deletions.
22 changes: 22 additions & 0 deletions bindata/etcd/etcd-common-tools
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Common environment variables
ASSET_DIR="/home/core/assets"
CONFIG_FILE_DIR="/etc/kubernetes"
MANIFEST_DIR="${CONFIG_FILE_DIR}/manifests"
ETCD_DATA_DIR="/var/lib/etcd"
ETCD_DATA_DIR_BACKUP="/var/lib/etcd-backup"
MANIFEST_STOPPED_DIR="${ASSET_DIR}/manifests-stopped"
RESTORE_ETCD_POD_YAML="${CONFIG_FILE_DIR}/static-pod-resources/etcd-certs/configmaps/restore-etcd-pod/pod.yaml"
ETCDCTL_BIN_DIR="${CONFIG_FILE_DIR}/static-pod-resources/bin"
PATH=${PATH}:${ETCDCTL_BIN_DIR}

# download etcdctl from upstream release assets
function dl_etcdctl {
local etcdimg=${ETCD_IMAGE}
local etcdctr=$(podman create ${etcdimg})
local etcdmnt=$(podman mount "${etcdctr}")
[ ! -d ${ETCDCTL_BIN_DIR} ] && mkdir -p ${ETCDCTL_BIN_DIR}
cp ${etcdmnt}/bin/etcdctl ${ETCDCTL_BIN_DIR}/
umount "${etcdmnt}"
podman rm "${etcdctr}"
etcdctl version
}
34 changes: 19 additions & 15 deletions bindata/etcd/etcd-member-remove.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#!/usr/bin/env bash

set -o errexit
set -o pipefail
set -o errtrace

# example
# sudo ./etcd-member-remove.sh $etcd_name

Expand All @@ -8,30 +12,30 @@ if [[ $EUID -ne 0 ]]; then
exit 1
fi

usage () {
function usage {
echo 'The name of the etcd member to remove is required: ./etcd-member-remove.sh $etcd_name'
exit 1
}

### main
if [ "$1" == "" ]; then
usage
fi

ETCD_NAME=$1
ASSET_DIR=/home/core/assets
ASSET_DIR_TMP="$ASSET_DIR/tmp"
ETCDCTL=$ASSET_DIR/bin/etcdctl
ETCD_DATA_DIR=/var/lib/etcd
CONFIG_FILE_DIR=/etc/kubernetes
NAME="$1"

source "/usr/local/bin/openshift-recovery-tools"
source /etc/kubernetes/static-pod-resources/etcd-certs/configmaps/etcd-scripts/etcd.env
source /etc/kubernetes/static-pod-resources/etcd-certs/configmaps/etcd-scripts/etcd-common-tools

function run {
init
dl_etcdctl
backup_etcd_client_certs
etcd_member_remove $ETCD_NAME
}
# Download etcdctl binary
dl_etcdctl

run
# If the 1st field or the 3rd field of the member list exactly matches with the name, then get its ID. Note 3rd field has extra space to match.
ID=$(etcdctl member list | awk -F, "\$1 ~ /^${NAME}$/ || \$3 ~ /^\s${NAME}$/ { print \$1 }")
if [ "$?" -ne 0 ] || [ -z "$ID" ]; then
echo "could not find etcd member $NAME to remove."
exit 1
fi

# Remove the member using ID
etcdctl member remove $ID
87 changes: 0 additions & 87 deletions bindata/etcd/etcd-restore-backup.sh

This file was deleted.

52 changes: 28 additions & 24 deletions bindata/etcd/etcd-snapshot-backup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,33 @@ if [[ $EUID -ne 0 ]]; then
exit 1
fi

usage () {
function usage {
echo 'Path to backup dir required: ./etcd-snapshot-backup.sh <path-to-backup-dir>'
exit 1
}

ASSET_DIR=/home/core/assets
#backup latest static pod resources for kube-apiserver
function backup_latest_kube_static_resources {
echo "Trying to backup latest static pod resources.."
LATEST_STATIC_POD_DIR=$(ls -vd "${CONFIG_FILE_DIR}"/static-pod-resources/kube-apiserver-pod-[0-9]* | tail -1) || true
if [ -z "$LATEST_STATIC_POD_DIR" ]; then
echo "error finding static-pod-resources"
exit 1
fi

LATEST_ETCD_STATIC_POD_DIR=$(ls -vd "${CONFIG_FILE_DIR}"/static-pod-resources/etcd-pod-[0-9]* | tail -1) || true
if [ -z "$LATEST_ETCD_STATIC_POD_DIR" ]; then
echo "error finding static-pod-resources"
exit 1
fi

# tar up the static kube resources, with the path relative to CONFIG_FILE_DIR
tar -cpzf $BACKUP_TAR_FILE -C ${CONFIG_FILE_DIR} ${LATEST_STATIC_POD_DIR#$CONFIG_FILE_DIR/} ${LATEST_ETCD_STATIC_POD_DIR#$CONFIG_FILE_DIR/}
}


# main
# If the first argument is missing, or it is an existing file, then print usage and exit
if [ -z "$1" ] || [ -f "$1" ]; then
usage
fi
Expand All @@ -34,26 +54,10 @@ SNAPSHOT_FILE="${BACKUP_DIR}/snapshot_${DATESTRING}.db"

trap "rm -f ${BACKUP_TAR_FILE} ${SNAPSHOT_FILE}" ERR

CONFIG_FILE_DIR=/etc/kubernetes
MANIFEST_DIR="${CONFIG_FILE_DIR}/manifests"
MANIFEST_STOPPED_DIR="${ASSET_DIR}/manifests-stopped"
ETCDCTL="${ASSET_DIR}/bin/etcdctl"
ETCD_DATA_DIR=/var/lib/etcd
ETCD_MANIFEST="${MANIFEST_DIR}/etcd-pod.yaml"
ETCD_STATIC_RESOURCES="${CONFIG_FILE_DIR}/static-pod-resources/etcd-member"
STOPPED_STATIC_PODS="${ASSET_DIR}/tmp/stopped-static-pods"

source "/usr/local/bin/openshift-recovery-tools"

function run {
init
dl_etcdctl
backup_etcd_client_certs
backup_manifest
backup_latest_kube_static_resources
snapshot_data_dir
echo "snapshot db and kube resources are successfully saved to ${BACKUP_DIR}!"
}

run
source /etc/kubernetes/static-pod-resources/etcd-certs/configmaps/etcd-scripts/etcd.env
source /etc/kubernetes/static-pod-resources/etcd-certs/configmaps/etcd-scripts/etcd-common-tools

dl_etcdctl
backup_latest_kube_static_resources
etcdctl snapshot save ${SNAPSHOT_FILE}
echo "snapshot db and kube resources are successfully saved to ${BACKUP_DIR}!"
72 changes: 72 additions & 0 deletions bindata/etcd/etcd-snapshot-restore.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env bash

set -o errexit
set -o pipefail
set -o errtrace

# example
# ./etcd-snapshot-restore.sh $path-to-backup

if [[ $EUID -ne 0 ]]; then
echo "This script must be run as root"
exit 1
fi

source /etc/kubernetes/static-pod-resources/etcd-certs/configmaps/etcd-scripts/etcd.env
source /etc/kubernetes/static-pod-resources/etcd-certs/configmaps/etcd-scripts/etcd-common-tools

function usage {
echo 'Path to the directory containing backup files is required: ./etcd-snapshot-restore.sh <path-to-backup>'
echo 'The backup directory is expected to be contain two files:'
echo ' 1. etcd snapshot'
echo ' 2. A copy of the Static POD resources at the time of backup'
exit 1
}

# If the argument is not passed, or if it is not a directory, print usage and exit.
if [ "$1" == "" ] || [ ! -d "$1" ]; then
usage
fi

BACKUP_DIR="$1"
BACKUP_FILE=$(ls -vd "${BACKUP_DIR}"/static_kuberesources*.tar.gz | tail -1) || true
SNAPSHOT_FILE=$(ls -vd "${BACKUP_DIR}"/snapshot*.db | tail -1) || true

if [ ! -f "${SNAPSHOT_FILE}" ]; then
echo "etcd snapshot ${SNAPSHOT_FILE} does not exist."
exit 1
fi

# Move manifests and stop static pods
if [ ! -d "$MANIFEST_STOPPED_DIR" ]; then
mkdir $MANIFEST_STOPPED_DIR
fi

# Move static pod manifests out of MANIFEST_DIR
find ${MANIFEST_DIR} \
-maxdepth 1 \
-type f \
-printf '...stopping %P\n' \
-exec mv {} ${MANIFEST_STOPPED_DIR} \;

# Wait for pods to stop
sleep 30

# //TO DO: verify using crictl that etcd and other pods stopped.

# Remove data dir
echo "Moving etcd data-dir ${ETCD_DATA_DIR}/member to ${ETCD_DATA_DIR_BACKUP}"
[ ! -d ${ETCD_DATA_DIR_BACKUP} ] && mkdir -p ${ETCD_DATA_DIR_BACKUP}
mv ${ETCD_DATA_DIR}/member ${ETCD_DATA_DIR_BACKUP}/member

# Copy snapshot to backupdir
if [ ! -d ${ETCD_DATA_DIR_BACKUP} ]; then
mkdir -p ${ETCD_DATA_DIR_BACKUP}
fi
cp -p ${SNAPSHOT_FILE} ${ETCD_DATA_DIR_BACKUP}/snapshot.db

# Copy etcd restore pod yaml
cp -p ${RESTORE_ETCD_POD_YAML} ${MANIFEST_DIR}/etcd-pod.yaml

# Restore static pod resources
tar -C ${CONFIG_FILE_DIR} -xzf ${BACKUP_FILE} static-pod-resources

0 comments on commit 0531dc4

Please sign in to comment.