-
Notifications
You must be signed in to change notification settings - Fork 10
DevOps
T. Andrew Manning edited this page Jun 16, 2026
·
1 revision
This script assumes that openstack.blast.sh and blast-k8s.kubeconfig are present in the script directory.
These are the OpenStack and Kubernetes API client config/auth files, respectively.
ubuntu@monitor:~/k8s$ bash cycle_k8s_nodes.sh blast controlplane
Filtering nodes by 'controlplane'.
Nodes to be cycled:
blast-k8s-controlplane-01
blast-k8s-controlplane-02
blast-k8s-controlplane-03
================================================================================
Cordoning nodes...
================================================================================
Cordoning node blast-k8s-controlplane-01...
node/blast-k8s-controlplane-01 cordoned
Cordoning node blast-k8s-controlplane-02...
node/blast-k8s-controlplane-02 cordoned
================================================================================
Draining and rebooting nodes...
...#!/bin/bash
set -euo pipefail
cd "$(dirname "$(readlink -f "$0")")"
set +u
if [[ $1 == "blast" ]]; then
source openstack.blast.sh
export KUBECONFIG=blast-k8s.kubeconfig
else
echo "Usage: $(basename "$(readlink -f "$0")") blast"
exit 1
fi
set -u
if [ ! -v OS_APPLICATION_CREDENTIAL_SECRET ]; then
echo "OpenStack access credentials must be configured in your environment. Aborting."
exit 1
fi
if [ ! -v KUBECONFIG ]; then
echo "Kubernetes access credentials must be configured in your environment. Aborting."
exit 1
fi
# Set OS_UPDATE=false to skip OS update step
if [ ! -v OS_UPDATE ]; then
OS_UPDATE="true"
fi
# Set DEPLOY_HEAL_WAIT to number of seconds to wait before draining next node
if [ ! -v DEPLOY_HEAL_WAIT ]; then
DEPLOY_HEAL_WAIT=60
fi
# Set OS_UPDATE_WAIT to number of seconds to wait after updating OS
if [ ! -v OS_UPDATE_WAIT ]; then
OS_UPDATE_WAIT=60
fi
# If DRY_RUN is set, replace value with "echo".
if [ -v DRY_RUN ]; then
DRY_RUN="echo"
DEPLOY_HEAL_WAIT=2
OS_UPDATE_WAIT=2
echo "Performing a dry-run. No actions will be taken."
else
DRY_RUN=""
fi
if [ ! -v 1 ]; then
echo "Provide a node filtering string like 'worker' or 'controlplane'."
exit 1
fi
NODE_FILTER=$2
echo "Filtering nodes by '${NODE_FILTER}'."
NODES=($(kubectl get node | awk '{ print $1}' | grep -v 'NAME' | grep -E "${NODE_FILTER}" | tail))
LAST_NODE="${NODES[-1]}"
echo "Nodes to be cycled:"
for node in "${NODES[@]}"; do echo $node; done
update_os() {
NODE=$1
echo "Updating node $NODE operating system..."
if [[ $DRY_RUN != "echo" ]]; then
ssh "$NODE" ' \
nohup \
sudo DEBIAN_FRONTEND=noninteractive apt-get --assume-yes update && \
sudo DEBIAN_FRONTEND=noninteractive apt-get --assume-yes --no-install-recommends upgrade && \
sudo DEBIAN_FRONTEND=noninteractive apt-get --assume-yes autoremove && \
sudo DEBIAN_FRONTEND=noninteractive apt-get --assume-yes autoclean && \
sudo reboot &'
fi
echo "[$(date)] OS update complete. Sleeping $OS_UPDATE_WAIT seconds..."
sleep $OS_UPDATE_WAIT
}
echo
echo "================================================================================"
echo "Cordoning nodes..."
echo "================================================================================"
echo
for NODE in "${NODES[@]}"; do
if [[ "$NODE" != "${LAST_NODE}" ]]; then
echo "Cordoning node $NODE..."
$DRY_RUN kubectl cordon $NODE
fi
done
echo
echo "================================================================================"
echo "Draining and rebooting nodes..."
echo "================================================================================"
echo
for NODE in "${NODES[@]}"; do
echo "Draining node $NODE..."
$DRY_RUN kubectl cordon $NODE
set -x
$DRY_RUN kubectl delete pod -n longhorn-system -l longhorn.io/component=instance-manager -l longhorn.io/node="$NODE"
set +x
# $DRY_RUN kubectl drain $NODE --delete-emptydir-data --ignore-daemonsets --force
$DRY_RUN kubectl drain $NODE --delete-emptydir-data --ignore-daemonsets
if [[ $OS_UPDATE == "true" ]]; then
$DRY_RUN update_os "$NODE"
else
echo "OS update skipped."
fi
echo "Rebooting node $NODE..."
if [[ $DRY_RUN == "echo" ]]; then
echo "Waiting for node $NODE to reboot..."
else
until openstack console log show "$NODE" | grep --quiet 'Final Stage'; do
echo "Waiting for node $NODE to reboot..."
sleep 5
done
fi
echo "Uncordoning node $NODE..."
$DRY_RUN kubectl uncordon $NODE
echo "[$(date)] Waiting $DEPLOY_HEAL_WAIT sec for deployments to heal before proceeding to the next node..."
sleep $DEPLOY_HEAL_WAIT
done
echo "Worker node OS updates complete."