Skip to content
T. Andrew Manning edited this page Jun 16, 2026 · 1 revision

DevOps notes and scripts

Apply OS updates and reboot Kubernetes cluster nodes

This script assumes that openstack.blast.sh and blast-k8s.kubeconfig are present in the script directory. These are the OpenStack and Kubernetes API client config/auth files, respectively.

Example usage to patch and reboot controlplane nodes

ubuntu@monitor:~/k8s$ bash cycle_k8s_nodes.sh blast controlplane

Filtering nodes by 'controlplane'.
Nodes to be cycled:
blast-k8s-controlplane-01
blast-k8s-controlplane-02
blast-k8s-controlplane-03

================================================================================
Cordoning nodes...
================================================================================

Cordoning node blast-k8s-controlplane-01...
node/blast-k8s-controlplane-01 cordoned
Cordoning node blast-k8s-controlplane-02...
node/blast-k8s-controlplane-02 cordoned

================================================================================
Draining and rebooting nodes...
...

cycle_k8s_nodes.sh

#!/bin/bash

set -euo pipefail

cd "$(dirname "$(readlink -f "$0")")"

set +u
if [[ $1 == "blast" ]]; then
    source openstack.blast.sh
    export KUBECONFIG=blast-k8s.kubeconfig
else
    echo "Usage: $(basename "$(readlink -f "$0")") blast"
    exit 1
fi
set -u

if [ ! -v OS_APPLICATION_CREDENTIAL_SECRET ]; then
    echo "OpenStack access credentials must be configured in your environment. Aborting."
    exit 1
fi
if [ ! -v KUBECONFIG ]; then
    echo "Kubernetes access credentials must be configured in your environment. Aborting."
    exit 1
fi
# Set OS_UPDATE=false to skip OS update step
if [ ! -v OS_UPDATE ]; then
    OS_UPDATE="true"
fi
# Set DEPLOY_HEAL_WAIT to number of seconds to wait before draining next node
if [ ! -v DEPLOY_HEAL_WAIT ]; then
    DEPLOY_HEAL_WAIT=60
fi
# Set OS_UPDATE_WAIT to number of seconds to wait after updating OS
if [ ! -v OS_UPDATE_WAIT ]; then
    OS_UPDATE_WAIT=60
fi
# If DRY_RUN is set, replace value with "echo".
if [ -v DRY_RUN ]; then
    DRY_RUN="echo"
    DEPLOY_HEAL_WAIT=2
    OS_UPDATE_WAIT=2
    echo "Performing a dry-run. No actions will be taken."
else
    DRY_RUN=""
fi

if [ ! -v 1 ]; then
    echo "Provide a node filtering string like 'worker' or 'controlplane'."
    exit 1
fi
NODE_FILTER=$2
echo "Filtering nodes by '${NODE_FILTER}'."
NODES=($(kubectl get node | awk '{ print $1}' | grep -v 'NAME' | grep -E "${NODE_FILTER}" | tail))
LAST_NODE="${NODES[-1]}"
echo "Nodes to be cycled:"
for node in "${NODES[@]}"; do echo $node; done

update_os() {
    NODE=$1
    echo "Updating node $NODE operating system..."
    if [[ $DRY_RUN != "echo" ]]; then
        ssh "$NODE" ' \
            nohup \
            sudo DEBIAN_FRONTEND=noninteractive apt-get --assume-yes update && \
            sudo DEBIAN_FRONTEND=noninteractive apt-get --assume-yes --no-install-recommends upgrade && \
            sudo DEBIAN_FRONTEND=noninteractive apt-get --assume-yes autoremove && \
            sudo DEBIAN_FRONTEND=noninteractive apt-get --assume-yes autoclean && \
            sudo reboot &'
    fi
    echo "[$(date)] OS update complete. Sleeping $OS_UPDATE_WAIT seconds..."
    sleep $OS_UPDATE_WAIT
}

echo
echo "================================================================================"
echo "Cordoning nodes..."
echo "================================================================================"
echo
for NODE in "${NODES[@]}"; do
    if [[ "$NODE" != "${LAST_NODE}" ]]; then
        echo "Cordoning node $NODE..."
        $DRY_RUN kubectl cordon $NODE
    fi
done

echo
echo "================================================================================"
echo "Draining and rebooting nodes..."
echo "================================================================================"
echo
for NODE in "${NODES[@]}"; do
    echo "Draining node $NODE..."
    $DRY_RUN kubectl cordon $NODE
    set -x
    $DRY_RUN kubectl delete pod -n longhorn-system -l longhorn.io/component=instance-manager -l longhorn.io/node="$NODE"
    set +x
    # $DRY_RUN kubectl drain $NODE --delete-emptydir-data --ignore-daemonsets --force
    $DRY_RUN kubectl drain $NODE --delete-emptydir-data --ignore-daemonsets
    if [[ $OS_UPDATE == "true" ]]; then
        $DRY_RUN update_os "$NODE"
    else
        echo "OS update skipped."
    fi
    echo "Rebooting node $NODE..."
    if [[ $DRY_RUN == "echo" ]]; then
        echo "Waiting for node $NODE to reboot..."
    else
        until openstack console log show "$NODE" | grep --quiet 'Final Stage'; do
            echo "Waiting for node $NODE to reboot..."
            sleep 5
        done
    fi
    echo "Uncordoning node $NODE..."
    $DRY_RUN kubectl uncordon $NODE
    echo "[$(date)] Waiting $DEPLOY_HEAL_WAIT sec for deployments to heal before proceeding to the next node..."
    sleep $DEPLOY_HEAL_WAIT
done

echo "Worker node OS updates complete."

Clone this wiki locally