openshift-metal3 · rwsu · Apr 20, 2024 · May 10, 2024 · May 10, 2024 · andfasano
diff --git a/Makefile b/Makefile
@@ -33,6 +33,9 @@ agent_gather:
 agent_tests:
 	./agent/agent_tests.sh
 
+agent_add_node:
+	./agent/07_agent_add_node.sh
+
 redeploy: ocp_cleanup ironic_cleanup build_installer ironic install_config ocp_run bell
 
 requirements:

diff --git a/agent/05_agent_configure.sh b/agent/05_agent_configure.sh
@@ -17,89 +17,6 @@ early_deploy_validation
 
 export CLUSTER_NAMESPACE=${CLUSTER_NAMESPACE:-"cluster0"}
 
-function add_ip_host_entry {
-    ip=${1}
-    hostname=${2}
-
-    echo "${ip} ${hostname}">>"${OCP_DIR}"/hosts
-}
-
-function add_dns_entry {
-    ip=${1}
-    hostname=${2}
-
-    # Add a DNS entry for this hostname if it's not already defined
-    if ! $(sudo virsh net-dumpxml ${BAREMETAL_NETWORK_NAME} | xmllint --xpath "//dns/host[@ip = '${ip}']" - &> /dev/null); then
-      sudo virsh net-update ${BAREMETAL_NETWORK_NAME} add dns-host  "<host ip='${ip}'> <hostname>${hostname}</hostname> </host>"  --live --config
-    fi
-
-    # Add entries to etc/hosts for SNO IPV6 to sucessfully run the openshift conformance tests
-    if [[ $NUM_MASTERS == 1 && $IP_STACK == "v6" ]]; then
-      AGENT_NODE0_IPSV6=${ip}
-      echo "${ip} console-openshift-console.apps.${CLUSTER_DOMAIN}" | sudo tee -a /etc/hosts
-      echo "${ip} oauth-openshift.apps.${CLUSTER_DOMAIN}" | sudo tee -a /etc/hosts
-      echo "${ip} thanos-querier-openshift-monitoring.apps.${CLUSTER_DOMAIN}" | sudo tee -a /etc/hosts
-    fi
-}
-
-function get_static_ips_and_macs() {
-
-    AGENT_NODES_IPS=()
-    AGENT_NODES_IPSV6=()
-    AGENT_NODES_MACS=()
-    AGENT_NODES_HOSTNAMES=()
-
-    if [[ "$AGENT_STATIC_IP_NODE0_ONLY" = "true" ]]; then
-        static_ips=1
-    else
-        static_ips=$NUM_MASTERS+$NUM_WORKERS
-    fi
-
-    if [[ $NETWORKING_MODE == "DHCP" ]]; then
-      base_ip=20
-    else
-      # Set outside the range used for dhcp
-      base_ip=80
-    fi
-
-    for (( i=0; i<${static_ips}; i++ ))
-    do
-        if [[ $i < $NUM_MASTERS ]]; then
-            AGENT_NODES_HOSTNAMES+=($(printf ${MASTER_HOSTNAME_FORMAT} ${i}))
-            cluster_name=${CLUSTER_NAME}_master_${i}
-        else
-	    worker_num=$((${i}-$NUM_MASTERS))
-            AGENT_NODES_HOSTNAMES+=($(printf ${WORKER_HOSTNAME_FORMAT} ${worker_num}))
-            cluster_name=${CLUSTER_NAME}_worker_${worker_num}
-        fi
-
-        ip=${base_ip}+${i}
-        if [[ "$IP_STACK" = "v4" ]]; then
-            AGENT_NODES_IPS+=($(nth_ip ${EXTERNAL_SUBNET_V4} ${ip}))
-            add_dns_entry ${AGENT_NODES_IPS[i]} ${AGENT_NODES_HOSTNAMES[i]}
-            add_ip_host_entry ${AGENT_NODES_IPS[i]} ${AGENT_NODES_HOSTNAMES[i]}
-        elif [[ "$IP_STACK" = "v6" ]]; then
-            AGENT_NODES_IPSV6+=($(nth_ip ${EXTERNAL_SUBNET_V6} ${ip}))
-            add_dns_entry ${AGENT_NODES_IPSV6[i]} ${AGENT_NODES_HOSTNAMES[i]}
-            add_ip_host_entry ${AGENT_NODES_IPSV6[i]} ${AGENT_NODES_HOSTNAMES[i]}
-        else
-	    # v4v6
-            AGENT_NODES_IPS+=($(nth_ip ${EXTERNAL_SUBNET_V4} ${ip}))
-            AGENT_NODES_IPSV6+=($(nth_ip $EXTERNAL_SUBNET_V6 ${ip}))
-            add_dns_entry ${AGENT_NODES_IPS[i]} ${AGENT_NODES_HOSTNAMES[i]}
-            add_ip_host_entry ${AGENT_NODES_IPS[i]} ${AGENT_NODES_HOSTNAMES[i]}
-            add_dns_entry ${AGENT_NODES_IPSV6[i]} ${AGENT_NODES_HOSTNAMES[i]}
-        fi
-
-        # Get the generated mac addresses
-        AGENT_NODES_MACS+=($(sudo virsh dumpxml $cluster_name | xmllint --xpath "string(//interface[descendant::source[@bridge = '${BAREMETAL_NETWORK_NAME}']]/mac/@address)" -))
-        if [[ ! -z "${BOND_PRIMARY_INTERFACE:-}" ]]; then
-	   # For a bond, a random mac is added for the 2nd interface
-	   AGENT_NODES_MACS+=($(sudo virsh domiflist ${cluster_name} | grep ${BAREMETAL_NETWORK_NAME} | grep -v ${AGENT_NODES_MACS[-1]} | awk '{print $5}'))
-        fi
-    done
-}
-
 function generate_extra_cluster_manifests() {
 
   mkdir -p ${EXTRA_MANIFESTS_PATH}
@@ -230,6 +147,15 @@ function generate_cluster_manifests() {
   nodes_hostnames=$(printf '%s,' "${AGENT_NODES_HOSTNAMES[@]}")
   export AGENT_NODES_HOSTNAMES_STR=${nodes_hostnames::-1}
 
+  extra_workers_ips=$(printf '%s,' "${AGENT_EXTRA_WORKERS_IPS[@]}")
+  export AGENT_EXTRA_WORKERS_IPS_STR=${extra_workers_ips::-1}
+  extra_workers_ipsv6=$(printf '%s,' "${AGENT_EXTRA_WORKERS_IPSV6[@]}")
+  export AGENT_EXTRA_WORKERS_IPSV6_STR=${extra_workers_ipsv6::-1}
+  extra_workers_macs=$(printf '%s,' "${AGENT_EXTRA_WORKERS_MACS[@]}")
+  export AGENT_EXTRA_WORKERS_MACS_STR=${extra_workers_macs::-1}
+  extra_workers_hostnames=$(printf '%s,' "${AGENT_EXTRA_WORKERS_HOSTNAMES[@]}")
+  export AGENT_EXTRA_WORKERS_HOSTNAMES_STR=${extra_workers_hostnames::-1}
+
   if [[ "${NUM_MASTERS}" > "1" ]]; then
      export API_VIPS=${API_VIPS}
      export INGRESS_VIPS=${INGRESS_VIPS}

diff --git a/agent/07_agent_add_node.sh b/agent/07_agent_add_node.sh
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+set -euxo pipefail
+
+SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/.." && pwd )"
+
+source $SCRIPTDIR/common.sh
+source $SCRIPTDIR/network.sh
+source $SCRIPTDIR/ocp_install_env.sh
+source $SCRIPTDIR/utils.sh
+source $SCRIPTDIR/validation.sh
+source $SCRIPTDIR/agent/common.sh
+
+early_deploy_validation
+
+function build_node_joiner() {
+  # Build installer
+  pushd .
+  cd $OPENSHIFT_INSTALL_PATH
+  TAGS="${OPENSHIFT_INSTALLER_BUILD_TAGS:-libvirt baremetal}" DEFAULT_ARCH=$(get_arch) hack/build-node-joiner.sh
+  popd
+}
+
+function approve_csrs() {
+  while true; do
+    pending_csrs=$(oc get csr | grep Pending)
+    if [[ ${pending_csrs} != "" ]]; then
+      echo "Approving CSRs: $pending_csrs"
+      echo $pending_csrs | cut -d ' ' -f 1 | xargs oc adm certificate approve
+    fi
+    sleep 10
+  done
+}
+
+if [ -z "$KNI_INSTALL_FROM_GIT" ]; then
+  # Extract node-joiner from the release image
+  baremetal_installer_image=$(oc adm release info --image-for=baremetal-installer --registry-config=$PULL_SECRET_FILE)
+  container_id=$(podman create --authfile $PULL_SECRET_FILE ${baremetal_installer_image} ls)
+  podman start $container_id
+  podman export $container_id > baremetal-installer.tar
+  tar xf baremetal-installer.tar usr/bin/node-joiner
+  cp usr/bin/node-joiner $OCP_DIR/node-joiner
+  rm -rf usr baremetal-installer.tar
+  podman rm $container_id
+else
+  # Build the node-joiner from source
+  build_node_joiner
+  cp "$OPENSHIFT_INSTALL_PATH/bin/node-joiner" "$OCP_DIR"
+fi
+
+rm $OCP_DIR/add-node/.openshift_install_state.json | true
+
+get_static_ips_and_macs
+
+node_joiner="$(realpath "${OCP_DIR}/node-joiner")"
+
+$node_joiner add-nodes --dir $OCP_DIR/add-node --kubeconfig $OCP_DIR/auth/kubeconfig
+
+sudo virt-xml ${CLUSTER_NAME}_extraworker_${AGENT_EXTRAWORKER_NODE_TO_ADD} --add-device --disk "${OCP_DIR}/add-node/node.x86_64.iso,device=cdrom,target.dev=sdc"
+sudo virt-xml ${CLUSTER_NAME}_extraworker_${AGENT_EXTRAWORKER_NODE_TO_ADD} --edit target=sda --disk="boot_order=1"
+sudo virt-xml ${CLUSTER_NAME}_extraworker_${AGENT_EXTRAWORKER_NODE_TO_ADD} --edit target=sdc --disk="boot_order=2" --start
+
+set +ex
+approve_csrs &
+approve_csrs_pid=$!
+trap 'kill -TERM ${approve_csrs_pid}; exit' INT EXIT TERM
+set -ex
+
+$node_joiner monitor-add-nodes ${AGENT_EXTRA_WORKERS_IPS[${AGENT_EXTRAWORKER_NODE_TO_ADD}]} --kubeconfig $OCP_DIR/auth/kubeconfig
+
+oc get nodes extraworker-${AGENT_EXTRAWORKER_NODE_TO_ADD} | grep Ready
+
+kill -TERM ${approve_csrs_pid}
diff --git a/agent/common.sh b/agent/common.sh
@@ -44,9 +44,132 @@ function getReleaseImage() {
     echo ${releaseImage}
 }
 
+# Functions used to determine IP addresses, MACs, and hostnames
+function add_ip_host_entry {
+    ip=${1}
+    hostname=${2}
+
+    echo "${ip} ${hostname}">>"${OCP_DIR}"/hosts
+}
+
+function add_dns_entry {
+    ip=${1}
+    hostname=${2}
+
+    # Add a DNS entry for this hostname if it's not already defined
+    if ! $(sudo virsh net-dumpxml ${BAREMETAL_NETWORK_NAME} | xmllint --xpath "//dns/host[@ip = '${ip}']" - &> /dev/null); then
+      sudo virsh net-update ${BAREMETAL_NETWORK_NAME} add dns-host  "<host ip='${ip}'> <hostname>${hostname}</hostname> </host>"  --live --config
+    fi
+
+    # Add entries to etc/hosts for SNO IPV6 to sucessfully run the openshift conformance tests
+    if [[ $NUM_MASTERS == 1 && $IP_STACK == "v6" ]]; then
+      AGENT_NODE0_IPSV6=${ip}
+      echo "${ip} console-openshift-console.apps.${CLUSTER_DOMAIN}" | sudo tee -a /etc/hosts
+      echo "${ip} oauth-openshift.apps.${CLUSTER_DOMAIN}" | sudo tee -a /etc/hosts
+      echo "${ip} thanos-querier-openshift-monitoring.apps.${CLUSTER_DOMAIN}" | sudo tee -a /etc/hosts
+    fi
+}
+
+function get_static_ips_and_macs() {
+
+    AGENT_NODES_IPS=()
+    AGENT_NODES_IPSV6=()
+    AGENT_NODES_MACS=()
+    AGENT_NODES_HOSTNAMES=()
+
+    if [[ "$AGENT_STATIC_IP_NODE0_ONLY" = "true" ]]; then
+        static_ips=1
+    else
+        static_ips=$NUM_MASTERS+$NUM_WORKERS
-        static_ips=$NUM_MASTERS+$NUM_WORKERS
+        static_ips=$NUM_MASTERS+$NUM_WORKERS+$NUM_EXTRA_WORKERS
-        static_ips=$NUM_MASTERS+$NUM_WORKERS
+        static_ips=$NUM_MASTERS+$NUM_WORKERS+$NUM_EXTRA_WORKERS
+    fi
+
+    if [[ $NETWORKING_MODE == "DHCP" ]]; then
+      base_ip=20
+    else
+      # Set outside the range used for dhcp
+      base_ip=80
+    fi
+
+    for (( i=0; i<${static_ips}; i++ ))
+    do
+        if [[ $i < $NUM_MASTERS ]]; then
+            AGENT_NODES_HOSTNAMES+=($(printf ${MASTER_HOSTNAME_FORMAT} ${i}))
+            cluster_name=${CLUSTER_NAME}_master_${i}
+        else
+	          worker_num=$((${i}-$NUM_MASTERS))
+            AGENT_NODES_HOSTNAMES+=($(printf ${WORKER_HOSTNAME_FORMAT} ${worker_num}))
+            cluster_name=${CLUSTER_NAME}_worker_${worker_num}
+        fi
+
+        ip=${base_ip}+${i}
+        if [[ "$IP_STACK" = "v4" ]]; then
+            AGENT_NODES_IPS+=($(nth_ip ${EXTERNAL_SUBNET_V4} ${ip}))
+            add_dns_entry ${AGENT_NODES_IPS[i]} ${AGENT_NODES_HOSTNAMES[i]}
+            add_ip_host_entry ${AGENT_NODES_IPS[i]} ${AGENT_NODES_HOSTNAMES[i]}
+        elif [[ "$IP_STACK" = "v6" ]]; then
+            AGENT_NODES_IPSV6+=($(nth_ip ${EXTERNAL_SUBNET_V6} ${ip}))
+            add_dns_entry ${AGENT_NODES_IPSV6[i]} ${AGENT_NODES_HOSTNAMES[i]}
+            add_ip_host_entry ${AGENT_NODES_IPSV6[i]} ${AGENT_NODES_HOSTNAMES[i]}
+        else
+	          # v4v6
+            AGENT_NODES_IPS+=($(nth_ip ${EXTERNAL_SUBNET_V4} ${ip}))
+            AGENT_NODES_IPSV6+=($(nth_ip $EXTERNAL_SUBNET_V6 ${ip}))
+            add_dns_entry ${AGENT_NODES_IPS[i]} ${AGENT_NODES_HOSTNAMES[i]}
+            add_ip_host_entry ${AGENT_NODES_IPS[i]} ${AGENT_NODES_HOSTNAMES[i]}
+            add_dns_entry ${AGENT_NODES_IPSV6[i]} ${AGENT_NODES_HOSTNAMES[i]}
+        fi
+
+        # Get the generated mac addresses
+        AGENT_NODES_MACS+=($(sudo virsh dumpxml $cluster_name | xmllint --xpath "string(//interface[descendant::source[@bridge = '${BAREMETAL_NETWORK_NAME}']]/mac/@address)" -))
+        if [[ ! -z "${BOND_PRIMARY_INTERFACE:-}" ]]; then
+	        # For a bond, a random mac is added for the 2nd interface
+	        AGENT_NODES_MACS+=($(sudo virsh domiflist ${cluster_name} | grep ${BAREMETAL_NETWORK_NAME} | grep -v ${AGENT_NODES_MACS[-1]} | awk '{print $5}'))
+        fi
+    done
+
+    AGENT_EXTRA_WORKERS_IPS=()
+    AGENT_EXTRA_WORKERS_IPSV6=()
+    AGENT_EXTRA_WORKERS_MACS=()
+    AGENT_EXTRA_WORKERS_HOSTNAMES=()
+    extra_workers=$NUM_EXTRA_WORKERS
+    for (( i=0; i<${extra_workers}; i++ ))
+    do
+      AGENT_EXTRA_WORKERS_HOSTNAMES+=($(printf ${EXTRA_WORKER_HOSTNAME_FORMAT} ${i}))
+      cluster_name=${CLUSTER_NAME}_extraworker_${i}
+
+
+      ip=${base_ip}+${static_ips}+${i}
+      if [[ "$IP_STACK" = "v4" ]]; then
+          AGENT_EXTRA_WORKERS_IPS+=($(nth_ip ${EXTERNAL_SUBNET_V4} ${ip}))
+          add_dns_entry ${AGENT_EXTRA_WORKERS_IPS[i]} ${AGENT_EXTRA_WORKERS_HOSTNAMES[i]}
+          add_ip_host_entry ${AGENT_EXTRA_WORKERS_IPS[i]} ${AGENT_EXTRA_WORKERS_HOSTNAMES[i]}
+      elif [[ "$IP_STACK" = "v6" ]]; then
+          AGENT_EXTRA_WORKERS_IPSV6+=($(nth_ip ${EXTERNAL_SUBNET_V6} ${ip}))
+          add_dns_entry ${AGENT_EXTRA_WORKERS_IPSV6[i]} ${AGENT_EXTRA_WORKERS_HOSTNAMES[i]}
+          add_ip_host_entry ${AGENT_EXTRA_WORKERS_IPSV6[i]} ${AGENT_EXTRA_WORKERS_HOSTNAMES[i]}
+      else
+          # v4v6
+          AGENT_EXTRA_WORKERS_IPS+=($(nth_ip ${EXTERNAL_SUBNET_V4} ${ip}))
+          AGENT_EXTRA_WORKERS_IPSV6+=($(nth_ip $EXTERNAL_SUBNET_V6 ${ip}))
+          add_dns_entry ${AGENT_EXTRA_WORKERS_IPS[i]} ${AGENT_EXTRA_WORKERS_HOSTNAMES[i]}
+          add_ip_host_entry ${AGENT_EXTRA_WORKERS_IPS[i]} ${AGENT_EXTRA_WORKERS_HOSTNAMES[i]}
+          add_dns_entry ${AGENT_EXTRA_WORKERS_IPSV6[i]} ${AGENT_EXTRA_WORKERS_HOSTNAMES[i]}
+      fi
+
+      # Get the generated mac addresses
+      AGENT_EXTRA_WORKERS_MACS+=($(sudo virsh dumpxml $cluster_name | xmllint --xpath "string(//interface[descendant::source[@bridge = '${BAREMETAL_NETWORK_NAME}']]/mac/@address)" -))
+      if [[ ! -z "${BOND_PRIMARY_INTERFACE:-}" ]]; then
+        # For a bond, a random mac is added for the 2nd interface
+        AGENT_EXTRA_WORKERS_MACS+=($(sudo virsh domiflist ${cluster_name} | grep ${BAREMETAL_NETWORK_NAME} | grep -v ${AGENT_NODES_MACS[-1]} | awk '{print $5}'))
+      fi
+    done
+}
+
 # External load balancer configuration.
 # The following ports are opened in firewalld so that libvirt VMs can communicate with haproxy.
 export MACHINE_CONFIG_SERVER_PORT=22623
 export KUBE_API_PORT=6443
 export INGRESS_ROUTER_PORT=443
 export AGENT_NODE0_IPSV6=${AGENT_NODE0_IPSV6:-}
+
+export AGENT_EXTRAWORKER_NODE_TO_ADD=${AGENT_EXTRAWORKER_NODE_TO_ADD:-"0"}
diff --git a/agent/docs/add-nodes.md b/agent/docs/add-nodes.md
@@ -0,0 +1,52 @@
+# Add nodes (day 2)
+
+Nodes can be added after installation, using the "agent_add_node" make target.
+
+To add a node, the cluster must be created with extra worker nodes using 
+the NUM_EXTRA_WORKERS environment variable. This will configure VMs for extra
+worker nodes, but not include them in the initial cluster installation.
+
+For example,
+
+```
+export NUM_EXTRA_WORKERS=2
+```
+
+will create two extra worker nodes, named extraworker-0 and extraworker-1.
+
+The extra worker nodes must have a minimum disk size of 100GB. This can be specified
+using:
+
+```
+export EXTRA_WORKER_DISK=100
+```
+
+After the cluster has been installed. To add both nodes to the cluster, run
+these commands:
+
+```
+AGENT_EXTRAWORKER_NODE_TO_ADD=0 make agent_add_node
+AGENT_EXTRAWORKER_NODE_TO_ADD=1 make agent_add_node
+```
+
+The nodes will be sequentially added to the cluster.
+
+# agent_add_node target
+
+To add a node to the cluster, the "agent_add_node" target performs the same
+steps a user would need to perform to add a node to the cluster using [the
+node-joiner tool](https://github.com/openshift/installer/blob/master/docs/user/agent/add-node/add-nodes.md).
+
+Steps:
+
+* Downloads and executes the [node-joiner.sh](https://github.com/openshift/installer/blob/master/docs/user/agent/add-node/node-joiner.sh) script to generate the ISO (node.x86_64.iso) that can be used to join a
+node to the cluster. This script requires an input file, nodes-config.yaml, which describes
+the node being added. This input file is automatically generated by "agent_add_node" provided
+the cluster was created with extra worker nodes.
+
+* Boots the extra worker node using the generated ISO.
+
+* Downloads and executes the [node-joiner-monitor.sh](https://github.com/openshift/installer/blob/master/docs/user/agent/add-node/node-joiner-monitor.sh) script to show progress. The monitor script also displays
+CSRs that will need to be approved for the node to join the cluster.
+
+* A user would then approve each CSR using "oc adm certificate approve" or equivalent. This target creates a background process that auto approves any CSRs.
diff --git a/agent/roles/manifests/tasks/install-config.yml b/agent/roles/manifests/tasks/install-config.yml
@@ -32,3 +32,15 @@
     src: "agent-config_bond_yaml.j2"
     dest: "{{ install_path }}/agent-config.yaml"
   when: agent_bond_config != 'none'
+
+- name: create add-node directory
+  ansible.builtin.file:
+    path: "{{ install_path }}/add-node"
+    state: directory
+  when: num_extra_workers != "0"
+
+- name: write the nodes-config.yaml 
+  template:
+    src: "nodes-config_yaml.j2"
+    dest: "{{ install_path }}/add-node/nodes-config.yaml"
+  when: agent_bond_config == 'none' and num_extra_workers != "0"