diff --git a/ci-operator/config/openshift/cluster-network-operator/openshift-cluster-network-operator-master.yaml b/ci-operator/config/openshift/cluster-network-operator/openshift-cluster-network-operator-master.yaml index 0e52076b85ee..ab5926bdd265 100644 --- a/ci-operator/config/openshift/cluster-network-operator/openshift-cluster-network-operator-master.yaml +++ b/ci-operator/config/openshift/cluster-network-operator/openshift-cluster-network-operator-master.yaml @@ -220,6 +220,11 @@ tests: - ref: ovn-sdn-migration-feature-migration-validate-multicast - ref: openshift-e2e-test workflow: openshift-e2e-aws +- as: e2e-aws-ovn-clusternetwork-cidr-expansion + cron: 0 3 * * 0,2,4,6 + steps: + cluster_profile: aws + workflow: openshift-e2e-aws-cidr-expansion - as: e2e-aws-ovn-single-node optional: true steps: diff --git a/ci-operator/config/openshift/cluster-network-operator/openshift-cluster-network-operator-release-4.13.yaml b/ci-operator/config/openshift/cluster-network-operator/openshift-cluster-network-operator-release-4.13.yaml index d16a5ffc44b6..c6b8c3414ef5 100644 --- a/ci-operator/config/openshift/cluster-network-operator/openshift-cluster-network-operator-release-4.13.yaml +++ b/ci-operator/config/openshift/cluster-network-operator/openshift-cluster-network-operator-release-4.13.yaml @@ -220,6 +220,11 @@ tests: - ref: ovn-sdn-migration-feature-migration-validate-multicast - ref: openshift-e2e-test workflow: openshift-e2e-aws +- as: e2e-aws-ovn-clusternetwork-cidr-expansion + cron: 0 3 * * 0,2,4,6 + steps: + cluster_profile: aws + workflow: openshift-e2e-aws-cidr-expansion - as: e2e-aws-ovn-single-node optional: true steps: diff --git a/ci-operator/config/openshift/cluster-network-operator/openshift-cluster-network-operator-release-4.14.yaml b/ci-operator/config/openshift/cluster-network-operator/openshift-cluster-network-operator-release-4.14.yaml index f76d48b731bb..8258dfb797fd 100644 --- a/ci-operator/config/openshift/cluster-network-operator/openshift-cluster-network-operator-release-4.14.yaml +++ b/ci-operator/config/openshift/cluster-network-operator/openshift-cluster-network-operator-release-4.14.yaml @@ -221,6 +221,11 @@ tests: - ref: ovn-sdn-migration-feature-migration-validate-multicast - ref: openshift-e2e-test workflow: openshift-e2e-aws +- as: e2e-aws-ovn-clusternetwork-cidr-expansion + cron: 0 3 * * 0,2,4,6 + steps: + cluster_profile: aws + workflow: openshift-e2e-aws-cidr-expansion - as: e2e-aws-ovn-single-node optional: true steps: diff --git a/ci-operator/jobs/openshift/cluster-network-operator/openshift-cluster-network-operator-master-periodics.yaml b/ci-operator/jobs/openshift/cluster-network-operator/openshift-cluster-network-operator-master-periodics.yaml new file mode 100644 index 000000000000..5d7c9b76b2b8 --- /dev/null +++ b/ci-operator/jobs/openshift/cluster-network-operator/openshift-cluster-network-operator-master-periodics.yaml @@ -0,0 +1,71 @@ +periodics: +- agent: kubernetes + cluster: build05 + cron: 0 3 * * 0,2,4,6 + decorate: true + extra_refs: + - base_ref: master + org: openshift + repo: cluster-network-operator + labels: + ci-operator.openshift.io/cloud: aws + ci-operator.openshift.io/cloud-cluster-profile: aws + ci.openshift.io/generator: prowgen + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: periodic-ci-openshift-cluster-network-operator-master-e2e-aws-ovn-clusternetwork-cidr-expansion + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --lease-server-credentials-file=/etc/boskos/credentials + - --report-credentials-file=/etc/report/credentials + - --secret-dir=/secrets/ci-pull-credentials + - --secret-dir=/usr/local/e2e-aws-ovn-clusternetwork-cidr-expansion-cluster-profile + - --target=e2e-aws-ovn-clusternetwork-cidr-expansion + command: + - ci-operator + image: ci-operator:latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /etc/boskos + name: boskos + readOnly: true + - mountPath: /secrets/ci-pull-credentials + name: ci-pull-credentials + readOnly: true + - mountPath: /usr/local/e2e-aws-ovn-clusternetwork-cidr-expansion-cluster-profile + name: cluster-profile + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: boskos + secret: + items: + - key: credentials + path: credentials + secretName: boskos-credentials + - name: ci-pull-credentials + secret: + secretName: ci-pull-credentials + - name: cluster-profile + secret: + secretName: cluster-secrets-aws + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator diff --git a/ci-operator/jobs/openshift/cluster-network-operator/openshift-cluster-network-operator-release-4.13-periodics.yaml b/ci-operator/jobs/openshift/cluster-network-operator/openshift-cluster-network-operator-release-4.13-periodics.yaml new file mode 100644 index 000000000000..e29a3dd36a6e --- /dev/null +++ b/ci-operator/jobs/openshift/cluster-network-operator/openshift-cluster-network-operator-release-4.13-periodics.yaml @@ -0,0 +1,71 @@ +periodics: +- agent: kubernetes + cluster: build05 + cron: 0 3 * * 0,2,4,6 + decorate: true + extra_refs: + - base_ref: release-4.13 + org: openshift + repo: cluster-network-operator + labels: + ci-operator.openshift.io/cloud: aws + ci-operator.openshift.io/cloud-cluster-profile: aws + ci.openshift.io/generator: prowgen + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: periodic-ci-openshift-cluster-network-operator-release-4.13-e2e-aws-ovn-clusternetwork-cidr-expansion + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --lease-server-credentials-file=/etc/boskos/credentials + - --report-credentials-file=/etc/report/credentials + - --secret-dir=/secrets/ci-pull-credentials + - --secret-dir=/usr/local/e2e-aws-ovn-clusternetwork-cidr-expansion-cluster-profile + - --target=e2e-aws-ovn-clusternetwork-cidr-expansion + command: + - ci-operator + image: ci-operator:latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /etc/boskos + name: boskos + readOnly: true + - mountPath: /secrets/ci-pull-credentials + name: ci-pull-credentials + readOnly: true + - mountPath: /usr/local/e2e-aws-ovn-clusternetwork-cidr-expansion-cluster-profile + name: cluster-profile + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: boskos + secret: + items: + - key: credentials + path: credentials + secretName: boskos-credentials + - name: ci-pull-credentials + secret: + secretName: ci-pull-credentials + - name: cluster-profile + secret: + secretName: cluster-secrets-aws + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator diff --git a/ci-operator/jobs/openshift/cluster-network-operator/openshift-cluster-network-operator-release-4.14-periodics.yaml b/ci-operator/jobs/openshift/cluster-network-operator/openshift-cluster-network-operator-release-4.14-periodics.yaml new file mode 100644 index 000000000000..da10636a45a2 --- /dev/null +++ b/ci-operator/jobs/openshift/cluster-network-operator/openshift-cluster-network-operator-release-4.14-periodics.yaml @@ -0,0 +1,71 @@ +periodics: +- agent: kubernetes + cluster: build05 + cron: 0 3 * * 0,2,4,6 + decorate: true + extra_refs: + - base_ref: release-4.14 + org: openshift + repo: cluster-network-operator + labels: + ci-operator.openshift.io/cloud: aws + ci-operator.openshift.io/cloud-cluster-profile: aws + ci.openshift.io/generator: prowgen + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: periodic-ci-openshift-cluster-network-operator-release-4.14-e2e-aws-ovn-clusternetwork-cidr-expansion + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --lease-server-credentials-file=/etc/boskos/credentials + - --report-credentials-file=/etc/report/credentials + - --secret-dir=/secrets/ci-pull-credentials + - --secret-dir=/usr/local/e2e-aws-ovn-clusternetwork-cidr-expansion-cluster-profile + - --target=e2e-aws-ovn-clusternetwork-cidr-expansion + command: + - ci-operator + image: ci-operator:latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /etc/boskos + name: boskos + readOnly: true + - mountPath: /secrets/ci-pull-credentials + name: ci-pull-credentials + readOnly: true + - mountPath: /usr/local/e2e-aws-ovn-clusternetwork-cidr-expansion-cluster-profile + name: cluster-profile + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: boskos + secret: + items: + - key: credentials + path: credentials + secretName: boskos-credentials + - name: ci-pull-credentials + secret: + secretName: ci-pull-credentials + - name: cluster-profile + secret: + secretName: cluster-secrets-aws + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator diff --git a/ci-operator/step-registry/ipi/aws/cidr-expansion/OWNERS b/ci-operator/step-registry/ipi/aws/cidr-expansion/OWNERS new file mode 120000 index 000000000000..ec405d65a79d --- /dev/null +++ b/ci-operator/step-registry/ipi/aws/cidr-expansion/OWNERS @@ -0,0 +1 @@ +../OWNERS \ No newline at end of file diff --git a/ci-operator/step-registry/openshift/e2e/aws/cidr-expansion/OWNERS b/ci-operator/step-registry/openshift/e2e/aws/cidr-expansion/OWNERS new file mode 120000 index 000000000000..ec405d65a79d --- /dev/null +++ b/ci-operator/step-registry/openshift/e2e/aws/cidr-expansion/OWNERS @@ -0,0 +1 @@ +../OWNERS \ No newline at end of file diff --git a/ci-operator/step-registry/openshift/e2e/aws/cidr-expansion/openshift-e2e-aws-cidr-expansion-workflow.metadata.json b/ci-operator/step-registry/openshift/e2e/aws/cidr-expansion/openshift-e2e-aws-cidr-expansion-workflow.metadata.json new file mode 100644 index 000000000000..6fe9a93368fd --- /dev/null +++ b/ci-operator/step-registry/openshift/e2e/aws/cidr-expansion/openshift-e2e-aws-cidr-expansion-workflow.metadata.json @@ -0,0 +1,12 @@ +{ + "path": "openshift/e2e/aws/cidr-expansion/openshift-e2e-aws-cidr-expansion-workflow.yaml", + "owners": { + "approvers": [ + "deads2k", + "dgoodwin", + "stbenjam", + "vrutkovs", + "wking" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/openshift/e2e/aws/cidr-expansion/openshift-e2e-aws-cidr-expansion-workflow.yaml b/ci-operator/step-registry/openshift/e2e/aws/cidr-expansion/openshift-e2e-aws-cidr-expansion-workflow.yaml new file mode 100644 index 000000000000..d5fdb2d5bc8d --- /dev/null +++ b/ci-operator/step-registry/openshift/e2e/aws/cidr-expansion/openshift-e2e-aws-cidr-expansion-workflow.yaml @@ -0,0 +1,22 @@ +workflow: + as: openshift-e2e-aws-cidr-expansion + steps: + env: + CLUSTER_NETWORK_CIDR: 10.128.0.0/23 + CLUSTER_NETWORK_HOST_PREFIX: 26 + pre: + - chain: ipi-conf-aws + - ref: ovn-conf-custom-cidr + - chain: ipi-install + test: + - ref: ovn-clusternetwork-cidr-expansion + - ref: openshift-e2e-test + post: + - chain: gather-core-dump + - chain: ipi-aws-post + documentation: |- + This workflow will configure custom values for the clusterNetwork CIDR mask (/23) and the + hostPrefix (/26) which allows for 512 total IPs w/ 64 IPs per node resulting in 8 nodes + max. The clusternetwork-cidr-expansion test step will modify (expand) the total IPs and + scale to more worker nodes. Finally the standard e2e-test step will run + diff --git a/ci-operator/step-registry/ovn/clusternetwork-cidr-expansion/OWNERS b/ci-operator/step-registry/ovn/clusternetwork-cidr-expansion/OWNERS new file mode 120000 index 000000000000..ec405d65a79d --- /dev/null +++ b/ci-operator/step-registry/ovn/clusternetwork-cidr-expansion/OWNERS @@ -0,0 +1 @@ +../OWNERS \ No newline at end of file diff --git a/ci-operator/step-registry/ovn/clusternetwork-cidr-expansion/ovn-clusternetwork-cidr-expansion-commands.sh b/ci-operator/step-registry/ovn/clusternetwork-cidr-expansion/ovn-clusternetwork-cidr-expansion-commands.sh new file mode 100644 index 000000000000..83e4722f43c0 --- /dev/null +++ b/ci-operator/step-registry/ovn/clusternetwork-cidr-expansion/ovn-clusternetwork-cidr-expansion-commands.sh @@ -0,0 +1,146 @@ +#!/bin/bash +set -x +set -o errexit +set -o nounset +set -o pipefail + +function dump_cluster_state { + oc get nodes -o wide + oc get network.operator.openshift.io -o yaml + oc get machinesets -n openshift-machine-api + oc get co -A +} + +function wait_for_operators_and_nodes { + # wait for all cluster operators to be done rolling out + timeout $1 bash <&2 + dump_cluster_state + exit 1 + fi +} + +# make sure cluster is up and healthy after install and dump initial state. There are cases when not +# all operators are ready even after the install process has completed. Poll for another 15m to be +# sure and exit/fail if all operators are not healthy. +wait_for_operators_and_nodes 900 +dump_cluster_state + +# validate expected clusterNetwork CIDR and bail out if it's not right. want /23 so that we +# know only 8 nodes are allowed + +cidr=$(oc get network.operator.openshift.io -o jsonpath='{.items[0].spec.clusterNetwork[0].cidr}') +host_prefix=$(oc get network.operator.openshift.io -o jsonpath='{.items[0].spec.clusterNetwork[0].hostPrefix}') + +if [[ "$cidr" != "10.128.0.0/23" ]] || [[ "$host_prefix" != "26" ]]; then + echo "Error: cluster network is misconfigured. Expected CIDR of $cidr and hostPrefix of $host_prefix, but got:" + oc get network.operator.openshift.io -o jsonpath='{.items[0].spec.clusterNetwork}' + exit 1 +fi + +# scale to 9 nodes. just making one of the worker machinesets has 3 more replicas +# Get the machineset list in JSON format +machinesets=$(oc get machinesets -n openshift-machine-api -o yaml) +machineset_count=$(echo "$machinesets" | /tmp/yq '.items | length') + +# Make sure all nodes in the machinesets are available and ready. if not, the cluster is +# probably not healthy and just bail right away. +for i in $(seq 0 $((machineset_count-1))); do + desired=$(echo "$machinesets" | /tmp/yq ".items[$i].spec.replicas") + ready=$(echo "$machinesets" | /tmp/yq ".items[$i].status.readyReplicas") + available=$(echo "$machinesets" | /tmp/yq ".items[$i].status.availableReplicas") + name=$(echo "$machinesets" | /tmp/yq ".items[$i].metadata.name") + if [[ "$desired" != "$ready" || "$ready" != "$available" ]]; then + echo "Error: machine set $name has mismatched counts" >&2 + exit 1 + fi + # Set NODE_TO_SCALE to the name of the first node in the list + if [[ $i == 0 ]]; then + NODE_TO_SCALE=$name + READY_COUNT=$ready + fi +done + +echo "NODE_TO_SCALE=$NODE_TO_SCALE" +oc scale --replicas=$(($READY_COUNT + 3)) machineset "$NODE_TO_SCALE" -n openshift-machine-api + +# wait for the two extra nodes to become ready, then validate that only 2 of the new nodes were allocated a subnet. +# the 3rd extra node should be notReady and have no subnet because they are exhausted +oc wait machinesets -n openshift-machine-api "$NODE_TO_SCALE" --for=jsonpath='{.status.readyReplicas}'=$(($READY_COUNT + 2)) --timeout=1200s +# machinesets are Ready, but there is a chance the final node that we expect to be notReady is not even deployed +# from the cloud provider, so let's make sure (10m) we have 9 nodes in total before we move on +timeout 600 bash <&2 + exit 1 +fi + +oc describe node $not_ready_node +oc describe node $not_ready_node | grep "nodeAdd: error adding node \"${not_ready_node}\": could not find \"k8s.ovn.org/node-subnets\" annotation" +if [ $? -ne 0 ]; then + oc get nodes -o wide + echo "Error: did not find a notReady node. Expected one node to be notReady because there are no subnets available" >&2 + exit 1 +fi + +# Check if there is exactly 1 node without a subnet and 8 nodes with a subnet +if [ "$nodes_with_subnet" -ne 8 ] || [ "$nodes_without_subnet" -ne 1 ]; then + oc get nodes -o wide + echo "Error: expected 8 nodes with subnets and 1 node with no subnet" >&2 + exit 1 +fi +# patch the cluster to give it more ip space with /22 +oc patch Network.config.openshift.io cluster --type='merge' --patch '{ "spec":{ "clusterNetwork": [ {"cidr":"10.128.0.0/22","hostPrefix":26} ], "networkType": "OVNKubernetes" }}' + +# first wait for the network operator to move to Progressing=True +if ! oc wait co network --for='condition=PROGRESSING=True' --timeout=120s; then + oc get co -A + echo "Error: the network operator never moved to Progressing=True. The clusterNetwork CIDR change may not have worked" >&2 + exit 1 +fi + +# it can take a while for operators to roll out after the CIDR mask change. give it up to 30m +wait_for_operators_and_nodes 1800 + +# double check that 9th node became available. Should not have to wait long as it should have +# moved to Ready state during the ovnk rollout process above +oc wait machinesets -n openshift-machine-api "$NODE_TO_SCALE" --for=jsonpath='{.status.readyReplicas}'=4 --timeout=120s || true +oc get nodes -o wide +nodes_ready=$(oc get nodes --no-headers | grep -v NotReady | grep -c Ready) +if [ "$nodes_ready" -ne 9 ]; then + oc get nodes -o wide + echo "Error: expected 9 nodes to be Ready" +fi + +# final state of the cluster +dump_cluster_state diff --git a/ci-operator/step-registry/ovn/clusternetwork-cidr-expansion/ovn-clusternetwork-cidr-expansion-ref.metadata.json b/ci-operator/step-registry/ovn/clusternetwork-cidr-expansion/ovn-clusternetwork-cidr-expansion-ref.metadata.json new file mode 100644 index 000000000000..b19654af7d71 --- /dev/null +++ b/ci-operator/step-registry/ovn/clusternetwork-cidr-expansion/ovn-clusternetwork-cidr-expansion-ref.metadata.json @@ -0,0 +1,11 @@ +{ + "path": "ovn/clusternetwork-cidr-expansion/ovn-clusternetwork-cidr-expansion-ref.yaml", + "owners": { + "approvers": [ + "sdn-approvers" + ], + "reviewers": [ + "sdn-reviewers" + ] + } +} \ No newline at end of file diff --git a/ci-operator/step-registry/ovn/clusternetwork-cidr-expansion/ovn-clusternetwork-cidr-expansion-ref.yaml b/ci-operator/step-registry/ovn/clusternetwork-cidr-expansion/ovn-clusternetwork-cidr-expansion-ref.yaml new file mode 100644 index 000000000000..e14e98e5814e --- /dev/null +++ b/ci-operator/step-registry/ovn/clusternetwork-cidr-expansion/ovn-clusternetwork-cidr-expansion-ref.yaml @@ -0,0 +1,15 @@ +ref: + as: ovn-clusternetwork-cidr-expansion + from_image: + name: cli-yq + namespace: ocp + tag: latest + commands: ovn-clusternetwork-cidr-expansion-commands.sh + resources: + requests: + cpu: 10m + memory: 100Mi + documentation: |- + This step will test that new nodes cannot be added because the existing clusterNetwork CIDR does + not provide enough IP space. Then it will configure the CIDR such that more IP space is added and + verify the new nodes are added. diff --git a/ci-operator/step-registry/ovn/conf/custom-cidr/ovn-conf-custom-cidr-commands.sh b/ci-operator/step-registry/ovn/conf/custom-cidr/ovn-conf-custom-cidr-commands.sh index f3cffdba017f..d05cf54f7fc5 100644 --- a/ci-operator/step-registry/ovn/conf/custom-cidr/ovn-conf-custom-cidr-commands.sh +++ b/ci-operator/step-registry/ovn/conf/custom-cidr/ovn-conf-custom-cidr-commands.sh @@ -4,12 +4,18 @@ set -o nounset set -o errexit set -o pipefail -echo "update cidr to 20 to the max nodes num is 8" +CLUSTER_NETWORK_CIDR=${CLUSTER_NETWORK_CIDR:-10.128.0.0/20} +CLUSTER_NETWORK_HOST_PREFIX=${CLUSTER_NETWORK_HOST_PREFIX:-23} + +echo "default is to update cidr to 20 w/ hostPrefix of 23 so that max nodes num is 8" +echo "\nsee below for actual values used\n--------------------------------\n" cat >> "${SHARED_DIR}/install-config.yaml" << EOF networking: networkType: OVNKubernetes clusterNetwork: - - cidr: 10.128.0.0/20 - hostPrefix: 23 + - cidr: $CLUSTER_NETWORK_CIDR + hostPrefix: $CLUSTER_NETWORK_HOST_PREFIX EOF + +cat "${SHARED_DIR}/install-config.yaml" diff --git a/ci-operator/step-registry/ovn/conf/custom-cidr/ovn-conf-custom-cidr-ref.yaml b/ci-operator/step-registry/ovn/conf/custom-cidr/ovn-conf-custom-cidr-ref.yaml index a1a4174aadea..c237c4f1bf55 100644 --- a/ci-operator/step-registry/ovn/conf/custom-cidr/ovn-conf-custom-cidr-ref.yaml +++ b/ci-operator/step-registry/ovn/conf/custom-cidr/ovn-conf-custom-cidr-ref.yaml @@ -6,5 +6,10 @@ ref: requests: cpu: 10m memory: 100Mi + env: + - name: CLUSTER_NETWORK_CIDR + default: "10.128.0.0/20" + - name: CLUSTER_NETWORK_HOST_PREFIX + default: "23" documentation: |- this will deploy cluster with custom the cidr to define the cluster workers numbers