-
Notifications
You must be signed in to change notification settings - Fork 96
Adding canary 'monitor' mode. #241
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0fff8e6
0dc45a1
0e9dae9
eb981a7
cc2c230
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -91,22 +91,28 @@ upgrade: | |
| # Requires a 'dist/oci-cloud-controller-manager-rollback.yaml' manifest. Requires $KUBECONFIG set. | ||
| .PHONY: rollback | ||
| rollback: | ||
| # Rollback the current CCM to the specified version | ||
| # Rollback the current CCM to the specified version. | ||
| @./hack/deploy.sh rollback-original-ccm | ||
|
|
||
| .PHONY: e2e | ||
| e2e: | ||
| @./hack/test-e2e.sh | ||
|
|
||
| # Run the canary tests. | ||
| .PHONY: canary | ||
| canary: | ||
| @./hack/test-canary.sh | ||
| # Run the canary tests - in single run mode. | ||
| .PHONY: canary-run-once | ||
| canary-run-once: | ||
| @./hack/test-canary.sh run-once | ||
|
|
||
| # Validate the generated canary test image. | ||
| # Run the canary tests - in monitor (infinite loop) mode. | ||
| .PHONY: canary-monitor | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. New monitor mode. |
||
| canary-monitor: | ||
| @./hack/test-canary.sh monitor | ||
|
|
||
| # Validate the generated canary test image. Runs test once | ||
| # and monitors from sidecar. | ||
| .PHONY: validate-canary | ||
| validate-canary: | ||
| @./hack/validate-canary.sh | ||
| @./hack/validate-canary.sh run | ||
|
|
||
| .PHONY: clean | ||
| clean: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| #!/bin/bash | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This script is for the new entrypoint. The easiest solution (rather than mess with GOPATH) is for the entrypoint script to move to the original project root and execute the canary test from there as before. |
||
|
|
||
| # Copyright 2018 Oracle and/or its affiliates. All rights reserved. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| echo "\$METRICS_FILE: ${METRICS_FILE}" | ||
| echo "\$MONITOR_PERIOD: ${MONITOR_PERIOD}" | ||
|
|
||
| # For OCI usage canary mode is the default | ||
| if [ -z "${CANARY_MODE}" ]; then | ||
| export CANARY_MODE="monitor" | ||
| fi | ||
|
|
||
| pushd "${GOPATH}/src/github.com/oracle/oci-cloud-controller-manager" | ||
| ./hack/test-canary.sh ${CANARY_MODE} | ||
| popd | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,7 +32,7 @@ function run_canary_tests() { | |
| ginkgo -v -progress -noColor=true \ | ||
| -focus "\[Canary\]" \ | ||
| test/e2e \ | ||
| -- --kubeconfig=${KUBECONFIG} --delete-namespace=false \ | ||
| -- --kubeconfig=${KUBECONFIG} --delete-namespace=true \ | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a better default for the canary tests. |
||
| 2>&1 | tee "${TEST_LOG}" | ||
| } | ||
|
|
||
|
|
@@ -75,15 +75,16 @@ function create_results() { | |
| echo "Creating result file: ${METRICS_FILE}" | ||
| cat > "${METRICS_FILE}" <<EOF | ||
| { | ||
| "start_time": "${START}" | ||
| "create_lb": "$(extract_result ${CREATE_LB_TEST})" | ||
| "start_time": "${START}", | ||
| "create_lb": "$(extract_result ${CREATE_LB_TEST})", | ||
| "end_time": "$(now)" | ||
| } | ||
| EOF | ||
| } | ||
|
|
||
| # Run the tests and extract the results | ||
| function run() { | ||
| # Run the tests once and extract the results. | ||
| function run-once() { | ||
| START=$(now) | ||
| init_results | ||
| cat "${METRICS_FILE}" | ||
| run_canary_tests | ||
|
|
@@ -95,13 +96,34 @@ function run() { | |
|
|
||
| # Helper function to clean up log and json files. | ||
| function clean() { | ||
| kubectl get pods --all-namespaces | grep ccm | awk '{print $1}' | xargs kubectl delete ns | ||
| rm "${TEST_DIR}/${TEST_PREFIX}*" | ||
| echo "ensuring fresh \$START." | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In monitor mode we no longer run once and exit - so we need a function to clean up resources between each iteration. |
||
| unset START | ||
| echo "ensuring fresh ${TEST_LOG} file." | ||
| rm -f "${TEST_LOG}" | ||
| echo "ensuring fresh ${METRICS_FILE} result file." | ||
| rm -f "${METRICS_FILE}" | ||
| echo "ensuring all 'cm-e2e-tests' namespaces are terminated." | ||
| local res=$(kubectl get ns | grep 'cm-e2e-tests-' | awk '{print $1}') | ||
| if [ ! -z "${res}" ]; then | ||
| echo ${res} | xargs kubectl delete ns 2> /dev/null | ||
| fi | ||
| } | ||
|
|
||
| # Run the tests in loop with the specified wait period. | ||
| function monitor() { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The new 'monitor' method just sits in an infinite loop and calls the original 'run-once' method on each iteration. It also sleeps for the specified period. This will give a window of opportunity for any sidecars to grab the results from filesystem. They will be deleted on the next iteration of the tests and the current results lost. |
||
| local period=${1:-$MONITOR_PERIOD} | ||
| while true; | ||
| do | ||
| clean && run-once | ||
| echo "Sleeping for ${period} before next run..." | ||
| sleep "${period}" | ||
| done | ||
| } | ||
|
|
||
| # Main ************************************************************************ | ||
| # | ||
|
|
||
| # Handle mandatory KUBECONFIG requirement. | ||
| if [ -z "${KUBECONFIG}" ]; then | ||
| if [ -z "${KUBECONFIG_VAR}" ]; then | ||
| echo "KUBECONFIG or KUBECONFIG_VAR must be set" | ||
|
|
@@ -113,26 +135,31 @@ if [ -z "${KUBECONFIG}" ]; then | |
| fi | ||
| fi | ||
|
|
||
| START=$(now) | ||
| # If not specified, default mandatory 'metrics file' location. | ||
| if [ -z "${METRICS_FILE}" ]; then | ||
| export METRICS_FILE=/tmp/ccm-canary-metrics.json | ||
| fi | ||
|
|
||
| TEST_ID="" | ||
| if [ "${UNIQUE_TEST_ID}" = true ]; then | ||
| TEST_ID="-$(date +"%Y-%m-%d-%H%M%S")" | ||
| # If not specified, default mandatory 'monitor period' in seconds. | ||
| if [ -z "${MONITOR_PERIOD}" ]; then | ||
| export MONITOR_PERIOD=30 | ||
| fi | ||
|
|
||
| # Set up directory for filesystem test log. The success of the test | ||
| # is extracted from this log. | ||
| if [ -z "${TEST_DIR}" ]; then | ||
| TEST_DIR="/tmp" | ||
| fi | ||
| mkdir -p "${TEST_DIR}" | ||
| TEST_LOG="${TEST_DIR}/oci-ccm-canary-test.log" | ||
|
|
||
| TEST_PREFIX="oci-ccm-canary-test" | ||
| TEST_LOG="${TEST_DIR}/${TEST_PREFIX}${TEST_ID}.log" | ||
|
|
||
| # If provided, execute the specified function. | ||
| if [ ! -z "$1" ]; then | ||
| $1 | ||
| # If provided, execute the specified function with args. | ||
| # e.g. run-once, monitor, clean, etc. | ||
| $@ | ||
| else | ||
| run | ||
| # Otherwise, run the monitor | ||
| monitor | ||
| fi | ||
|
|
||
| exit $? | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -88,7 +88,6 @@ function local-docker-mode() { | |
| docker exec -it ${cid} /bin/bash | ||
| } | ||
|
|
||
|
|
||
| # Test Functions ************************************************************** | ||
| # | ||
|
|
||
|
|
@@ -107,28 +106,32 @@ spec: | |
| containers: | ||
| - name: oci-cloud-controller-manager-canary-test-runner | ||
| image: iad.ocir.io/oracle/oci-cloud-controller-manager-canary:${version} | ||
| command: ["/bin/bash"] | ||
| args: ["-ec", "make canary"] | ||
| env: | ||
| - name: METRICS_FILE | ||
| value: /metrics/output.json | ||
| - name: KUBECONFIG_VAR | ||
| value: $(cat ${KUBECONFIG} | openssl enc -base64 -A) | ||
| - name: KUBECONFIG_VAR | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The validation test also shows expected usage of the canary in a pod. |
||
| value: $(cat ${KUBECONFIG} | openssl enc -base64 -A) | ||
| - name: METRICS_FILE | ||
| value: /metrics/output.json | ||
| - name: MONITOR_PERIOD | ||
| value: "30" | ||
| - name: CANARY_MODE | ||
| value: monitor | ||
| command: ["/bin/bash"] | ||
| args: ["-ec", "/oci/scripts/ccm-canary-entrypoint.sh"] | ||
| volumeMounts: | ||
| - mountPath: /metrics | ||
| name: metrics-volume | ||
|
|
||
| - name: oci-cloud-controller-manager-canary-test-reporter | ||
| image: iad.ocir.io/oracle/oci-cloud-controller-manager-ci-e2e:1.0.1 | ||
| command: ["/bin/bash"] | ||
| args: ["-ec", "touch \$METRICS_FILE; while [ -z \$(cat \$METRICS_FILE | grep 'end_time' | cut -d':' -f 1) ]; do sleep 1; done; cat \$METRICS_FILE"] | ||
| args: ["-ec", "while true; do sleep 10; cat \$METRICS_FILE; done"] | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The canary validation tests now has a dumb reporter that just sits in an infinite loop and cats the contents of the metrics file to std out. |
||
| env: | ||
| - name: METRICS_FILE | ||
| value: /metrics/output.json | ||
| - name: METRICS_FILE | ||
| value: /metrics/output.json | ||
| volumeMounts: | ||
| - mountPath: /metrics | ||
| name: metrics-volume | ||
|
|
||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Whoops. |
||
| imagePullSecrets: | ||
| - name: ocir | ||
|
|
||
|
|
@@ -152,17 +155,37 @@ function run() { | |
| clean-canary | ||
| generate-canary-manifest | ||
| deploy-canary | ||
| # Tail the logs of the reporter to block until it completes. The report only logs the result file. | ||
| res=$(kubectl logs -f oci-cloud-controller-manager-canary -c oci-cloud-controller-manager-canary-test-reporter) | ||
| # Display the results. | ||
| echo "${res}" | ||
| # Grep the log to return an error code. | ||
| error=$(echo "${res}" | grep 'end_time' | cut -d':' -f 1) | ||
| if [ -z ${error} ]; then | ||
| exit 1 | ||
| else | ||
| exit 0 | ||
| fi | ||
|
|
||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The validation test itself is a little more involved now. We wait for canary to run CANARY_RUNS times. If we have any failures we consider it FAILED. If we have all successes then PASSED. On completion we destroy the canary pod. We let the CI destroy long running tests. This should also destroy any running canaries. |
||
| local canary_runs=${CANARY_RUNS} | ||
| local duration=1800 | ||
| local sleep=10 | ||
| local timeout=$(($(date +%s) + $duration)) | ||
| while [ $(date +%s) -lt $timeout ]; do | ||
| echo "waiting for ${canary_runs} runs." | ||
| local logs=$(kubectl logs oci-cloud-controller-manager-canary -c oci-cloud-controller-manager-canary-test-reporter) | ||
| local num_runs=$(echo "${logs}"| grep 'end_time' | uniq | wc -l) | ||
| echo "currently run ${num_runs} times." | ||
| if [ "${num_runs}" -ge "${canary_runs}" ]; then | ||
| # Remove canary and delete any remaining test namespaces. | ||
| kubectl delete pod oci-cloud-controller-manager-canary | ||
| local res=$(kubectl get ns | grep 'cm-e2e-tests-' | awk '{print $1}') | ||
| if [ ! -z "${res}" ]; then | ||
| echo ${res} | xargs kubectl delete ns | ||
| fi | ||
| # Test results | ||
| local num_pass=$(echo "${logs}"| grep '"create_lb": "1"' | uniq | wc -l) | ||
| local num_fail=$(echo "${logs}"| grep '"create_lb": "0"' | uniq | wc -l) | ||
| if [ "${num_fail}" -gt "0" ]; then | ||
| echo "FAILED" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe dump the test logs here for the failure case, i.e. it might make debugging a failure easier? |
||
| kubectl logs oci-cloud-controller-manager-canary -c oci-cloud-controller-manager-canary-test-runner | ||
| exit 1 | ||
| elif [ "${num_pass}" -eq "1" ]; then | ||
| echo "PASSED" | ||
| exit 0 | ||
| fi | ||
| fi | ||
| sleep ${sleep} | ||
| done | ||
| } | ||
|
|
||
| # Main ************************************************************************ | ||
|
|
@@ -178,11 +201,16 @@ if [ -z "${KUBECONFIG}" ]; then | |
| export KUBECONFIG=/tmp/kubeconfig | ||
| fi | ||
| fi | ||
|
|
||
| if [ -z "${VERSION}" ]; then | ||
| echo "The VERSION must be set" | ||
| exit 1 | ||
| fi | ||
|
|
||
| if [ -z "${CANARY_RUNS}" ]; then | ||
| export CANARY_RUNS=1 | ||
| fi | ||
|
|
||
| # If provided, execute the specified function. | ||
| if [ ! -z "$1" ]; then | ||
| $1 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ import ( | |
| "flag" | ||
| "fmt" | ||
| "os" | ||
| "strconv" | ||
| "strings" | ||
| "time" | ||
|
|
||
|
|
@@ -51,6 +52,7 @@ var ( | |
| kubeconfig string // path to kubeconfig file | ||
| deleteNamespace bool // whether or not to delete test namespaces | ||
| cloudConfigFile string // path to cloud provider config file | ||
| nodePortTest bool // whether or not to test the connectivity of node ports. | ||
| ccmSeclistID string // The ocid of the loadbalancer subnet seclist. Optional. | ||
| k8sSeclistID string // The ocid of the k8s worker subnet seclist. Optional. | ||
| ) | ||
|
|
@@ -59,6 +61,7 @@ func init() { | |
| flag.StringVar(&kubeconfig, "kubeconfig", "", "Path to Kubeconfig file with authorization and master location information.") | ||
| flag.BoolVar(&deleteNamespace, "delete-namespace", true, "If true tests will delete namespace after completion. It is only designed to make debugging easier, DO NOT turn it off by default.") | ||
| flag.StringVar(&cloudConfigFile, "cloud-config", "", "The path to the cloud provider configuration file. Empty string for no configuration file.") | ||
| flag.BoolVar(&nodePortTest, "nodeport-test", false, "If true test will include 'nodePort' connectectivity tests.") | ||
| flag.StringVar(&ccmSeclistID, "ccm-seclist-id", "", "The ocid of the loadbalancer subnet seclist. Enables additional seclist rule tests. If specified the 'k8s-seclist-id parameter' is also required.") | ||
| flag.StringVar(&k8sSeclistID, "k8s-seclist-id", "", "The ocid of the k8s worker subnet seclist. Enables additional seclist rule tests. If specified the 'ccm-seclist-id parameter' is also required.") | ||
| } | ||
|
|
@@ -75,6 +78,7 @@ type Framework struct { | |
|
|
||
| CloudProviderConfig *oci.Config // If specified, the CloudProviderConfig. This provides information on the configuration of the test cluster. | ||
| Client client.Interface // An OCI client for checking the state of any provisioned OCI infrastructure during testing. | ||
| NodePortTest bool // An optional configuration for E2E testing. If set to true, then will run additional E2E nodePort connectivity checks during testing. | ||
| CCMSecListID string // An optional configuration for E2E testing. If present can be used to run additional checks against seclist during testing. | ||
| K8SSecListID string // An optional configuration for E2E testing. If present can be used to run additional checks against seclist during testing. | ||
|
|
||
|
|
@@ -111,6 +115,13 @@ func NewFramework(baseName string, client clientset.Interface) *Framework { | |
| BaseName: baseName, | ||
| ClientSet: client, | ||
| } | ||
| // Dev/CI only configuration. Enable NodePort tests. | ||
| npt, err := strconv.ParseBool(os.Getenv("NODEPORT_TEST")) | ||
| if err != nil { | ||
| f.NodePortTest = false | ||
| } else { | ||
| f.NodePortTest = npt | ||
| } | ||
| // Dev/CI only configuration. The seclist for CCM load-balancer routes. | ||
| f.CCMSecListID = os.Getenv("CCM_SECLIST_ID") | ||
| if ccmSeclistID != "" { | ||
|
|
@@ -202,7 +213,8 @@ func (f *Framework) BeforeEach() { | |
| // https://github.com/onsi/ginkgo/issues/222 | ||
| f.cleanupHandle = AddCleanupAction(f.AfterEach) | ||
|
|
||
| if f.Client == nil { | ||
| // Create an OCI client if the cloudConfig has been specified. | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A series of tweaks to allow users to avoid specifying the cloud config if not required. The canary runner might not want to do this. |
||
| if cloudConfigFile != "" && f.Client == nil { | ||
| By("Creating OCI client") | ||
| cloudProviderConfig, err := createCloudProviderConfig(cloudConfigFile) | ||
| Expect(err).NotTo(HaveOccurred()) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Original mode renamed.