diff --git a/test/run-end-to-end.py b/test/run-end-to-end.py deleted file mode 100644 index ca51548c8..000000000 --- a/test/run-end-to-end.py +++ /dev/null @@ -1,206 +0,0 @@ -#!/usr/bin/env python3 -import shared_test_code -import json, os, subprocess, time, yaml -from kubernetes import client, config -from kubernetes.client.rest import ApiException - -def main(): - print("End-to-end test main start") - - # If this is a PUSH, the test needs to wait for the new containers to be - # built/pushed. In this case, the workflow will set /tmp/sleep_duration.txt to - # the number of seconds to sleep. - # If this is a MANUALLY triggerd or a PULL-REQUEST, no new containers will - # be built/pushed, the workflows will not set /tmp/sleep_duration.txt and - # this test will execute immediately. - shared_test_code.initial_sleep() - - # Update Helm and install this version's chart - os.system("helm repo update") - - # Get version of akri to test - test_version = shared_test_code.get_test_version() - print("Testing version: {}".format(test_version)) - - shared_test_code.major_version = "v" + test_version.split(".")[0] - print("Testing major version: {}".format(shared_test_code.major_version)) - - helm_chart_location = shared_test_code.get_helm_chart_location() - print("Get Akri Helm chart: {}".format(helm_chart_location)) - k8s_distro_arg = shared_test_code.get_k8s_distro_arg() - print("Providing Akri Helm chart with K8s distro arg: {}".format(k8s_distro_arg)) - extra_helm_args = shared_test_code.get_extra_helm_args() - print("Providing Akri Helm chart with extra helm args: {}".format(extra_helm_args)) - helm_install_command = "\ - helm install akri {location} \ - --set agent.full=true \ - --set debugEcho.configuration.enabled=true \ - --set debugEcho.configuration.name={config_name} \ - --set debugEcho.configuration.brokerPod.image.repository=nginx \ - --set debugEcho.configuration.brokerPod.image.tag=stable-alpine \ - --set debugEcho.configuration.shared=false \ - --set debugEcho.configuration.discoveryDetails.descriptions[0]='{description_prefix}0' \ - --set debugEcho.configuration.discoveryDetails.descriptions[1]='{description_prefix}1' \ - --set debugEcho.configuration.brokerProperties.{res_width_key}={res_width_val} \ - --set debugEcho.configuration.brokerProperties.{res_height_key}={res_height_val} \ - --set agent.allowDebugEcho=true \ - {k8s_distro_arg} \ - {helm_args} \ - --debug \ - ".format(location=helm_chart_location, config_name=shared_test_code.DEBUG_ECHO_NAME, description_prefix=shared_test_code.DEBUG_ECHO_DESCRIPTIONS_PREFIX, res_width_key=shared_test_code.PROPERTIES_RESOLUTION_WIDTH_KEY, res_width_val=shared_test_code.PROPERTIES_RESOLUTION_WIDTH_VALUE, res_height_key=shared_test_code.PROPERTIES_RESOLUTION_HEIGHT_KEY, res_height_val=shared_test_code.PROPERTIES_RESOLUTION_HEIGHT_VALUE, k8s_distro_arg=k8s_distro_arg, helm_args=extra_helm_args) - print("Helm command: {}".format(helm_install_command)) - os.system(helm_install_command) - - try: - res = do_test() - except Exception as e: - print(e) - res = False - finally: - # Best effort cleanup work - try: - # Save Agent and controller logs - shared_test_code.save_agent_and_controller_logs() - finally: - # Get running kube - print("On ERROR: check kubectl nodestatus") - os.system("kubectl get nodes -o wide") - # Delete akri and check that controller and Agent pods deleted - os.system("helm delete akri") - if res: - # Only test cleanup if the test has succeeded up to now - if not shared_test_code.check_akri_state(0, 0, 0, 0, 0, 0): - print("Akri not running in expected state after helm delete") - raise RuntimeError("Scenario Failed") - - if not res: - raise RuntimeError("Scenario Failed") - - - -def do_test(): - kubeconfig_path = shared_test_code.get_kubeconfig_path() - print("Loading k8s config: {}".format(kubeconfig_path)) - config.load_kube_config(config_file=kubeconfig_path) - - # Get kubectl command - kubectl_cmd = shared_test_code.get_kubectl_command() - - # Ensure Helm Akri installation applied CRDs and set up agent and controller - print("Checking for CRDs") - if not shared_test_code.crds_applied(): - print("CRDs not applied by helm chart") - return False - - print("Checking for initial Akri state") - if not shared_test_code.check_akri_state(1, 1, 2, 2, 1, 2): - print("Akri not running in expected state") - os.system('{} get pods,services,akric,akrii --show-labels'.format(kubectl_cmd)) - os.system('{} describe pod'.format(kubectl_cmd)) - return False - - # - # Check agent responds to dynamic offline/online resource - # - print("Writing to Agent pod {} that device offline".format(shared_test_code.agent_pod_name)) - os.system('{} exec -i {} -- /bin/sh -c "echo "OFFLINE" > /tmp/debug-echo-availability.txt"'.format(kubectl_cmd, shared_test_code.agent_pod_name)) - - print("Checking Akri state after taking device offline") - if not shared_test_code.check_akri_state(1, 1, 0, 0, 0, 0): - print("Akri not running in expected state after taking device offline") - os.system('{} get pods,services,akric,akrii --show-labels'.format(kubectl_cmd)) - os.system('{} describe pod'.format(kubectl_cmd)) - return False - - # Do back online scenario - print("Writing to Agent pod {} that device online".format(shared_test_code.agent_pod_name)) - os.system('{} exec -i {} -- /bin/sh -c "echo "ONLINE" > /tmp/debug-echo-availability.txt"'.format(kubectl_cmd, shared_test_code.agent_pod_name)) - - print("Checking Akri state after bringing device back online") - if not shared_test_code.check_akri_state(1, 1, 2, 2, 1, 2): - print("Akri not running in expected state after bringing device back online") - os.system('{} get pods,services,akric,akrii --show-labels'.format(kubectl_cmd)) - os.system('{} describe pod'.format(kubectl_cmd)) - return False - - # - # Check that slot reconciliation is working on agent - # - print("Check logs for Agent slot-reconciliation for pod {}".format(shared_test_code.agent_pod_name)) - temporary_agent_log_path = "/tmp/agent_log.txt" - for x in range(3): - log_result = subprocess.run('{} logs {} > {}'.format(kubectl_cmd, shared_test_code.agent_pod_name, temporary_agent_log_path), shell=True) - if log_result.returncode == 0: - print("Successfully stored Agent logs in {}".format(temporary_agent_log_path)) - break - print("Failed to get logs from {} pod with result {} on attempt {} of 3".format(shared_test_code.agent_pod_name, log_result, x)) - if x == 2: - return False - grep_result = subprocess.run(['grep', "get_node_slots - crictl called successfully", temporary_agent_log_path]) - if grep_result.returncode != 0: - print("Akri failed to successfully connect to crictl via the CRI socket with return value of {}", grep_result) - # Log information to understand why error occurred - os.system('{} get pods,services,akric,akrii --show-labels'.format(kubectl_cmd)) - os.system('grep get_node_slots {}'.format(temporary_agent_log_path)) - return False - - # - # Check that broker is recreated if it is deleted - # - broker_pod_selector = "{}={}".format(shared_test_code.CONFIGURATION_LABEL_NAME, shared_test_code.DEBUG_ECHO_NAME) - brokers_info = shared_test_code.get_running_pod_names_and_uids(broker_pod_selector) - if len(brokers_info) != 2: - print("Expected to find 2 broker pods but found: {}", len(brokers_info)) - os.system('{} get pods,services,akric,akrii --show-labels'.format(kubectl_cmd)) - os.system('{} describe pod'.format(kubectl_cmd)) - return False - - # There is a possible race condition here between when the `kubectl delete pod` returns, - # when check_broker_pod_state validates that the pod is gone, and when the check_akri_state - # validates that the broker pod has been restarted - - broker_pod_name = sorted(brokers_info.keys())[0] - delete_pod_command = '{} delete pod {}'.format(kubectl_cmd, broker_pod_name) - print("Deleting broker pod: {}".format(delete_pod_command)) - os.system(delete_pod_command) - - # Create kube client - v1 = client.CoreV1Api() - - # Wait for there to be 2 brokers pods again - if not shared_test_code.check_broker_pods_state(v1, 2): - print("Akri not running in expected state after broker pod restoration should have happened") - os.system('{} get pods,services,akric,akrii --show-labels'.format(kubectl_cmd)) - os.system('{} describe pod'.format(kubectl_cmd)) - return False - - restored_brokers_info = shared_test_code.get_running_pod_names_and_uids(broker_pod_selector) - if len(restored_brokers_info) != 2: - print("Expected to find 2 broker pods but found: {}", len(restored_brokers_info)) - os.system('{} get pods,services,akric,akrii --show-labels'.format(kubectl_cmd)) - os.system('{} describe pod'.format(kubectl_cmd)) - return False - - # Make sure that the deleted broker uid is different from the restored broker pod uid ... signifying - # that the Pod was restarted - print("Restored broker pod uid should differ from original broker pod uid") - if brokers_info[broker_pod_name] == restored_brokers_info[broker_pod_name]: - print("Restored broker pod uid [{}] should differ from original broker pod uid [{}]".format(brokers_info[broker_pod_name], restored_brokers_info[broker_pod_name])) - os.system('{} get pods,services,akric,akrii --show-labels'.format(kubectl_cmd)) - os.system('{} describe pod'.format(kubectl_cmd)) - return False - - # Do cleanup scenario - print("Deleting Akri configuration: {}".format(shared_test_code.DEBUG_ECHO_NAME)) - os.system("{} delete akric {}".format(kubectl_cmd, shared_test_code.DEBUG_ECHO_NAME)) - - print("Checking Akri state after deleting configuration") - if not shared_test_code.check_akri_state(1, 1, 0, 0, 0, 0): - print("Akri not running in expected state after deleting configuration") - os.system('{} get pods,services,akric,akrii --show-labels'.format(kubectl_cmd)) - os.system('{} describe pod'.format(kubectl_cmd)) - return False - - return True - -main() diff --git a/test/run-webhook.md b/test/run-webhook.md deleted file mode 100644 index 7063407ad..000000000 --- a/test/run-webhook.md +++ /dev/null @@ -1,268 +0,0 @@ -# Run Webhook End-to-End Tests - -File: `/tests/run-webhook.py` - -Complements existing Python-based end-to-end test [script](/test/run-end-to-end.py) with a script to test Akri configured to use the Configuration Admission Controller Webhook ([README](/webhooks/validating/configuration/README.md)). - -The Webhook validates Akri Configurations, permitting (semantically) valid Configurations to be applied to a cluster and prohibiting (semantically) invalid Configurations. - -In order to create an end-to-end test including the Webhook: - -1. Akri (including the Webhook) is deployed to a cluster -1. A valid Configuration is applied and, confirmed to have been applied by retrieval -1. An invalid Configuration is applied and, confirmed to have been trapped by the Webhook by catching an (API) exception -1. The cluster is deleted. - -## ImagePullSecrets - -When running the script outside of the GitHub Actions workflow, you may need to configure the Kubernetes cluster to access a private registry, for example GitHub Container Registry (aka GHCR). The simplest way to authenticate to a private registry is to create a Secret (e.g. `${SECRET}`) containing the credentials in the Namespace(s) and configure Helm to reference the Secret when deploying Akri: `--set=imagePullSecrets[0].name=${SECRET}` - -## Configuration - -The Webhook requires a certificate and key. The certificate must correctly reference the Webhook's Kubernetes' service name through its Subject Alternate Name (SAN) configuration. - -The test includes 2 certificates (and their associate keys). Both require that the Webhook's name (`WEBHOOK_NAME`) be `akri-webhook-configuration`. - -The script is configured to use the first cert|key pair in the `default` namespace with a Service name: `akri-webhook-configuration.default.svc.cluster.local`. The second cert|key pair is for the `deleteme` namespace (see below) for Service name: `akri-webhook-configuration.deleteme.svc.cluster.local`. - -If you wish to use a different Webhook name or namespace, you will need to generate a new cert|key pair, then reconfigure the script using these and the CA. See [Generate Certificate|Key](#Generate-CertKey). - -The GitHub Actions workflow applies end-to-end tests to the test cluster's `default` namespace. This script permits non-`default` clusters to be used, for example when testing the script locally. To further simplify this process and avoid having to create a certificate and key for the Webhook, a certificate is provided that works with a namespace named `deleteme`. If you would like to use `deleteme` instead of `default` namespace: - -+ Ensure the namespace exists: `kubectl create namespace deleteme` -+ Update the script `namespace="deleteme"` -+ Replace the value of `CRT` and `KEY` in the script with those below - -```Python -# CRT|KEY defined (DNS) for `akri-webhook-configuration.deleteme.svc` -CRT = "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURUVENDQWpXZ0F3SUJBZ0lRR05OYkJKZlN1dVJ1a3pqRk1wTzVhakFOQmdrcWhraUc5dzBCQVFzRkFEQU4KTVFzd0NRWURWUVFEREFKRFFUQWVGdzB5TVRBek1UQXdNVEU1TURCYUZ3MHlNakF6TVRBd01URTVNREJhTUFBdwpnZ0VpTUEwR0NTcUdTSWIzRFFFQkFRVUFBNElCRHdBd2dnRUtBb0lCQVFDOHdIZDZVY3ZhcG53Mlp3eDJQQzUvCjBiaGZ5eEJQaHNEZWVueTg2ZFl2SzRWajBTQmF6aFZwdUVKaHd0em1kcHJBSTR2bXBNTEt6NmVmV29mRFBmZWkKdjRuNm5zaXFoN1oyTjk3SGVDSy85SWJOcG9seDQzMmtRZWliR3h4NFRFb1VrZFFjZ1RHQ3BsNWFLQ3oxUFFXdgpzWG1TREFuVFRmaG1TakxmU3BZTk5qQUtKUUExSFRrTFJ5MmJuTy9wOVFHc1hhNzNUejZKSGcydmZHb0VWZTFhClhWd0x3SXFmbFRPY0RUSndlR3B4UysrRW15dElIdUUxb3hzek95VVdEUVYrRGIxcnV6VjVxbDhycWY4UXlEUHUKODdYNUg5dW1GL0M4MUNEaUdVWmJiMk5UeWdEWmdNRmpLZVVGaUNvWWxnU1hYYnlHYnIwUVRRMGlxMFkrSDFPNwpBZ01CQUFHamdiVXdnYkl3RXdZRFZSMGxCQXd3Q2dZSUt3WUJCUVVIQXdFd0RBWURWUjBUQVFIL0JBSXdBREFmCkJnTlZIU01FR0RBV2dCUXBSNWZiZC9STER2WkFoSjRmK0RzNWYyNHo0ekJzQmdOVkhSRUJBZjhFWWpCZ2dpZGgKYTNKcExYZGxZbWh2YjJzdFkyOXVabWxuZFhKaGRHbHZiaTVrWld4bGRHVnRaUzV6ZG1PQ05XRnJjbWt0ZDJWaQphRzl2YXkxamIyNW1hV2QxY21GMGFXOXVMbVJsYkdWMFpXMWxMbk4yWXk1amJIVnpkR1Z5TG14dlkyRnNNQTBHCkNTcUdTSWIzRFFFQkN3VUFBNElCQVFBRXRJc2JhZEM5eVc5SHhSMFE5VlBSaDhlekdVeUo5enpoUXlMcWVuOS8KcDJqU0YwRHBFVmtzMWhVYkFnRkhlMUk4Qk5McS9nSTVGb3o3WkxjQVNNcmdjVG9wSklWVW5ldnFtbzlwZ0lxLwprREtzV3NlSDZuaTgzOS9wbzFUSDdDNU5OWU4ybHFMS2xNQU84Ym5wSElDazMyRyt6RlZBSURLT0JDTHZPR3pKCmUvT09rUjBGcTRSWGxTWTdmNHA2QkhzRVVUdG1hOTFqMHFtWFdHSnRpc0UxbEhHZDE1bmFsOGhLWE1LVGRRN0EKbFR3Z2h4RTJXSzQ3dER6ald5eXZ1NmVPUFdxdlN1RFVNZzZzRXkvK01xZW9qeXI1MFZjUWxpS0JYK05xU0J3NApsMHRpMlVsVXdpZFhUWXFIM0NieUwrOTJ2b3R0alJFUU00bXpRWmN3THVwQgotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==" -KEY = "LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFcEFJQkFBS0NBUUVBdk1CM2VsSEwycVo4Tm1jTWRqd3VmOUc0WDhzUVQ0YkEzbnA4dk9uV0x5dUZZOUVnCldzNFZhYmhDWWNMYzVuYWF3Q09MNXFUQ3lzK25uMXFId3ozM29yK0orcDdJcW9lMmRqZmV4M2dpdi9TR3phYUoKY2VOOXBFSG9teHNjZUV4S0ZKSFVISUV4Z3FaZVdpZ3M5VDBGcjdGNWtnd0owMDM0WmtveTMwcVdEVFl3Q2lVQQpOUjA1QzBjdG01enY2ZlVCckYydTkwOCtpUjROcjN4cUJGWHRXbDFjQzhDS241VXpuQTB5Y0hocWNVdnZoSnNyClNCN2hOYU1iTXpzbEZnMEZmZzI5YTdzMWVhcGZLNm4vRU1nejd2TzErUi9icGhmd3ZOUWc0aGxHVzI5alU4b0EKMllEQll5bmxCWWdxR0pZRWwxMjhobTY5RUUwTklxdEdQaDlUdXdJREFRQUJBb0lCQUQ3Q2hEZVF3UWFIdXQ5Zgo3ajNXRHVRRE9KbnBiQmYxUjJYeU5rMmVOdEJpV1N6eVdSNjRUVmhrb3ZYY2xCU3hOUTFVQkcyQk5SKzRZaFRUClJqYitBTHdGa2Z4YUZZRFdOUzRqcjVpRmNwQiszdCs4VXhFaVFpRitwTGdHRUxaVEw0S2RabmkvNEZWL3VmbWkKU0NpV3pMQTVnNkd6RFFWTWRKNldaMG5sZy9VS0QrK3ZadkJNOFdZZlFGMUduRWU0VTFWWGgzVHhTL28zeVBacAp4UEdheTc2NnRNNXBEVTVxcWhEYUo3TGp0RzE2cDlBOEZLb3JJWjFDSzZxSlJPT1RkMjQ2K2M1b24wUy9WZXNWCklwbmt5RksreFRHd1R0eWdtbUFmcmhPRzdGakI5Qy9YR1lNNUVTWkRic2I0R0QzUWprdC93WVhnZUk2d2tBWUUKUUl4d1VBRUNnWUVBelFUcFMzd1YycXZpMUw4dnlMczlWZ2JQYmdNcnVqR2FENDRIYnBuQ1kwcGdaZHlySTcvTwozTHc1MStWTFVVVUhGd2R3SlZPbSs3cUJWd0xvL2JuR09pU1AwNzFHc0dVVUgvTnZIRGpXaTQ2N0U2RVlzL01QCnlINW1oSDlwYlYxYkRhcnhvbUpPU2NhOHZvenpUcy9Lak1RcXRkSW1sUUkzajZGdkYwUWdhN3NDZ1lFQTY3QUMKYldGKy9YQjZSbDBSSXUwRUJSbUNmeEpHY2RJdHpyYXJ5ejdJYjdHZmhoVmJjbEtvazNuY3hPTEFwaXQrR2hGQQpvUU56REF1RVdDNXVKT2d0em10YkVXS1U3SzV4WmNLRHhqT2U0UVMwRlNDOGNYb3prK0hJZEtEQVhlT25tNWorCmFxSDU5NFRnYUx5Umg0aTl5c25iN1M0aHdrM1F0Wkl5SitWcU9BRUNnWUF5OW95VGtnWFF0TGVQRVBOczMzWncKd3dLZkl6U2tkUjRKemRGMUljMmJadXF0aDN3WFI5L0JLUnpyMlBpdS9BeTJJY2d6enlhTUhxRjJJcWdPSWpidgpUeFZkbWdoUFl1RHN6Rk9MWFdtZmlWeGhsY01SUUZObEVGNmxneEtPK0F6aFNlUUU3SkR2Yi9LTkgzWi8yZEZNCnlwcWZWZHozWDNTMlJIZmIvYmhkYndLQmdRQ0lENEkzTnhPaXArNU85S2RSN0ZabncwUk1xM1l6ZTB5cWkxWTkKN1M2MUhHdWxjbXJxWXNHaThiVDdqSlArMmhqZ1g1bFoycTN1QkRBUTRDMEI3VytVUFBIRDVZOW4yNFRuWkJYQwp0RVpDVFA3Uk82Yk9NK2ZXdFgrTnBNZW83Q1gwYzZ4Y1Rzb0psSklncE11MjNNQUVjK2djMG9iMnJ3bVA2S2cvCjAvY3dBUUtCZ1FDNXBOSmp5V0VOVUJySDFiNmNvbHpVZ0tmU3pPZ0wzN2JHKzBUNklSWGlMR2pHeHUzK3RwVkoKeUsvN0l0dW1iTTZEM1JFSTZWcWVLNGxZVUVzbW9sNjNONXc2TFhGY29Mdi9TU0VzQ2lFV0doMXFTMFpYaDN3YwpUNkZCUUlLMUdpU2V6YjZEWkQwaEFoVHdEeEtPYVJ3WDZXY2szL0VsM3laQm5tYUFocjJGQkE9PQotLS0tLUVORCBSU0EgUFJJVkFURSBLRVktLS0tLQo=" -``` - -## Explanation - -`/test/run-wehook.py` comprises 2 functions: `main` and `do_test`. - -The script uses functions (shared with `/test/run-end-to-end.py`) defined in `/test/shared_test_code.py` - -The script depends on [Python Client for Kubernetes](https://pypi.org/project/kubernetes/). This SDK is used by the script(s) to access Kubernetes cluster resources during the test. However, the shared functions use `kubectl logs` through Python's `os.system` function to obtain logs. - -No SDK is used for Helm. Helm commands are effected through Python's `os.system` function. - -### `main` - -`main` determines the location of the Helm Chart and then assembles the correct `helm install` command to install Akri. In addition to Akri's `agent` and `controller`, the script configures Helm to include the Webhook. Like the `agent` and `controller`, the Webhook is configurable using Helm's `--set` flag to override Akri's Chart's `values.yaml` settings. Specifically, the Webhook is enabled (by default it is disabled), the name, defined by the constant `WEBHOOK_NAME` is used and the CA certificate used to sign the Webhook's certificate is given to the cluster so that it may validate the Webhook's certificate. - -> **NOTE** `WEBHOOK_NAME=akri-webhook-configuration` which is the default value defined in `values.yaml`. Although redundant, it is provided here to be more intentional. - -```python -def get_webhook_helm_config() -> str: - webhook = "\ - --set=webhookConfiguration.enabled=true \ - --set=webhookConfiguration.name={name} \ - --set=webhookConfiguration.caBundle={cabundle} \ - ".format( - name=WEBHOOK_NAME, - cabundle=CA_BUNDLE, - ) - print("Webhook configuration:\n{}".format(webhook)) - return webhook -``` - -Once the Helm Chart is installed, the function calls `do_test`. Regardless of whether `do_test` succeeds, the Helm Chart is uninstalled|deleted and the script outputs any exception thrown by `do_test`. - -### `do_test` - -`do_test` shares some tests with `/test/run-end-to-end.py`, namely by checking whether Akri's CRDs (`Configuration`, `Instance`) were successfully created by the Helm Chart installation, and whether the deployment is in the correct state, namely whether there is an Akri Agent and an Akri Controller running. If both tests pass, the function proceeds. - -The Webhook is manifest by a Deployment that produces a ReplicaSet that manages a single Pod. `do_test` effects `kubectl describe` commands for each of these resources and outputs the results to the stdout. - -Then, `do_test` applies a valid Configuration to the cluster. It does this using the Kubernetes SDK. First to apply (create) the Configuration and then to get the resource. It outputs the result to stdout before deleting the Configuration. - -Then, `do_test` applies an invalid Configuration to the cluster. The Configuration is syntactically correct but semantically incorrect; it is valid YAML but an invalid Configuration. Without the Webhook, the cluster will accept this Configuration. With the Webhook, the Configuration should be rejected. The test is similar to the test for a valid Configuration, except this time the function expects an API exception to be thrown by the Kubernetes API. - -The Webhooks' logs are retrieved and persisted to `WEBHOOK_LOG_PATH = "/tmp/webhook_log.txt"`. When run under GitHub Actions, the workflow persists this log file. - -## `subprocess` vs. `os` - -Python (3.x) deprecated `os` and replaced it with `subprocess`. The Webhook script uses `subprocess` rather than `os` because `subprocess` appears to work more cleanly with GitHub Actions and correctly placing stdout and stderr after the commands as they are run. Using `os` with GitHub Actions (as is done by `/test/shared_test_code.py`) causes the stdout (and stderr) to be displayed at the beginning of the workflow output. - -The Webhook Python script wraps `subprocess.run` in a function called `run`: - -```python -def run(command): - print("Executing: {}".format(command)) - result = subprocess.run(command, - shell=True, - capture_output=True, - text=True) - print("returncode: {}".format(result.returncode)) - if result.stdout: - print("stdout:") - print(result.stdout) - if result.stderr: - print("stderr:") - print(result.stderr) -``` - -## Generate Certificate|Key -First, [install the Kubernetes cert-manager](https://cert-manager.io/docs/installation/#default-static-install) on your cluster. - -```bash -NAMESPACE="deleteme" -kubectl create namespace ${NAMESPACE} - -WEBHOOK="akri-webhook-configuration" # Default name if not provided - -# Generate CA (that is valid for 5 years) -openssl req \ --nodes \ --new \ --x509 \ --days 1800 \ --keyout ./secrets/ca.key \ --out ./secrets/ca.crt \ --subj "/CN=CA" - -# Create Secret -kubectl create secret tls ca \ ---namespace=${NAMESPACE} \ ---cert=./secrets/ca.crt \ ---key=./secrets/ca.key - -# Create Issuer using this Secret -echo " -apiVersion: cert-manager.io/v1 -kind: Issuer -metadata: - name: ca - namespace: ${NAMESPACE} -spec: - ca: - secretName: ca -" | kubectl apply --filename=- - -# Create Certificate using this CA. The following will last 5 years. -echo " -apiVersion: cert-manager.io/v1 -kind: Certificate -metadata: - name: ${WEBHOOK} - namespace: ${NAMESPACE} -spec: - secretName: ${WEBHOOK} - duration: 43800h - renewBefore: 43700h - isCA: false - privateKey: - algorithm: RSA - encoding: PKCS1 - size: 2048 - usages: - - server auth - dnsNames: - - ${WEBHOOK}.${NAMESPACE}.svc - - ${WEBHOOK}.${NAMESPACE}.svc.cluster.local - issuerRef: - name: ca - kind: Issuer - group: cert-manager.io -" | kubectl apply --filename=- - -# Check -kubectl get certificate/${WEBHOOK} --namespace=${NAMESPACE} - -# Delete Certificate (to stop Secret being recreated) -kubectl delete certificate/${WEBHOOK} --namespace=${NAMESPACE} - -# Retrieve cert-manager generated certificates and key -CRT=$(\ - kubectl get secret/${WEBHOOK} \ - --namespace=${NAMESPACE} \ - --output=jsonpath="{.data.tls\.crt}") && echo ${CRT} - -KEY=$(\ - kubectl get secret/${WEBHOOK} \ - --namespace=${NAMESPACE} \ - --output=jsonpath="{.data.tls\.key}") && echo ${KEY} - -CABUNDLE=$(\ - kubectl get secret/${WEBHOOK} \ - --namespace=${NAMESPACE} \ - --output=jsonpath="{.data.ca\.crt}") && echo ${CABUNDLE} -``` - -## Validate - -> **NOTE** Certificate is bound to `akri-webhook-configuration.$NAMESPACE.svc` - -```bash -echo ${CRT} \ -| base64 --decode \ -| openssl x509 -in - -noout -text -``` - -Yields: - -```console -Certificate: - Data: - Version: 3 (0x2) - Serial Number: - 0c:70:fc:16:98:71:01:8c:ad:d0:05:d7:b7:98:11:e6 - Signature Algorithm: sha256WithRSAEncryption - Issuer: CN = CA - Validity - Not Before: Mar 10 01:09:53 2021 GMT - Not After : Mar 10 01:09:53 2022 GMT - Subject: - Subject Public Key Info: - Public Key Algorithm: rsaEncryption - RSA Public-Key: (2048 bit) - Modulus: - 00:d0:1f:8b:eb:85:65:43:a0:78:90:e2:ba:47:7d: - bd:76:92:76:dc:82:fd:5c:46:58:ec:2f:1c:bc:db: - 39:93:09:2f:8c:4c:13:03:b9:18:02:8a:16:62:ed: - 6c:ee:e2:f9:c0:90:12:dc:8a:98:92:4a:83:94:e3: - 91:99:19:0b:69:6c:bc:66:55:5a:3c:c2:d9:28:8d: - dd:1a:97:3e:07:7a:25:74:bb:ee:d3:69:02:60:9f: - 15:59:a0:f5:78:fa:b5:84:78:ab:33:71:25:47:2b: - 8b:d6:16:28:1e:8a:04:18:27:6b:ea:0a:ce:de:4e: - 33:cd:6e:da:a2:41:4f:c1:3e:9b:1e:06:57:f3:91: - 85:32:fd:55:65:39:11:4b:c7:b4:86:5a:f9:c3:41: - dd:5b:d3:05:5e:a8:56:67:ea:76:7f:1a:9d:36:ae: - d8:b0:cb:a6:9f:42:06:8a:3e:29:c5:48:12:d1:e6: - 0e:a6:b2:a7:90:60:cd:c0:fd:ef:a3:7d:62:59:00: - 9b:0f:09:18:8f:02:42:90:44:bf:d4:d3:01:79:04: - 77:4f:31:41:2c:b7:e3:85:7d:aa:0c:f0:3e:af:e0: - a5:71:8e:20:8b:3f:cd:33:81:0a:00:c5:f3:c7:1f: - 57:68:95:ce:48:b8:0d:50:f8:58:96:68:9b:b9:78: - 76:3f - Exponent: 65537 (0x10001) - X509v3 extensions: - X509v3 Extended Key Usage: - TLS Web Server Authentication - X509v3 Basic Constraints: critical - CA:FALSE - X509v3 Authority Key Identifier: - keyid:29:47:97:DB:77:F4:4B:0E:F6:40:84:9E:1F:F8:3B:39:7F:6E:33:E3 - - X509v3 Subject Alternative Name: critical - DNS:akri-webhook-configuration.default.svc, DNS:akri-webhook-configuration.default.svc.cluster.local - Signature Algorithm: sha256WithRSAEncryption - 56:5f:d0:7b:e7:71:2d:ec:08:8b:b7:c0:10:8f:e7:00:c4:6c: - 0b:03:73:97:64:9b:57:2a:9b:de:59:a2:95:7f:64:26:c6:8c: - 84:75:d8:af:7d:e8:ac:7b:fa:9d:bc:f5:22:59:ac:67:f2:b1: - 3d:dc:5f:82:06:b7:10:83:29:b5:97:54:b1:1c:b3:0b:e7:b6: - c6:34:a2:48:58:df:7a:e4:1a:87:6a:10:60:21:9c:85:19:29: - f9:6e:d4:5c:31:3a:63:e5:57:84:b1:2b:9d:37:81:1c:a6:6d: - 7a:02:c6:6a:f1:eb:b3:7c:1f:fc:fc:4f:31:16:98:1f:d2:d7: - 5c:08:9f:ad:36:ae:d1:19:8b:04:f3:0b:8f:87:4d:45:23:10: - 97:1c:c6:ed:f6:17:18:a4:77:df:70:58:78:11:29:bb:2a:c0: - 04:2a:21:e1:fb:a2:af:8b:97:62:f1:cb:f2:23:84:04:b7:b3: - e9:ec:24:72:ff:11:38:17:48:a7:71:25:22:c2:4c:c7:3f:37: - 81:7c:6c:f6:37:9b:ff:37:85:64:74:5b:bb:00:bc:0a:85:84: - 35:e1:c4:42:11:9c:f8:a4:df:b2:1f:bb:06:af:f3:a0:2d:87: - 83:f3:51:cb:5f:4f:74:e1:09:21:37:9f:c1:4f:5f:5c:e9:91: - 84:ee:33:a6 -``` diff --git a/test/run-webhook.py b/test/run-webhook.py deleted file mode 100755 index fb5abe9f9..000000000 --- a/test/run-webhook.py +++ /dev/null @@ -1,333 +0,0 @@ -#!/usr/bin/env python3 - -import shared_test_code -import os, subprocess - -from kubernetes import client, config -from kubernetes.client.rest import ApiException - -HELM_CHART_NAME = "akri" -NAMESPACE = "default" -WEBHOOK_NAME = "akri-webhook-configuration" -WEBHOOK_LOG_PATH = "/tmp/webhook_log.txt" - -# Required by Webhook - renew by following the "Generate Certificate|Key" section of run-webhook.md with $NAMESPACE=default -# DNS: `akri-webhook-configuration.default.svc` -# Expires: Mar 24 20:21:16 2027 GMT -CRT = "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURTekNDQWpPZ0F3SUJBZ0lRRy9rU3NNQm94YW82cDgzZjhsM3VJekFOQmdrcWhraUc5dzBCQVFzRkFEQU4KTVFzd0NRWURWUVFEREFKRFFUQWVGdzB5TWpBek1qVXlNREl4TVRaYUZ3MHlOekF6TWpReU1ESXhNVFphTUFBdwpnZ0VpTUEwR0NTcUdTSWIzRFFFQkFRVUFBNElCRHdBd2dnRUtBb0lCQVFEUmp6UlhKM0QwY2JhQ1dpZEswbmdmClVheEgwZDliTnhQSlVCL01ZRHdtRUJyZFJ2R3lTT3pGKytsUVVjb0VVdmp4OTRpWmxNamlVZ3JUblk3UXRFSTgKUXQxY1VZcFZVOE1VTEh6RFU5VEFtUzZRVTZkQTEzT0Ivakx3ZG1TWkV4aWRnTGtGT1FYOHphUVM0RlJRUDg0agoxRzd6dnZHUHBWSlFjQm12SEJtT0twRy9Fc0JmbUZ2Y0dMdWNOTk1CMzNlNStuZ1hVZU9KMVBaeVJiMTBDbmpLCmF2dWdBRGcrMTY3Q3JrdVFpSnIyYm9FbGRlME82T1VCTFF6aXJxb2FqOXArbkY1SFZweitmRSt3ZkxnbWtPaC8KbE9ia3BVaWUrd2NTMndFQmJwSWFaZ2NZRXUzN1lOYnQyZTBhQm5tcmtKU1c1NGFwd1VYRFI4Ym9TMkF1c1pFWgpBZ01CQUFHamdiTXdnYkF3RXdZRFZSMGxCQXd3Q2dZSUt3WUJCUVVIQXdFd0RBWURWUjBUQVFIL0JBSXdBREFmCkJnTlZIU01FR0RBV2dCVFliMDV0STVVQmRhcnBmNWc5b2oxaE5XOE5vakJxQmdOVkhSRUJBZjhFWURCZWdpWmgKYTNKcExYZGxZbWh2YjJzdFkyOXVabWxuZFhKaGRHbHZiaTVrWldaaGRXeDBMbk4yWTRJMFlXdHlhUzEzWldKbwpiMjlyTFdOdmJtWnBaM1Z5WVhScGIyNHVaR1ZtWVhWc2RDNXpkbU11WTJ4MWMzUmxjaTVzYjJOaGJEQU5CZ2txCmhraUc5dzBCQVFzRkFBT0NBUUVBVzJCRHBPdlAxc3JhbVNoWlNHQnF5SlVxdWF1bWIzdzBtRFl2K0ZTT3J5OUYKYTd1Mm1USGdGMUtISDEyWnlKeWZxTEpPTkRSUWMyc2M1UHBQL1YxMStuOXhveS90cG9QM3RzMjBuNGg4MzdTMwpnNUtJQ25IaGFmM2RTdHFUSkd5aFhFYmNvakk4U0lTQmZMSjhXd0hxU1N2T2FVTWxqVWVlb21nZmZaU1RQMk93CmJ2a3VrUW1WRUVGLytlb0xkd0MvNkFiMkgxNjJTTFBaS2ZiNXJsOEpManRHVk9odmprSFJkUk1iRzJmTWtoY0sKWHFFbEwvM1V2aWMrY1hUZExqQUxIYXo0ck9vZzBBTktwYzB3djNIT1czMlp5S25hK0VCTnk4K2NNbVFnTGhxOQo0OExIK09xRDBpODRGaDVSbE82TDhqaWlHT3BKL0hxOTNnQklHOGwrU1E9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==" -KEY = "LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFcEFJQkFBS0NBUUVBMFk4MFZ5ZHc5SEcyZ2xvblN0SjRIMUdzUjlIZld6Y1R5VkFmekdBOEpoQWEzVWJ4CnNranN4ZnZwVUZIS0JGTDQ4ZmVJbVpUSTRsSUswNTJPMExSQ1BFTGRYRkdLVlZQREZDeDh3MVBVd0prdWtGT24KUU5kemdmNHk4SFprbVJNWW5ZQzVCVGtGL00ya0V1QlVVRC9PSTlSdTg3N3hqNlZTVUhBWnJ4d1pqaXFSdnhMQQpYNWhiM0JpN25EVFRBZDkzdWZwNEYxSGppZFQyY2tXOWRBcDR5bXI3b0FBNFB0ZXV3cTVMa0lpYTltNkJKWFh0CkR1amxBUzBNNHE2cUdvL2FmcHhlUjFhYy9ueFBzSHk0SnBEb2Y1VG01S1ZJbnZzSEV0c0JBVzZTR21ZSEdCTHQKKzJEVzdkbnRHZ1o1cTVDVWx1ZUdxY0ZGdzBmRzZFdGdMckdSR1FJREFRQUJBb0lCQUhrUFpZbER1N2s3UjlnZQpCTHp3d1h3MlRuUmZCYzFJRUNJb0szYUIwYjJiYUNtVXBtUDhST3hMRHduYmRmenhnZWNtdkw4Y2VNQmw3T003CkRobjdTSmhQZUZtd3NWMkJ1aHlaWnFuZ2IvT2ppb2JPREwwa3VoSEtxOXJHU204ejNQQ0FRR0tJQXJGOGl1QnMKdjhoc1U4WFhIeEdvcVJ2MndZcStkOWYxUDc0a1JLNjZjU0hMMjRsUDZ3WTVVVXBLcXBiNVd6ZWJGWDJGbXFJNQpCcm52SU1HUk5RSGs0UTNvUXRNaFI3RGZSUE5uUjREaWdveEZaejRNV2VGMStqRHh2czZIU25zSVhlM1hxWVF5CnVmUmFHV3Fic25DNmlOZWFxMUpPNlQ0ejJhR2ZVTTB4YjIwOWowWEZFYWRqd0pJWGRVdUgxOVlBMVozaThtUTUKdzdEK0xsRUNnWUVBMHZiRG1kK1JvMCtXclhPV2VLa3ZScmZqQ0xYL29yZ25MR0gxUjFoSFZNa0c3N0YwS1AyYQpQb0VqYm1PSVR4Q1BDanpaN2d0K0xCZ1JRY0VKcUxDVXhXMjZkMFR1T3VXMllJM3NLR0NpeU5PRzhkMWhGaEYrCmhrV3hEbGVGRmdWSEQ5clpWaFQrU0NJVmN0MUFsazJWSVZJeEp4elNFU0xVcDVSSnlNS3FBTWNDZ1lFQS9rdXUKdFZ4ZklPcFdldlJPclBwTFlabSttUTJZWVBKaWhEUFVUeVFHcjVjakNRUk9wSmJ4Ym5LdnArZlpWaUYwdDZHTApFWXg5MGg5Q2FzY1hsRHVEc3MxaXpUZkhnRVFzUWQ5dXJ5ZG9RWTZsbWtoamxxT2tJbCtwbGZkUUx1ODR3SkxlCkpLbGNPYTc4dGs1RFRqQzdmUHd4RUJMcnVCVjZjMEs0M05WQnZ4OENnWUFhcUJXVkp0dkhML0pSSG03ZjlqakUKRGM1Qk5vWURzSk02bDNJZnZyYmNycjRTb1hDVkVWNWhFWDVCbjVBRXRZbnRlRHp0U0VSOEc5cHFYWkx6M3NRZApvanpTZjBJKzdQRzdoNU5Va3NsZTZPTi9Ra0xYUUFTbHdMNmJtbEYxczlzRDFOcHJkeUdlU2JnK0dGamw1UTIzCjlTUEMxbkJ3dTk3MUFkYkU2RndFMXdLQmdRREJvUU1nMlhzZDV4RitnZlErUmorTHk3T1Rld1NpSFMzaW1FeDcKRG1XQTRrWXRJWGg0WHU3ck9LeUQzMGhnQ3cyQ25hRDA5ZE1BWWdrQ29TSlZIcFFEVzl4MWdwbUlFMkRYcjdmcAo2c201MFZKTGpmODJ2dGZGeksybW9UQU83Tng3MWRrTWRXRGlFMW9kdnE0RkpabzlheEk0dVE5L2xlc3RSSXJhCnJBOXA1UUtCZ1FDWXpiUWlVUmNlbEJMQjR6cWE0Y05UY1lOd2puU0hYWDNLZXp6WEN5Z0lhRXN4Z044bThRMmkKbVhyOGVWeWRTdEh3QmNjRWZqTW1PVytiSkx0UEJlTDgyNk42ZVlvNzlneUtlSm1qQWhkay8xYmxPMDhiRU1UYgoyN212VUxvSCtCWDFrckhFRXlGQTk3bk15NlczcFlXdE5kSnZoT25mWmRZRVFJUk9wSHJRc2c9PQotLS0tLUVORCBSU0EgUFJJVkFURSBLRVktLS0tLQo=" -# Expires: Feb 27 18:09:49 2027 GMT -CA_BUNDLE = "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUMrekNDQWVPZ0F3SUJBZ0lVVUcrVXVmN1N2bXRzWmVDV2FVMmF0Rlc4bXV3d0RRWUpLb1pJaHZjTkFRRUwKQlFBd0RURUxNQWtHQTFVRUF3d0NRMEV3SGhjTk1qSXdNekkxTVRnd09UUTVXaGNOTWpjd01qSTNNVGd3T1RRNQpXakFOTVFzd0NRWURWUVFEREFKRFFUQ0NBU0l3RFFZSktvWklodmNOQVFFQkJRQURnZ0VQQURDQ0FRb0NnZ0VCCkFMUDdyWjJYcFRORkJyUHRKY2pvejlxOHFscE9OL0ZQUWJlTnE2Y3QzTGZrZkx4VnlpWFFBRllndWlXME10c1MKeUxrMHN1ZFFOT3VrNEE2aDErTitzSFhVUXNCaElDTXJDZmdkeW02b3BTcjVwK1JqSzRock9Hak5yZXQxOTBlWgp3ZXI3S0wxTVdlT0FHQUtVa3JKQVc5V1B3T3dubUZ6Z282Y2pZQmNCU0t5R3lHNVdCUHFOTEd5WFpRcG0zYTFsCnd1OVVqT1U5Vm1CVTJMUHRDc3BkNldJbGQzUjI4Y0NpbTM2SXEzUmJPTFFTWWEyVGp6NHFRU2luWXpTak5sb2wKWkJ2R0l6MVluZ3VHUDFxbHM0MFUvdkthcmxoV2ozMUNOenNCRk9WWXN2eEg1bEZDOStWU3pXVWVnZXlPS0l2RQozYXlkZmQxL0daK1N2M0FlYkt0cDBuY0NBd0VBQWFOVE1GRXdIUVlEVlIwT0JCWUVGTmh2VG0wamxRRjFxdWwvCm1EMmlQV0UxYncyaU1COEdBMVVkSXdRWU1CYUFGTmh2VG0wamxRRjFxdWwvbUQyaVBXRTFidzJpTUE4R0ExVWQKRXdFQi93UUZNQU1CQWY4d0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dFQkFJWStUb3E0VVJEZFk4UVhaVDZ4VW5YZAo1UThEcEhIbGh1UjVSN0JRQzhOZlFVWGgrQ3pBRnRpMi8vZDhjdWFiU3B5QlZmVG1yTEs0L2VDcmtZWmhhek50Ck43TlN6K1E3bzBjVm1Hbys2R2Rlb3NnOCtDWms1b3llbm1TSHh6NHllWlcyNXFYVXd4dCtZcjJQZlRSV0x2MmsKNkFDV3ZNTUJzUlNzSlNKUHpVRVBnb2xGYkdJeEMweENKWk1kYTliRlo0MHErVEZ5ZmI1V1dQdzNaeHRNdHN0NgpiQTFUTXRBQ3Q5MmthVk42SVh1TWc4NkhYQzNXQTNEWFZiTWxwZ0FORk1pQ05CSDFFZzZVSWlDTjhRc3JYd0hpClR1aEU1VGNEVStHVHlSNVk0ZHdKajN2UHJpS1NzN1F6Y0wvVEd6TFdZeU1UR1JiQlBya00yUzlmWWYvcit0RT0KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=" -SECRET = { - "apiVersion": "v1", - "kind": "Secret", - "metadata": { - "name": WEBHOOK_NAME, - "namespace": NAMESPACE, - }, - "type": "kubernetes.io/tls", - "data": { - "ca.crt": CA_BUNDLE, - "tls.crt": CRT, - "tls.key": KEY, - } -} - -GROUP = "akri.sh" -VERSION = "v0" -KIND = "Configuration" -NAME = "broker" - -RESOURCES = {"limits": {"{{PLACEHOLDER}}": "1"}} - -SERVICE = { - "type": "ClusterIP", - "ports": [{ - "name": "name", - "port": 0, - "targetPort": 0, - "protocol": "TCP" - }] -} - -TEMPLATE = { - "apiVersion": "{}/{}".format(GROUP, VERSION), - "kind": KIND, - "metadata": { - "annotations": { - "kubectl.kubernetes.io/last-applied-configuration": "" - }, - "creationTimestamp": "2021-01-01T00:00:00Z", - "generation": 1, - "managedFields": [], - "name": NAME, - "uid": "00000000-0000-0000-0000-000000000000" - }, - "spec": { - "discoveryHandler": { - "name": "debugEcho", - "discoveryDetails": "{\"descriptions\": [\"foo\",\"bar\"]}" - }, - "brokerSpec": { - "brokerPodSpec": { - "containers": [{ - "name": "test-broker", - "image": "nginx:stable-alpine", - "imagePullPolicy": "Always", - }], - } - }, - "instanceServiceSpec": SERVICE, - "configurationServiceSpec": SERVICE, - "capacity": 1 - } -} - - -def main(): - print("End-to-end test using validating webhook") - - # If this is a PUSH, the test needs to wait for the new containers to be - # built/pushed. In this case, the workflow will set /tmp/sleep_duration.txt to - # the number of seconds to sleep. - # If this is a MANUALLY triggerd or a PULL-REQUEST, no new containers will - # be built/pushed, the workflows will not set /tmp/sleep_duration.txt and - # this test will execute immediately. - shared_test_code.initial_sleep() - - # Webhook expects TLS-containing Secret (of the same name) mounted as a volume - kubeconfig_path = shared_test_code.get_kubeconfig_path() - print("Loading k8s config: {}".format(kubeconfig_path)) - config.load_kube_config(config_file=kubeconfig_path) - - print("Creating Secret: {namespace}/{name}".format(namespace=NAMESPACE, - name=WEBHOOK_NAME)) - client.CoreV1Api().create_namespaced_secret(body=SECRET, - namespace=NAMESPACE) - - # Update Helm and install this version's chart - os.system("helm repo update") - - # Get version of akri to test - test_version = shared_test_code.get_test_version() - print("Testing version: {}".format(test_version)) - - shared_test_code.major_version = "v" + test_version.split(".")[0] - print("Testing major version: {}".format(shared_test_code.major_version)) - - helm_chart_location = shared_test_code.get_helm_chart_location() - print("Get Akri Helm chart: {}".format(helm_chart_location)) - - k8s_distro_arg = shared_test_code.get_k8s_distro_arg() - print("Providing Akri Helm chart with K8s distro arg: {}".format(k8s_distro_arg)) - - extra_helm_args = shared_test_code.get_extra_helm_args() - print("Providing Akri Helm chart with extra helm args: {}".format( - extra_helm_args)) - - helm_install_command = "\ - helm install {chart_name} {location} \ - --namespace={namespace} \ - --set=agent.full=true \ - --set=agent.allowDebugEcho=true \ - {webhook_config} \ - {k8s_distro_arg} \ - {helm_args} \ - --debug\ - ".format(chart_name=HELM_CHART_NAME, - location=helm_chart_location, - namespace=NAMESPACE, - webhook_config=get_webhook_helm_config(), - k8s_distro_arg=k8s_distro_arg, - helm_args=extra_helm_args) - print("Helm command: {}".format(helm_install_command)) - os.system(helm_install_command) - - res = False - try: - res = do_test() - except Exception as e: - print(e) - res = False - finally: - # Best effort cleanup work - try: - # Save Agent and controller logs - shared_test_code.save_agent_and_controller_logs( - namespace=NAMESPACE) - finally: - # Delete akri and check that controller and Agent pods deleted - os.system("\ - helm delete {chart_name} \ - --namespace={namespace}\ - ".format( - chart_name=HELM_CHART_NAME, - namespace=NAMESPACE, - )) - # Delete Webhook Secret - client.CoreV1Api().delete_namespaced_secret(name=WEBHOOK_NAME, - namespace=NAMESPACE) - if res: - # Only test cleanup if the test has succeeded up to now - if not shared_test_code.check_akri_state( - 0, 0, 0, 0, 0, 0, namespace=NAMESPACE): - print( - "Akri not running in expected state after helm delete") - raise RuntimeError("Scenario Failed") - - if not res: - raise RuntimeError("Scenario Failed") - - -def do_test() -> bool: - kubeconfig_path = shared_test_code.get_kubeconfig_path() - print("Loading k8s config: {}".format(kubeconfig_path)) - config.load_kube_config(config_file=kubeconfig_path) - - # Get kubectl command - kubectl_cmd = shared_test_code.get_kubectl_command() - - # Ensure Helm Akri installation applied CRDs and set up Agent and Controller - print("Checking for CRDs") - if not shared_test_code.crds_applied(): - print("CRDs not applied by helm chart") - return False - - print("Checking for initial Akri state") - - if not shared_test_code.check_akri_state(1, 1, 0, 0, 0, 0): - print("Akri not running in expected state") - run("{kubectl} get pods,services,akric,akrii --show-labels". - format(kubectl=kubectl_cmd)) - return False - - # Enumerate Webhook resources - print("Debugging:") - - print("Deployment:") - run("{kubectl} describe deployment/{service}\ - --namespace={namespace}".format(kubectl=kubectl_cmd, - service=WEBHOOK_NAME, - namespace=NAMESPACE)) - - print("ReplicaSet:") - run("{kubectl} describe replicaset \ - --selector=app={service} \ - --namespace={namespace}".format(kubectl=kubectl_cmd, - service=WEBHOOK_NAME, - namespace=NAMESPACE)) - - print("Pod:") - run("{kubectl} describe pod \ - --selector=app={service} \ - --namespace={namespace}".format(kubectl=kubectl_cmd, - service=WEBHOOK_NAME, - namespace=NAMESPACE)) - - # Apply Valid Akri Configuration - print("Applying Valid Akri Configuration") - - # Use the template and place resources in the correct location - body = TEMPLATE - body["spec"]["brokerSpec"]["brokerPodSpec"]["containers"][0]["resources"] = RESOURCES - - api = client.CustomObjectsApi() - api.create_namespaced_custom_object(group=GROUP, - version=VERSION, - namespace=NAMESPACE, - plural="configurations", - body=body) - - # Check - print("Retrieving Akri Configuration") - akri_config = api.get_namespaced_custom_object(group=GROUP, - version=VERSION, - name=NAME, - namespace=NAMESPACE, - plural="configurations") - print(akri_config) - - # Delete - api.delete_namespaced_custom_object( - group=GROUP, - version=VERSION, - name=NAME, - namespace=NAMESPACE, - plural="configurations", - body=client.V1DeleteOptions(), - ) - - # Apply Invalid Akri Configuration - res = False - try: - print("Applying Invalid (!) Akri Configuration") - - # Use the template but(!) place resources in an incorrect location - body = TEMPLATE - body["spec"]["brokerSpec"]["brokerPodSpec"]["resources"] = RESOURCES - - api.create_namespaced_custom_object(group=GROUP, - version=VERSION, - namespace=NAMESPACE, - plural="configurations", - body=body) - except ApiException as e: - print( - "As expected, Invalid Akri Configuration generates API Exception") - print("Status Code: {} [{}]", e.status, e.reason) - print("Response: {}".format(e.body)) - res = True - else: - print("Expected APIException but none was thrown. This is an error!") - - # Debugging: check the Webhook's logs - print("Webhook logs") - run("{kubectl} logs deployment/{service} --namespace={namespace}". - format(kubectl=kubectl_cmd, - service=WEBHOOK_NAME, - namespace=NAMESPACE)) - - res = False - - # Save Webhook logs - run("{kubectl} logs deployment/{service} --namespace={namespace} >> {file}" - .format(kubectl=kubectl_cmd, - service=WEBHOOK_NAME, - namespace=NAMESPACE, - file=WEBHOOK_LOG_PATH)) - - print("Akri Validating Webhook test: {}".format( - "Success" if res else "Failure")) - return res - - -def get_webhook_helm_config() -> str: - webhook = "\ - --set=webhookConfiguration.enabled=true \ - --set=webhookConfiguration.name={name} \ - --set=webhookConfiguration.caBundle={cabundle} \ - ".format( - name=WEBHOOK_NAME, - cabundle=CA_BUNDLE, - ) - print("Webhook configuration:\n{}".format(webhook)) - return webhook - - -def run(command): - print("Executing: {}".format(command)) - result = subprocess.run(command, - shell=True, - capture_output=True, - text=True) - print("returncode: {}".format(result.returncode)) - if result.stdout: - print("stdout:") - print(result.stdout) - if result.stderr: - print("stderr:") - print(result.stderr) - - -if __name__ == "__main__": - main() diff --git a/test/shared_test_code.py b/test/shared_test_code.py deleted file mode 100644 index 7584950bf..000000000 --- a/test/shared_test_code.py +++ /dev/null @@ -1,327 +0,0 @@ -#mod shared_test_code - -import json, os, time, yaml -from kubernetes import client, config -from kubernetes.client.rest import ApiException - -CONFIGURATION_LABEL_NAME = "akri.sh/configuration" -INSTANCE_LABEL_NAME = "akri.sh/instance" -AGENT_POD_NAME = "akri-agent" -CONTROLLER_POD_NAME = "akri-controller" -GROUP = "akri.sh" -AGENT_LOG_PATH = "/tmp/agent_log.txt" -CONTROLLER_LOG_PATH = "/tmp/controller_log.txt" -DEBUG_ECHO_NAME = "akri-debug-echo-foo" -DEBUG_ECHO_DESCRIPTIONS_PREFIX = "bar" -KUBE_CONFIG_PATH_FILE = "/tmp/kubeconfig_path_to_test.txt" -RUNTIME_COMMAND_FILE = "/tmp/runtime_cmd_to_test.txt" -HELM_K8S_DISTRO = "/tmp/k8s_distro_to_test.txt" -VERSION_FILE = "/tmp/version_to_test.txt" -SLEEP_DURATION_FILE = "/tmp/sleep_duration.txt" -EXTRA_HELM_ARGS_FILE = "/tmp/extra_helm_args.txt" -HELM_CHART_LOCATION = "/tmp/helm_chart_location.txt" -SLEEP_INTERVAL = 20 - -CONTROLLER_POD_LABEL_SELECTOR = "app.kubernetes.io/name=" + CONTROLLER_POD_NAME -AGENT_POD_LABEL_SELECTOR = "app.kubernetes.io/name=" + AGENT_POD_NAME -BROKER_POD_LABEL_SELECTOR = CONFIGURATION_LABEL_NAME - -CONFIGURATION_SVC_LABEL_SELECTOR = CONFIGURATION_LABEL_NAME -INSTANCE_SVC_LABEL_SELECTOR = INSTANCE_LABEL_NAME - -# Debug echo Configuration broker properties -PROPERTIES_RESOLUTION_WIDTH_KEY = "RESOLUTION_WIDTH" -PROPERTIES_RESOLUTION_HEIGHT_KEY = "RESOLUTION_HEIGHT" -PROPERTIES_RESOLUTION_WIDTH_VALUE = "800" -PROPERTIES_RESOLUTION_HEIGHT_VALUE = "600" - -major_version = "" -agent_pod_name = "" -controller_pod_name = "" - - -def get_helm_chart_location(): - # Get helm chart location passed in helm install command (i.e. `repo/chart --version X.Y.Z` or `./deployment/helm`) - return open(HELM_CHART_LOCATION, "r").readline().rstrip() - - -def get_extra_helm_args(): - # Get any extra helm args passed from workflow - if os.path.exists(EXTRA_HELM_ARGS_FILE): - return open(EXTRA_HELM_ARGS_FILE, "r").readline().rstrip() - return "" - - -def initial_sleep(): - # Sleep for amount of time specified in SLEEP_DURATION_FILE else don't sleep at all - if os.path.exists(SLEEP_DURATION_FILE): - initial_sleep_duration = open(SLEEP_DURATION_FILE, - "r").readline().rstrip() - print("Sleeping for {} seconds".format(initial_sleep_duration)) - time.sleep(int(initial_sleep_duration)) - print("Done sleeping") - - -def helm_update(): - # Update Helm and install this version's chart - os.system("helm repo update") - - -def get_kubeconfig_path(): - # Get kubeconfig path - return open(KUBE_CONFIG_PATH_FILE, "r").readline().rstrip() - - -def get_kubectl_command(): - # Get kubectl command - return open(RUNTIME_COMMAND_FILE, "r").readline().rstrip() - - -def get_k8s_distro_arg(): - # Get K8s distro arg for Akri Helm - return open(HELM_K8S_DISTRO, "r").readline().rstrip() - - -def get_test_version(): - # Get version of akri to test - if os.path.exists(VERSION_FILE): - return open(VERSION_FILE, "r").readline().rstrip() - return open("version.txt", "r").readline().rstrip() - - -def save_agent_and_controller_logs(namespace="default"): - kubectl_cmd = get_kubectl_command() - os.system("{} logs {} --namespace={} >> {}".format(kubectl_cmd, - agent_pod_name, - namespace, - AGENT_LOG_PATH)) - os.system("{} logs {} --namespace={} >> {}".format(kubectl_cmd, - controller_pod_name, - namespace, - CONTROLLER_LOG_PATH)) - - -def crds_applied(): - print("Checking for CRDs") - v1_ext = client.ApiextensionsV1Api() - for x in range(5): - if x != 0: - time.sleep(SLEEP_INTERVAL) - current_crds = [ - x["spec"]["names"]["kind"].lower() for x in - v1_ext.list_custom_resource_definition().to_dict()['items'] - ] - if "configuration" in current_crds and "instance" in current_crds: - return True - return False - - -def check_pods_running(v1, pod_label_selector, count): - print("Checking number of pods [{}] ... expected {}".format( - pod_label_selector, count)) - for x in range(30): - if x != 0: - time.sleep(SLEEP_INTERVAL) - print( - "Sleep iteration {} ... been waiting for {} seconds for pod check" - .format(x + 1, (x + 1) * SLEEP_INTERVAL)) - pods = v1.list_pod_for_all_namespaces( - label_selector=pod_label_selector).items - print("Found {} pods".format(len(pods))) - if count == 0: - # Expectation is that no pods are running - if len(pods) == 0: - return True - else: - all_terminating = True - for pod in pods: - # Ensure that none of the pods are still running - if pod.status.phase != "Terminating": - all_terminating = False - break - if all_terminating: return True - else: - # Expectation is that `count` pods are running - all_running = True - if len(pods) == count: - for pod in pods: - if pod.status.phase != "Running": - all_running = False - break - if all_running: - if pod_label_selector == BROKER_POD_LABEL_SELECTOR: - return check_broker_pods_env_var(pods) - else: - return True - print("Wrong number of pods [{}] found ... expected {}".format( - pod_label_selector, count)) - return False - -def check_broker_pods_env_var(pods): - kubectl_cmd = get_kubectl_command() - for pod in pods: - # expect the environment variable from akri instance suffixed with a 6 digest hash id - if os.system('{} exec -i {} -- /bin/sh -c "printenv | grep \'^DEBUG_ECHO_DESCRIPTION_[A-F0-9]\\{{6,6\\}}={}\' | wc -l | grep -v 0"'.format(kubectl_cmd, pod.metadata.name, DEBUG_ECHO_DESCRIPTIONS_PREFIX)): - print("Could not find a DEBUG_ECHO_DESCRIPTION_ environment variable in broker Pod {}".format(pod.metadata.name)) - return False - if os.system('{} exec -i {} -- /bin/sh -c "printenv | grep ^{}={}$ | wc -l | grep -v 0"'.format(kubectl_cmd, pod.metadata.name, PROPERTIES_RESOLUTION_WIDTH_KEY, PROPERTIES_RESOLUTION_WIDTH_VALUE)): - print("Could not find a {} environment variable in broker Pod {}".format(pod.metadata.name)) - return False - if os.system('{} exec -i {} -- /bin/sh -c "printenv | grep ^{}={}$ | wc -l | grep -v 0"'.format(kubectl_cmd, pod.metadata.name, PROPERTIES_RESOLUTION_HEIGHT_KEY, PROPERTIES_RESOLUTION_HEIGHT_VALUE)): - print("Could not find a {} environment variable in broker Pod {}".format(pod.metadata.name)) - return False - return True - -def check_svcs_running(v1, svc_label_selector, count): - print("Checking number of svcs [{}] ... expected {}".format( - svc_label_selector, count)) - for x in range(30): - if x != 0: - time.sleep(SLEEP_INTERVAL) - print( - "Sleep iteration {} ... been waiting for {} seconds for svc check" - .format(x + 1, (x + 1) * SLEEP_INTERVAL)) - svcs = v1.list_service_for_all_namespaces( - label_selector=svc_label_selector).items - print("Found {} pods".format(len(svcs))) - if count == 0: - # Expectation is that no svcs are running - if len(svcs) == 0: - return True - else: - # Expectation is that `count` svcs are running - if len(svcs) == count: - return True - print("Wrong number of services [{}] found ... expected {}".format( - svc_label_selector, count)) - return False - - -def get_pod_name(pod_label_selector, index): - v1 = client.CoreV1Api() - print("Getting pod name [{}]".format(pod_label_selector)) - pods = v1.list_pod_for_all_namespaces( - label_selector=pod_label_selector).items - if len(pods) >= index: - if pods[index].status.phase == "Running": - return pods[index].metadata.name - return "" - - -def get_running_pod_names_and_uids(pod_label_selector): - v1 = client.CoreV1Api() - map = {} - print("Getting pod name [{}]".format(pod_label_selector)) - pods = v1.list_pod_for_all_namespaces( - label_selector=pod_label_selector).items - for pod in pods: - if pod.status.phase == "Running": - map[pod.metadata.name] = pod.metadata.uid - return map - - -def check_instance_count(count, namespace="default"): - print("Checking for instances ... version:{} count:{}".format( - major_version, count)) - if count == 0: - return True - - api_instance = client.CustomObjectsApi() - for x in range(20): - if x != 0: - time.sleep(SLEEP_INTERVAL) - print( - "Sleep iteration {} ... been waiting for {} seconds for instances". - format(x + 1, (x + 1) * SLEEP_INTERVAL)) - instances = api_instance.list_namespaced_custom_object( - group=GROUP, - version=major_version, - namespace=namespace, - plural="instances")['items'] - if len(instances) == count: - return True - return False - - -def check_agent_pods_state(v1, agents): - global agent_pod_name - print("Checking for agent pods ... expected {}".format(agents)) - agents_check_failed = check_pods_running(v1, AGENT_POD_LABEL_SELECTOR, - agents) - if not agents_check_failed: - print("Wrong number of agents found ... expected {}".format(agents)) - else: - if agents == 1: - agent_pod_name = get_pod_name(AGENT_POD_LABEL_SELECTOR, 0) - if agent_pod_name == "": - print("Agent pod name not found") - return False - - return agents_check_failed - - -def check_controller_pods_state(v1, controllers): - global controller_pod_name - print("Checking for controller pods ... expected {}".format(controllers)) - controllers_check_failed = check_pods_running( - v1, CONTROLLER_POD_LABEL_SELECTOR, controllers) - if not controllers_check_failed: - print("Wrong number of controllers found ... expected {}".format( - controllers)) - else: - if controllers == 1: - controller_pod_name = get_pod_name(CONTROLLER_POD_LABEL_SELECTOR, - 0) - if controller_pod_name == "": - print("Controller pod name not found") - return False - - return controllers_check_failed - - -def check_broker_pods_state(v1, brokers): - print("Checking for broker pods ... expected {}".format(brokers)) - brokers_check_failed = check_pods_running(v1, BROKER_POD_LABEL_SELECTOR, - brokers) - if not brokers_check_failed: - print("Wrong number of brokers found ... expected {}".format(brokers)) - return brokers_check_failed - - -def check_config_svcs_state(v1, count: int): - print("Checking for configuration services ... expected {}".format(count)) - config_svcs_check_failed = check_svcs_running( - v1, CONFIGURATION_SVC_LABEL_SELECTOR, count) - if not config_svcs_check_failed: - print("Wrong number of configuration services found ... expected {}". - format(count)) - return config_svcs_check_failed - - -def check_instance_svcs_state(v1, count: int): - print("Checking for instance services ... expected {}".format(count)) - instance_svcs_check_failed = check_svcs_running( - v1, INSTANCE_SVC_LABEL_SELECTOR, count) - if not instance_svcs_check_failed: - print("Wrong number of brokers found ... expected {}".format(count)) - return instance_svcs_check_failed - - -def check_akri_state(agents, - controllers, - instances, - brokers, - config_svcs, - instance_svcs, - namespace="default"): - print( - "Checking for Akri state ... expected agent(s):{}, controller(s):{}, instance(s):{}, broker(s):{}, config service(s):{}, and instance service(s):{} to exist" - .format(agents, controllers, instances, brokers, config_svcs, - instance_svcs)) - v1 = client.CoreV1Api() - return check_agent_pods_state(v1, agents) and \ - check_controller_pods_state(v1, controllers) and \ - check_instance_count(instances, namespace) and \ - check_broker_pods_state(v1, brokers) and \ - check_config_svcs_state(v1, config_svcs) and \ - check_instance_svcs_state(v1, instance_svcs)