From 949bed7e187e1233f86ee0f8571cf4b2d576f6d2 Mon Sep 17 00:00:00 2001
From: Ankush Agarwal <agwl@google.com>
Date: Wed, 26 Sep 2018 20:11:01 -0700
Subject: [PATCH] Add a e2e-test for katib

This is run as part of kfctl e2e test
---
 scripts/kfctl.sh                              | 25 ++++-----
 scripts/util.sh                               |  2 +-
 testing/test_deploy.py                        | 51 +++++++++++++++++++
 .../workflows/components/workflows.libsonnet  | 22 ++++++--
 4 files changed, 84 insertions(+), 16 deletions(-)

diff --git a/scripts/kfctl.sh b/scripts/kfctl.sh
index 1c645af7532..cd9b5376ac2 100755
--- a/scripts/kfctl.sh
+++ b/scripts/kfctl.sh
@@ -35,7 +35,7 @@ createEnv() {
   # Namespace where kubeflow is deployed
   echo K8S_NAMESPACE=${K8S_NAMESPACE:-"kubeflow"} >> ${ENV_FILE}
 
-  case "$PLATFORM" in 
+  case "$PLATFORM" in
     minikube)
       echo KUBEFLOW_CLOUD=minikube >> ${ENV_FILE}
       echo MOUNT_LOCAL=${MOUNT_LOCAL} >> ${ENV_FILE}
@@ -52,29 +52,29 @@ createEnv() {
         echo PROJECT must be set either using environment variable PROJECT
         echo or by setting the default project in gcloud
         exit 1
-      fi    
-  
+      fi
+
       # Name of the deployment
       DEPLOYMENT_NAME=${DEPLOYMENT_NAME:-"kubeflow"}
       echo DEPLOYMENT_NAME="${DEPLOYMENT_NAME}" >> ${ENV_FILE}
-  
+
       # Kubeflow directories
       echo KUBEFLOW_DM_DIR=${KUBEFLOW_DM_DIR:-"$(pwd)/gcp_config"} >> ${ENV_FILE}
       echo KUBEFLOW_SECRETS_DIR=${KUBEFLOW_SECRETS_DIR:-"$(pwd)/secrets"} >> ${ENV_FILE}
       echo KUBEFLOW_K8S_MANIFESTS_DIR="$(pwd)/k8s_specs" >> ${ENV_FILE}
-  
+
       # Name of the K8s context to create.
       echo  KUBEFLOW_K8S_CONTEXT=${DEPLOYMENT_NAME} >> ${ENV_FILE}
-  
+
       # GCP Zone
       # The default should be a zone that supports Haswell.
       ZONE=${ZONE:-$(gcloud config get-value compute/zone 2>/dev/null)}
       echo ZONE=${ZONE:-"us-east1-d"} >> ${ENV_FILE}
-  
+
       # Email for cert manager
       EMAIL=${EMAIL:-$(gcloud config get-value account 2>/dev/null)}
       echo EMAIL=${EMAIL} >> ${ENV_FILE}
-  
+
       # GCP Static IP Name
       echo KUBEFLOW_IP_NAME=${KUBEFLOW_IP_NAME:-"${DEPLOYMENT_NAME}-ip"} >> ${ENV_FILE}
       # Name of the endpoint
@@ -82,13 +82,13 @@ createEnv() {
       echo KUBEFLOW_ENDPOINT_NAME=${KUBEFLOW_ENDPOINT_NAME} >> ${ENV_FILE}
       # Complete hostname
       echo KUBEFLOW_HOSTNAME=${KUBEFLOW_HOSTNAME:-"${KUBEFLOW_ENDPOINT_NAME}.endpoints.${PROJECT}.cloud.goog"} >> ${ENV_FILE}
-  
+
       echo CONFIG_FILE=${CONFIG_FILE:-"cluster-kubeflow.yaml"} >> ${ENV_FILE}
-  
+
       if [ -z "${PROJECT_NUMBER}" ]; then
         PROJECT_NUMBER=$(gcloud projects describe ${PROJECT} --format='value(project_number)')
       fi
-  
+
       echo PROJECT_NUMBER=${PROJECT_NUMBER} >> ${ENV_FILE}
       ;;
     *)
@@ -210,6 +210,7 @@ ksApply () {
   ks apply default -c centraldashboard
   ks apply default -c tf-job-operator
   ks apply default -c argo
+  ks apply default -c katib
   ks apply default -c spartakus
   popd
 
@@ -247,7 +248,7 @@ if [ "${COMMAND}" == "generate" ]; then
     if [ "${PLATFORM}" == "minikube" ]; then
       create_local_fs_mount_spec
       if ${MOUNT_LOCAL}; then
-        ks param set jupyterhub disks "local-notebooks" 
+        ks param set jupyterhub disks "local-notebooks"
         ks param set jupyterhub notebookUid `id -u`
         ks param set jupyterhub notebookGid `id -g`
         ks param set jupyterhub accessLocalFs true
diff --git a/scripts/util.sh b/scripts/util.sh
index 49eb43e7e3d..5b309cadee2 100644
--- a/scripts/util.sh
+++ b/scripts/util.sh
@@ -66,7 +66,7 @@ function createKsApp() {
   ks generate tf-job-operator tf-job-operator
 
   ks generate argo argo
-
+  ks generate katib katib
   # Enable collection of anonymous usage metrics
   # To disable metrics collection. Remove the spartakus component.
   # cd ks_app
diff --git a/testing/test_deploy.py b/testing/test_deploy.py
index ac383152d32..f4bd502d140 100644
--- a/testing/test_deploy.py
+++ b/testing/test_deploy.py
@@ -29,7 +29,9 @@
 import json
 import logging
 import os
+import re
 import shutil
+import subprocess
 import tempfile
 import time
 import uuid
@@ -189,6 +191,50 @@ def deploy_model(args):
     api_client, namespace, args.deploy_name + "-v1", timeout_minutes=10)
   logging.info("Verified TF serving started.")
 
+def test_successful_deployment(deployment_name):
+  """ Tests if deployment_name is successfully running using kubectl """
+  # TODO use the python kubernetes library to get deployment status
+  # This is using kubectl right now
+  retries = 20
+  i = 0
+  while True:
+    if i == retries:
+      raise Exception('Deployment failed: ' + deployment_name)
+    try:
+      output = util.run(["kubectl", "get", "deployment", deployment_name])
+      logging.info("output = \n" + output)
+      if output.count('\n') == 1:
+        output = output.split('\n')[1]
+        output = re.split(' +', output)
+        desired_pods = output[1]
+        current_pods = output[2]
+        uptodate_pods = output[3]
+        available_pods = output[4]
+        logging.info("desired_pods " + desired_pods)
+        logging.info("current_pods " + current_pods)
+        logging.info("uptodate_pods " + uptodate_pods)
+        logging.info("available_pods " + available_pods)
+        if desired_pods == current_pods and \
+           desired_pods == uptodate_pods and \
+           desired_pods == available_pods:
+          return True
+    except subprocess.CalledProcessError as e:
+      logging.error(e)
+    logging.info("Sleeping 5 seconds and retrying..")
+    time.sleep(5)
+    i += 1
+
+
+def test_katib(args):
+  test_successful_deployment('vizier-core')
+  test_successful_deployment('vizier-db')
+  test_successful_deployment('vizier-suggestion-grid')
+  test_successful_deployment('vizier-suggestion-random')
+  test_successful_deployment('studyjob-controller')
+  test_successful_deployment('modeldb-backend')
+  test_successful_deployment('modeldb-db')
+  test_successful_deployment('modeldb-frontend')
+
 def deploy_argo(args):
   api_client = create_k8s_client(args)
   app_dir = setup_kubeflow_ks_app(args, api_client)
@@ -601,6 +647,11 @@ def main():  # pylint: disable=too-many-locals,too-many-statements
 
   parser_argo_job.set_defaults(func=deploy_argo)
 
+  parser_katib_test = subparsers.add_parser(
+    "test_katib", help="Test Katib")
+
+  parser_katib_test.set_defaults(func=test_katib)
+
   parser_minikube = subparsers.add_parser(
     "deploy_minikube", help="Setup a K8s cluster on minikube.")
 
diff --git a/testing/workflows/components/workflows.libsonnet b/testing/workflows/components/workflows.libsonnet
index 65ac3d0a813..a7505d8b4b1 100644
--- a/testing/workflows/components/workflows.libsonnet
+++ b/testing/workflows/components/workflows.libsonnet
@@ -80,7 +80,6 @@
     srcDir: self.srcRootDir + "/kubeflow/kubeflow",
     image: "gcr.io/kubeflow-ci/test-worker:latest",
 
-
     // value of KUBECONFIG environment variable. This should be  a full path.
     kubeConfig: self.testDir + "/.kube/kubeconfig",
 
@@ -107,7 +106,6 @@
       env_vars:: [],
       side_cars: [],
 
-
       activeDeadlineSeconds: 1800,  // Set 30 minute timeout for each template
 
       local template = self,
@@ -242,6 +240,25 @@
         },
         dependencies: ["wait-for-kubeflow"],
       },  // test-argo-deploy
+      {
+
+        template: tests.buildTemplate {
+          name: "test-katib-deploy",
+          command: [
+            "python",
+            "-m",
+            "testing.test_deploy",
+            "--project=kubeflow-ci",
+            "--github_token=$(GITHUB_TOKEN)",
+            "--namespace=" + tests.stepsNamespace,
+            "--test_dir=" + tests.testDir,
+            "--artifacts_dir=" + tests.artifactsDir,
+            "--deploy_name=test-katib",
+            "test_katib",
+          ],
+        },
+        dependencies: ["wait-for-kubeflow"],
+      },  // test-katib
       {
         template: tests.buildTemplate {
           name: "pytorchjob-deploy",
@@ -299,7 +316,6 @@
     argoTaskTemplates: std.map(function(i) i.template.argoTemplate
                                , self.tasks),
 
-
     argoTemplates: [self.argoDagTemplate] + self.argoTaskTemplates,
   },  // kfTests