Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Cherry-pick] Add a simple e2e-test for katib #1646

Merged
merged 1 commit into from
Sep 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 13 additions & 12 deletions scripts/kfctl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ createEnv() {
# Namespace where kubeflow is deployed
echo K8S_NAMESPACE=${K8S_NAMESPACE:-"kubeflow"} >> ${ENV_FILE}

case "$PLATFORM" in
case "$PLATFORM" in
minikube)
echo KUBEFLOW_CLOUD=minikube >> ${ENV_FILE}
echo MOUNT_LOCAL=${MOUNT_LOCAL} >> ${ENV_FILE}
Expand All @@ -52,43 +52,43 @@ createEnv() {
echo PROJECT must be set either using environment variable PROJECT
echo or by setting the default project in gcloud
exit 1
fi
fi

# Name of the deployment
DEPLOYMENT_NAME=${DEPLOYMENT_NAME:-"kubeflow"}
echo DEPLOYMENT_NAME="${DEPLOYMENT_NAME}" >> ${ENV_FILE}

# Kubeflow directories
echo KUBEFLOW_DM_DIR=${KUBEFLOW_DM_DIR:-"$(pwd)/gcp_config"} >> ${ENV_FILE}
echo KUBEFLOW_SECRETS_DIR=${KUBEFLOW_SECRETS_DIR:-"$(pwd)/secrets"} >> ${ENV_FILE}
echo KUBEFLOW_K8S_MANIFESTS_DIR="$(pwd)/k8s_specs" >> ${ENV_FILE}

# Name of the K8s context to create.
echo KUBEFLOW_K8S_CONTEXT=${DEPLOYMENT_NAME} >> ${ENV_FILE}

# GCP Zone
# The default should be a zone that supports Haswell.
ZONE=${ZONE:-$(gcloud config get-value compute/zone 2>/dev/null)}
echo ZONE=${ZONE:-"us-east1-d"} >> ${ENV_FILE}

# Email for cert manager
EMAIL=${EMAIL:-$(gcloud config get-value account 2>/dev/null)}
echo EMAIL=${EMAIL} >> ${ENV_FILE}

# GCP Static IP Name
echo KUBEFLOW_IP_NAME=${KUBEFLOW_IP_NAME:-"${DEPLOYMENT_NAME}-ip"} >> ${ENV_FILE}
# Name of the endpoint
KUBEFLOW_ENDPOINT_NAME=${KUBEFLOW_ENDPOINT_NAME:-"${DEPLOYMENT_NAME}"}
echo KUBEFLOW_ENDPOINT_NAME=${KUBEFLOW_ENDPOINT_NAME} >> ${ENV_FILE}
# Complete hostname
echo KUBEFLOW_HOSTNAME=${KUBEFLOW_HOSTNAME:-"${KUBEFLOW_ENDPOINT_NAME}.endpoints.${PROJECT}.cloud.goog"} >> ${ENV_FILE}

echo CONFIG_FILE=${CONFIG_FILE:-"cluster-kubeflow.yaml"} >> ${ENV_FILE}

if [ -z "${PROJECT_NUMBER}" ]; then
PROJECT_NUMBER=$(gcloud projects describe ${PROJECT} --format='value(project_number)')
fi

echo PROJECT_NUMBER=${PROJECT_NUMBER} >> ${ENV_FILE}
;;
*)
Expand Down Expand Up @@ -210,6 +210,7 @@ ksApply () {
ks apply default -c centraldashboard
ks apply default -c tf-job-operator
ks apply default -c argo
ks apply default -c katib
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would mean that katib will get deployed by default into the installations. Is that what we want?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am assuming that 0.3 onwards we want to install katib by default

ks apply default -c spartakus
popd

Expand Down Expand Up @@ -247,7 +248,7 @@ if [ "${COMMAND}" == "generate" ]; then
if [ "${PLATFORM}" == "minikube" ]; then
create_local_fs_mount_spec
if ${MOUNT_LOCAL}; then
ks param set jupyterhub disks "local-notebooks"
ks param set jupyterhub disks "local-notebooks"
ks param set jupyterhub notebookUid `id -u`
ks param set jupyterhub notebookGid `id -g`
ks param set jupyterhub accessLocalFs true
Expand Down
2 changes: 1 addition & 1 deletion scripts/util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ function createKsApp() {
ks generate tf-job-operator tf-job-operator

ks generate argo argo

ks generate katib katib
# Enable collection of anonymous usage metrics
# To disable metrics collection. Remove the spartakus component.
# cd ks_app
Expand Down
51 changes: 51 additions & 0 deletions testing/test_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@
import json
import logging
import os
import re
import shutil
import subprocess
import tempfile
import time
import uuid
Expand Down Expand Up @@ -189,6 +191,50 @@ def deploy_model(args):
api_client, namespace, args.deploy_name + "-v1", timeout_minutes=10)
logging.info("Verified TF serving started.")

def test_successful_deployment(deployment_name):
""" Tests if deployment_name is successfully running using kubectl """
# TODO use the python kubernetes library to get deployment status
# This is using kubectl right now
retries = 20
i = 0
while True:
if i == retries:
raise Exception('Deployment failed: ' + deployment_name)
try:
output = util.run(["kubectl", "get", "deployment", deployment_name])
logging.info("output = \n" + output)
if output.count('\n') == 1:
output = output.split('\n')[1]
output = re.split(' +', output)
desired_pods = output[1]
current_pods = output[2]
uptodate_pods = output[3]
available_pods = output[4]
logging.info("desired_pods " + desired_pods)
logging.info("current_pods " + current_pods)
logging.info("uptodate_pods " + uptodate_pods)
logging.info("available_pods " + available_pods)
if desired_pods == current_pods and \
desired_pods == uptodate_pods and \
desired_pods == available_pods:
return True
except subprocess.CalledProcessError as e:
logging.error(e)
logging.info("Sleeping 5 seconds and retrying..")
time.sleep(5)
i += 1


def test_katib(args):
test_successful_deployment('vizier-core')
test_successful_deployment('vizier-db')
test_successful_deployment('vizier-suggestion-grid')
test_successful_deployment('vizier-suggestion-random')
test_successful_deployment('studyjob-controller')
test_successful_deployment('modeldb-backend')
test_successful_deployment('modeldb-db')
test_successful_deployment('modeldb-frontend')

def deploy_argo(args):
api_client = create_k8s_client(args)
app_dir = setup_kubeflow_ks_app(args, api_client)
Expand Down Expand Up @@ -601,6 +647,11 @@ def main(): # pylint: disable=too-many-locals,too-many-statements

parser_argo_job.set_defaults(func=deploy_argo)

parser_katib_test = subparsers.add_parser(
"test_katib", help="Test Katib")

parser_katib_test.set_defaults(func=test_katib)

parser_minikube = subparsers.add_parser(
"deploy_minikube", help="Setup a K8s cluster on minikube.")

Expand Down
22 changes: 19 additions & 3 deletions testing/workflows/components/workflows.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@
srcDir: self.srcRootDir + "/kubeflow/kubeflow",
image: "gcr.io/kubeflow-ci/test-worker:latest",


// value of KUBECONFIG environment variable. This should be a full path.
kubeConfig: self.testDir + "/.kube/kubeconfig",

Expand All @@ -107,7 +106,6 @@
env_vars:: [],
side_cars: [],


activeDeadlineSeconds: 1800, // Set 30 minute timeout for each template

local template = self,
Expand Down Expand Up @@ -242,6 +240,25 @@
},
dependencies: ["wait-for-kubeflow"],
}, // test-argo-deploy
{

template: tests.buildTemplate {
name: "test-katib-deploy",
command: [
"python",
"-m",
"testing.test_deploy",
"--project=kubeflow-ci",
"--github_token=$(GITHUB_TOKEN)",
"--namespace=" + tests.stepsNamespace,
"--test_dir=" + tests.testDir,
"--artifacts_dir=" + tests.artifactsDir,
"--deploy_name=test-katib",
"test_katib",
],
},
dependencies: ["wait-for-kubeflow"],
}, // test-katib
{
template: tests.buildTemplate {
name: "pytorchjob-deploy",
Expand Down Expand Up @@ -299,7 +316,6 @@
argoTaskTemplates: std.map(function(i) i.template.argoTemplate
, self.tasks),


argoTemplates: [self.argoDagTemplate] + self.argoTaskTemplates,
}, // kfTests

Expand Down