Skip to content

Commit

Permalink
Enable node autoprovisioning in v1beta1 clusters (#1959)
Browse files Browse the repository at this point in the history
* Enable node autoprovisioning in v1beta1 clusters

* Add v1beta cluster tests

* Make accelerator configurable

* Revert prow changes

* Fix params.libsonnet

* Change scope for GKE beta
  • Loading branch information
richardsliu authored and k8s-ci-robot committed Nov 20, 2018
1 parent d72ba77 commit 4240dfd
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 4 deletions.
13 changes: 11 additions & 2 deletions deployment/gke/deployment_manager_configs/cluster-kubeflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ resources:
zone: SET_THE_ZONE
# "1.X": picks the highest valid patch+gke.N patch in the 1.X version
# https://cloud.google.com/kubernetes-engine/docs/reference/rest/v1/projects.zones.clusters
cluster-version: "1.10"
cluster-version: "1.11"
# Set this to v1beta1 to use beta features such as private clusterss
# and the Kubernetes stackdriver agents.
gkeApiVersion: v1
gkeApiVersion: SET_GKE_API_VERSION
# An arbitrary string appending to name of nodepools
# bump this if you want to modify the node pools.
# This will cause existing node pools to be deleted and new ones to be created.
Expand All @@ -53,6 +53,15 @@ resources:
gpu-pool-min-nodes: 0
gpu-pool-max-nodes: 0
gpu-type: nvidia-tesla-k80
# Autoprovisioning parameters (only supported in gkeApiVersion v1beta1).
# This is configured by the gkeApiVersion setting.
autoprovisioning-config:
enabled: true
max-cpu: 20
max-memory: 200
max-accelerator:
- type: nvidia-tesla-k80
count: 8
# Whether to enable TPUs
enable_tpu: false
securityConfig:
Expand Down
14 changes: 14 additions & 0 deletions deployment/gke/deployment_manager_configs/cluster.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,20 @@ resources:
{{ properties['securityConfig']['masterAuthorizedNetworksConfigCidr'] }}
{% endif %}
{% endif %}
# Autoprovisioning is only supported in v1beta1.
{% if properties['gkeApiVersion'] == 'v1beta1' and properties['autoprovisioning-config']['enabled'] %}
autoscaling:
enableNodeAutoprovisioning: true
resourceLimits:
- resourceType: 'cpu'
maximum: {{ properties['autoprovisioning-config']['max-cpu'] }}
- resourceType: 'memory'
maximum: {{ properties['autoprovisioning-config']['max-memory'] }}
{% for accelerator in properties['autoprovisioning-config']['max-accelerator'] %}
- resourceType: {{ accelerator.type }}
maximum: {{ accelerator.count }}
{% endfor %}
{% endif %}
nodePools:
- name: default-pool
initialNodeCount: {{ properties['cpu-pool-initialNodeCount'] }}
Expand Down
18 changes: 16 additions & 2 deletions prow_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,33 @@ workflows:
- deployment/*
params:
platform: gke
gkeApiVersion: v1
- app_dir: kubeflow/kubeflow/testing/workflows
component: kfctl_test
name: kfctl-beta
job_types:
- presubmit
- postsubmit
- periodic
include_dirs:
- scripts/gke/*
- deployment/gke/*
params:
platform: gke
gkeApiVersion: v1beta1
# Run unittests
# TODO(jlewi): Need to add step to run go and python unittests
- app_dir: kubeflow/kubeflow/testing/workflows
component: unit_tests
name: unittests
name: unittests
# TODO(jlewi): We should be running the minikube workflow
# on presubmit when the minikube E2E test itself changes
# so we verify the test is working before submitting
# changes to it. But right now we can't match a regex or
# glob.
# see: https://github.com/kubeflow/testing/issues/187
# see: https://github.com/kubeflow/kubeflow/issues/1350
#
#
# Run tests on minikube
- app_dir: kubeflow/kubeflow/testing/workflows
component: workflows
Expand Down
1 change: 1 addition & 0 deletions scripts/gke/util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ generateDMConfigs() {

# Set values in DM config file
sed -i.bak "s/zone: SET_THE_ZONE/zone: ${ZONE}/" "${KUBEFLOW_DM_DIR}/${CONFIG_FILE}"
sed -i.bak "s/gkeApiVersion: SET_GKE_API_VERSION/gkeApiVersion: ${GKE_API_VERSION}/" "${KUBEFLOW_DM_DIR}/${CONFIG_FILE}"
sed -i.bak "s/users:/users: [\"${IAP_IAM_ENTRY}\"]/" "${KUBEFLOW_DM_DIR}/${CONFIG_FILE}"
sed -i.bak "s/ipName: kubeflow-ip/ipName: ${KUBEFLOW_IP_NAME}/" "${KUBEFLOW_DM_DIR}/${CONFIG_FILE}"
rm "${KUBEFLOW_DM_DIR}/${CONFIG_FILE}.bak"
Expand Down
7 changes: 7 additions & 0 deletions scripts/kfctl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ set -xe
ENV_FILE="env.sh"
SKIP_INIT_PROJECT=false
CLUSTER_VERSION="1.10"
GKE_API_VERSION="v1"

DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd)"
source "${DIR}/util.sh"
Expand Down Expand Up @@ -84,6 +85,8 @@ createEnv() {
# https://cloud.google.com/kubernetes-engine/docs/reference/rest/v1/projects.zones.clusters
echo "Setting cluster version to ${CLUSTER_VERSION}"
echo CLUSTER_VERSION=${CLUSTER_VERSION} >> ${ENV_FILE}

echo GKE_API_VERSION=${GKE_API_VERSION} >> ${ENV_FILE}
;;
*)
echo KUBEFLOW_PLATFORM=null >> ${ENV_FILE}
Expand Down Expand Up @@ -178,6 +181,10 @@ parseArgs() {
shift
EMAIL=$1
;;
--gkeApiVersion)
shift
GKE_API_VERSION=$1
;;
--skipInitProject)
SKIP_INIT_PROJECT=true
;;
Expand Down
2 changes: 2 additions & 0 deletions testing/workflows/components/kfctl_test.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ local dagTemplates = [
"us-east1-d",
// Temporary fix for https://github.com/kubeflow/kubeflow/issues/1562
"--skipInitProject",
"--gkeApiVersion",
params.gkeApiVersion,
],
working_dir=testDir,
),
Expand Down
6 changes: 6 additions & 0 deletions testing/workflows/components/params.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -11,38 +11,44 @@
platform: "minikube",
prow: "JOB_NAME=kubeflow-presubmit-test,JOB_TYPE=presubmit,PULL_NUMBER=209,REPO_NAME=kubeflow,REPO_OWNER=kubeflow,BUILD_NUMBER=997a",
prow_env: "JOB_NAME=kubeflow-gke-deploy-test,JOB_TYPE=presubmit,PULL_NUMBER=4,REPO_NAME=kubeflow,REPO_OWNER=jlewi,BUILD_NUMBER=3a8b",
gkeApiVersion: "",
},
gke_deploy: {
bucket: "kubeflow-ci_temp",
name: "jlewi-kubeflow-gke-deploy-test-4-3a8b",
namespace: "kubeflow-test-infra",
prow: "JOB_NAME=kubeflow-presubmit-test,JOB_TYPE=presubmit,PULL_NUMBER=209,REPO_NAME=kubeflow,REPO_OWNER=kubeflow,BUILD_NUMBER=997a",
prow_env: "JOB_NAME=kubeflow-gke-deploy-test,JOB_TYPE=presubmit,PULL_NUMBER=4,REPO_NAME=kubeflow,REPO_OWNER=jlewi,BUILD_NUMBER=3a8b",
gkeApiVersion: "",
},
kfctl_test: {
bucket: "kubeflow-ci_temp",
name: "somefakename",
namespace: "kubeflow-test-infra",
prow_env: "",
deleteKubeflow: true,
gkeApiVersion: "v1",
},
click_deploy_test: {
bucket: "kubeflow-ci_temp",
name: "somefakename",
namespace: "kubeflow-test-infra",
prow_env: "",
gkeApiVersion: "v1",
},
unit_tests: {
bucket: "kubeflow-ci_temp",
name: "somefakename",
namespace: "kubeflow-test-infra",
prow_env: "",
gkeApiVersion: "",
},
tfserving: {
commit: "master",
name: "somefakename",
namespace: "kubeflow-test-infra",
prow_env: "REPO_OWNER=kubeflow,REPO_NAME=kubeflow,PULL_BASE_SHA=master",
gkeApiVersion: "",
},
},
}

0 comments on commit 4240dfd

Please sign in to comment.