Skip to content

Commit

Permalink
Pull in the latest changes to the scripts from master into the 0.2 br…
Browse files Browse the repository at this point in the history
…anch.

* This includes changes like #1174 which makes downloading the repo
  efficient by downloading a tarball and not using git clone.

* Also pull in the scripts for GKE that don't use bootstrapper.
  • Loading branch information
jlewi committed Jul 11, 2018
1 parent d42bd0a commit 1b5c54a
Show file tree
Hide file tree
Showing 9 changed files with 292 additions and 61 deletions.
5 changes: 5 additions & 0 deletions docs/gke/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Getting Started on GKE

Follow the guide
[here](https://www.kubeflow.org/docs/started/getting-started-gke/).

8 changes: 6 additions & 2 deletions docs/gke/configs/cluster-kubeflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,14 @@ resources:
- cidrBlock: 1.2.3.4/32
- cidrBlock: 5.6.7.8/32
enabled: false
users:
users:
# List users to grant appropriate GCP permissions to use Kubeflow.
# These can either be individual users (Google accounts) or Google
# Groups.
# - user:john@acme.com
# - group:data-scientists@acme.com
# Path for the bootstrapper image.
bootstrapperImage: gcr.io/kubeflow-images-public/bootstrapper:v0.2.0
bootstrapperImage: gcr.io/kubeflow-images-public/bootstrapper:v20180618-715aafc
# This is the name of the GCP static ip address reserved for your domain.
# Each Kubeflow deployment in your project should use one unique ipName among all configs.
ipName: kubeflow-ip
Expand All @@ -89,6 +89,8 @@ resources:
registry: kubeflow
- name: tf-serving
registry: kubeflow
- name: pytorch-job
registry: kubeflow
components:
- name: kubeflow-core
prototype: kubeflow-core
Expand All @@ -98,6 +100,8 @@ resources:
prototype: cert-manager
- name: iap-ingress
prototype: iap-ingress
- name: pytorch-operator
prototype: pytorch-operator
parameters:
- component: cloud-endpoints
name: secretName
Expand Down
80 changes: 36 additions & 44 deletions docs/gke/configs/cluster.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -259,18 +259,16 @@ TODO(jlewi): Do we need to serialize API activation
serviceName: iam.googleapis.com

{# Get the IAM policy first so that we do not remove any existing bindings. #}
- name: get-iam-policy
- name: get-iam-policy-add
action: gcp-types/cloudresourcemanager-v1:cloudresourcemanager.projects.getIamPolicy
properties:
resource: {{ env['project'] }}

metadata:
dependsOn:
- resource-manager-api
- iam-api
runtimePolicy:
- UPDATE_ALWAYS

dependsOn:
- resource-manager-api
- iam-api
runtimePolicy:
- UPDATE_ALWAYS
properties:
resource: {{ env["project"] }}
{# Set the IAM policy patching the existing policy with what ever is currently in the
config.
Expand All @@ -280,13 +278,16 @@ TODO(jlewi): Do we need to serialize API activation
Note: This will fail if the cloudservices account doesn't have IamProjectAdmin
permissions.
#}
- name: patch-iam-policy
- name: set-iam-policy-add
action: gcp-types/cloudresourcemanager-v1:cloudresourcemanager.projects.setIamPolicy
metadata:
runtimePolicy:
- UPDATE_ON_CHANGE
properties:
resource: {{ env['project'] }}
policy: $(ref.get-iam-policy)
resource: {{ env["project"] }}
policy: $(ref.get-iam-policy-add)
gcpIamPolicyPatch:
add:
add:
- role: roles/container.admin
members:
{# Deployment manager uses cloudservices account. #}
Expand Down Expand Up @@ -347,31 +348,26 @@ TODO(jlewi): Do we need to serialize API activation
{% endfor %}
{% endif %}
{% endif %}

remove: []

# Use a second get-set IAM policy pair to have a fresh etag.
- name: get-iam-policy-delete
action: gcp-types/cloudresourcemanager-v1:cloudresourcemanager.projects.getIamPolicy
metadata:
dependsOn:
- get-iam-policy
- iam-api
- {{ KF_ADMIN_NAME }}
- {{ KF_USER_NAME }}
runtimePolicy:
- CREATE

{# Remove IAM role binding when delete deployment.
TODO: This delete might be too aggresive (delete roles not created by deployment manager)
#}
- name: remove-iam-policy
dependsOn:
- set-iam-policy-add
runtimePolicy:
- UPDATE_ALWAYS
properties:
resource: {{ env["project"] }}
- name: set-iam-policy-delete
action: gcp-types/cloudresourcemanager-v1:cloudresourcemanager.projects.setIamPolicy
metadata:
runtimePolicy:
- DELETE
properties:
resource: {{ env['project'] }}
policy: $(ref.patch-iam-policy)
resource: {{ env["project"] }}
policy: $(ref.get-iam-policy-delete)
gcpIamPolicyPatch:
add: []

remove:
remove:
{# Grant permissions needed to push the app to a cloud repository. #}
- role: roles/source.admin
members:
Expand Down Expand Up @@ -427,12 +423,8 @@ TODO(jlewi): Do we need to serialize API activation
{% endfor %}
{% endif %}
{% endif %}
metadata:
dependsOn:
- patch-iam-policy
runtimePolicy:
- DELETE

{# This changes every time to ensure a fresh etag is obtained. #}
quotaUser: {{ env["current_time"] }}
{# A note about K8s resources.
The type value should be defined using a reference to the corresponding type provider.
Using references will ensure the K8s resource has
Expand Down Expand Up @@ -474,7 +466,7 @@ the corresponding type provider.
- admin-namespace

{# Make the default service account in the kubeflow-admin namespace a cluster admin.
Cluster admin priveleges are needed by the bootstrapper.
Cluster admin privileges are needed by the bootstrapper.
#}
- name: bootstrap-rbac
type: {{ env['project'] }}/$(ref.{{ RBAC_TYPE_NAME }}.name):{{ CLUSTER_ROLE_BINDING_COLLECTION }}
Expand Down Expand Up @@ -592,10 +584,10 @@ the corresponding type provider.
- name: kubeflow-bootstrapper
configMap:
name: kubeflow-bootstrapper
- name: kubeflow-admin-sa
- name: kubeflow-admin-sa
secret:
secretName: admin-gcp-sa

metadata:
dependsOn:
- admin-namespace
Expand Down
45 changes: 34 additions & 11 deletions docs/gke/configs/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# $ ./deploy.sh
#

set -e
set -xe

# Required Variables
export PROJECT=${PROJECT:-}
Expand All @@ -21,6 +21,9 @@ export CONFIG_FILE=${CONFIG_FILE:-}
export CLIENT_ID=${CLIENT_ID:-}
export CLIENT_SECRET=${CLIENT_SECRET:-}

# Set to false to skip setting up the project.
export SETUP_PROJECT=${SETUP_PROJECT:true}

if [ -z "${PROJECT}" ] || \
[ -z "${DEPLOYMENT_NAME}" ] || \
[ -z "${ZONE}" ] || \
Expand All @@ -44,18 +47,34 @@ export USER_SECRET_NAME=${DEPLOYMENT_NAME}-user
export K8S_ADMIN_NAMESPACE=kubeflow-admin
export K8S_NAMESPACE=kubeflow

# Enable GCloud APIs
gcloud services enable deploymentmanager.googleapis.com --project=${PROJECT}
gcloud services enable servicemanagement.googleapis.com --project=${PROJECT}
gcloud services enable iam.googleapis.com --project=${PROJECT}
# Perform project setup
if ${SETUP_PROJECT}; then
# Enable GCloud APIs
gcloud services enable deploymentmanager.googleapis.com \
servicemanagement.googleapis.com \
iam.googleapis.com --project=${PROJECT}

# Set IAM Admin Policy
gcloud projects add-iam-policy-binding ${PROJECT} \
--member serviceAccount:${PROJECT_NUMBER}@cloudservices.gserviceaccount.com \
--role roles/resourcemanager.projectIamAdmin
# Set IAM Admin Policy
gcloud projects add-iam-policy-binding ${PROJECT} \
--member serviceAccount:${PROJECT_NUMBER}@cloudservices.gserviceaccount.com \
--role roles/resourcemanager.projectIamAdmin
else
echo skipping project setup
fi

# Run Deployment Manager
gcloud deployment-manager --project=${PROJECT} deployments create ${DEPLOYMENT_NAME} --config=${CONFIG_FILE}
# Check if it already exists
set +e
gcloud deployment-manager --project=${PROJECT} deployments describe ${DEPLOYMENT_NAME}
exists=$?
set -e

if [ ${exists} -eq 0 ]; then
echo ${DEPLOYMENT_NAME} exists
gcloud deployment-manager --project=${PROJECT} deployments update ${DEPLOYMENT_NAME} --config=${CONFIG_FILE}
else
# Run Deployment Manager
gcloud deployment-manager --project=${PROJECT} deployments create ${DEPLOYMENT_NAME} --config=${CONFIG_FILE}
fi

# TODO(jlewi): We should name the secrets more consistently based on the service account name.
# We will need to update the component configs though
Expand All @@ -77,3 +96,7 @@ kubectl create secret generic --namespace=${K8S_ADMIN_NAMESPACE} admin-gcp-sa --
kubectl create secret generic --namespace=${K8S_NAMESPACE} admin-gcp-sa --from-file=admin-gcp-sa.json=./${SA_EMAIL}.json
kubectl create secret generic --namespace=${K8S_NAMESPACE} user-gcp-sa --from-file=user-gcp-sa.json=./${USER_EMAIL}.json
kubectl create secret generic --namespace=${K8S_NAMESPACE} kubeflow-oauth --from-literal=CLIENT_ID=${CLIENT_ID} --from-literal=CLIENT_SECRET=${CLIENT_SECRET}

# Install the GPU driver. It has not effect on non-GPU nodes.
kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/stable/nvidia-driver-installer/cos/daemonset-preloaded.yaml

62 changes: 62 additions & 0 deletions scripts/deploy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/bin/bash
# This script creates a kubeflow deployment on minikube
# It checks for kubectl, ks
# Creates the ksonnet app, installs packages, components and then applies them

set -xe

KUBEFLOW_REPO=${KUBEFLOW_REPO:-"`pwd`/kubeflow_repo"}
KUBEFLOW_VERSION=${KUBEFLOW_VERSION:-"master"}
KUBEFLOW_DEPLOY=${KUBEFLOW_DEPLOY:-true}

if [[ ! -d "${KUBEFLOW_REPO}" ]]; then
if [ "${KUBEFLOW_VERSION}" == "master" ]; then
TAG=${KUBEFLOW_VERSION}
else
TAG=v${KUBEFLOW_VERSION}
fi
curl -L -o /tmp/kubeflow.${KUBEFLOW_VERSION}.tar.gz https://github.com/kubeflow/kubeflow/archive/${TAG}.tar.gz
tar -xzvf /tmp/kubeflow.${KUBEFLOW_VERSION}.tar.gz -C /tmp
mv /tmp/kubeflow-${TAG} "${KUBEFLOW_REPO}"
fi

source "${KUBEFLOW_REPO}/scripts/util.sh"

# TODO(ankushagarwal): verify ks version is higher than 0.11.0
check_install ks
check_install kubectl

# Name of the deployment
DEPLOYMENT_NAME=${DEPLOYMENT_NAME:-"kubeflow"}

KUBEFLOW_KS_DIR=${KUBEFLOW_KS_DIR:-"`pwd`/${DEPLOYMENT_NAME}_ks_app"}

cd $(dirname "${KUBEFLOW_KS_DIR}")
ks init $(basename "${KUBEFLOW_KS_DIR}")
cd "${KUBEFLOW_KS_DIR}"

# Add the local registry
ks registry add kubeflow "${KUBEFLOW_REPO}/kubeflow"

# Install packages
ks pkg install kubeflow/argo
ks pkg install kubeflow/core
ks pkg install kubeflow/examples
ks pkg install kubeflow/katib
ks pkg install kubeflow/mpi-job
ks pkg install kubeflow/pytorch-job
ks pkg install kubeflow/seldon
ks pkg install kubeflow/tf-serving

# Generate all required components
ks generate kubeflow-core kubeflow-core

# Enable collection of anonymous usage metrics
# Skip this step if you don't want to enable collection.
ks param set kubeflow-core reportUsage true
ks param set kubeflow-core usageId $(uuidgen)

# Apply the components generated
if ${KUBEFLOW_DEPLOY}; then
ks apply default
fi
24 changes: 20 additions & 4 deletions scripts/gke/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,14 @@ KUBEFLOW_REPO=${KUBEFLOW_REPO:-"`pwd`/kubeflow_repo"}
KUBEFLOW_VERSION=${KUBEFLOW_VERSION:-"master"}

if [[ ! -d "${KUBEFLOW_REPO}" ]]; then
git clone https://github.com/kubeflow/kubeflow.git "${KUBEFLOW_REPO}"
cd "${KUBEFLOW_REPO}"
git checkout "${KUBEFLOW_VERSION}"
cd -
if [ "${KUBEFLOW_VERSION}" == "master" ]; then
TAG=${KUBEFLOW_VERSION}
else
TAG=v${KUBEFLOW_VERSION}
fi
curl -L -o /tmp/kubeflow.${KUBEFLOW_VERSION}.tar.gz https://github.com/kubeflow/kubeflow/archive/${TAG}.tar.gz
tar -xzvf /tmp/kubeflow.${KUBEFLOW_VERSION}.tar.gz -C /tmp
mv /tmp/kubeflow-${TAG} "${KUBEFLOW_REPO}"
fi

source "${KUBEFLOW_REPO}/scripts/util.sh"
Expand Down Expand Up @@ -128,14 +132,26 @@ ks env set default --namespace "${K8S_NAMESPACE}"
ks registry add kubeflow "${KUBEFLOW_REPO}/kubeflow"

# Install all required packages
ks pkg install kubeflow/argo
ks pkg install kubeflow/core
ks pkg install kubeflow/examples
ks pkg install kubeflow/katib
ks pkg install kubeflow/mpi-job
ks pkg install kubeflow/pytorch-job
ks pkg install kubeflow/seldon
ks pkg install kubeflow/tf-serving

# Generate all required components
ks generate kubeflow-core kubeflow-core --jupyterHubAuthenticator iap
ks generate cloud-endpoints cloud-endpoints
ks generate cert-manager cert-manager --acmeEmail=${EMAIL}
ks generate iap-ingress iap-ingress --ipName=${KUBEFLOW_IP_NAME} --hostname=${KUBEFLOW_HOSTNAME}

# Enable collection of anonymous usage metrics
# Skip this step if you don't want to enable collection.
ks param set kubeflow-core reportUsage true
ks param set kubeflow-core usageId $(uuidgen)

# Apply the components generated
ks apply default -c kubeflow-core
ks apply default -c cloud-endpoints
Expand Down
9 changes: 9 additions & 0 deletions scripts/gke/deployment_manager_configs/cluster.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ resources:
members:
{# Deployment manager uses cloudservices account. #}
- {{ 'serviceAccount:' + env['project_number'] + '@cloudservices.gserviceaccount.com' }}
{# Grant permissions needed to submit builds to Google Cloud Container Builder #}
- role: roles/cloudbuild.builds.editor
members:
- {{ 'serviceAccount:' + KF_USER_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }}

{# Grant permissions needed to push the app to a cloud repository. #}
- role: roles/source.admin
Expand Down Expand Up @@ -253,6 +257,11 @@ resources:
policy: $(ref.get-iam-policy-delete)
gcpIamPolicyPatch:
remove:
{# Grant permissions needed to submit builds to Google Cloud Container Builder #}
- role: roles/cloudbuild.builds.editor
members:
- {{ 'serviceAccount:' + KF_USER_NAME + '@' + env['project'] + '.iam.gserviceaccount.com' }}

{# Grant permissions needed to push the app to a cloud repository. #}
- role: roles/source.admin
members:
Expand Down

0 comments on commit 1b5c54a

Please sign in to comment.