Merge with master

kubeflow · Nov 29, 2018 · 0268b5e · 0268b5e
2 parents 8988617 + 9b77d4a
commit 0268b5e
Show file tree

Hide file tree

Showing 357 changed files with 54,546 additions and 1,130 deletions.
diff --git a/.cloudbuild.yaml b/.cloudbuild.yaml
@@ -165,11 +165,11 @@ steps:
 # Build the local pipeline component images
 - name: 'gcr.io/cloud-builders/docker'
   entrypoint: '/bin/bash'
-  args: ['-c', 'cd /workspace/components/local/containers/confusion_matrix && ./build.sh -p $PROJECT_ID -t $COMMIT_SHA']
+  args: ['-c', 'cd /workspace/components/local/confusion_matrix && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA']
   id: 'buildConfusionMatrix'
 - name: 'gcr.io/cloud-builders/docker'
   entrypoint: '/bin/bash'
-  args: ['-c', 'cd /workspace/components/local/containers/roc && ./build.sh -p $PROJECT_ID -t $COMMIT_SHA']
+  args: ['-c', 'cd /workspace/components/local/roc && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA']
   id: 'buildROC'
 
 # Build the tagged samples

diff --git a/Gopkg.lock b/Gopkg.lock
diff --git a/README.md b/README.md
@@ -15,7 +15,12 @@ The Kubeflow pipelines service has the following goals:
 
 ## Documentation
 
-Get started with your first pipeline and read further information in the [documentation](https://github.com/kubeflow/pipelines/wiki).
+Get started with your first pipeline and read further information in the [Kubeflow Pipelines documentation](https://www.kubeflow.org/docs/guides/pipelines/pipelines-overview/).
+
+## Blog posts
+
+* [Getting started with Kubeflow Pipelines](https://cloud.google.com/blog/products/ai-machine-learning/getting-started-kubeflow-pipelines) (By Amy Unruh)
+* [How to create and deploy a Kubeflow Machine Learning Pipeline](https://towardsdatascience.com/how-to-create-and-deploy-a-kubeflow-machine-learning-pipeline-part-1-efea7a4b650f) (By Lak Lakshmanan)
 
 ## Acknowledgments
 

diff --git a/backend/Dockerfile b/backend/Dockerfile
@@ -14,14 +14,33 @@ COPY . .
 RUN apk add --update gcc musl-dev
 RUN go build -o /bin/apiserver backend/src/apiserver/*.go
 
-FROM python:3.5.0-slim as compiler
+FROM python:3.5 as compiler
+
+RUN apt-get update -y && \
+    apt-get install --no-install-recommends -y -q default-jdk wget 
+
+RUN pip3 install setuptools==40.5.0
+
+RUN wget http://central.maven.org/maven2/io/swagger/swagger-codegen-cli/2.3.1/swagger-codegen-cli-2.3.1.jar -O /tmp/swagger-codegen-cli.jar
+
+WORKDIR /go/src/github.com/kubeflow/pipelines
+COPY . .
+WORKDIR /go/src/github.com/kubeflow/pipelines/sdk/python
+RUN ./build.sh /kfp.tar.gz
+RUN pip3 install /kfp.tar.gz
 
-# This is hard coded to 0.0.26. Once kfp DSK release process is automated,
-# we can dynamically refer to the version from same commit SHA.
-RUN pip install https://storage.googleapis.com/ml-pipeline/release/0.0.26/kfp-0.0.26.tar.gz --upgrade
 WORKDIR /samples
 COPY ./samples .
-RUN  find . -maxdepth 2 -name "*.py" -exec dsl-compile --py {} --output {}.tar.gz \;
+
+#We need to check that all samples have been compiled without error.
+#For find program, the -exec argument is a filter predicate just like -name. It only affects whether the file is "found", not the find's exit code.
+#One way to solve this problem is to check whether we have any python pipelines that cannot compile. Here the exit code is the number of such files:
+#RUN  bash -e -c 'exit $(find . -maxdepth 2 -name "*.py" ! -exec dsl-compile --py {} --output {}.tar.gz \; -print | wc -l)'
+#I think it's better to just use a shell loop though.
+#RUN  for pipeline in $(find . -maxdepth 2 -name '*.py' -type f); do dsl-compile --py "$pipeline" --output "$pipeline.tar.gz"; done
+#The "for" loop breaks on all whitespace, so we either need to override IFS or use the "read" command instead.
+RUN  find . -maxdepth 2 -name '*.py' -type f | while read pipeline; do dsl-compile --py "$pipeline" --output "$pipeline.tar.gz"; done
+
 
 FROM alpine
 

diff --git a/backend/src/apiserver/interceptor.go b/backend/src/apiserver/interceptor.go
@@ -26,13 +26,14 @@ import (
 // to be executed before and after all API handler calls, e.g. Logging, error handling.
 // For more details, see https://github.com/grpc/grpc-go/blob/master/interceptor.go
 func apiServerInterceptor(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
-	glog.Infof("%v called", info.FullMethod)
+	glog.Infof("%v handler starting", info.FullMethod)
 	resp, err = handler(ctx, req)
 	if err != nil {
 		util.LogError(util.Wrapf(err, "%s call failed", info.FullMethod))
 		// Convert error to gRPC errors
 		err = util.ToGRPCError(err)
 		return
 	}
+	glog.Infof("%v handler finished", info.FullMethod)
 	return
 }
diff --git a/backend/test/test_utils.go b/backend/test/test_utils.go
@@ -23,6 +23,8 @@ import (
 
 	"testing"
 
+	"net/http"
+
 	"github.com/cenkalti/backoff"
 	experimentparams "github.com/kubeflow/pipelines/backend/api/go_http_client/experiment_client/experiment_service"
 	jobparams "github.com/kubeflow/pipelines/backend/api/go_http_client/job_client/job_service"
@@ -31,59 +33,29 @@ import (
 	"github.com/kubeflow/pipelines/backend/src/common/client/api_server"
 	"github.com/pkg/errors"
 	"github.com/stretchr/testify/assert"
-	"k8s.io/client-go/kubernetes"
-	"k8s.io/client-go/rest"
 	"k8s.io/client-go/tools/clientcmd"
 	clientcmdapi "k8s.io/client-go/tools/clientcmd/api"
 )
 
-const (
-	// ML pipeline API server root URL
-	mlPipelineAPIServerBase = "/api/v1/namespaces/%s/services/ml-pipeline:8888/proxy/apis/v1beta1/%s"
-)
-
 var namespace = flag.String("namespace", "kubeflow", "The namespace ml pipeline deployed to")
 var initializeTimeout = flag.Duration("initializeTimeout", 2*time.Minute, "Duration to wait for test initialization")
 
-func getKubernetesClient() (*kubernetes.Clientset, error) {
-	// use the current context in kubeconfig
-	config, err := rest.InClusterConfig()
-	if err != nil {
-		return nil, errors.Wrapf(err, "Failed to get cluster config during K8s client initialization")
-	}
-	// create the clientset
-	clientSet, err := kubernetes.NewForConfig(config)
-	if err != nil {
-		return nil, errors.Wrapf(err, "Failed to create client set during K8s client initialization")
-	}
-
-	return clientSet, nil
-}
-
 func waitForReady(namespace string, initializeTimeout time.Duration) error {
-	clientSet, err := getKubernetesClient()
-	if err != nil {
-		return errors.Wrapf(err, "Failed to get K8s client set when waiting for ML pipeline to be ready")
-	}
-
 	var operation = func() error {
-		response := clientSet.RESTClient().Get().
-			AbsPath(fmt.Sprintf(mlPipelineAPIServerBase, namespace, "healthz")).Do()
-		if response.Error() == nil {
+		response, err := http.Get(fmt.Sprintf("http://ml-pipeline.%s.svc.cluster.local:8888/apis/v1beta1/healthz", namespace))
+		if err == nil {
 			return nil
 		}
-		var code int
-		response.StatusCode(&code)
 		// we wait only on 503 service unavailable. Stop retry otherwise.
-		if code != 503 {
-			return backoff.Permanent(errors.Wrapf(response.Error(), "Waiting for ml pipeline failed with non retriable error."))
+		if response.StatusCode != 503 {
+			return backoff.Permanent(errors.Wrapf(err, "Waiting for ml pipeline failed with non retriable error."))
 		}
-		return response.Error()
+		return err
 	}
 
 	b := backoff.NewExponentialBackOff()
 	b.MaxElapsedTime = initializeTimeout
-	err = backoff.Retry(operation, b)
+	err := backoff.Retry(operation, b)
 	return errors.Wrapf(err, "Waiting for ml pipeline failed after all attempts.")
 }
 

diff --git a/bootstrapper.yaml b/bootstrapper.yaml
@@ -65,7 +65,7 @@ spec:
     spec:
       containers:
       - name: deploy
-        image: gcr.io/ml-pipeline/bootstrapper:0.1.2 #TODO-release: update the release tag for the next release
+        image: gcr.io/ml-pipeline/bootstrapper:0.1.3-rc.2 #TODO-release: update the release tag for the next release
         imagePullPolicy: 'Always'
         # Additional parameter available:
         args: [

diff --git a/components/README.md b/components/README.md
@@ -1,9 +1,9 @@
 # Kubeflow pipeline components
 
 Kubeflow pipeline components are implementations of Kubeflow pipeline tasks. Each task takes
-one or more [artifacts](https://github.com/kubeflow/pipelines/wiki/Concepts#step-output-artifacts)
+one or more [artifacts](https://www.kubeflow.org/docs/guides/pipelines/pipelines-concepts/#step-output-artifacts)
 as input and may produce one or more
-[artifacts](https://github.com/kubeflow/pipelines/wiki/Concepts#step-output-artifacts) as output.
+[artifacts](https://www.kubeflow.org/docs/guides/pipelines/pipelines-concepts/#step-output-artifacts) as output.
 
 
 **Example: XGBoost DataProc components**
@@ -31,4 +31,4 @@ Note the naming convention for client code and runtime code&mdash;for a task nam
 * The `mytask.py` program contains the client code.
 * The `mytask` directory contains all the runtime code.
 
-See [how to build your own components](https://github.com/kubeflow/pipelines/wiki/Build-Your-Own-Component)
+See [how to build your own components](https://www.kubeflow.org/docs/guides/pipelines/build-component/)
diff --git a/components/kubeflow/launcher/kubeflow_tfjob_launcher_op.py b/components/kubeflow/launcher/kubeflow_tfjob_launcher_op.py
@@ -17,7 +17,7 @@
 def kubeflow_tfjob_launcher_op(container_image, command, number_of_workers: int, number_of_parameter_servers: int, tfjob_timeout_minutes: int, output_dir=None, step_name='TFJob-launcher'):
     return dsl.ContainerOp(
         name = step_name,
-        image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf:0.1.0',#TODO-release: update the release tag for the next release
+        image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf:0.1.3-rc.2',#TODO-release: update the release tag for the next release
         arguments = [
             '--workers', number_of_workers,
             '--pss', number_of_parameter_servers,

diff --git a/components/kubeflow/launcher/src/train.template.yaml b/components/kubeflow/launcher/src/train.template.yaml
@@ -26,7 +26,7 @@ spec:
         spec:
           containers:
           - name: tensorflow
-            image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:0.1.0 #TODO-release: update the release tag for the next release
+            image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:0.1.3-rc.2 #TODO-release: update the release tag for the next release
             command:
               - python
               - -m
@@ -38,7 +38,7 @@ spec:
         spec:
           containers:
           - name: tensorflow
-            image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:0.1.0 #TODO-release: update the release tag for the next release
+            image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:0.1.3-rc.2 #TODO-release: update the release tag for the next release
             command:
               - python
               - -m
@@ -50,7 +50,7 @@ spec:
             spec:
               containers:
               - name: tensorflow
-                image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:0.1.0 #TODO-release: update the release tag for the next release
+                image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:0.1.3-rc.2 #TODO-release: update the release tag for the next release
                 command:
                   - python
                   - -m

diff --git a/components/local/containers/base/Dockerfile → components/local/base/Dockerfile b/components/local/containers/base/Dockerfile → components/local/base/Dockerfile
diff --git a/components/local/containers/base/build.sh → components/local/base/build_image.sh b/components/local/containers/base/build.sh → components/local/base/build_image.sh
@@ -15,10 +15,11 @@
 
 
 mkdir -p ./build
-rsync -arvp "../../evaluation"/ ./build/
+rsync -arvp "../confusion_matrix/src"/ ./build/
+rsync -arvp "../roc/src"/ ./build/
 
-cp ../../../license.sh ./build
-cp ../../../third_party_licenses.csv ./build
+cp ../../license.sh ./build
+cp ../../third_party_licenses.csv ./build
 
 docker build -t ml-pipeline-local-base .
 rm -rf ./build
diff --git a/...al/containers/confusion_matrix/Dockerfile → components/local/confusion_matrix/Dockerfile b/...al/containers/confusion_matrix/Dockerfile → components/local/confusion_matrix/Dockerfile
diff --git a/...ocal/containers/confusion_matrix/build.sh → ...nts/local/confusion_matrix/build_image.sh b/...ocal/containers/confusion_matrix/build.sh → ...nts/local/confusion_matrix/build_image.sh
@@ -44,7 +44,7 @@ fi
 
 # build base image
 pushd ../base
-./build.sh
+./build_image.sh
 popd
 
 docker build -t ${LOCAL_IMAGE_NAME} .

diff --git a/...ents/local/evaluation/confusion_matrix.py → .../confusion_matrix/src/confusion_matrix.py b/...ents/local/evaluation/confusion_matrix.py → .../confusion_matrix/src/confusion_matrix.py
diff --git a/components/local/containers/roc/Dockerfile → components/local/roc/Dockerfile b/components/local/containers/roc/Dockerfile → components/local/roc/Dockerfile
diff --git a/components/local/containers/roc/build.sh → components/local/roc/build_image.sh b/components/local/containers/roc/build.sh → components/local/roc/build_image.sh
@@ -44,7 +44,7 @@ fi
 
 # build base image
 pushd ../base
-./build.sh
+./build_image.sh
 popd
 
 docker build -t ${LOCAL_IMAGE_NAME} .

diff --git a/components/local/evaluation/roc.py → components/local/roc/src/roc.py b/components/local/evaluation/roc.py → components/local/roc/src/roc.py
diff --git a/contrib/components/openvino/model_convert/README.md b/contrib/components/openvino/model_convert/README.md
@@ -236,17 +236,22 @@ Kaldi-specific parameters:
 The output folder specify then should be uploaded the generated model file in IR format with .bin and .xml
 extensions.
 
-The component also saved the generated model files in location: `/tmp/model.bin` and `/tmp/model.xml`
-so this path could be used in the argo pipeline for storing the workflow artifacts.
+The component also creates 3 files including the paths to generated model:
+- `/tmp/output.txt` - GSC path to the folder including the generated model files.
+- `/tmp/bin_path.txt` -  GSC path to weights model file 
+- `/tmp/xml_path.txt` - GSC path to graph model file 
+They can be used as parameters to be passed to other jobs in ML pipelines.
 
 ## Examples
 
-Input path - gs://tensorflow_model_path/resnet/1/saved_model.pb
-
-MO options - --saved_model_dir .
-
+Input path - gs://tensorflow_model_path/resnet/1/saved_model.pb<br />
+MO options - --saved_model_dir .<br />
 Output path - gs://tensorflow_model_path/resnet/1
 
+Input path - gs://tensorflow_model_path/resnet/1<br />
+MO options - --saved_model_dir 1<br />
+Output path - gs://tensorflow_model_path/resnet/dldt/1<br />
+
 
 ## Building docker image
 
@@ -260,7 +265,7 @@ This component requires GCP authentication token in json format generated for th
 which has access to GCS location. In the example below it is in key.json in the current path.
 
 ```bash
-COMMAND="python3 ../convert_model.py --mo_options  \"--saved_model_dir .\" --input_path gs://tensorflow_model_path/resnet/1/saved_model.pb --output_path gs://tensorflow_model_path/resnet/1"
+COMMAND="convert_model.py --mo_options  \"--saved_model_dir .\" --input_path gs://tensorflow_model_path/resnet/1/saved_model.pb --output_path gs://tensorflow_model_path/resnet/1"
 docker run --rm -it -v $(pwd)/key.json:/etc/credentials/gcp-key.json \
 -e GOOGLE_APPLICATION_CREDENTIALS=/etc/credentials/gcp-key.json <image_name> $COMMAND