Refactor the TFServing component to better support GPUs and specific …

…clouds (#387) * Refactor the TFServing component to better support GPUs and specific clouds. * To support GPUs and specific clouds we refactor the component to make it easy to override the parts we care about (e.g. container environment variables, resources, etc...). * We do this by moving the things we care about up to the root of tf-serving.libsonnet. * We rely on jsonnet late binding (http://jsonnet.org/docs/tutorial.html). Late binding allows us to devine dictionaries (e.g. params, tfServingContainer) in tf-serving.libsonnet. We can then create manifests based on those objects (e.g. tfDeployment). We can then override values (e.g. params) and the derived objecs (e.g. tfDeployment) will use the overwritten values. * We introduce a parameter "cloud" which allows us to control which "prototype" to use. We use this to use cloud specific customizations; like setting the environment variables on AWS to use S3. * Late binding also makes it possible to select an appropriate default image based on whether GPUs are bing used or not while still allowing the user to override the images. * We remove parameter definitions from the prototypes. The set of parameters ends up being conditional based on flags like cloud, GPUs so its not clear how scalable that was. * Use camelCase not underscores for parameters. See #303. Related Issues: Fix #292 Update the test to work with the changes. * Parameters are now camelCase. They also aren't parameters of the prototype so we can't set them in the call to generate. * So we need to modify deploy to take a list of the parameters to set on the component. * jsonnet format.
kubeflow · Mar 8, 2018 · 179abd0 · 179abd0
1 parent 7684203
commit 179abd0
Show file tree

Hide file tree

Showing 6 changed files with 377 additions and 224 deletions.
diff --git a/components/k8s-model-server/images/releaser/components/params.libsonnet b/components/k8s-model-server/images/releaser/components/params.libsonnet
@@ -22,7 +22,7 @@
     // Test deploying a GPU model.
     gpu_model: {
       http_proxy_image: "gcr.io/kubeflow/http-proxy:1.0",
-      model_path: "gs://some-bucket/some/model",
+      model_path: "gs://kubeflow-ci-test-models/mnist/",
       model_server_image: "gcr.io/kubeflow-images-staging/tf-model-server-gpu:v20180305-pr362-7f250ae-5cc7",
       name: "gpu_model",
       namespace: "default",

diff --git a/components/k8s-model-server/images/releaser/components/workflows.libsonnet b/components/k8s-model-server/images/releaser/components/workflows.libsonnet
@@ -21,6 +21,8 @@
 
 
   // Default parameters.
+  //
+  // TODO(jlewi): Use camelCase consistently.
   defaultParams:: {
     bucket: "mlkube-testing_temp",
     commit: "master",
@@ -85,6 +87,19 @@
       local cpuImage = params.registry + "/tf-model-server-cpu" + ":" + params.versionTag;
       local gpuImage = params.registry + "/tf-model-server-gpu" + ":" + params.versionTag;
 
+      // Parameters to set on the modelServer component
+      local deployParams = {
+        name: "inception",
+        namespace: stepsNamespace,
+        modelPath: "gs://kubeflow-models/inception",
+      } + if build_image then
+        {
+          modelServerImage: cpuImage,
+        } else {};
+
+      local toPair = function(k, v) k + "=" + v;
+      local deployParamsList = std.join(",", [toPair(k, deployParams[k]) for k in std.objectFields(deployParams)]);
+
       // Build an Argo template to execute a particular command.
       // step_name: Name for the template
       // command: List to pass as the container command.
@@ -203,12 +218,14 @@
         "--cluster=" + cluster,
         "--zone=" + zone,
         "--github_token=$(GITHUB_TOKEN)",
+        // TODO(jlewi): This is duplicative with params. We should probably get
+        // rid of this and just treat namespace as another parameter.
         "--namespace=" + stepsNamespace,
         "--test_dir=" + testDir,
         "--artifacts_dir=" + artifactsDir,
-        "setup",
-        "--deploy_tf_serving=true",
-      ] + if build_image then ["--model_server_image=" + cpuImage] else [];
+        "deploy_model",
+        "--params=" + deployParamsList,
+      ];
 
       {
         apiVersion: "argoproj.io/v1alpha1",

diff --git a/kubeflow/tf-serving/prototypes/tf-serving-all-features.jsonnet b/kubeflow/tf-serving/prototypes/tf-serving-all-features.jsonnet
@@ -3,55 +3,21 @@
 // @description TensorFlow serving
 // @shortDescription A TensorFlow serving deployment
 // @param name string Name to give to each of the components
-// @optionalParam namespace string default Namespace
-// @param model_path string Path to the model. This can be a GCS path.
-// @optionalParam model_name string  Name of the model in the serving api.
-// @optionalParam model_server_image string gcr.io/kubeflow-images-staging/tf-model-server:v20180227-master Container for TF model server
-// @optionalParam http_proxy_image string gcr.io/kubeflow/http-proxy:1.0 Container for http_proxy of TF model server
-// @optionalParam service_type string ClusterIP The service type for TFServing deployment.
-// @optionalParam s3_secret_name string  Name of the k8s secrets containing S3 credentials.
-// @optionalParam s3_secret_accesskeyid_key_name string aws-access-key-id Name of the key in the k8s secret containing AWS_ACCESS_KEY_ID.
-// @optionalParam s3_secret_secretaccesskey_key_name string aws-secret-access-key Name of the key in the k8s secret containing AWS_SECRET_ACCESS_KEY.
-// @optionalParam s3_aws_region string us-west-1 S3 Region
-// @optionalParam s3_use_https string true Whether or not to use https for S3 connections
-// @optionalParam s3_verify_ssl string true Whether or not to verify https certificates for S3 connections
-// @optionalParam s3_endpoint string http://s3.us-west-1.amazonaws.com URL for your s3-compatible endpoint.
 
 // TODO(https://github.com/ksonnet/ksonnet/issues/222): We have to add namespace as an explicit parameter
 // because ksonnet doesn't support inheriting it from the environment yet.
 
 local k = import "k.libsonnet";
-local tfServing = import "kubeflow/tf-serving/tf-serving.libsonnet";
 
+// ksonnet appears to require name be a parameter of the prototype which is why we handle it differently.
 local name = import "param://name";
-local namespace = import "param://namespace";
-local modelPath = import "param://model_path";
-local modelNametmp = import "param://model_name";
-local modelServerImage = import "param://model_server_image";
-local httpProxyImage = import "param://http_proxy_image";
-local serviceType = import "param://service_type";
-local s3SecretName = import "param://s3_secret_name";
-local s3SecretAccesskeyidKeyName = import "param://s3_secret_accesskeyid_key_name";
-local s3SecretSecretaccesskeyKeyName = import "param://s3_secret_secretaccesskey_key_name";
-local s3awsRegion = import "param://s3_aws_region";
-local s3UseHTTPS = import "param://s3_use_https";
-local s3VerifySSL = import "param://s3_verify_ssl";
-local s3Endpoint = import "param://s3_endpoint";
 
+local tfServingBase = import "kubeflow/tf-serving/tf-serving.libsonnet";
+local tfServing = tfServingBase {
+  // Override parameters with user supplied parameters.
+  params+: params {
+    name: name,
+  },
+};
 
-local modelServerEnv = if s3SecretName != "" then [
-  { name: "AWS_ACCESS_KEY_ID", valueFrom: { secretKeyRef: { name: s3SecretName, key: s3SecretAccesskeyidKeyName } } },
-  { name: "AWS_SECRET_ACCESS_KEY", valueFrom: { secretKeyRef: { name: s3SecretName, key: s3SecretSecretaccesskeyKeyName } } },
-  { name: "AWS_REGION", value: s3awsRegion },
-  { name: "S3_REGION", value: s3awsRegion },
-  { name: "S3_USE_HTTPS", value: s3UseHTTPS },
-  { name: "S3_VERIFY_SSL", value: s3VerifySSL },
-  { name: "S3_ENDPOINT", value: s3Endpoint },
-] else [];
-
-local modelName = if modelNametmp == "" then name else import "param://model_name";
-
-std.prune(k.core.v1.list.new([
-  tfServing.parts.deployment.modelServer(name, namespace, modelPath, modelName, modelServerImage, modelServerEnv, httpProxyImage),
-  tfServing.parts.deployment.modelService(name, namespace, serviceType),
-]))
+std.prune(k.core.v1.list.new(tfServing.components))