kserve · k8s-ci-robot · Jan 22, 2021 · Jan 18, 2021 · Jan 18, 2021 · Jan 18, 2021
diff --git a/cmd/agent/main.go b/cmd/agent/main.go
@@ -1,22 +1,14 @@
 package main
 
 import (
-	gstorage "cloud.google.com/go/storage"
 	"context"
 	"flag"
 	"fmt"
-	"github.com/aws/aws-sdk-go/aws"
-	"github.com/aws/aws-sdk-go/aws/session"
-	"github.com/aws/aws-sdk-go/service/s3"
-	"github.com/aws/aws-sdk-go/service/s3/s3manager"
-	"github.com/googleapis/google-cloud-go-testing/storage/stiface"
 	"github.com/kelseyhightower/envconfig"
 	"github.com/kubeflow/kfserving/pkg/agent"
 	"github.com/kubeflow/kfserving/pkg/agent/storage"
 	"github.com/kubeflow/kfserving/pkg/apis/serving/v1beta1"
 	"github.com/kubeflow/kfserving/pkg/batcher"
-	gcscredential "github.com/kubeflow/kfserving/pkg/credentials/gcs"
-	s3credential "github.com/kubeflow/kfserving/pkg/credentials/s3"
 	kfslogger "github.com/kubeflow/kfserving/pkg/logger"
 	"github.com/pkg/errors"
 	"go.uber.org/zap"
@@ -34,7 +26,6 @@ import (
 	"net/url"
 	"os"
 	"strconv"
-	"strings"
 	"time"
 )
 
@@ -276,44 +267,6 @@ func startModelPuller(logger *zap.SugaredLogger) {
 		Logger:    logger,
 	}
 
-	if endpoint, ok := os.LookupEnv(s3credential.AWSEndpointUrl); ok {
-		region, _ := os.LookupEnv(s3credential.AWSRegion)
-		useVirtualBucketString, ok := os.LookupEnv(s3credential.S3UseVirtualBucket)
-		useVirtualBucket := true
-		if ok && strings.ToLower(useVirtualBucketString) == "false" {
-			useVirtualBucket = false
-		}
-		sess, err := session.NewSession(&aws.Config{
-			Endpoint:         aws.String(endpoint),
-			Region:           aws.String(region),
-			S3ForcePathStyle: aws.Bool(!useVirtualBucket)},
-		)
-		logger.Infof("Initializing s3 client with endpoint %s, region %s", endpoint, region)
-		if err != nil {
-			panic(err)
-		}
-		sessionClient := s3.New(sess)
-		downloader.Providers[storage.S3] = &storage.S3Provider{
-			Client: sessionClient,
-			Downloader: s3manager.NewDownloaderWithClient(sessionClient, func(d *s3manager.Downloader) {
-			}),
-		}
-	}
-
-	if _, ok := os.LookupEnv(gcscredential.GCSCredentialEnvKey); ok {
-		// GCS relies on environment variable GOOGLE_APPLICATION_CREDENTIALS to point to the service-account-key
-		// If set, it will be automatically be picked up by the client.
-		logger.Info("Initializing gcs client, using existing GOOGLE_APPLICATION_CREDENTIALS variable.")
-		ctx := context.Background()
-		client, err := gstorage.NewClient(ctx)
-		if err != nil {
-			panic(err)
-		}
-		downloader.Providers[storage.GCS] = &storage.GCSProvider{
-			Client: stiface.AdaptClient(client),
-		}
-	}
-
 	watcher := agent.NewWatcher(*configDir, *modelDir, logger)
 	logger.Info("Starting puller")
 	agent.StartPuller(downloader, watcher.ModelEvents, logger)

diff --git a/docs/MULTIMODELSERVING_GUIDE.md b/docs/MULTIMODELSERVING_GUIDE.md
@@ -0,0 +1,28 @@
+# Multi-Model Serving
+## Introduction
+
+### Problem
+
+With machine learning approaches becoming more widely adopted in organizations, there is a trend to deploy many models. More models aims to provide personalized experience which often need to train a lot of models. Additionally, many models help to isolate each user’s data and train models separately for data privacy.
+When KFServing was originally designed, it followed the one model and one server paradigm which presents a challenge for the Kubernetes cluster when users want to deploy many models.
+For example, Kubernetes sets a default limit of 110 pods per node. A 100 nodes cluster can host at most 11,000 pods, which is often not enough.
+Additionally, there is no easy way to request a fraction of GPU in Kubernetes infrastructure, it makes sense to load multiple models in one model server to share GPU resources. KFServing's multi-model serving is a solution that allows for loading multiple models into a server while still keeping the out of the box serverless features.
+
+### Benefits
+- Allow multiple models to share the same GPU
+- Increase the total number of models that can be deployed in a cluster
+- Reduced model deployment resource overhead
+    - An InferenceService needs some CPU and overhead for each replica
+    - Loading multiple models in one inferenceService is more resource efficient
+    - Allow deploying hundreds of thousands of models with ease and monitoring deployed trained models at scale
+
+### Design
+![Multi-model Diagram](./diagrams/mms-design.png)
+
+### Integration with model servers
+Multi-model serving will work with any model server that implements KFServing V2 protocol. More specifically, if the model server implements the load and unload endpoint then it can use KFServing's TrainedModel.
+Currently, the only supported model servers are Triton, SKLearn, and XGBoost. Click on [Triton](https://github.com/kubeflow/kfserving/tree/master/docs/samples/v1beta1/triton/multimodel) or [SKLearn](https://github.com/kubeflow/kfserving/tree/master/docs/samples/v1beta1/sklearn/multimodel) to see examples on how to run multi-model serving!
+
+
+
+For a more in depth details checkout this [document](https://docs.google.com/document/d/11qETyR--oOIquQke-DCaLsZY75vT1hRu21PesSUDy7o).
diff --git a/docs/diagrams/mms-design.png b/docs/diagrams/mms-design.png
diff --git a/docs/samples/v1beta1/sklearn/multimodel/README.md b/docs/samples/v1beta1/sklearn/multimodel/README.md
@@ -0,0 +1,159 @@
+#Multi-Model Serving with Sklearn
+
+## Overview
+
+The general overview of multi-model serving:
+1. Deploy InferenceService with the framework specified
+2. Deploy TrainedModel(s) with the storageUri, framework, and memory
+3. A config map will be created and will contain details about each trained model
+4. Model Agent loads model from the model config
+5. An endpoint is set up and is ready to serve model(s)
+6. Deleting a model leads to removing model from config map which causes the model agent to unload the model
+7. Deleting the InferenceService causes the TrainedModel(s) to be deleted
+
+
+## Example
+Firstly, you should have kfserving installed. Check [this](https://github.com/kubeflow/kfserving#install-kfserving) out if you have not installed kfserving.
+
+The content below is in the file `inferenceservice.yaml`.
+
+```yaml
+apiVersion: "serving.kubeflow.org/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: "sklearn-iris-example"
+spec:
+  predictor:
+    minReplicas: 1
+    sklearn:
+      protocolVersion: v1
+      name: "sklearn-iris-predictor"
+      resources:
+        limits:
+          cpu: 100m
+          memory: 256Mi
+        requests:
+          cpu: 100m
+          memory: 256Mi
+```
+Run the command `kubectl apply -f inferenceservice.yaml` to create the inference service. Check if the service is properly deployed by running `kubectl get inferenceservice`. The output should be similar to the below.
+```yaml
+NAME                   URL                                               READY   PREV   LATEST   PREVROLLEDOUTREVISION   LATESTREADYREVISION                            AGE
+sklearn-iris-example   http://sklearn-iris-example.default.example.com   True           100                              sklearn-iris-example-predictor-default-kgtql   22s
+```
+
+Next, the other file the trained models `trainedmodels.yaml` is shown below.
+```yaml
+apiVersion: "serving.kubeflow.org/v1alpha1"
+kind: "TrainedModel"
+metadata:
+  name: "model1-sklearn"
+spec:
+  inferenceService: "sklearn-iris-example"
+  model:
+    storageUri: "gs://kfserving-samples/models/sklearn/iris"
+    framework: "sklearn"
+    memory: "256Mi"
+---
+apiVersion: "serving.kubeflow.org/v1alpha1"
+kind: "TrainedModel"
+metadata:
+  name: "model2-sklearn"
+spec:
+  inferenceService: "sklearn-iris-example"
+  model:
+    storageUri: "gs://kfserving-samples/models/sklearn/iris"
+    framework: "sklearn"
+    memory: "256Mi"
+```
+Run the command `kubectl apply -f trainedmodels.yaml` to create the trained models. Run `kubectl get trainedmodel` to view the resource.
+
+Run `kubectl get po` to get the name of the predictor pod. The name should be similar to sklearn-iris-example-predictor-default-xxxxx-deployment-xxxxx.
+
+Run `kubectl logs <name-of-predictor-pod> -c agent` to check if the models are properly loaded. You should get the same output as below. Wait a few minutes and try again if you do not see "Downloading model".
+```yaml
+{"level":"info","ts":"2021-01-20T16:24:00.421Z","caller":"agent/puller.go:129","msg":"Downloading model from gs://kfserving-samples/models/sklearn/iris"}
+{"level":"info","ts":"2021-01-20T16:24:00.421Z","caller":"agent/downloader.go:47","msg":"Downloading gs://kfserving-samples/models/sklearn/iris to model dir /mnt/models"}
+{"level":"info","ts":"2021-01-20T16:24:00.424Z","caller":"agent/puller.go:121","msg":"Worker is started for model1-sklearn"}
+{"level":"info","ts":"2021-01-20T16:24:00.424Z","caller":"agent/puller.go:129","msg":"Downloading model from gs://kfserving-samples/models/sklearn/iris"}
+{"level":"info","ts":"2021-01-20T16:24:00.424Z","caller":"agent/downloader.go:47","msg":"Downloading gs://kfserving-samples/models/sklearn/iris to model dir /mnt/models"}
+{"level":"info","ts":"2021-01-20T16:24:09.255Z","caller":"agent/puller.go:146","msg":"Successfully loaded model model2-sklearn"}
+{"level":"info","ts":"2021-01-20T16:24:09.256Z","caller":"agent/puller.go:114","msg":"completion event for model model2-sklearn, in flight ops 0"}
+{"level":"info","ts":"2021-01-20T16:24:09.260Z","caller":"agent/puller.go:146","msg":"Successfully loaded model model1-sklearn"}
+{"level":"info","ts":"2021-01-20T16:24:09.260Z","caller":"agent/puller.go:114","msg":"completion event for model model1-sklearn, in flight ops 0"}
+```
+
+Run the command `kubectl get cm modelconfig-sklearn-iris-example-0 -oyaml` to get the configmap. The output should be similar to the below.
+```yaml
+apiVersion: v1
+data:
+   models.json: '[{"modelName":"model1-sklearn","modelSpec":{"storageUri":"gs://kfserving-samples/models/sklearn/iris","framework":"sklearn","memory":"256Mi"}},{"modelName":"model2-sklearn","modelSpec":{"storageUri":"gs://kfserving-samples/models/sklearn/iris","framework":"sklearn","memory":"256Mi"}}]'
+kind: ConfigMap
+metadata:
+   creationTimestamp: "2021-01-20T16:22:52Z"
+   name: modelconfig-sklearn-iris-example-0
+   namespace: default
+   ownerReferences:
+      - apiVersion: serving.kubeflow.org/v1beta1
+        blockOwnerDeletion: true
+        controller: true
+        kind: InferenceService
+        name: sklearn-iris-example
+        uid: f91d8414-0bfa-4182-af25-5d0c1a7eff4e
+   resourceVersion: "1958556"
+   selfLink: /api/v1/namespaces/default/configmaps/modelconfig-sklearn-iris-example-0
+   uid: 79e68f80-e31a-419b-994b-14a6159d8cc2
+```
+
+The models will be ready to serve once they are successfully loaded.
+
+Check to see which case applies to you.
+
+If the EXTERNAL-IP value is set, your environment has an external load balancer that you can use for the ingress gateway. Set them by running: 
+````bash
+export INGRESS_HOST=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
+export INGRESS_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="http2")].port}')
+export SERVICE_HOSTNAME=$(kubectl get inferenceservice sklearn-iris-example -n default -o jsonpath='{.status.url}' | cut -d "/" -f 3)
+````
+
+If the EXTERNAL-IP is none, and you can access the gateway using the service's node port:
+```bash
+# GKE
+export INGRESS_HOST=worker-node-address
+# Minikube
+export INGRESS_HOST=$(minikube ip)å
+# Other environment(On Prem)
+export INGRESS_HOST=$(kubectl get po -l istio=ingressgateway -n istio-system -o jsonpath='{.items[0].status.hostIP}')
+export INGRESS_PORT=$(kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="http2")].nodePort}')
+```
+
+For KIND/Port Fowarding:
+- Run `kubectl port-forward -n istio-system svc/istio-ingressgateway 8080:80`
+- In a different window, run:
+   ```bash
+   export INGRESS_HOST=localhost
+   export INGRESS_PORT=8080
+   export SERVICE_HOSTNAME=$(kubectl get inferenceservice sklearn-iris-example -n default -o jsonpath='{.status.url}' | cut -d "/" -f 3)
+   ```
+
+
+After setting up the above:
+- Go to the root directory of `kfserving`
+- Query the two models:
+  - Curl from ingress gateway:
+     ```bash
+     curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/model1-sklearn:predict -d @./docs/samples/v1alpha2/sklearn/iris-input.json
+     curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/model2-sklearn:predict -d @./docs/samples/v1alpha2/sklearn/iris-input.json
+     ```
+  - Curl from local cluster gateway
+    ```
+    curl -v http://sklearn-iris-example.default/v1/models/model1-sklearn:predict -d @./docs/samples/v1alpha2/sklearn/iris-input.json
+    curl -v http://sklearn-iris-example.default/v1/models/model2-sklearn:predict -d @./docs/samples/v1alpha2/sklearn/iris-input.json
+     ```
+
+The outputs should be
+```yaml
+{"predictions": [1, 1]}*
+```
+
+To remove the resources, run the command `kubectl delete inferenceservice sklearn-iris-example`. This will delete the inference service and result in the trained models being deleted.
diff --git a/docs/samples/v1beta1/sklearn/multimodel/inferenceservice.yaml b/docs/samples/v1beta1/sklearn/multimodel/inferenceservice.yaml
@@ -0,0 +1,17 @@
+apiVersion: "serving.kubeflow.org/v1beta1"
+kind: "InferenceService"
+metadata:
+  name: "sklearn-iris-example"
+spec:
+  predictor:
+    minReplicas: 1
+    sklearn:
+      protocolVersion: v1
+      name: "sklearn-iris-predictor"
+      resources:
+        limits:
+          cpu: 100m
+          memory: 256Mi
+        requests:
+          cpu: 100m
+          memory: 256Mi
diff --git a/docs/samples/v1beta1/sklearn/multimodel/trainedmodels.yaml b/docs/samples/v1beta1/sklearn/multimodel/trainedmodels.yaml
@@ -0,0 +1,21 @@
+apiVersion: "serving.kubeflow.org/v1alpha1"
+kind: "TrainedModel"
+metadata:
+  name: "model1-sklearn"
+spec:
+  inferenceService: "sklearn-iris-example"
+  model:
+    storageUri: "gs://kfserving-samples/models/sklearn/iris"
+    framework: "sklearn"
+    memory: "256Mi"
+---
+apiVersion: "serving.kubeflow.org/v1alpha1"
+kind: "TrainedModel"
+metadata:
+  name: "model2-sklearn"
+spec:
+  inferenceService: "sklearn-iris-example"
+  model:
+    storageUri: "gs://kfserving-samples/models/sklearn/iris"
+    framework: "sklearn"
+    memory: "256Mi"
diff --git a/go.mod b/go.mod
@@ -18,8 +18,8 @@ require (
 	github.com/google/uuid v1.1.1
 	github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8
 	github.com/json-iterator/go v1.1.10
-	github.com/mattbaird/jsonpatch v0.0.0-20171005235357-81af80346b1a // indirect
 	github.com/kelseyhightower/envconfig v1.4.0
+	github.com/mattbaird/jsonpatch v0.0.0-20171005235357-81af80346b1a // indirect
 	github.com/onsi/ginkgo v1.14.0
 	github.com/onsi/gomega v1.10.2
 	github.com/pkg/errors v0.9.1
@@ -39,6 +39,7 @@ require (
 	istio.io/gogo-genproto v0.0.0-20191029161641-f7d19ec0141d // indirect
 	k8s.io/api v0.18.8
 	k8s.io/apimachinery v0.18.8
+	k8s.io/client-go v11.0.1-0.20190805182717-6502b5e7b1b5+incompatible
 	k8s.io/klog v1.0.0
 	k8s.io/kube-openapi v0.0.0-20200410145947-bcb3869e6f29
 	knative.dev/networking v0.0.0-20200922180040-a71b40c69b15

diff --git a/pkg/agent/downloader.go b/pkg/agent/downloader.go
@@ -67,9 +67,9 @@ func (d *Downloader) download(modelName string, storageUri string) error {
 	if err != nil {
 		return errors.Wrapf(err, "unsupported protocol")
 	}
-	provider, ok := d.Providers[protocol]
-	if !ok {
-		return errors.Wrapf(err, "protocol manager for %s is not initialized", protocol)
+	provider, err := storage.GetProvider(d.Providers, protocol)
+	if err != nil {
+		return errors.Wrapf(err, "unable to create or get provider for protocol %s", protocol)
 	}
 	if err := provider.DownloadModel(d.ModelDir, modelName, storageUri); err != nil {
 		return errors.Wrapf(err, "failed to download model")

diff --git a/pkg/agent/storage/gcs.go b/pkg/agent/storage/gcs.go
@@ -26,13 +26,13 @@ func (p *GCSProvider) DownloadModel(modelDir string, modelName string, storageUr
 		prefix = tokens[1]
 	}
 	ctx := context.Background()
-	gcsObjectDownloader := &GCSObjectDownloader {
-		Context: ctx,
+	gcsObjectDownloader := &GCSObjectDownloader{
+		Context:    ctx,
 		StorageUri: storageUri,
-		ModelDir: modelDir,
-		ModelName: modelName,
-		Bucket: tokens[0],
-		Item: prefix,
+		ModelDir:   modelDir,
+		ModelName:  modelName,
+		Bucket:     tokens[0],
+		Item:       prefix,
 	}
 	it, err := gcsObjectDownloader.GetObjectIterator(p.Client)
 	if err != nil {
@@ -71,7 +71,8 @@ func (g *GCSObjectDownloader) Download(client stiface.Client, it stiface.ObjectI
 			return fmt.Errorf("an error occurred while iterating: %v", err)
 		}
 		foundObject = true
-		fileName := filepath.Join(g.ModelDir, g.ModelName, attrs.Name)
+		objectValue := strings.TrimPrefix(attrs.Name, g.Item)
+		fileName := filepath.Join(g.ModelDir, g.ModelName, objectValue)
 		if FileExists(fileName) {
 			log.Info("Deleting", fileName)
 			if err := os.Remove(fileName); err != nil {