# Sample for KFServing SDK v1beta1

This is a sample for KFServing SDK v1beta1. 

The notebook shows how to use KFServing SDK to create, get and delete InferenceService.

In [1]:
from kubernetes import client 
from kfserving import KFServingClient
from kfserving import constants
from kfserving import utils
from kfserving import V1beta1InferenceService
from kfserving import V1beta1InferenceServiceSpec
from kfserving import V1beta1PredictorSpec
from kfserving import V1beta1TFServingSpec

Define namespace where InferenceService needs to be deployed to. If not specified, below function defines namespace to the current one where SDK is running in the cluster, otherwise it will deploy to default namespace.

In [2]:
#namespace = utils.get_default_target_namespace()
namespace = 'kfserving-test'

# reset the KFServing version, such as v1alpha2 or v1beta1
kfserving_version = 'v1beta1'

## Define InferenceService

Firstly define default endpoint spec, and then define the inferenceservice basic on the endpoint spec.

In [3]:
api_version = constants.KFSERVING_GROUP + '/' + kfserving_version

isvc = V1beta1InferenceService(api_version=api_version,
                               kind=constants.KFSERVING_KIND,
                               metadata=client.V1ObjectMeta(
                                   name='flower-sample', namespace=namespace),
                               spec=V1beta1InferenceServiceSpec(
                               predictor=V1beta1PredictorSpec(
                               tensorflow=(V1beta1TFServingSpec(
                                   storage_uri='gs://kfserving-samples/models/tensorflow/flowers'))))
)

## Create InferenceService

Call KFServingClient to create InferenceService.

In [4]:
KFServing = KFServingClient()
KFServing.create(isvc, version=kfserving_version)

{'apiVersion': 'serving.kubeflow.org/v1beta1',
 'kind': 'InferenceService',
 'metadata': {'creationTimestamp': '2020-10-29T08:24:07Z',
  'generation': 1,
  'name': 'flower-sample',
  'namespace': 'kfserving-test',
  'resourceVersion': '1508368',
  'selfLink': '/apis/serving.kubeflow.org/v1beta1/namespaces/kfserving-test/inferenceservices/flower-sample',
  'uid': 'e2509c42-3831-477d-b004-3c349a3b8ba6'},
 'spec': {'predictor': {'tensorflow': {'name': 'kfserving-container',
    'resources': {'limits': {'cpu': '1', 'memory': '2Gi'},
     'requests': {'cpu': '1', 'memory': '2Gi'}},
    'runtimeVersion': '1.14.0',
    'storageUri': 'gs://kfserving-samples/models/tensorflow/flowers'}}}}

## Check the InferenceService

In [5]:
KFServing.get('flower-sample', namespace=namespace, watch=True, timeout_seconds=120, version=kfserving_version)

NAME                 READY      PREDICTOR_CANARY_TRAFFIC  URL                                                              
flower-sample        False                                                                                                 
flower-sample        False                                                                                                 
flower-sample        False      100                                                                                        
flower-sample        False      100                                                                                        
flower-sample        True       100                       http://flower-sample.kfserving-test.example.com                  


## Patch the InferenceService and define Canary Traffic Percent

In [6]:
isvc = V1beta1InferenceService(api_version=api_version,
                               kind=constants.KFSERVING_KIND,
                               metadata=client.V1ObjectMeta(
                                   name='flower-sample', namespace=namespace),
                               spec=V1beta1InferenceServiceSpec(
                               predictor=V1beta1PredictorSpec(
                                   canary_traffic_percent=20,
                                   tensorflow=(V1beta1TFServingSpec(
                                       storage_uri='gs://kfserving-samples/models/tensorflow/flowers-2'))))
)

KFServing.patch('flower-sample', isvc, namespace=namespace, version=kfserving_version)

{'apiVersion': 'serving.kubeflow.org/v1beta1',
 'kind': 'InferenceService',
 'metadata': {'creationTimestamp': '2020-10-29T08:24:07Z',
  'generation': 2,
  'name': 'flower-sample',
  'namespace': 'kfserving-test',
  'resourceVersion': '1508558',
  'selfLink': '/apis/serving.kubeflow.org/v1beta1/namespaces/kfserving-test/inferenceservices/flower-sample',
  'uid': 'e2509c42-3831-477d-b004-3c349a3b8ba6'},
 'spec': {'predictor': {'canaryTrafficPercent': 20,
   'tensorflow': {'name': 'kfserving-container',
    'resources': {'limits': {'cpu': '1', 'memory': '2Gi'},
     'requests': {'cpu': '1', 'memory': '2Gi'}},
    'runtimeVersion': '1.14.0',
    'storageUri': 'gs://kfserving-samples/models/tensorflow/flowers-2'}}},
 'status': {'address': {'url': 'http://flower-sample.kfserving-test.svc.cluster.local'},
  'components': {'predictor': {'address': {'url': 'http://flower-sample-predictor-default.kfserving-test.svc.cluster.local'},
    'latestCreatedRevision': 'flower-sample-predictor-default-c

### Check the InferenceService after Patching

In [7]:
KFServing.wait_isvc_ready('flower-sample', namespace=namespace, version=kfserving_version)

In [8]:
KFServing.get('flower-sample', namespace=namespace, version=kfserving_version)

{'apiVersion': 'serving.kubeflow.org/v1beta1',
 'kind': 'InferenceService',
 'metadata': {'creationTimestamp': '2020-10-29T08:24:07Z',
  'generation': 2,
  'name': 'flower-sample',
  'namespace': 'kfserving-test',
  'resourceVersion': '1508712',
  'selfLink': '/apis/serving.kubeflow.org/v1beta1/namespaces/kfserving-test/inferenceservices/flower-sample',
  'uid': 'e2509c42-3831-477d-b004-3c349a3b8ba6'},
 'spec': {'predictor': {'canaryTrafficPercent': 20,
   'tensorflow': {'name': 'kfserving-container',
    'resources': {'limits': {'cpu': '1', 'memory': '2Gi'},
     'requests': {'cpu': '1', 'memory': '2Gi'}},
    'runtimeVersion': '1.14.0',
    'storageUri': 'gs://kfserving-samples/models/tensorflow/flowers-2'}}},
 'status': {'address': {'url': 'http://flower-sample.kfserving-test.svc.cluster.local'},
  'components': {'predictor': {'address': {'url': 'http://flower-sample-predictor-default.kfserving-test.svc.cluster.local'},
    'latestCreatedRevision': 'flower-sample-predictor-default-9

## Delete the InferenceService

In [9]:
KFServing.delete('flower-sample', namespace=namespace)

{'kind': 'Status',
 'apiVersion': 'v1',
 'metadata': {},
 'status': 'Success',
 'details': {'name': 'flower-sample',
  'group': 'serving.kubeflow.org',
  'kind': 'inferenceservices',
  'uid': 'e2509c42-3831-477d-b004-3c349a3b8ba6'}}