In [40]:
!pip show kubeflow-fairing

Name: kubeflow-fairing
Version: 1.0.1
Summary: Kubeflow Fairing Python SDK.
Home-page: https://github.com/kubeflow/fairing
Author: Kubeflow Authors
Author-email: hejinchi@cn.ibm.com
License: Apache License Version 2.0
Location: /usr/local/lib/python3.6/dist-packages
Requires: retrying, docker, oauth2client, cloudpickle, numpy, nbconvert, grpcio, future, urllib3, kubernetes, tornado, notebook, google-auth, setuptools, azure-storage-file, kfserving, ibm-cos-sdk, boto3, google-api-python-client, google-cloud-storage, kubeflow-pytorchjob, requests, six, google-cloud-logging, httplib2, azure-mgmt-storage, kubeflow-tfjob, python-dateutil
Required-by: 


In [41]:
# Set docker registry to store image.
# Ensure you have permission for pushing docker image requests. 
DOCKER_REGISTRY = 'index.docker.io/insoopark'

# Set namespace. Note that the created PVC should be in the namespace.
my_namespace = 'admin'
# You also can get the default target namepspace using below API.
#namespace = fairing_utils.get_default_target_namespace()

In [42]:
# To satify the distributed training, the PVC should be access from all nodes in the cluster.
# The example creates a NFS PV to satify that.
#nfs_server = '172.16.189.69'
#nfs_path = '/opt/kubeflow/data/mnist'
pv_name = 'kubeflow-mnist'
pvc_name = 'mnist-pvc'

In [15]:
!pip install pyyaml



In [43]:
from kubernetes import client as k8s_client
from kubernetes import config as k8s_config
from kubeflow.fairing.utils import is_running_in_k8s
import yaml

pv_yaml = f'''
apiVersion: v1
kind: PersistentVolume
metadata:
  name: {pv_name}
spec:
  capacity:
    storage: 10Gi
  accessModes:
  - ReadWriteMany
  persistentVolumeReclaimPolicy: Retain
  storageClassName: standard
  hostPath:
    path: /home/insoo67_park/data/7
'''
pvc_yaml = f'''
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: {pvc_name}
  namespace: {my_namespace}
spec:
  accessModes:
    - ReadWriteMany
  storageClassName: standard
  resources:
    requests:
      storage: 10Gi
'''

if is_running_in_k8s():
    k8s_config.load_incluster_config()
else:
    k8s_config.load_kube_config()

k8s_core_api = k8s_client.CoreV1Api()
#k8s_core_api.create_persistent_volume(yaml.safe_load(pv_yaml))
#k8s_core_api.create_namespaced_persistent_volume_claim(my_namespace, yaml.safe_load(pvc_yaml))


In [44]:
num_chief = 1 #number of Chief in TFJob 
num_ps = 1  #number of PS in TFJob 
num_workers = 1  #number of Worker in TFJob 
model_dir = "/mnt"
export_path = "/mnt/export" 
train_steps = "1000"
batch_size = "100"
learning_rate = "0.01"

In [199]:
import uuid
from kubeflow import fairing   
from kubeflow.fairing.kubernetes.utils import mounting_pvc
from kubeflow.fairing.kubernetes import utils as k8s_utils

tfjob_name = f'mnist-training-{uuid.uuid4().hex[:4]}'

output_map =  {
    "Dockerfile": "Dockerfile",
    "mnist.py": "mnist.py"
}

command=["python",
         "/opt/mnist.py",
         "--tf-model-dir=" + model_dir,
         "--tf-export-dir=" + export_path,
         "--tf-train-steps=" + train_steps,
         "--tf-batch-size=" + batch_size,
         "--tf-learning-rate=" + learning_rate]

fairing.config.set_preprocessor('python', input_files=["mnist.py"], executable="mnist.py")
# fairing.config.set_preprocessor('python', command=command, path_prefix="/app", output_map=output_map)
fairing.config.set_builder(
            name='append',
            image_name='mnist',
            base_image='tensorflow/tensorflow:1.15.2-py3',
            registry=DOCKER_REGISTRY, 
            push=True)
# fairing.config.set_builder(name='docker', registry=DOCKER_REGISTRY, base_image="", image_name="mnist", dockerfile_path="Dockerfile")
# fairing.config.set_deployer(name='tfjob', namespace=my_namespace, stream_log=False, job_name=tfjob_name,
#                            chief_count=num_chief, worker_count=num_workers, ps_count=num_ps, 
#                            pod_spec_mutators=[mounting_pvc(pvc_name=pvc_name, pvc_mount_path=model_dir)])
fairing.config.set_deployer('tfjob', namespace='admin', stream_log=False, job_name=tfjob_name, 
                            # chief_count=num_chief, worker_count=num_workers, ps_count=num_ps,
                            pod_spec_mutators=[k8s_utils.get_resource_mutator(cpu=2, memory=4),
                                               mounting_pvc(pvc_name=pvc_name, pvc_mount_path=model_dir)])

fairing.config.run()

[W 200828 03:14:11 utils:51] The function mounting_pvc has been deprecated,                     please use `volume_mounts`
[I 200828 03:14:11 config:134] Using preprocessor: <kubeflow.fairing.preprocessors.base.BasePreProcessor object at 0x7f24dbeda908>
[I 200828 03:14:11 config:136] Using builder: <kubeflow.fairing.builders.append.append.AppendBuilder object at 0x7f24dbeda518>
[I 200828 03:14:11 config:138] Using deployer: <kubeflow.fairing.deployers.tfjob.tfjob.TfJob object at 0x7f24dbeda358>
[W 200828 03:14:11 append:50] Building image using Append builder...
[I 200828 03:14:11 base:107] Creating docker context: /tmp/fairing_context_cyrgcsww
[I 200828 03:14:12 docker_creds_:234] Loading Docker credentials for repository 'tensorflow/tensorflow:1.15.2-py3'
[W 200828 03:14:12 append:54] Image successfully built in 0.6486922830008552s.
[W 200828 03:14:12 append:94] Pushing image index.docker.io/insoopark/mnist:5526BD17...
[I 200828 03:14:12 docker_creds_:234] Loading Docker credentials 

(<kubeflow.fairing.preprocessors.base.BasePreProcessor at 0x7f24dbeda908>,
 <kubeflow.fairing.builders.append.append.AppendBuilder at 0x7f24dbeda518>,
 <kubeflow.fairing.deployers.tfjob.tfjob.TfJob at 0x7f24dbeda358>)

In [200]:
from kubeflow.tfjob import TFJobClient
tfjob_client = TFJobClient()

tfjob_client.get(tfjob_name, namespace=my_namespace)

{'apiVersion': 'kubeflow.org/v1',
 'kind': 'TFJob',
 'metadata': {'creationTimestamp': '2020-08-28T03:14:15Z',
  'generateName': 'fairing-tfjob-',
  'generation': 1,
  'labels': {'fairing-deployer': 'tfjob',
   'fairing-id': '8db67b16-e8dc-11ea-9adc-c2e8541cc144'},
  'managedFields': [{'apiVersion': 'kubeflow.org/v1',
    'fieldsType': 'FieldsV1',
    'fieldsV1': {'f:metadata': {'f:generateName': {},
      'f:labels': {'.': {}, 'f:fairing-deployer': {}, 'f:fairing-id': {}}},
     'f:spec': {'.': {},
      'f:tfReplicaSpecs': {'.': {},
       'f:Worker': {'.': {},
        'f:replicas': {},
        'f:template': {'.': {},
         'f:metadata': {'.': {},
          'f:annotations': {'.': {}, 'f:sidecar.istio.io/inject': {}},
          'f:labels': {'.': {}, 'f:fairing-deployer': {}, 'f:fairing-id': {}},
          'f:name': {}},
         'f:spec': {'.': {}, 'f:restartPolicy': {}, 'f:volumes': {}}}}}}},
    'manager': 'Swagger-Codegen',
    'operation': 'Update',
    'time': '2020-08-28T03:1

In [201]:
tfjob_client.wait_for_job(tfjob_name, namespace=my_namespace, watch=True)

mnist-training-e4ff            Running              2020-08-28T03:14:17Z          
mnist-training-e4ff            Succeeded            2020-08-28T03:15:07Z          


In [202]:
tfjob_client.is_job_succeeded(tfjob_name, namespace=my_namespace)

True

In [203]:
tfjob_client.get_logs(tfjob_name, namespace=my_namespace)

[I 200828 03:15:07 tf_job_client:386] The logs of Pod mnist-training-e4ff-worker-0:
    
    
    W0828 03:14:19.083941 139842627770176 module_wrapper.py:139] From /app/mnist.py:155: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead.
    
    
    W0828 03:14:19.084154 139842627770176 module_wrapper.py:139] From /app/mnist.py:155: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead.
    
    
    W0828 03:14:19.085295 139842627770176 module_wrapper.py:139] From /app/mnist.py:160: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.
    
    INFO:tensorflow:TF_CONFIG {}
    I0828 03:14:19.085411 139842627770176 mnist.py:160] TF_CONFIG {}
    INFO:tensorflow:cluster=None job_name=None task_index=None
    I0828 03:14:19.085923 139842627770176 mnist.py:166] cluster=None job_name=None task_index=None
    INFO:tensorflow:Will export model
    I0828 03:14:19.085996 1398426277

In [204]:
from kubeflow.fairing.deployers.kfserving.kfserving import KFServing
isvc_name = f'mnist-service-{uuid.uuid4().hex[:4]}'
isvc = KFServing('tensorflow', namespace=my_namespace, isvc_name=isvc_name,
                 default_storage_uri='pvc://' + pvc_name + '/export')
print(isvc)
print(isvc.generate_isvc())
isvc.deploy(isvc.generate_isvc())

<kubeflow.fairing.deployers.kfserving.kfserving.KFServing object at 0x7f24dbeda668>
{'api_version': 'serving.kubeflow.org/v1alpha2',
 'kind': 'InferenceService',
 'metadata': {'annotations': None,
              'cluster_name': None,
              'creation_timestamp': None,
              'deletion_grace_period_seconds': None,
              'deletion_timestamp': None,
              'finalizers': None,
              'generate_name': 'fairing-kfserving-',
              'generation': None,
              'initializers': None,
              'labels': None,
              'managed_fields': None,
              'name': 'mnist-service-f761',
              'namespace': 'admin',
              'owner_references': None,
              'resource_version': None,
              'self_link': None,
              'uid': None},
 'spec': {'canary': None,
          'canary_traffic_percent': 0,
          'default': {'explainer': None,
                      'predictor': {'batcher': None,
                         

[I 200828 03:25:07 kfserving:127] Deployed the InferenceService mnist-service-f761 successfully.


'mnist-service-f761'

In [205]:
from kfserving import KFServingClient
kfserving_client = KFServingClient()
kfserving_client.get(namespace=my_namespace)

{'apiVersion': 'serving.kubeflow.org/v1alpha2',
 'items': [{'apiVersion': 'serving.kubeflow.org/v1alpha2',
   'kind': 'InferenceService',
   'metadata': {'creationTimestamp': '2020-08-28T03:15:07Z',
    'generateName': 'fairing-kfserving-',
    'generation': 1,
    'name': 'mnist-service-f761',
    'namespace': 'admin',
    'resourceVersion': '67831',
    'selfLink': '/apis/serving.kubeflow.org/v1alpha2/namespaces/admin/inferenceservices/mnist-service-f761',
    'uid': '09b461f8-cb18-48d7-85ac-1e0d04d037fa'},
   'spec': {'default': {'predictor': {'tensorflow': {'resources': {'limits': {'cpu': '1',
         'memory': '2Gi'},
        'requests': {'cpu': '1', 'memory': '2Gi'}},
       'runtimeVersion': '1.14.0',
       'storageUri': 'pvc://mnist-pvc/export'}}}},
   'status': {}}],
 'kind': 'InferenceServiceList',
 'metadata': {'continue': '',
  'resourceVersion': '70851',
  'selfLink': '/apis/serving.kubeflow.org/v1alpha2/namespaces/admin/inferenceservices'}}

In [206]:
mnist_isvc = kfserving_client.get(isvc_name, namespace=my_namespace)
mnist_isvc_name = mnist_isvc['metadata']['name']
mnist_isvc_endpoint = mnist_isvc['status'].get('url', '')
print("MNIST Service Endpoint: " + mnist_isvc_endpoint)

MNIST Service Endpoint: 


In [212]:
#ISTIO_CLUSTER_IP=!kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.clusterIP}'
#print("ISTIO_CLUSTER_IP => " + ISTIO_CLUSTER_IP[0])
#CLUSTER_IP=ISTIO_CLUSTER_IP[0]
CLUSTER_IP="34.121.58.241"
#MODEL_HOST=f"Host: {mnist_isvc_name}.{my_namespace}.example.com"
#MODEL_HOST=f"Host: mnist-service-f761.admin.nip.io"
#!curl -v -H "{MODEL_HOST}" http://{CLUSTER_IP}/v1/models/{mnist_isvc_name}:predict -d @./input.json
!curl -v http://{CLUSTER_IP}/v1/models/{mnist_isvc_name}:predict -d @./input.json

ISTIO_CLUSTER_IP => Error from server (Forbidden): services "istio-ingressgateway" is forbidden: User "system:serviceaccount:admin:default-editor" cannot get resource "services" in API group "" in the namespace "istio-system"
/bin/sh: 1: Syntax error: "(" unexpected


In [197]:
tfjob_client.delete(tfjob_name, namespace=my_namespace)

{'kind': 'Status',
 'apiVersion': 'v1',
 'metadata': {},
 'status': 'Success',
 'details': {'name': 'mnist-training-5ae1',
  'group': 'kubeflow.org',
  'kind': 'tfjobs',
  'uid': 'bf76aec7-885c-49e5-8066-6842a440f758'}}

In [198]:
kfserving_client.delete(isvc_name, namespace=my_namespace)

{'kind': 'Status',
 'apiVersion': 'v1',
 'metadata': {},
 'status': 'Success',
 'details': {'name': 'mnist-service-9c0f',
  'group': 'serving.kubeflow.org',
  'kind': 'inferenceservices',
  'uid': 'ea091cd2-ec1f-4258-99ea-394f549c9df3'}}