In [1]:
!pip show kubeflow-fairing

Name: kubeflow-fairing
Version: 1.0.1
Summary: Kubeflow Fairing Python SDK.
Home-page: https://github.com/kubeflow/fairing
Author: Kubeflow Authors
Author-email: hejinchi@cn.ibm.com
License: Apache License Version 2.0
Location: /usr/local/lib/python3.6/dist-packages
Requires: requests, ibm-cos-sdk, setuptools, notebook, tornado, urllib3, nbconvert, retrying, azure-storage-file, kubernetes, google-auth, future, docker, numpy, oauth2client, cloudpickle, kubeflow-tfjob, boto3, google-cloud-logging, kfserving, python-dateutil, grpcio, azure-mgmt-storage, google-cloud-storage, google-api-python-client, six, kubeflow-pytorchjob, httplib2
Required-by: 


In [2]:
# Set docker registry to store image.
# Ensure you have permission for pushing docker image requests. 
DOCKER_REGISTRY = 'index.docker.io/insoopark'

# Set namespace. Note that the created PVC should be in the namespace.
my_namespace = 'admin'
# You also can get the default target namepspace using below API.
#namespace = fairing_utils.get_default_target_namespace()

In [5]:
# To satify the distributed training, the PVC should be access from all nodes in the cluster.
# The example creates a NFS PV to satify that.
#nfs_server = '172.16.189.69'
#nfs_path = '/opt/kubeflow/data/mnist'
pv_name = 'kubeflow-mnist'
pvc_name = 'mnist-pvc'

In [6]:
!pip install pyyaml



In [7]:
from kubernetes import client as k8s_client
from kubernetes import config as k8s_config
from kubeflow.fairing.utils import is_running_in_k8s
import yaml

pv_yaml = f'''
apiVersion: v1
kind: PersistentVolume
metadata:
  name: {pv_name}
spec:
  capacity:
    storage: 10Gi
  accessModes:
  - ReadWriteMany
  persistentVolumeReclaimPolicy: Retain
  storageClassName: standard
  hostPath:
    path: /home/insoo67_park/data/7
'''
pvc_yaml = f'''
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: {pvc_name}
  namespace: {my_namespace}
spec:
  accessModes:
    - ReadWriteMany
  storageClassName: standard
  resources:
    requests:
      storage: 10Gi
'''

if is_running_in_k8s():
    k8s_config.load_incluster_config()
else:
    k8s_config.load_kube_config()

k8s_core_api = k8s_client.CoreV1Api()
#k8s_core_api.create_persistent_volume(yaml.safe_load(pv_yaml))
#k8s_core_api.create_namespaced_persistent_volume_claim(my_namespace, yaml.safe_load(pvc_yaml))


In [8]:
num_chief = 1 #number of Chief in TFJob 
num_ps = 1  #number of PS in TFJob 
num_workers = 1  #number of Worker in TFJob 
model_dir = "/mnt"
export_path = "/mnt/export" 
train_steps = "1000"
batch_size = "100"
learning_rate = "0.01"

In [18]:
import uuid
from kubeflow import fairing   
from kubeflow.fairing.kubernetes.utils import mounting_pvc
from kubeflow.fairing.kubernetes import utils as k8s_utils

tfjob_name = f'mnist-training-{uuid.uuid4().hex[:4]}'

output_map =  {
    "Dockerfile": "Dockerfile",
    "mnist.py": "mnist.py"
}

command=["python",
         "/opt/mnist.py",
         "--tf-model-dir=" + model_dir,
         "--tf-export-dir=" + export_path,
         "--tf-train-steps=" + train_steps,
         "--tf-batch-size=" + batch_size,
         "--tf-learning-rate=" + learning_rate]

fairing.config.set_preprocessor('python', input_files=["mnist.py"], executable="mnist.py")
# fairing.config.set_preprocessor('python', command=command, path_prefix="/app", output_map=output_map)
fairing.config.set_builder(
            name='append',
            image_name='mnist',
            base_image='tensorflow/tensorflow:1.15.2-py3',
            registry=DOCKER_REGISTRY, 
            push=True)
# fairing.config.set_builder(name='docker', registry=DOCKER_REGISTRY, base_image="", image_name="mnist", dockerfile_path="Dockerfile")
# fairing.config.set_deployer(name='tfjob', namespace=my_namespace, stream_log=False, job_name=tfjob_name,
#                            chief_count=num_chief, worker_count=num_workers, ps_count=num_ps, 
#                            pod_spec_mutators=[mounting_pvc(pvc_name=pvc_name, pvc_mount_path=model_dir)])
fairing.config.set_deployer('tfjob', namespace='admin', stream_log=False, job_name=tfjob_name, 
                            # chief_count=num_chief, worker_count=num_workers, ps_count=num_ps,
                            pod_spec_mutators=[k8s_utils.get_resource_mutator(cpu=1, memory=2),
                                               mounting_pvc(pvc_name=pvc_name, pvc_mount_path=model_dir)])

fairing.config.run()

[W 200829 07:08:01 utils:51] The function mounting_pvc has been deprecated,                     please use `volume_mounts`
[I 200829 07:08:01 config:134] Using preprocessor: <kubeflow.fairing.preprocessors.base.BasePreProcessor object at 0x7f9a64eb4da0>
[I 200829 07:08:01 config:136] Using builder: <kubeflow.fairing.builders.append.append.AppendBuilder object at 0x7f9a8c7423c8>
[I 200829 07:08:01 config:138] Using deployer: <kubeflow.fairing.deployers.tfjob.tfjob.TfJob object at 0x7f9a64eaca90>
[W 200829 07:08:01 append:50] Building image using Append builder...
[I 200829 07:08:01 base:107] Creating docker context: /tmp/fairing_context_840mjsuo
[I 200829 07:08:01 docker_creds_:234] Loading Docker credentials for repository 'tensorflow/tensorflow:1.15.2-py3'
[W 200829 07:08:02 append:54] Image successfully built in 1.001206887999615s.
[W 200829 07:08:02 append:94] Pushing image index.docker.io/insoopark/mnist:584A65A0...
[I 200829 07:08:02 docker_creds_:234] Loading Docker credentials f

(<kubeflow.fairing.preprocessors.base.BasePreProcessor at 0x7f9a64eb4da0>,
 <kubeflow.fairing.builders.append.append.AppendBuilder at 0x7f9a8c7423c8>,
 <kubeflow.fairing.deployers.tfjob.tfjob.TfJob at 0x7f9a64eaca90>)

In [19]:
from kubeflow.tfjob import TFJobClient
tfjob_client = TFJobClient()

tfjob_client.get(tfjob_name, namespace=my_namespace)

{'apiVersion': 'kubeflow.org/v1',
 'kind': 'TFJob',
 'metadata': {'creationTimestamp': '2020-08-29T07:08:04Z',
  'generateName': 'fairing-tfjob-',
  'generation': 1,
  'labels': {'fairing-deployer': 'tfjob',
   'fairing-id': '61bfda76-e9c6-11ea-8ee8-26a9a7e45509'},
  'name': 'mnist-training-0784',
  'namespace': 'admin',
  'resourceVersion': '26963',
  'selfLink': '/apis/kubeflow.org/v1/namespaces/admin/tfjobs/mnist-training-0784',
  'uid': '839fdf24-c77d-4984-8c70-5a08add868ad'},
 'spec': {'tfReplicaSpecs': {'Worker': {'replicas': 1,
    'template': {'metadata': {'annotations': {'sidecar.istio.io/inject': 'false'},
      'labels': {'fairing-deployer': 'tfjob',
       'fairing-id': '61bfda76-e9c6-11ea-8ee8-26a9a7e45509'},
      'name': 'fairing-deployer'},
     'spec': {'containers': [{'command': ['python', '/app/mnist.py'],
        'env': [{'name': 'FAIRING_RUNTIME', 'value': '1'}],
        'image': 'index.docker.io/insoopark/mnist:584A65A0',
        'name': 'tensorflow',
        'res

In [20]:
tfjob_client.wait_for_job(tfjob_name, namespace=my_namespace, watch=True)

mnist-training-0784            Created              2020-08-29T07:08:04Z          
mnist-training-0784            Running              2020-08-29T07:08:48Z          
mnist-training-0784            Succeeded            2020-08-29T07:10:38Z          


In [21]:
tfjob_client.is_job_succeeded(tfjob_name, namespace=my_namespace)

True

In [22]:
tfjob_client.get_logs(tfjob_name, namespace=my_namespace)

[I 200829 07:10:38 tf_job_client:386] The logs of Pod mnist-training-0784-worker-0:
    
    
    W0829 07:08:50.540437 140716473689920 module_wrapper.py:139] From /app/mnist.py:155: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead.
    
    
    W0829 07:08:50.540673 140716473689920 module_wrapper.py:139] From /app/mnist.py:155: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead.
    
    
    W0829 07:08:50.541976 140716473689920 module_wrapper.py:139] From /app/mnist.py:160: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.
    
    INFO:tensorflow:TF_CONFIG {}
    I0829 07:08:50.542119 140716473689920 mnist.py:160] TF_CONFIG {}
    INFO:tensorflow:cluster=None job_name=None task_index=None
    I0829 07:08:50.542696 140716473689920 mnist.py:166] cluster=None job_name=None task_index=None
    INFO:tensorflow:Will export model
    I0829 07:08:50.542799 1407164736

In [23]:
from kubeflow.fairing.deployers.kfserving.kfserving import KFServing
isvc_name = f'mnist-service-{uuid.uuid4().hex[:4]}'
isvc = KFServing('tensorflow', namespace=my_namespace, isvc_name=isvc_name,
                 default_storage_uri='pvc://' + pvc_name + '/export')
print(isvc)
print(isvc.generate_isvc())
isvc.deploy(isvc.generate_isvc())

<kubeflow.fairing.deployers.kfserving.kfserving.KFServing object at 0x7f9a8c738438>
{'api_version': 'serving.kubeflow.org/v1alpha2',
 'kind': 'InferenceService',
 'metadata': {'annotations': None,
              'cluster_name': None,
              'creation_timestamp': None,
              'deletion_grace_period_seconds': None,
              'deletion_timestamp': None,
              'finalizers': None,
              'generate_name': 'fairing-kfserving-',
              'generation': None,
              'initializers': None,
              'labels': None,
              'managed_fields': None,
              'name': 'mnist-service-2397',
              'namespace': 'admin',
              'owner_references': None,
              'resource_version': None,
              'self_link': None,
              'uid': None},
 'spec': {'canary': None,
          'canary_traffic_percent': 0,
          'default': {'explainer': None,
                      'predictor': {'batcher': None,
                         

[I 200829 07:20:39 kfserving:127] Deployed the InferenceService mnist-service-2397 successfully.


'mnist-service-2397'

In [24]:
from kfserving import KFServingClient
kfserving_client = KFServingClient()
kfserving_client.get(namespace=my_namespace)

{'apiVersion': 'serving.kubeflow.org/v1alpha2',
 'items': [{'apiVersion': 'serving.kubeflow.org/v1alpha2',
   'kind': 'InferenceService',
   'metadata': {'creationTimestamp': '2020-08-29T07:10:39Z',
    'generateName': 'fairing-kfserving-',
    'generation': 1,
    'name': 'mnist-service-2397',
    'namespace': 'admin',
    'resourceVersion': '27891',
    'selfLink': '/apis/serving.kubeflow.org/v1alpha2/namespaces/admin/inferenceservices/mnist-service-2397',
    'uid': 'ae4c93fa-819b-4b05-8e73-23aefa86ae69'},
   'spec': {'default': {'predictor': {'tensorflow': {'resources': {'limits': {'cpu': '1',
         'memory': '2Gi'},
        'requests': {'cpu': '1', 'memory': '2Gi'}},
       'runtimeVersion': '1.14.0',
       'storageUri': 'pvc://mnist-pvc/export'}}}},
   'status': {'canary': {},
    'conditions': [{'lastTransitionTime': '2020-08-29T07:10:41Z',
      'message': 'Configuration "mnist-service-2397-predictor-default" is waiting for a Revision to become ready.',
      'reason': 'Rev

In [25]:
mnist_isvc = kfserving_client.get(isvc_name, namespace=my_namespace)
mnist_isvc_name = mnist_isvc['metadata']['name']
mnist_isvc_endpoint = mnist_isvc['status'].get('url', '')
print("MNIST Service Endpoint: " + mnist_isvc_endpoint)

MNIST Service Endpoint: 


In [29]:
#ISTIO_CLUSTER_IP=!kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.clusterIP}'
#print("ISTIO_CLUSTER_IP => " + ISTIO_CLUSTER_IP[0])
#CLUSTER_IP=ISTIO_CLUSTER_IP[0]
CLUSTER_IP="10.99.127.209"
MODEL_HOST=f"Host: {mnist_isvc_name}.{my_namespace}.svc.cluster.local"
#MODEL_HOST=f"Host: mnist-service-f761.admin.nip.io"
#!curl -v -H "{MODEL_HOST}" http://{CLUSTER_IP}/v1/models/{mnist_isvc_name}:predict -d @./input.json
!curl -v -H "{MODEL_HOST}" http://{CLUSTER_IP}/v1/models/{mnist_isvc_name}:predict -d @./input.json

ISTIO_CLUSTER_IP => Error from server (Forbidden): services "istio-ingressgateway" is forbidden: User "system:serviceaccount:admin:default-editor" cannot get resource "services" in API group "" in the namespace "istio-system"
/bin/sh: 1: Syntax error: "(" unexpected


In [27]:
tfjob_client.delete(tfjob_name, namespace=my_namespace)

{'kind': 'Status',
 'apiVersion': 'v1',
 'metadata': {},
 'status': 'Success',
 'details': {'name': 'mnist-training-0784',
  'group': 'kubeflow.org',
  'kind': 'tfjobs',
  'uid': '839fdf24-c77d-4984-8c70-5a08add868ad'}}

In [28]:
kfserving_client.delete(isvc_name, namespace=my_namespace)

{'kind': 'Status',
 'apiVersion': 'v1',
 'metadata': {},
 'status': 'Success',
 'details': {'name': 'mnist-service-2397',
  'group': 'serving.kubeflow.org',
  'kind': 'inferenceservices',
  'uid': 'ae4c93fa-819b-4b05-8e73-23aefa86ae69'}}

In [30]:
!kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.clusterIP}'

Error from server (Forbidden): services "istio-ingressgateway" is forbidden: User "system:serviceaccount:admin:default-editor" cannot get resource "services" in API group "" in the namespace "istio-system"
