In [1]:
!pip show kubeflow-fairing

Name: kubeflow-fairing
Version: 1.0.1
Summary: Kubeflow Fairing Python SDK.
Home-page: https://github.com/kubeflow/fairing
Author: Kubeflow Authors
Author-email: hejinchi@cn.ibm.com
License: Apache License Version 2.0
Location: /usr/local/lib/python3.6/dist-packages
Requires: urllib3, boto3, oauth2client, kfserving, retrying, google-cloud-logging, requests, numpy, kubernetes, docker, nbconvert, ibm-cos-sdk, setuptools, six, notebook, kubeflow-tfjob, cloudpickle, google-cloud-storage, python-dateutil, google-api-python-client, future, tornado, kubeflow-pytorchjob, azure-storage-file, grpcio, azure-mgmt-storage, google-auth, httplib2
Required-by: 


In [17]:
# Set docker registry to store image.
# Ensure you have permission for pushing docker image requests. 
DOCKER_REGISTRY = 'index.docker.io/insoopark'

# Set namespace. Note that the created PVC should be in the namespace.
my_namespace = 'admin'
# You also can get the default target namepspace using below API.
#namespace = fairing_utils.get_default_target_namespace()

In [18]:
# To satify the distributed training, the PVC should be access from all nodes in the cluster.
# The example creates a NFS PV to satify that.
#nfs_server = '172.16.189.69'
#nfs_path = '/opt/kubeflow/data/mnist'
pv_name = 'kubeflow-mnist'
pvc_name = 'mnist-pvc'

In [5]:
!pip install pyyaml



In [19]:
from kubernetes import client as k8s_client
from kubernetes import config as k8s_config
from kubeflow.fairing.utils import is_running_in_k8s
import yaml

pv_yaml = f'''
apiVersion: v1
kind: PersistentVolume
metadata:
  name: {pv_name}
spec:
  capacity:
    storage: 10Gi
  accessModes:
  - ReadWriteMany
  persistentVolumeReclaimPolicy: Retain
  storageClassName: standard
  hostPath:
    path: /home/insoo67_park/data/7
'''
pvc_yaml = f'''
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: {pvc_name}
  namespace: {my_namespace}
spec:
  accessModes:
    - ReadWriteMany
  storageClassName: standard
  resources:
    requests:
      storage: 10Gi
'''

if is_running_in_k8s():
    k8s_config.load_incluster_config()
else:
    k8s_config.load_kube_config()

k8s_core_api = k8s_client.CoreV1Api()
#k8s_core_api.create_persistent_volume(yaml.safe_load(pv_yaml))
#k8s_core_api.create_namespaced_persistent_volume_claim(my_namespace, yaml.safe_load(pvc_yaml))


In [50]:
num_chief = 1 #number of Chief in TFJob 
num_ps = 1  #number of PS in TFJob 
num_workers = 1  #number of Worker in TFJob 
model_dir = "/mnt"
export_path = "/mnt/export" 
train_steps = "1000"
batch_size = "100"
learning_rate = "0.01"

In [137]:
import uuid
from kubeflow import fairing   
from kubeflow.fairing.kubernetes.utils import mounting_pvc
from kubeflow.fairing.kubernetes import utils as k8s_utils

tfjob_name = f'mnist-training-{uuid.uuid4().hex[:4]}'

output_map =  {
    "Dockerfile": "Dockerfile",
    "mnist.py": "mnist.py"
}

command=["python",
         "/opt/mnist.py",
         "--tf-model-dir=" + model_dir,
         "--tf-export-dir=" + export_path,
         "--tf-train-steps=" + train_steps,
         "--tf-batch-size=" + batch_size,
         "--tf-learning-rate=" + learning_rate]

fairing.config.set_preprocessor('python', input_files=["mnist.py"], executable="mnist.py")
# fairing.config.set_preprocessor('python', command=command, path_prefix="/app", output_map=output_map)
fairing.config.set_builder(
            name='append',
            image_name='mnist',
            base_image='tensorflow/tensorflow:1.15.2-py3',
            registry=DOCKER_REGISTRY, 
            push=True)
# fairing.config.set_builder(name='docker', registry=DOCKER_REGISTRY, base_image="", image_name="mnist", dockerfile_path="Dockerfile")
# fairing.config.set_deployer(name='tfjob', namespace=my_namespace, stream_log=False, job_name=tfjob_name,
#                            chief_count=num_chief, worker_count=num_workers, ps_count=num_ps, 
#                            pod_spec_mutators=[mounting_pvc(pvc_name=pvc_name, pvc_mount_path=model_dir)])
fairing.config.set_deployer('tfjob', namespace='admin', stream_log=False, job_name=tfjob_name, 
                            # chief_count=num_chief, worker_count=num_workers, ps_count=num_ps,
                            pod_spec_mutators=[k8s_utils.get_resource_mutator(cpu=2, memory=4),
                                               mounting_pvc(pvc_name=pvc_name, pvc_mount_path=model_dir)])

fairing.config.run()

[W 200827 08:40:31 utils:51] The function mounting_pvc has been deprecated,                     please use `volume_mounts`
[I 200827 08:40:31 config:134] Using preprocessor: <kubeflow.fairing.preprocessors.base.BasePreProcessor object at 0x7f6344062f98>
[I 200827 08:40:31 config:136] Using builder: <kubeflow.fairing.builders.append.append.AppendBuilder object at 0x7f6344062320>
[I 200827 08:40:31 config:138] Using deployer: <kubeflow.fairing.deployers.tfjob.tfjob.TfJob object at 0x7f635c6eaa90>
[W 200827 08:40:31 append:50] Building image using Append builder...
[I 200827 08:40:31 base:107] Creating docker context: /tmp/fairing_context_vutlkw5z
[I 200827 08:40:31 docker_creds_:234] Loading Docker credentials for repository 'tensorflow/tensorflow:1.15.2-py3'
[W 200827 08:40:32 append:54] Image successfully built in 0.934535329000937s.
[W 200827 08:40:32 append:94] Pushing image index.docker.io/insoopark/mnist:F102750B...
[I 200827 08:40:32 docker_creds_:234] Loading Docker credentials f

(<kubeflow.fairing.preprocessors.base.BasePreProcessor at 0x7f6344062f98>,
 <kubeflow.fairing.builders.append.append.AppendBuilder at 0x7f6344062320>,
 <kubeflow.fairing.deployers.tfjob.tfjob.TfJob at 0x7f635c6eaa90>)

In [138]:
from kubeflow.tfjob import TFJobClient
tfjob_client = TFJobClient()

tfjob_client.get(tfjob_name, namespace=my_namespace)

{'apiVersion': 'kubeflow.org/v1',
 'kind': 'TFJob',
 'metadata': {'creationTimestamp': '2020-08-27T08:40:35Z',
  'generateName': 'fairing-tfjob-',
  'generation': 1,
  'labels': {'fairing-deployer': 'tfjob',
   'fairing-id': 'f9fb7e9e-e840-11ea-af76-92a5f5e2b415'},
  'managedFields': [{'apiVersion': 'kubeflow.org/v1',
    'fieldsType': 'FieldsV1',
    'fieldsV1': {'f:metadata': {'f:generateName': {},
      'f:labels': {'.': {}, 'f:fairing-deployer': {}, 'f:fairing-id': {}}},
     'f:spec': {'.': {},
      'f:tfReplicaSpecs': {'.': {},
       'f:Worker': {'.': {},
        'f:replicas': {},
        'f:template': {'.': {},
         'f:metadata': {'.': {},
          'f:annotations': {'.': {}, 'f:sidecar.istio.io/inject': {}},
          'f:labels': {'.': {}, 'f:fairing-deployer': {}, 'f:fairing-id': {}},
          'f:name': {}},
         'f:spec': {'.': {}, 'f:restartPolicy': {}, 'f:volumes': {}}}}}}},
    'manager': 'Swagger-Codegen',
    'operation': 'Update',
    'time': '2020-08-27T08:4

In [139]:
tfjob_client.wait_for_job(tfjob_name, namespace=my_namespace, watch=True)

mnist-training-987f            Running              2020-08-27T08:40:38Z          
mnist-training-987f            Succeeded            2020-08-27T08:41:32Z          


In [140]:
tfjob_client.is_job_succeeded(tfjob_name, namespace=my_namespace)

True

In [141]:
tfjob_client.get_logs(tfjob_name, namespace=my_namespace)

[I 200827 08:41:32 tf_job_client:386] The logs of Pod mnist-training-987f-worker-0:
    
    
    W0827 08:40:40.288545 139729072990016 module_wrapper.py:139] From /app/mnist.py:154: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead.
    
    
    W0827 08:40:40.288763 139729072990016 module_wrapper.py:139] From /app/mnist.py:154: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead.
    
    
    W0827 08:40:40.289884 139729072990016 module_wrapper.py:139] From /app/mnist.py:159: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.
    
    INFO:tensorflow:TF_CONFIG {}
    I0827 08:40:40.290011 139729072990016 mnist.py:159] TF_CONFIG {}
    INFO:tensorflow:cluster=None job_name=None task_index=None
    I0827 08:40:40.290480 139729072990016 mnist.py:165] cluster=None job_name=None task_index=None
    INFO:tensorflow:Will export model
    I0827 08:40:40.290560 1397290729

In [142]:
from kubeflow.fairing.deployers.kfserving.kfserving import KFServing
isvc_name = f'mnist-service-{uuid.uuid4().hex[:4]}'
isvc = KFServing('tensorflow', namespace=my_namespace, isvc_name=isvc_name,
                 default_storage_uri='pvc://' + pvc_name + '/export')
print(isvc)
isvc.deploy(isvc.generate_isvc())

<kubeflow.fairing.deployers.kfserving.kfserving.KFServing object at 0x7f635c831b00>
NAME                 READY      DEFAULT_TRAFFIC CANARY_TRAFFIC  URL                                               
mnist-service-e54c   Unknown                                                                                      


KeyboardInterrupt: 

In [None]:
from kfserving import KFServingClient
kfserving_client = KFServingClient()
kfserving_client.get(namespace=my_namespace)

In [None]:
mnist_isvc = kfserving_client.get(isvc_name, namespace=my_namespace)
mnist_isvc_name = mnist_isvc['metadata']['name']
mnist_isvc_endpoint = mnist_isvc['status'].get('url', '')
print("MNIST Service Endpoint: " + mnist_isvc_endpoint)

In [None]:
ISTIO_CLUSTER_IP=!kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.clusterIP}'
CLUSTER_IP=ISTIO_CLUSTER_IP[0]
MODEL_HOST=f"Host: {mnist_isvc_name}.{my_namespace}.example.com"
!curl -v -H "{MODEL_HOST}" http://{CLUSTER_IP}/v1/models/{mnist_isvc_name}:predict -d @./input.json

In [143]:
tfjob_client.delete(tfjob_name, namespace=my_namespace)

{'kind': 'Status',
 'apiVersion': 'v1',
 'metadata': {},
 'status': 'Success',
 'details': {'name': 'mnist-training-987f',
  'group': 'kubeflow.org',
  'kind': 'tfjobs',
  'uid': 'd70be868-a990-4feb-ac2f-a6852124f077'}}

In [None]:
kfserving_client.delete(isvc_name, namespace=my_namespace)