In [1]:
!pip show kubeflow-fairing
!pip install pyyaml

Name: kubeflow-fairing
Version: 1.0.1
Summary: Kubeflow Fairing Python SDK.
Home-page: https://github.com/kubeflow/fairing
Author: Kubeflow Authors
Author-email: hejinchi@cn.ibm.com
License: Apache License Version 2.0
Location: /usr/local/lib/python3.6/dist-packages
Requires: numpy, grpcio, kubernetes, docker, google-cloud-logging, requests, oauth2client, python-dateutil, setuptools, urllib3, cloudpickle, kubeflow-pytorchjob, kfserving, retrying, ibm-cos-sdk, google-api-python-client, httplib2, tornado, google-auth, six, boto3, azure-storage-file, google-cloud-storage, nbconvert, kubeflow-tfjob, future, azure-mgmt-storage, notebook
Required-by: 


In [2]:
DOCKER_REGISTRY = 'index.docker.io/insoopark'
my_namespace = 'admin'

In [3]:
pv_name = 'kubeflow-mnist'
pvc_name = 'mnist-pvc'

In [4]:
from kubernetes import client as k8s_client
from kubernetes import config as k8s_config
from kubeflow.fairing.utils import is_running_in_k8s
import yaml

if is_running_in_k8s():
    k8s_config.load_incluster_config()
else:
    k8s_config.load_kube_config()

k8s_core_api = k8s_client.CoreV1Api()

In [5]:
num_chief = 1 #number of Chief in TFJob 
num_ps = 1  #number of PS in TFJob 
num_workers = 1  #number of Worker in TFJob 
model_dir = "/mnt"
export_path = "/mnt/export" 
train_steps = "1000"
batch_size = "100"
learning_rate = "0.01"

In [8]:
import uuid
from kubeflow import fairing   
from kubeflow.fairing.kubernetes.utils import mounting_pvc
from kubeflow.fairing.kubernetes import utils as k8s_utils

tfjob_name = f'mnist-training-{uuid.uuid4().hex[:4]}'

output_map =  {
    "Dockerfile": "Dockerfile",
    "mnist.py": "mnist.py"
}

command=["python",
         "/opt/mnist.py",
         "--tf-model-dir=" + model_dir,
         "--tf-export-dir=" + export_path,
         "--tf-train-steps=" + train_steps,
         "--tf-batch-size=" + batch_size,
         "--tf-learning-rate=" + learning_rate]

fairing.config.set_preprocessor('python', input_files=["mnist.py"], executable="mnist.py")
fairing.config.set_builder(
            name='append',
            image_name='mnist',
            base_image='tensorflow/tensorflow:1.15.2-py3',
            registry=DOCKER_REGISTRY, 
            push=True)
fairing.config.set_deployer('tfjob', namespace='admin', stream_log=False, job_name=tfjob_name, 
                            pod_spec_mutators=[k8s_utils.get_resource_mutator(cpu=1, memory=2),
                                               mounting_pvc(pvc_name=pvc_name, pvc_mount_path=model_dir)])

fairing.config.run()

[W 200903 23:51:19 utils:51] The function mounting_pvc has been deprecated,                     please use `volume_mounts`
[I 200903 23:51:19 config:134] Using preprocessor: <kubeflow.fairing.preprocessors.base.BasePreProcessor object at 0x7ff1cf5455f8>
[I 200903 23:51:19 config:136] Using builder: <kubeflow.fairing.builders.append.append.AppendBuilder object at 0x7ff2087b4390>
[I 200903 23:51:19 config:138] Using deployer: <kubeflow.fairing.deployers.tfjob.tfjob.TfJob object at 0x7ff1cf545710>
[W 200903 23:51:19 append:50] Building image using Append builder...
[I 200903 23:51:19 base:107] Creating docker context: /tmp/fairing_context_qnb6kjf9
[I 200903 23:51:19 docker_creds_:234] Loading Docker credentials for repository 'tensorflow/tensorflow:1.15.2-py3'
[W 200903 23:51:20 append:54] Image successfully built in 0.965359633999924s.
[W 200903 23:51:20 append:94] Pushing image index.docker.io/insoopark/mnist:34680F47...
[I 200903 23:51:20 docker_creds_:234] Loading Docker credentials f

(<kubeflow.fairing.preprocessors.base.BasePreProcessor at 0x7ff1cf5455f8>,
 <kubeflow.fairing.builders.append.append.AppendBuilder at 0x7ff2087b4390>,
 <kubeflow.fairing.deployers.tfjob.tfjob.TfJob at 0x7ff1cf545710>)

In [9]:
from kubeflow.tfjob import TFJobClient
tfjob_client = TFJobClient()

tfjob_client.get(tfjob_name, namespace=my_namespace)

{'apiVersion': 'kubeflow.org/v1',
 'kind': 'TFJob',
 'metadata': {'creationTimestamp': '2020-09-03T23:51:22Z',
  'generateName': 'fairing-tfjob-',
  'generation': 1,
  'labels': {'fairing-deployer': 'tfjob',
   'fairing-id': '5ed66aea-ee40-11ea-8471-3e157ccd4c2e'},
  'name': 'mnist-training-0ff7',
  'namespace': 'admin',
  'resourceVersion': '11228',
  'selfLink': '/apis/kubeflow.org/v1/namespaces/admin/tfjobs/mnist-training-0ff7',
  'uid': '8662ccaa-4c82-492e-8901-00deefe3a8c6'},
 'spec': {'tfReplicaSpecs': {'Worker': {'replicas': 1,
    'template': {'metadata': {'annotations': {'sidecar.istio.io/inject': 'false'},
      'labels': {'fairing-deployer': 'tfjob',
       'fairing-id': '5ed66aea-ee40-11ea-8471-3e157ccd4c2e'},
      'name': 'fairing-deployer'},
     'spec': {'containers': [{'command': ['python', '/app/mnist.py'],
        'env': [{'name': 'FAIRING_RUNTIME', 'value': '1'}],
        'image': 'index.docker.io/insoopark/mnist:34680F47',
        'name': 'tensorflow',
        'res

In [10]:
tfjob_client.wait_for_job(tfjob_name, namespace=my_namespace, watch=True)

NAME                           STATE                TIME                          
mnist-training-0ff7            Created              2020-09-03T23:51:22Z          
mnist-training-0ff7            Running              2020-09-03T23:52:06Z          
mnist-training-0ff7            Succeeded            2020-09-03T23:53:42Z          


In [11]:
tfjob_client.is_job_succeeded(tfjob_name, namespace=my_namespace)

True

In [12]:
tfjob_client.get_logs(tfjob_name, namespace=my_namespace)

[I 200903 23:53:42 tf_job_client:386] The logs of Pod mnist-training-0ff7-worker-0:
    
    
    W0903 23:52:07.903190 140376088172352 module_wrapper.py:139] From /app/mnist.py:155: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead.
    
    
    W0903 23:52:07.903398 140376088172352 module_wrapper.py:139] From /app/mnist.py:155: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead.
    
    
    W0903 23:52:07.904510 140376088172352 module_wrapper.py:139] From /app/mnist.py:160: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.
    
    INFO:tensorflow:TF_CONFIG {}
    I0903 23:52:07.904625 140376088172352 mnist.py:160] TF_CONFIG {}
    INFO:tensorflow:cluster=None job_name=None task_index=None
    I0903 23:52:07.905145 140376088172352 mnist.py:166] cluster=None job_name=None task_index=None
    INFO:tensorflow:Will export model
    I0903 23:52:07.905232 1403760881

In [13]:
from kubernetes import client

from kfserving import KFServingClient
from kfserving import constants
from kfserving import utils
from kfserving import V1alpha2EndpointSpec
from kfserving import V1alpha2PredictorSpec
from kfserving import V1alpha2TensorflowSpec
from kfserving import V1alpha2InferenceServiceSpec
from kfserving import V1alpha2InferenceService
from kubernetes.client import V1ResourceRequirements

In [14]:
namespace = utils.get_default_target_namespace()
print(namespace)

admin


In [15]:
isvc_name = f'mnist-service-{uuid.uuid4().hex[:4]}'
api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
# due to cpu resource is not enough, inference service is not ready. changed from 1Gi to 100m
default_endpoint_spec = V1alpha2EndpointSpec(
                          predictor=V1alpha2PredictorSpec(
                            tensorflow=V1alpha2TensorflowSpec(
                              storage_uri='pvc://mnist-pvc/export/mnist',
                              resources=V1ResourceRequirements(
                                  requests={'cpu':'100m','memory':'1Gi'},
                                  limits={'cpu':'100m', 'memory':'1Gi'}))))
    
isvc = V1alpha2InferenceService(api_version=api_version,
                          kind=constants.KFSERVING_KIND,
                          metadata=client.V1ObjectMeta(
                              name=isvc_name, namespace=namespace),
                          spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

In [16]:
KFServing = KFServingClient()
KFServing.create(isvc)

{'apiVersion': 'serving.kubeflow.org/v1alpha2',
 'kind': 'InferenceService',
 'metadata': {'creationTimestamp': '2020-09-03T23:54:15Z',
  'generation': 1,
  'name': 'mnist-service-782c',
  'namespace': 'admin',
  'resourceVersion': '12029',
  'selfLink': '/apis/serving.kubeflow.org/v1alpha2/namespaces/admin/inferenceservices/mnist-service-782c',
  'uid': 'c2e0f427-5146-414f-9ead-093ea550cf1d'},
 'spec': {'default': {'predictor': {'tensorflow': {'resources': {'limits': {'cpu': '100m',
       'memory': '1Gi'},
      'requests': {'cpu': '100m', 'memory': '1Gi'}},
     'runtimeVersion': '1.14.0',
     'storageUri': 'pvc://mnist-pvc/export/mnist'}}}}}

In [17]:
KFServing.get(isvc_name, namespace=namespace, watch=True, timeout_seconds=120)

NAME                 READY      DEFAULT_TRAFFIC CANARY_TRAFFIC  URL                                               
mnist-service-782c   False                                                                                        
mnist-service-782c   False                                                                                        
mnist-service-782c   True       100                             http://mnist-service-782c.admin.example.com/v1/...


In [18]:
# can get ip address from (kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.clusterIP}')
# 아래 주소로 날리면 302 응답이 떨어짐. -L 옵션을 줘야 함 
# 외부에서 외부 IP로 접속하면 (GCP의 경우 404 에러 발생 -> 쩝. 당연함)
!curl -L -v -H "mnist-service-782c.admin.example.com" http://10.96.50.44/v1/models/mnist-service-782c:predict -d @./input.json

*   Trying 10.96.50.44...
* TCP_NODELAY set
* Connected to 10.96.50.44 (10.96.50.44) port 80 (#0)
> POST /v1/models/mnist-service-782c:predict HTTP/1.1
> Host: 10.96.50.44
> User-Agent: curl/7.58.0
> Accept: */*
> Content-Length: 2052
> Content-Type: application/x-www-form-urlencoded
> Expect: 100-continue
> 
< HTTP/1.1 100 Continue
* We are completely uploaded and fine
< HTTP/1.1 302 Found
< location: /dex/auth?client_id=kubeflow-oidc-authservice&redirect_uri=%2Flogin%2Foidc&response_type=code&scope=profile+email+groups+openid&state=MTU5OTE3ODM2MXxFd3dBRUdoUVRtMU5iV1JtTW1WVFNubFpkSEU9fJX2YoRe6AlS4VS99O9GrmJDCcQflfhRhHUO_R1ytTuQ
< date: Fri, 04 Sep 2020 00:12:41 GMT
< content-length: 0
< x-envoy-upstream-service-time: 2
< server: istio-envoy
< 
* Connection #0 to host 10.96.50.44 left intact
* Issue another request to this URL: 'http://10.96.50.44/dex/auth?client_id=kubeflow-oidc-authservice&redirect_uri=%2Flogin%2Foidc&response_type=code&scope=profile+email+groups+openid&state=MTU5OTE

In [19]:
KFServing.delete(isvc_name, namespace=namespace)

{'kind': 'Status',
 'apiVersion': 'v1',
 'metadata': {},
 'status': 'Success',
 'details': {'name': 'mnist-service-782c',
  'group': 'serving.kubeflow.org',
  'kind': 'inferenceservices',
  'uid': 'c2e0f427-5146-414f-9ead-093ea550cf1d'}}

In [20]:
tfjob_client.delete(tfjob_name, namespace=my_namespace)

{'kind': 'Status',
 'apiVersion': 'v1',
 'metadata': {},
 'status': 'Success',
 'details': {'name': 'mnist-training-0ff7',
  'group': 'kubeflow.org',
  'kind': 'tfjobs',
  'uid': '8662ccaa-4c82-492e-8901-00deefe3a8c6'}}