In [66]:
!pip show kubeflow-fairing
!pip install pyyaml

Name: kubeflow-fairing
Version: 1.0.1
Summary: Kubeflow Fairing Python SDK.
Home-page: https://github.com/kubeflow/fairing
Author: Kubeflow Authors
Author-email: hejinchi@cn.ibm.com
License: Apache License Version 2.0
Location: /usr/local/lib/python3.6/dist-packages
Requires: kubeflow-tfjob, requests, google-cloud-logging, six, nbconvert, docker, oauth2client, azure-storage-file, google-api-python-client, numpy, tornado, boto3, httplib2, grpcio, kfserving, kubeflow-pytorchjob, google-auth, notebook, future, google-cloud-storage, cloudpickle, retrying, ibm-cos-sdk, setuptools, python-dateutil, urllib3, azure-mgmt-storage, kubernetes
Required-by: 


In [67]:
DOCKER_REGISTRY = 'index.docker.io/insoopark'
my_namespace = 'admin'

In [68]:
pv_name = 'kubeflow-mnist'
pvc_name = 'mnist-pvc'

In [69]:
from kubernetes import client as k8s_client
from kubernetes import config as k8s_config
from kubeflow.fairing.utils import is_running_in_k8s
import yaml

if is_running_in_k8s():
    k8s_config.load_incluster_config()
else:
    k8s_config.load_kube_config()

k8s_core_api = k8s_client.CoreV1Api()

In [70]:
num_chief = 1 #number of Chief in TFJob 
num_ps = 1  #number of PS in TFJob 
num_workers = 1  #number of Worker in TFJob 
model_dir = "/mnt"
export_path = "/mnt/export" 
train_steps = "1000"
batch_size = "100"
learning_rate = "0.01"

In [71]:
import uuid
from kubeflow import fairing   
from kubeflow.fairing.kubernetes.utils import mounting_pvc
from kubeflow.fairing.kubernetes import utils as k8s_utils

tfjob_name = f'mnist-training-{uuid.uuid4().hex[:4]}'

output_map =  {
    "Dockerfile": "Dockerfile",
    "mnist.py": "mnist.py"
}

command=["python",
         "/opt/mnist.py",
         "--tf-model-dir=" + model_dir,
         "--tf-export-dir=" + export_path,
         "--tf-train-steps=" + train_steps,
         "--tf-batch-size=" + batch_size,
         "--tf-learning-rate=" + learning_rate]

fairing.config.set_preprocessor('python', input_files=["mnist.py"], executable="mnist.py")
fairing.config.set_builder(
            name='append',
            image_name='mnist',
            base_image='tensorflow/tensorflow:1.15.2-py3',
            registry=DOCKER_REGISTRY, 
            push=True)
fairing.config.set_deployer('tfjob', namespace='admin', stream_log=False, job_name=tfjob_name, 
                            pod_spec_mutators=[k8s_utils.get_resource_mutator(cpu=1, memory=2),
                                               mounting_pvc(pvc_name=pvc_name, pvc_mount_path=model_dir)])

fairing.config.run()

[W 200829 10:08:11 utils:51] The function mounting_pvc has been deprecated,                     please use `volume_mounts`
[I 200829 10:08:11 config:134] Using preprocessor: <kubeflow.fairing.preprocessors.base.BasePreProcessor object at 0x7fbe51a52978>
[I 200829 10:08:11 config:136] Using builder: <kubeflow.fairing.builders.append.append.AppendBuilder object at 0x7fbe5064abe0>
[I 200829 10:08:11 config:138] Using deployer: <kubeflow.fairing.deployers.tfjob.tfjob.TfJob object at 0x7fbe5064ac88>
[W 200829 10:08:11 append:50] Building image using Append builder...
[I 200829 10:08:11 base:107] Creating docker context: /tmp/fairing_context_l76fuw8f
[I 200829 10:08:11 docker_creds_:234] Loading Docker credentials for repository 'tensorflow/tensorflow:1.15.2-py3'
[W 200829 10:08:12 append:54] Image successfully built in 0.6545741519985313s.
[W 200829 10:08:12 append:94] Pushing image index.docker.io/insoopark/mnist:ADFF44F9...
[I 200829 10:08:12 docker_creds_:234] Loading Docker credentials 

(<kubeflow.fairing.preprocessors.base.BasePreProcessor at 0x7fbe51a52978>,
 <kubeflow.fairing.builders.append.append.AppendBuilder at 0x7fbe5064abe0>,
 <kubeflow.fairing.deployers.tfjob.tfjob.TfJob at 0x7fbe5064ac88>)

In [72]:
from kubeflow.tfjob import TFJobClient
tfjob_client = TFJobClient()

tfjob_client.get(tfjob_name, namespace=my_namespace)

{'apiVersion': 'kubeflow.org/v1',
 'kind': 'TFJob',
 'metadata': {'creationTimestamp': '2020-08-29T10:08:14Z',
  'generateName': 'fairing-tfjob-',
  'generation': 1,
  'labels': {'fairing-deployer': 'tfjob',
   'fairing-id': '8cf4483a-e9df-11ea-bf92-26a9a7e45509'},
  'name': 'mnist-training-6ed9',
  'namespace': 'admin',
  'resourceVersion': '89436',
  'selfLink': '/apis/kubeflow.org/v1/namespaces/admin/tfjobs/mnist-training-6ed9',
  'uid': '86892e5e-92e2-4a3e-af42-7d60500cd6cf'},
 'spec': {'tfReplicaSpecs': {'Worker': {'replicas': 1,
    'template': {'metadata': {'annotations': {'sidecar.istio.io/inject': 'false'},
      'labels': {'fairing-deployer': 'tfjob',
       'fairing-id': '8cf4483a-e9df-11ea-bf92-26a9a7e45509'},
      'name': 'fairing-deployer'},
     'spec': {'containers': [{'command': ['python', '/app/mnist.py'],
        'env': [{'name': 'FAIRING_RUNTIME', 'value': '1'}],
        'image': 'index.docker.io/insoopark/mnist:ADFF44F9',
        'name': 'tensorflow',
        'res

In [73]:
tfjob_client.wait_for_job(tfjob_name, namespace=my_namespace, watch=True)

mnist-training-6ed9            Running              2020-08-29T10:08:16Z          
mnist-training-6ed9            Succeeded            2020-08-29T10:10:05Z          


In [74]:
tfjob_client.is_job_succeeded(tfjob_name, namespace=my_namespace)

True

In [75]:
tfjob_client.get_logs(tfjob_name, namespace=my_namespace)

[I 200829 10:10:05 tf_job_client:386] The logs of Pod mnist-training-6ed9-worker-0:
    
    
    W0829 10:08:17.498552 140699284064064 module_wrapper.py:139] From /app/mnist.py:155: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead.
    
    
    W0829 10:08:17.498784 140699284064064 module_wrapper.py:139] From /app/mnist.py:155: The name tf.logging.INFO is deprecated. Please use tf.compat.v1.logging.INFO instead.
    
    
    W0829 10:08:17.500096 140699284064064 module_wrapper.py:139] From /app/mnist.py:160: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.
    
    INFO:tensorflow:TF_CONFIG {}
    I0829 10:08:17.500215 140699284064064 mnist.py:160] TF_CONFIG {}
    INFO:tensorflow:cluster=None job_name=None task_index=None
    I0829 10:08:17.500962 140699284064064 mnist.py:166] cluster=None job_name=None task_index=None
    INFO:tensorflow:Will export model
    I0829 10:08:17.501038 1406992840

In [76]:
from kubernetes import client

from kfserving import KFServingClient
from kfserving import constants
from kfserving import utils
from kfserving import V1alpha2EndpointSpec
from kfserving import V1alpha2PredictorSpec
from kfserving import V1alpha2TensorflowSpec
from kfserving import V1alpha2InferenceServiceSpec
from kfserving import V1alpha2InferenceService
from kubernetes.client import V1ResourceRequirements

In [77]:
namespace = utils.get_default_target_namespace()
print(namespace)

admin


In [78]:
isvc_name = f'mnist-service-{uuid.uuid4().hex[:4]}'
api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
# due to cpu resource is not enough, inference service is not ready. changed from 1Gi to 100m
default_endpoint_spec = V1alpha2EndpointSpec(
                          predictor=V1alpha2PredictorSpec(
                            tensorflow=V1alpha2TensorflowSpec(
                              storage_uri='pvc://mnist-pvc/export/mnist',
                              resources=V1ResourceRequirements(
                                  requests={'cpu':'100m','memory':'1Gi'},
                                  limits={'cpu':'100m', 'memory':'1Gi'}))))
    
isvc = V1alpha2InferenceService(api_version=api_version,
                          kind=constants.KFSERVING_KIND,
                          metadata=client.V1ObjectMeta(
                              name=isvc_name, namespace=namespace),
                          spec=V1alpha2InferenceServiceSpec(default=default_endpoint_spec))

In [79]:
KFServing = KFServingClient()
KFServing.create(isvc)

{'apiVersion': 'serving.kubeflow.org/v1alpha2',
 'kind': 'InferenceService',
 'metadata': {'creationTimestamp': '2020-08-29T10:10:05Z',
  'generation': 1,
  'name': 'mnist-service-9a07',
  'namespace': 'admin',
  'resourceVersion': '90074',
  'selfLink': '/apis/serving.kubeflow.org/v1alpha2/namespaces/admin/inferenceservices/mnist-service-9a07',
  'uid': '80504a9a-145b-41a3-8914-e315b5a31737'},
 'spec': {'default': {'predictor': {'tensorflow': {'resources': {'limits': {'cpu': '100m',
       'memory': '1Gi'},
      'requests': {'cpu': '100m', 'memory': '1Gi'}},
     'runtimeVersion': '1.14.0',
     'storageUri': 'pvc://mnist-pvc/export/mnist'}}}}}

In [80]:
KFServing.get(isvc_name, namespace=namespace, watch=True, timeout_seconds=120)

NAME                 READY      DEFAULT_TRAFFIC CANARY_TRAFFIC  URL                                               
mnist-service-9a07   Unknown                                                                                      
mnist-service-9a07   False                                                                                        
mnist-service-9a07   False                                                                                        
mnist-service-9a07   False                                                                                        
mnist-service-9a07   False                                                                                        
mnist-service-9a07   True       100                             http://mnist-service-9a07.admin.example.com/v1/...


In [81]:
# can get ip address from (kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.clusterIP}')
# 아래 주소로 날리면 302 응답이 떨어짐. -L 옵션을 줘야 함 
# 외부에서 외부 IP로 접속하면 (GCP의 경우 404 에러 발생 -> 쩝. 당연함)
!curl -L -v -H "mnist-service-6822.admin.example.com" http://10.99.127.209/v1/models/mnist-service-6822:predict -d @./input.json

*   Trying 10.99.127.209...
* TCP_NODELAY set
* Connected to 10.99.127.209 (10.99.127.209) port 80 (#0)
> POST /v1/models/mnist-service-6822:predict HTTP/1.1
> Host: 10.99.127.209
> User-Agent: curl/7.58.0
> Accept: */*
> Content-Length: 2052
> Content-Type: application/x-www-form-urlencoded
> Expect: 100-continue
> 
< HTTP/1.1 100 Continue
* We are completely uploaded and fine
< HTTP/1.1 302 Found
< location: /dex/auth?client_id=kubeflow-oidc-authservice&redirect_uri=%2Flogin%2Foidc&response_type=code&scope=profile+email+groups+openid&state=MTU5ODY5NTgxOXxFd3dBRUV3NWRWSlBURk41VnpGRFVHWjVWMjA9fJ0CHcHTBM9TgVIbA3szKD15fAvIPGsoKeLDZC8GZxb3
< date: Sat, 29 Aug 2020 10:10:19 GMT
< content-length: 0
< x-envoy-upstream-service-time: 6
< server: envoy
< 
* Connection #0 to host 10.99.127.209 left intact
* Issue another request to this URL: 'http://10.99.127.209/dex/auth?client_id=kubeflow-oidc-authservice&redirect_uri=%2Flogin%2Foidc&response_type=code&scope=profile+email+groups+openid&state=M

In [82]:
KFServing.delete(isvc_name, namespace=namespace)

{'kind': 'Status',
 'apiVersion': 'v1',
 'metadata': {},
 'status': 'Success',
 'details': {'name': 'mnist-service-9a07',
  'group': 'serving.kubeflow.org',
  'kind': 'inferenceservices',
  'uid': '80504a9a-145b-41a3-8914-e315b5a31737'}}

In [83]:
tfjob_client.delete(tfjob_name, namespace=my_namespace)

{'kind': 'Status',
 'apiVersion': 'v1',
 'metadata': {},
 'status': 'Success',
 'details': {'name': 'mnist-training-6ed9',
  'group': 'kubeflow.org',
  'kind': 'tfjobs',
  'uid': '86892e5e-92e2-4a3e-af42-7d60500cd6cf'}}