In [2]:
import tensorflow as tf
from tensorflow.python.keras.applications import InceptionV3
from tensorflow.keras.layers import Dense, Input, Activation, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import optimizers
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.callbacks import Callback
from tensorflow.python.keras.utils import multi_gpu_model

import os

class Caltech101(object):
    def run(self):
        tf.compat.v1.disable_eager_execution()
        input = Input(shape=(200, 200, 3))
        model = InceptionV3(input_tensor=input, include_top=False, weights='imagenet', pooling='max')

        for layer in model.layers:
          layer.trainable = False

        input_image_size = (200, 200)

        x = model.output
        x = Dense(1024, name='fully')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dense(512)(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Dense(101, activation='softmax', name='softmax')(x)
        model = Model(model.input, x)

        model.summary()

        train_datagen = ImageDataGenerator(rescale=1. / 255, validation_split=0.2)
        batch_size = 16

        train_generator = train_datagen.flow_from_directory(
            '/result/caltech101',
            target_size=input_image_size,
            batch_size=batch_size,
            class_mode='categorical',
            subset='training')

        validation_generator = train_datagen.flow_from_directory(
            '/result/caltech101',
            target_size=input_image_size,
            batch_size=batch_size,
            class_mode='categorical',
            subset='validation')
        
        model = multi_gpu_model(model, gpus=2)
        model.compile(
            optimizer=tf.keras.optimizers.Adam(),
            loss='categorical_crossentropy',
            metrics=['acc'])

        early_stopping = EarlyStopping(patience=20, mode='auto', monitor='val_acc')
        hist = model.fit_generator(train_generator,
                                      steps_per_epoch=train_generator.samples // batch_size,
                                      validation_data = validation_generator,
                                      epochs=100,
                                      callbacks=[early_stopping])
        
if __name__ == '__main__':
    if os.getenv('FAIRING_RUNTIME', None) is None:
        from kubeflow import fairing
        from kubeflow.fairing.kubernetes import utils as k8s_utils
        DOCKER_REGISTRY = 'kubeflow-registry.default.svc.cluster.local:30000'
        fairing.config.set_builder(
            'append',
            image_name='caltech-fairing-job',
            base_image='brightfly/tf-fairing:2.0-gpu',
            registry=DOCKER_REGISTRY,
            push=True)
        
        fairing.config.set_deployer('job',
                                    namespace='dudaji',
                                    pod_spec_mutators=[
                                    k8s_utils.mounting_pvc(pvc_name="caltech101", 
                                                          pvc_mount_path="/result")]
                                    )
        # python3
        import IPython
        ipy = IPython.get_ipython()
        if ipy is None:
            fairing.config.set_preprocessor('python', input_files=[__file__])        
        fairing.config.run()
    else:
        train = Caltech101()
        train.run()

[I 200216 21:57:31 config:123] Using preprocessor: <kubeflow.fairing.preprocessors.converted_notebook.ConvertNotebookPreprocessor object at 0x7f829405b550>
[I 200216 21:57:31 config:125] Using builder: <kubeflow.fairing.builders.append.append.AppendBuilder object at 0x7f82940af358>
[I 200216 21:57:31 config:127] Using deployer: <kubeflow.fairing.builders.append.append.AppendBuilder object at 0x7f82940af358>
[W 200216 21:57:31 append:50] Building image using Append builder...
[I 200216 21:57:31 base:105] Creating docker context: /tmp/fairing_context_gqf0szq6
[I 200216 21:57:32 converted_notebook:127] Converting caltech101_for_fairing.ipynb to caltech101_for_fairing.py
[I 200216 21:57:32 docker_creds_:234] Loading Docker credentials for repository 'brightfly/tf-fairing:2.0-gpu'
[W 200216 21:57:34 append:54] Image successfully built in 2.235296613071114s.
[W 200216 21:57:34 append:94] Pushing image kubeflow-registry.default.svc.cluster.local:30000/caltech-fairing-job:F4001E55...
[I 200216

2020-02-16 21:58:12.143513: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-02-16 21:58:12.193978: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1006] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-02-16 21:58:12.195116: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 0 with properties:
name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235
pciBusID: 0000:00:04.0
2020-02-16 21:58:12.195325: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1006] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-02-16 21:58:12.196181: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 1 with properties:
name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235
pciBusID: 0000:00:05.0
2020-02-16 

[W 200216 22:12:04 job:162] Cleaning up job fairing-job-2d29j...


  3/436 [..............................] - ETA: 2:59 - loss: 0.2489 - acc: 0.8958


ApiException: (404)
Reason: Not Found
HTTP response headers: HTTPHeaderDict({'Content-Type': 'application/json', 'Date': 'Sun, 16 Feb 2020 22:12:04 GMT', 'Content-Length': '224'})
HTTP response body: {"kind":"Status","apiVersion":"v1","metadata":{},"status":"Failure","message":"jobs.batch \"fairing-job-2d29j\" not found","reason":"NotFound","details":{"name":"fairing-job-2d29j","group":"batch","kind":"jobs"},"code":404}

