In [1]:
import os
import tensorflow as tf
import argparse
import logging
import minio_utils as mu
from kfserving import utils
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from datetime import datetime, timezone
from tensorflow.python.keras.callbacks import Callback

class MyModel(object):
    def train(self):
        
        logging.basicConfig(filename='/var/log/katib/mnist.log', level=logging.DEBUG)
        
        mnist = tf.keras.datasets.mnist

        parser = argparse.ArgumentParser()
        parser.add_argument('--is_save_model', required=False, type=bool, default=True)
        parser.add_argument('--epoch', required=False, type=int, default=1)        
        parser.add_argument('--batch_size', required=False, type=int, default=16)        
        parser.add_argument('--learning_rate', required=False, type=float, default=0.01)
        parser.add_argument('--dropout_rate', required=False, type=float, default=0.2)
        parser.add_argument('--checkpoint_dir', required=False, default='/reuslt/training_checkpoints')
        parser.add_argument('--model_version', required=False, default='001')
        parser.add_argument('--saved_model_dir', required=False, default='/result/saved_model')        
        args = parser.parse_args()

        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        x_train, x_test = x_train / 255.0, x_test / 255.0
        
        batch_size = args.batch_size
        
        img_rows, img_cols = 28, 28   
        num_classes = 10
        
        if tf.keras.backend.image_data_format() == 'channels_first':
            x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
            x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
            input_shape = (1, img_rows, img_cols)
        else:
            x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
            x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
            input_shape = (img_rows, img_cols, 1)        

        model = Sequential()
        model.add(Conv2D(32, kernel_size=(3, 3),
                         activation='relu',
                         input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(args.dropout_rate))
        model.add(Flatten())
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(args.dropout_rate))
        model.add(Dense(num_classes, activation='softmax'))

        sgd = tf.keras.optimizers.SGD(lr=args.learning_rate,
                                      decay=1e-6,
                                      momentum=0.9,
                                      nesterov=True)

        model.compile(optimizer=sgd,
                      loss='sparse_categorical_crossentropy',
                      metrics=['acc'])

        model.fit(x_train, y_train,
                  batch_size=batch_size,
                  verbose=0,
                  validation_data=(x_test, y_test),
                  epochs=args.epoch,
                  callbacks=[KatibMetricLog()])

        if args.is_save_model :
            saved_model_path = args.saved_model_dir + "/" + args.model_version        
            model.save(saved_model_path, save_format='tf')
            mu.export_object_storage(utils.get_default_target_namespace(), saved_model_path)
        
        
        
class KatibMetricLog(Callback):
    def on_batch_end(self, batch, logs={}):
        local_time = datetime.now(timezone.utc).astimezone().isoformat()
        logging.info('\n{} accuracy={:.4f} loss={:.4f}'.format(local_time, logs['acc'], logs['loss']))

    def on_epoch_end(self, epoch, logs={}):
        local_time = datetime.now(timezone.utc).astimezone().isoformat()
        logging.info('\n{} val_acc={:.4f} val_loss={:.4f}\n'.format(local_time, logs['val_acc'], logs['val_loss']))

if __name__ == '__main__':
    if os.getenv('FAIRING_RUNTIME', None) is None:
        """
        from kubeflow import fairing
        from kubeflow.fairing.kubernetes import utils as k8s_utils

        DOCKER_REGISTRY = 'kubeflow-registry.default.svc.cluster.local:30000'
        fairing.config.set_builder(
            'append',
            image_name='katib-job',
            base_image='brightfly/kubeflow-jupyter-lab:tf2.0-gpu',
            registry=DOCKER_REGISTRY,
            push=True)
        # cpu 1, memory 1GiB
        fairing.config.set_deployer('job',
                                    namespace='amaramusic'
                                    )
        output_map = { 'minio_utils.py':'/app/minio_utils.py'}        
        fairing.config.set_preprocessor('notebook', 
                                        output_map=output_map)         
        # python3
        #fairing.config.set_preprocessor('python', input_files=[__file__])
        fairing.config.run()
        """
        from kubeflow.fairing.builders.append.append import AppendBuilder
        from kubeflow.fairing.preprocessors.converted_notebook import ConvertNotebookPreprocessor

        DOCKER_REGISTRY = 'kubeflow-registry.default.svc.cluster.local:30000'
        base_image='brightfly/kubeflow-jupyter-lab:tf2.0-cpu'
        image_name='mnist-train'

        builder = AppendBuilder(
            registry=DOCKER_REGISTRY,
            image_name=image_name,
            base_image=base_image,
            push=True,
            
            preprocessor=ConvertNotebookPreprocessor(
                output_map = { 'minio_utils.py':'/app/minio_utils.py'} ,
                notebook_file="mnist_for_train.ipynb" )
            )
        builder.build()        
    else:
        remote_train = MyModel()
        remote_train.train()


[W 201011 00:47:57 append:50] Building image using Append builder...
[I 201011 00:47:57 base:105] Creating docker context: /tmp/fairing_context_wlenwera
[I 201011 00:47:57 converted_notebook:127] Converting mnist_for_train.ipynb to mnist_for_train.py
[I 201011 00:47:57 docker_creds_:234] Loading Docker credentials for repository 'brightfly/kubeflow-jupyter-lab:tf2.0-cpu'
[W 201011 00:48:00 append:54] Image successfully built in 2.5555068920366466s.
[W 201011 00:48:00 append:94] Pushing image kubeflow-registry.default.svc.cluster.local:30000/mnist-train:C3081A64...
[I 201011 00:48:00 docker_creds_:234] Loading Docker credentials for repository 'kubeflow-registry.default.svc.cluster.local:30000/mnist-train:C3081A64'
[W 201011 00:48:00 append:81] Uploading kubeflow-registry.default.svc.cluster.local:30000/mnist-train:C3081A64
[I 201011 00:48:00 docker_session_:280] Layer sha256:66257906239d377cd700c566b27f12895ccefcd8d95eae7377f600208151d8e0 exists, skipping
[I 201011 00:48:00 docker_sess

In [1]:
!pip freeze

absl-py==0.8.0
adal==1.2.2
argo-models==2.2.1a0
asn1crypto==0.24.0
astor==0.8.0
attrs==19.3.0
azure==4.0.0
azure-applicationinsights==0.1.0
azure-batch==4.1.3
azure-common==1.1.24
azure-cosmosdb-nspkg==2.0.2
azure-cosmosdb-table==1.0.6
azure-datalake-store==0.0.48
azure-eventgrid==1.3.0
azure-graphrbac==0.40.0
azure-keyvault==1.1.0
azure-loganalytics==0.1.0
azure-mgmt==4.0.0
azure-mgmt-advisor==1.0.1
azure-mgmt-applicationinsights==0.1.1
azure-mgmt-authorization==0.50.0
azure-mgmt-batch==5.0.1
azure-mgmt-batchai==2.0.0
azure-mgmt-billing==0.2.0
azure-mgmt-cdn==3.1.0
azure-mgmt-cognitiveservices==3.0.0
azure-mgmt-commerce==1.0.1
azure-mgmt-compute==4.6.2
azure-mgmt-consumption==2.0.0
azure-mgmt-containerinstance==1.5.0
azure-mgmt-containerregistry==2.8.0
azure-mgmt-containerservice==4.4.0
azure-mgmt-cosmosdb==0.4.1
azure-mgmt-datafactory==0.6.0
azure-mgmt-datalake-analytics==0.6.0
azure-mgmt-datalake-nspkg==3.0.1
azure-mgmt-datalake-store==0.5.0
azure-mgmt-datamigration==1.0.0
azure-mgm