In [11]:
import tempfile
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
#import tensorflow_model_optimization as tfmot
import argparse
from tensorflow.python.keras.callbacks import Callback
from tensorflow.python.lib.io import file_io
import json


# 모델 사이즈를 측정하기 위한 함수
def get_gzipped_model_size(file):
  # Returns size of gzipped model, in bytes.
    import os
    import zipfile

    _, zipped_file = tempfile.mkstemp('.zip')
    with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(file)

    return os.path.getsize(zipped_file)

class Mnist(object):
    def train(self):

        parser = argparse.ArgumentParser()
        parser.add_argument('--learning_rate', required=False, type=float, default=0.001)
        parser.add_argument('--dropout_rate', required=False, type=float, default=0.3)  
        parser.add_argument('--model_path', required=False, default='/result/saved_model',type = str)  
        parser.add_argument('--model_version', required=False, default='1',type = str)
        args = parser.parse_args()



        # Load MNIST dataset
        mnist = tf.keras.datasets.mnist
        (train_images, train_labels), (test_images, test_labels) = mnist.load_data()

        # Normalize the input image so that each pixel value is between 0 to 1.
        train_images = train_images.astype(np.float32) / 255.0
        test_images = test_images.astype(np.float32) / 255.0
        # Define the model architecture
        model = tf.keras.Sequential([
          tf.keras.layers.InputLayer(input_shape=(28, 28)),
          tf.keras.layers.Reshape(target_shape=(28, 28, 1)),
          tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),
          tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
          tf.keras.layers.Flatten(),
          tf.keras.layers.Dense(10)
        ])
        
        model.summary()

        # Train the digit classification model
        model.compile(optimizer='adam',
                      loss=tf.keras.losses.SparseCategoricalCrossentropy(
                          from_logits=True),
                      metrics=['accuracy'])
        model.fit(
          train_images,
          train_labels,
          epochs=5,
          validation_data=(test_images, test_labels)
        )
        #model.fit(x_train, y_train, epochs=5,callbacks=[KatibMetricLog()])

        
        results = model.evaluate(test_images, test_labels, batch_size=128)
        print('test loss, test acc:', results)
        loss = results[0]
        accuracy = results[1]
        metrics = {
            'metrics': [{
                'name': 'accuracy',
                'numberValue': float(accuracy),
                'format': "PERCENTAGE",
            }, {
                'name': 'loss',
                'numberValue': float(loss),
                'format': "RAW",
            }]
        }
        with file_io.FileIO('/mlpipeline-metrics.json', 'w') as f:
            json.dump(metrics, f)
   
    
        #path = args.saved_model_dir + "/" + args.model_version
        export_path = os.path.join(args.model_path, str(args.model_version))
        tf.keras.models.save_model(model, export_path+'/fmnist_model.h5', include_optimizer=False)
#         model.save(export_path, save_format="tf")
        #model.save(export_path+'/fmnist_model.h5')
        
        #model_size
#         _, keras_file = tempfile.mkstemp('.h5')
#         tf.keras.models.save_model(model, keras_file, include_optimizer=False)
#         print('Saved baseline model to:', keras_file)
        
#         print('Baseline model size:', get_gzipped_model_size(keras_file))


def fairing_run():
    CONTAINER_REGISTRY = 'khw2126'

    namespace = 'admin'
    job_name = f'mnist-job-{uuid.uuid4().hex[:4]}'


    fairing.config.set_builder('append', registry=CONTAINER_REGISTRY, image_name="mnist-simple",base_image="khw2126/tensorflow-2.0.0-notebook-gpu:3.0.0")

    #fairing.config.set_deployer('job', namespace=namespace, job_name=job_name, cleanup=False, stream_log=True)
    
    fairing.config.set_deployer('job', namespace=namespace, job_name=job_name, cleanup=False, stream_log=True,
                                pod_spec_mutators=[
                                    k8s_utils.mounting_pvc(pvc_name="workspace-hufsice", 
                                                           pvc_mount_path="/result")])

    fairing.config.run()
    
if __name__ == '__main__':
    if os.getenv('FAIRING_RUNTIME', None) is None:
        import uuid
        from kubeflow import fairing
        from kubeflow.fairing.kubernetes import utils as k8s_utils
        fairing_run()
    else:
        remote_train = Mnist()
        remote_train.train()
        







[I 200915 13:14:12 config:134] Using preprocessor: <kubeflow.fairing.preprocessors.converted_notebook.ConvertNotebookPreprocessor object at 0x7f385e9ccac8>
[I 200915 13:14:12 config:136] Using builder: <kubeflow.fairing.builders.append.append.AppendBuilder object at 0x7f38f824e160>
[I 200915 13:14:12 config:138] Using deployer: <kubeflow.fairing.deployers.job.job.Job object at 0x7f385fb2b438>
[W 200915 13:14:12 append:50] Building image using Append builder...
[I 200915 13:14:12 base:107] Creating docker context: /tmp/fairing_context_t8rvl9or
[I 200915 13:14:12 converted_notebook:127] Converting Train.ipynb to Train.py
[I 200915 13:14:12 docker_creds_:234] Loading Docker credentials for repository 'khw2126/tensorflow-2.0.0-notebook-gpu:3.0.0'
[W 200915 13:14:19 append:54] Image successfully built in 6.662764813998365s.
[W 200915 13:14:19 append:94] Pushing image khw2126/mnist-simple:1E60C7FE...
[I 200915 13:14:19 docker_creds_:234] Loading Docker credentials for repository 'khw2126/mni

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
2020-09-15 13:14:46.859909: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-09-15 13:14:46.862858: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-09-15 13:14:46.863172: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce RTX 2070 computeCapability: 7.5
coreClock: 1.815GHz coreCount: 36 deviceMemorySize: 7.79GiB deviceMemoryBandwidth: 417.29GiB/s
2020-09-15 13:14:46.863332: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/ex