In [1]:
import tensorflow as tf

def train():
  
    mnist = tf.keras.datasets.mnist

    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0

    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    model.summary()

    print("Training...")

    model.fit(
        x_train, y_train, 
        epochs=3, 
        validation_split=0.2 
    ) 

    score = model.evaluate(x_test, y_test, batch_size=128, verbose=0)
    print('Test accuracy: ', score[1])

In [2]:
# Local training
train()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________
Training...
Train on 48000 samples, validate on 12000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Test accuracy:  0.97


In [3]:
from kubeflow import fairing

PRIVATE_REGISTRY = 'kubeflow-registry.default.svc.cluster.local:30000'

fairing.config.set_builder(
    'append',
    base_image = 'tensorflow/tensorflow:2.0.3-gpu-py3',
    # base_image = f'{PRIVATE_REGISTRY}/kf-base:latest', 
    registry = PRIVATE_REGISTRY, 
    push=True
)

fairing.config.set_deployer('job')

remote_train = fairing.config.fn(train)

remote_train()

[I 210103 17:12:21 config:134] Using preprocessor: <kubeflow.fairing.preprocessors.function.FunctionPreProcessor object at 0x7fe5700a66a0>
[I 210103 17:12:21 config:136] Using builder: <kubeflow.fairing.builders.append.append.AppendBuilder object at 0x7fe58613b908>
[I 210103 17:12:21 config:138] Using deployer: <kubeflow.fairing.deployers.job.job.Job object at 0x7fe5700a6828>
[W 210103 17:12:21 append:50] Building image using Append builder...
[I 210103 17:12:21 base:107] Creating docker context: /tmp/fairing_context_98201suk
[W 210103 17:12:21 base:94] /usr/local/lib/python3.6/dist-packages/kubeflow/fairing/__init__.py already exists in Fairing context, skipping...
[I 210103 17:12:21 docker_creds_:234] Loading Docker credentials for repository 'tensorflow/tensorflow:2.0.3-gpu-py3'
[W 210103 17:12:24 append:54] Image successfully built in 2.1465259729884565s.
[W 210103 17:12:24 append:94] Pushing image kubeflow-registry.default.svc.cluster.local:30000/fairing-job:CE704026...
[I 210103 

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
2021-01-03 17:12:28.397829: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2021-01-03 17:12:28.397907: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: UNKNOWN ERROR (-1)
2021-01-03 17:12:28.397940: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (fairing-job-hmxv8-8fz96): /proc/driver/nvidia/version does not exist
2021-01-03 17:12:28.398277: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2021-01-03 17:12:28.408264: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2199995000 Hz
2021-01-03 17:12:28.408917: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x57862a0 executing computations on platform Host.

[W 210103 17:12:44 job:173] Cleaning up job fairing-job-hmxv8...


### curl로 프라이빗 레지스트리 확인

In [4]:
! curl http://kubeflow-registry.default.svc.cluster.local:30000/v2/_catalog

{"repositories":["fairing-job"]}


In [5]:
! curl http://kubeflow-registry.default.svc.cluster.local:30000/v2/fairing-job/tags/list

{"name":"fairing-job","tags":["CE704026","16FD05F9"]}
