In [1]:
def train():
    import tensorflow as tf
    
    mnist = tf.keras.datasets.mnist

    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0

    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    model.summary()

    print("Training...")

    model.fit(
        x_train, y_train, 
        epochs=3, 
        validation_split=0.2 
    ) 

    score = model.evaluate(x_test, y_test, batch_size=128, verbose=0)
    print('Test accuracy: ', score[1])

In [2]:
from kubeflow import fairing
from kubeflow.fairing.kubernetes import utils as k8s_utils

PRIVATE_REGISTRY = 'registry.kube-system.svc.cluster.local:30000'

fairing.config.set_builder(
    'append',
    base_image = 'tensorflow/tensorflow',
    registry = PRIVATE_REGISTRY, 
    image_name='my-03-function-fairing-job', 
    push=True
)

# fairing.config.set_deployer('job')
fairing.config.set_deployer(
    'job',
    namespace='myspace', # default: 현재 네임스페이스
    pod_spec_mutators=[
        k8s_utils.get_resource_mutator(cpu=1, memory=5)]
)

remote_train = fairing.config.fn(train)

remote_train()

[I 210314 18:49:47 config:134] Using preprocessor: <kubeflow.fairing.preprocessors.function.FunctionPreProcessor object at 0x7f9b93ac57b8>
[I 210314 18:49:47 config:136] Using builder: <kubeflow.fairing.builders.append.append.AppendBuilder object at 0x7f9b93ac57f0>
[I 210314 18:49:47 config:138] Using deployer: <kubeflow.fairing.deployers.job.job.Job object at 0x7f9b93ac5940>
[W 210314 18:49:47 append:50] Building image using Append builder...
[I 210314 18:49:47 base:107] Creating docker context: /tmp/fairing_context_1o3n1g_t
[W 210314 18:49:47 base:94] /usr/local/lib/python3.6/dist-packages/kubeflow/fairing/__init__.py already exists in Fairing context, skipping...
[I 210314 18:49:47 docker_creds_:234] Loading Docker credentials for repository 'tensorflow/tensorflow'
[W 210314 18:49:47 append:54] Image successfully built in 0.5906359840009827s.
[W 210314 18:49:47 append:94] Pushing image registry.kube-system.svc.cluster.local:30000/my-03-function-fairing-job:C7CA1930...
[I 210314 18:4

2021-03-14 18:50:11.184759: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-03-14 18:50:11.184820: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
2021-03-14 18:50:13.938227: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-03-14 18:50:13.938469: W tensorflow/stream_executor/platform/default/dso_loader.cc:60] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-03-14 18:50:13.938500: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2021-03-14 18:50:13.938530: I tensorflo

[W 210314 18:50:47 job:173] Cleaning up job fairing-job-6df6w...


### curl로 프라이빗 레지스트리 확인

In [None]:
! curl http://registry.kube-system.svc.cluster.local:30000/v2/_catalog

In [None]:
! curl http://registry.kube-system.svc.cluster.local:30000/v2/fairing-job/tags/list