In [1]:
import tensorflow as tf

def train():
    mnist = tf.keras.datasets.mnist

    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0

    model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    model.summary()

    print("Training...")

    model.fit(
        x_train, y_train, 
        epochs=3, 
        validation_split=0.2 
    ) 

    score = model.evaluate(x_test, y_test, batch_size=128, verbose=0)
    print('Test accuracy: ', score[1])

In [2]:
import os
from kubeflow import fairing

PRIVATE_REGISTRY = 'kubeflow-registry.default.svc.cluster.local:30000'

fairing.config.set_builder(
    'append',
    # base_image = f'{PRIVATE_REGISTRY}/kf-base:latest', # 사전준비에서 마련한 Base Image  # 동작 (X) 하단 설명으로 이슈 해결할 것
    base_image= 'tensorflow/tensorflow:2.0.3-gpu-py3', # 동작 (O)
    registry = PRIVATE_REGISTRY, 
    push=True
)

fairing.config.set_deployer('job')

remote_train = fairing.config.fn(train)

remote_train()

[I 201206 12:26:29 config:134] Using preprocessor: <kubeflow.fairing.preprocessors.function.FunctionPreProcessor object at 0x7f2dc0467550>
[I 201206 12:26:29 config:136] Using builder: <kubeflow.fairing.builders.append.append.AppendBuilder object at 0x7f2cffabbdd8>
[I 201206 12:26:29 config:138] Using deployer: <kubeflow.fairing.deployers.job.job.Job object at 0x7f2dc04676d8>
[W 201206 12:26:29 append:50] Building image using Append builder...
[I 201206 12:26:29 base:107] Creating docker context: /tmp/fairing_context_y4umdtfr
[W 201206 12:26:29 base:94] /usr/local/lib/python3.6/dist-packages/kubeflow/fairing/__init__.py already exists in Fairing context, skipping...
[I 201206 12:26:29 docker_creds_:234] Loading Docker credentials for repository 'tensorflow/tensorflow:2.0.3-gpu-py3'
[W 201206 12:26:32 append:54] Image successfully built in 2.798909298988292s.
[W 201206 12:26:32 append:94] Pushing image kubeflow-registry.default.svc.cluster.local:30000/fairing-job:6891A465...
[I 201206 1

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
2020-12-06 12:28:14.948115: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-12-06 12:28:14.948186: E tensorflow/stream_executor/cuda/cuda_driver.cc:318] failed call to cuInit: UNKNOWN ERROR (-1)
2020-12-06 12:28:14.948220: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (fairing-job-tw8wl-kqqh5): /proc/driver/nvidia/version does not exist
2020-12-06 12:28:14.948640: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 AVX512F FMA
2020-12-06 12:28:14.970902: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2000140000 Hz
2020-12-06 12:28:14.972071: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5509930 executing computations on platfo

[W 201206 12:28:37 job:173] Cleaning up job fairing-job-tw8wl...


### curl로 프라이빗 레지스트리 확인

In [3]:
! curl http://kubeflow-registry.default.svc.cluster.local:30000/v2/_catalog

{"repositories":["02-python-file-fairing","covid19-katib-job","fairing-job","katib-base","katib-job","kf-base","my-02-faring-job","my-02-python-file-fairing","my-03-notebook-fairing-job","my-04-notebook-single-file-fairing-job","my-06-build-only-fairing","my-fairing-job","my-notebook-fairing-job","tensorboard-job"]}


In [4]:
! curl http://kubeflow-registry.default.svc.cluster.local:30000/v2/fairing-job/tags/list

{"name":"fairing-job","tags":["3DA1A245","42CFDC19","51B5229D","37453BCC","910F73B3","18DF7E85","FE1E3D0C","F5D207F","23704EBA","7E272974","E69B7B0","7602C563","795AC9AC","BD60E1F7","223E3522","CB53FA23","5E7B4FEE","141D2F60","EC86838C","A5EE9B45","59DF0B11","E87ED344","6891A465","956A7281","3B74C463","D1C6AB4E","8E296311","9381360","12FA764","9A917239","66787D3","B4C7B0","498914B7","F60C4742","3052981C"]}


# Function Fairing 패키지 버전 이슈
Function Fairing 시 fairing SDK가 컨테이너 내부에서 아래와 같은 명령을 실행
```python
python /app/function_shim.py --serialized_fn_file /app/pickled_fn.p --python_version 3.6
```
결과는 에러.

다음과 같이 테스트 하여 실행 성공과 실패 케이스를 정리함

- 실행 실패
  - base_image = f'{PRIVATE_REGISTRY}/kf-base:latest', # 사전준비에서 마련한 Base Image  
  - base_image = 'tensorflow/tensorflow:latest-py3',   
  - base_image = 'tensorflow/tensorflow:2.1.2-gpu',    
  - Base Image에 pip install tensorflow-gpu   <- 최신 2.3.1 설치됨
- 실행 성공  
  - base_image = 'brightfly/kubeflow-jupyter-lab:tf2.0-gpu', <- 실마리
  - base_image = 'tensorflow/tensorflow:2.0.3-gpu-py3',
  - Base Image에 pip install tensorflow-gpu==2.0.0 