# 대규모 텐서플로 모델 훈련과 배포

먼저 몇 개의 모듈을 임포트한다. 맷플롯립 그림을 저장하는 함수를 준비한다.

In [1]:
# 공통 모듈 임포트
import matplotlib as mpl
import os
import matplotlib.pyplot as plt

# 깔끔한 그래프 출력을 위해
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# 그림을 저장할 위치
PROJECT_ROOT_DIR = '.'
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, 'images')
os.makedirs(IMAGES_PATH, exist_ok=True)


def save_fig(fig_id, tight_layout=True, fig_extension='png', resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + '.' + fig_extension)
    print('그림 저장' + fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

## 텐서플로 모델 서빙

### 텐서플로 서빙 사용하기
REST API나 gRPC API를 사용한다.

**SavedModel로 내보내기**

In [2]:
from tensorflow import keras
import numpy as np

(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.mnist.load_data()
X_train_full = X_train_full[..., np.newaxis].astype(np.float32) / 255.
X_test = X_test[..., np.newaxis].astype(np.float32) / 255.
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_new = X_test[:3]

2023-08-21 08:15:20.280497: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
model = keras.Sequential(
    [keras.layers.Flatten(input_shape=[28, 28, 1]), keras.layers.Dense(100, 'relu'), keras.layers.Dense(10, 'softmax')]
)
model.compile(keras.optimizers.experimental.SGD(), 'sparse_categorical_crossentropy', ['accuracy'])
# model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))
model.fit(X_train, y_train, validation_data=(X_valid, y_valid))

2023-08-21 08:15:24.215555: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-21 08:15:24.450481: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-21 08:15:24.450725: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-21 08:15:24.453581: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-21 08:15:24.453765: I tensorflow/compile



<keras.src.callbacks.History at 0x7f880c134b20>

In [4]:
np.around(model.predict(X_new), 2)



array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.99, 0.  , 0.01],
       [0.05, 0.  , 0.74, 0.03, 0.  , 0.08, 0.08, 0.  , 0.02, 0.  ],
       [0.  , 0.94, 0.02, 0.01, 0.  , 0.  , 0.01, 0.01, 0.01, 0.  ]],
      dtype=float32)

In [5]:
model_version = '0001'
model_name = 'my_mnist_model'
model_path = os.path.join(model_name, model_version)
model_path

'my_mnist_model/0001'

In [6]:
!rm -rf {model_name}

In [7]:
import tensorflow as tf

tf.saved_model.save(model, model_path)

INFO:tensorflow:Assets written to: my_mnist_model/0001/assets


INFO:tensorflow:Assets written to: my_mnist_model/0001/assets


In [8]:
for root, _, files in os.walk(model_name):
    indent = '    ' * root.count(os.sep)
    print(f'{indent}{os.path.basename(root)}/')
    for filename in files:
        print(f'{indent}    {filename}')

my_mnist_model/
    0001/
        fingerprint.pb
        saved_model.pb
        assets/
        variables/
            variables.data-00000-of-00001
            variables.index


In [9]:
!saved_model_cli show --dir {model_path}

2023-08-21 08:15:39.794518: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-21 08:15:41.048459: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-21 08:15:41.073743: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-21 08:15:41.074078: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been bu

In [10]:
!saved_model_cli show --dir {model_path} --tag_set serve

2023-08-21 08:15:42.674465: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-21 08:15:43.816225: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-21 08:15:43.832320: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-21 08:15:43.832573: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been bu

In [11]:
!saved_model_cli show --dir {model_path} --tag_set serve --signature_def serving_default

2023-08-21 08:15:44.604485: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-21 08:15:45.993620: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-21 08:15:46.015223: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-21 08:15:46.015645: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been bu

In [12]:
!saved_model_cli show --dir {model_path} --all

2023-08-21 08:15:47.383297: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-21 08:15:49.586343: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-21 08:15:49.603327: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-21 08:15:49.603598: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been bu

X_new를 `npy` 파일로 만들면 모델에 쉽게 전달할 수 있다:

In [13]:
np.save('my_mnist_tests.npy', X_new)

In [14]:
input_name = model.input_names[0]
input_name

'flatten_input'

그리고 이제 `saved_model_cli`를 사용해 방금 저장한 샘플에 대한 예측을 만든다:

In [15]:
!saved_model_cli run --dir {model_path} --tag_set serve --signature_def serving_default --inputs {input_name}=my_mnist_tests.npy

2023-08-21 08:15:51.104219: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-21 08:15:52.291379: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-21 08:15:52.304585: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-08-21 08:15:52.304806: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been bu

In [16]:
np.round(
    [
        [
            1.1540909e-04,
            6.7565077e-07,
            6.4367504e-04,
            1.3884673e-03,
            7.0954684e-07,
            1.6306719e-04,
            2.9787778e-08,
            9.9733573e-01,
            4.6408379e-05,
            3.0576065e-04
        ],
        [
            2.2494975e-03,
            7.9745638e-05,
            9.8004192e-01,
            1.2274164e-02,
            2.3486223e-08,
            9.9441968e-04,
            2.5695174e-03,
            1.5748299e-09,
            1.7906638e-03,
            4.0853223e-08
        ],
        [
            7.1588256e-06,
            9.8235393e-01,
            5.3347293e-03,
            2.8666973e-03,
            2.3666114e-04,
            6.4616540e-04,
            1.0684166e-03,
            5.2083088e-03,
            2.1845093e-03,
            9.3470750e-05
        ]
    ]
)

array([[0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]])

**텐서플로 서빙 설치하기**

[도커](https://docs.docker.com/install/)가 없다면 설치한다. 그리고 다음을 실행한다:

```bash
docker pull tensorflow/serving:latest-gpu

export ML_PATH=$HOME/ml # 또는 이 프로젝트가 있는 곳
docker run -it --rm -p 8500:8500 -p 8501:8501 -v "$ML_PATH/my_mnist_model:/models/my_mnist_model" -e MODEL_NAME=my_mnist_model tensorflow/serving:latest-gpu
```

사용이 끝나면 Ctrl-C를 눌러 서버를 종료한다.

또는 `tensorflow_model_server`가 설치되어 있다면 (예를 들어, 이 노트북을 코랩에서 실행하는 경우) 다음 세 개의 셀을 실행하여 서버를 시작한다:

```python
os.environ['MODEL_DIR'] = os.path.split(os.path.abspath(model_path))[0]
```

```python
%%bash --bg
nohup tensorflow_model_server --rest_api_port=8501 --model_name=my_mnist_model --model_base_path="${MODEL_DIR}" >server.log 2>&1
```

```python
!tail server.log
```

**REST API로 TF 서빙에 쿼리하기**

In [16]:
import json

input_data_json = json.dumps({"signature_name": "serving_default", "instances": X_new.tolist()})

In [17]:
repr(input_data_json)[:1500] + '...'

'\'{"signature_name": "serving_default", "instances": [[[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]], [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0

이제 텐서플로 서빙의 REST API를 사용해 예측을 만든다:

In [18]:
import requests

SERVER_URL = 'http://172.17.0.3:8501/v1/models/my_mnist_model:predict'
response = requests.post(SERVER_URL, input_data_json)
response.raise_for_status()  # 에러가 생길 경우 예외를 발생한다.
response = response.json()

In [19]:
response.keys()

dict_keys(['predictions'])

In [20]:
y_proba = np.array(response['predictions'])
y_proba.round(2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.99, 0.  , 0.01],
       [0.05, 0.  , 0.74, 0.03, 0.  , 0.08, 0.08, 0.  , 0.02, 0.  ],
       [0.  , 0.94, 0.02, 0.01, 0.  , 0.  , 0.01, 0.01, 0.01, 0.  ]])

**gRPC API로 TF 서빙에 쿼리하기**

In [21]:
from tensorflow_serving.apis.predict_pb2 import PredictRequest

request = PredictRequest()
request.model_spec.name = model_name
request.model_spec.signature_name = 'serving_default'
input_name = model.input_names[0]
request.inputs[input_name].CopyFrom(tf.make_tensor_proto(X_new))

In [22]:
import grpc
from tensorflow_serving.apis import prediction_service_pb2_grpc

channel = grpc.insecure_channel('172.17.0.3:8500')
predict_service = prediction_service_pb2_grpc.PredictionServiceStub(channel)
response = predict_service.Predict(request, timeout=10.)

In [23]:
response

model_spec {
  name: "my_mnist_model"
  version {
    value: 1
  }
  signature_name: "serving_default"
}
outputs {
  key: "dense_1"
  value {
    dtype: DT_FLOAT
    tensor_shape {
      dim {
        size: 3
      }
      dim {
        size: 10
      }
    }
    float_val: 0.000402979786
    float_val: 5.88208832e-06
    float_val: 0.000366911176
    float_val: 0.00135380519
    float_val: 0.000246009586
    float_val: 0.00012352725
    float_val: 6.24434233e-06
    float_val: 0.986865819
    float_val: 0.000193533
    float_val: 0.010435285
    float_val: 0.0472884588
    float_val: 0.00201013405
    float_val: 0.741884887
    float_val: 0.0263824854
    float_val: 4.35053189e-05
    float_val: 0.0835964605
    float_val: 0.077808
    float_val: 1.3808849e-05
    float_val: 0.0208080988
    float_val: 0.000164117519
    float_val: 0.000934983371
    float_val: 0.935363352
    float_val: 0.0228319
    float_val: 0.0100065954
    float_val: 0.00207439577
    float_val: 0.00379154854
  

응답을 텐서로 변환한다:

In [24]:
output_name = model.output_names[0]
outputs_proto = response.outputs[output_name]
y_proba = tf.make_ndarray(outputs_proto)
y_proba.round(2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.99, 0.  , 0.01],
       [0.05, 0.  , 0.74, 0.03, 0.  , 0.08, 0.08, 0.  , 0.02, 0.  ],
       [0.  , 0.94, 0.02, 0.01, 0.  , 0.  , 0.01, 0.01, 0.01, 0.  ]],
      dtype=float32)

클라이언트가 텐서플로 라이브러리를 사용하지 않는다면 넘파이 배열로 변환한다:

In [25]:
output_name = model.output_names[0]
outputs_proto = response.outputs[output_name]
shape = [dim.size for dim in outputs_proto.tensor_shape.dim]
y_proba = np.array(outputs_proto.float_val).reshape(shape)
y_proba.round(2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.99, 0.  , 0.01],
       [0.05, 0.  , 0.74, 0.03, 0.  , 0.08, 0.08, 0.  , 0.02, 0.  ],
       [0.  , 0.94, 0.02, 0.01, 0.  , 0.  , 0.01, 0.01, 0.01, 0.  ]])

**새로운 버전의 모델 배포하기**

In [26]:
model = keras.Sequential(
    [
        keras.layers.Flatten(input_shape=[28, 28, 1]),
        keras.layers.Dense(50, 'relu'),
        keras.layers.Dense(50, 'relu'),
        keras.layers.Dense(10, 'softmax')
    ]
)
model.compile(keras.optimizers.experimental.SGD(), 'sparse_categorical_crossentropy', ['accuracy'])
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [27]:
model_version = '0002'
model_name = 'my_mnist_model'
model_path = os.path.join(model_name, model_version)
model_path

'my_mnist_model/0002'

In [28]:
tf.saved_model.save(model, model_path)

INFO:tensorflow:Assets written to: my_mnist_model/0002/assets


INFO:tensorflow:Assets written to: my_mnist_model/0002/assets


In [29]:
for root, _, files in os.walk(model_name):
    indent = '    ' * root.count(os.sep)
    print(f'{indent}{os.path.basename(root)}/')
    for filename in files:
        print(f'{indent}    {filename}')

my_mnist_model/
    0001/
        fingerprint.pb
        saved_model.pb
        assets/
        variables/
            variables.data-00000-of-00001
            variables.index
    0002/
        fingerprint.pb
        saved_model.pb
        assets/
        variables/
            variables.data-00000-of-00001
            variables.index


**경고**: 새로운 모델이 텐서플로 서빙에 로드되기 전까지 잠시 기다려야 할 수 있다:

In [30]:
SERVER_URL = 'http://172.17.0.3:8501/v1/models/my_mnist_model:predict'
response = requests.post(SERVER_URL, input_data_json)
response.raise_for_status()
response = response.json()

In [31]:
response.keys()

dict_keys(['predictions'])

In [32]:
y_proba = np.array(response['predictions'])
y_proba.round(2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.99, 0.  , 0.01],
       [0.05, 0.  , 0.74, 0.03, 0.  , 0.08, 0.08, 0.  , 0.02, 0.  ],
       [0.  , 0.94, 0.02, 0.01, 0.  , 0.  , 0.01, 0.01, 0.01, 0.  ]])

### 예측 서비스 사용하기

**구글 클라우드 클라이언트 라이브러리**

구글 클라우드 AI 플랫폼에 모델을 배포하고, 서비스 계정의 개인키를 다운로드하여 프로젝트 디렉토리에 저장한다:

In [33]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "mineral-trainer-394903-4289c79f86b1.json"

In [34]:
from google.cloud import aiplatform
from google.protobuf import json_format
from google.protobuf.struct_pb2 import Value


def predict_custom_trained_model_sample(
        project, endpoint_id, instances, location='us-central1', api_endpoint='us-central1-aiplatform.googleapis.com'
):
    client_options = {'api_endpoint': api_endpoint}
    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
    instances = instances if isinstance(instances, list) else [instances]
    instances = [json_format.ParseDict(instance_dict, Value()) for instance_dict in instances]
    parameters_dict = {}
    parameters = json_format.ParseDict(parameters_dict, Value())
    endpoint = client.endpoint_path(project=project, location=location, endpoint=endpoint_id)
    response = client.predict(endpoint=endpoint, instances=instances, parameters=parameters)
    predictions = response.predictions
    return np.array(predictions)

In [35]:
Y_probas = predict_custom_trained_model_sample(
    '759555954445',
    '8228789002740695040',
    X_new.tolist(),
    'asia-northeast3',
    'asia-northeast3-aiplatform.googleapis.com'
)
np.round(Y_probas, 2)

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  , 0.  , 0.  ],
       [0.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.99, 0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.  ]])