# Keras와 Tensorflow를 결합한 딥러닝 모델 구현하기

이번 세션에서는 Fashion MNIST 이미지 데이터를 활용하여 Keras의 Convolution Neural Network(CNN) 모델을 Azure Machine Learning Service에서 구현해보도록 하겠습니다. Azure Machine Learning Service에서는 총 세 가지 방법으로 딥러닝 모델을 구현할 수 있습니다.

    1. 일반적인 모델링
    2. 일반 Estimator를 활용한 모델링
    3. Open Framework Estimator를 활용한 모델링

세 가지 방법 중 두번째, 세번째 방법은 ML Workspace에서 Experiment를 Run하는 방식으로 진행이 되며 Data Science Virtual Machine 또는 AML Compute 등과 같은 computing target 사용이 가능합니다. 이번 세션에서는 Local 자원으로 첫 번째 방법을 실행해보고 AML Compute (GPU Cluster)를 활용하여 두번째, 세번째 방법을 실행해보고 마지막으로 Horovod를 사용하여 분산학습 (distributed training)을 사용해볼 예정입니다.

## Azure ML 설정 (Configuration)

아래 코드는 ML workspace 연동과 computing resource 지정 등을 수행합니다.

In [None]:
import sys
import warnings
import azureml.core
import os
from azureml.core import Workspace
from azureml.core import Experiment
from azureml.core.compute import AmlCompute, ComputeTarget

warnings.simplefilter("ignore")

subscription_id = '94ff7c1e-50c0-4466-a33b-232a0ccff39d'
resource_group = 'amlStudy1'
workspace_name = 'amlMNIST'
workspace_region = 'eastus'

ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name)

ws.write_config()

# GPU VM
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "mnistGPU")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_NC6")

if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('Found compute target! Just use ' + compute_name)
else:
    print('Creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size, 
                                                                min_nodes = compute_min_nodes, 
                                                                max_nodes = compute_max_nodes)
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)
    compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)

print('Computing resources attached!')

## 데이터셋 불러오기

Fashion MNIST 데이터는 6만장의 패션 아이템 관련 Train 이미지 (28 X 28 픽셀)와 1만장의 Test 이미지로 구성되어 있습니다. 해당 데이터를 받을 수 있는 방법은 다양하지만 이번 세션에서는 tensorflow-keras 라이브러리에서 불러오도록 하겠습니다. 아래와 같이 tensorflow와 keras를 불러오고 datasets에서 fashion_mnist를 지정해준 뒤 load_data 클래스를 활용할 경우 쉽게 데이터를 불러올 수 있습니다. 

In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

저장된 데이터의 사이즈를 확인하기 위해선 .shape를 사용하면 dimension을 확인할 수 있습니다.

In [None]:
print("x_train shape:", x_train.shape, "y_train shape:", y_train.shape)
print("x_test shape:", x_test.shape, "y_test shape:", y_test.shape)

In [None]:
fig = plt.figure()
seed = [12, 123, 1234, 12345, 1111, 2222, 3333, 4444, 5555, 9999]
label = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

for i in range(10):
    subplot = fig.add_subplot(1, 10, i + 1)
    subplot.set_xticks([])
    subplot.set_yticks([])
    subplot.set_title('%s' % label[y_train[seed[i]]])
    subplot.imshow(x_train[seed[i]])

fig.subplots_adjust(left=3, right=5, wspace=0.1)

In [None]:
x_train = x_train.astype('float32')/255
x_test = x_test.astype('float32')/255

In [None]:
train_x=x_train.reshape(60000,28,28,1)
test_x=x_test.reshape(10000,28,28,1)

In [None]:
from keras.utils import to_categorical

train_y = to_categorical(y_train)
test_y = to_categorical(y_test)

In [None]:
print("x_train shape:", train_x.shape, "y_train shape:", train_y.shape)
print("x_test shape:", test_x.shape, "y_test shape:", test_y.shape)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten

model = Sequential()

# First Convolution Layer
model.add(Conv2D(filters=64, kernel_size=3, padding='same', activation='relu', input_shape=(28,28,1))) 
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.3))

# Second Convolution Layer
model.add(Conv2D(filters=32, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.3))

# Hidden Layer
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

# Model Summary
model.summary()

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
import time

start0 = time.time()

history=model.fit(train_x , 
                  train_y , 
                  batch_size=64 , 
                  epochs=10 ,
                  validation_data = (test_x, test_y) ,
                  shuffle=False)

end0 = time.time()

In [None]:
# list all data in history
print(history.history.keys())

# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
history.history['val_acc']

In [None]:
fig2 = plt.figure()
pred_y = model.predict(test_x)
seed2 = [12, 123, 1234, 12345, 1111, 2222, 3333, 4444, 5555, 7777, 9999]

for i in range(10):
    subplot = fig2.add_subplot(1, 10, i + 1)
    subplot.set_xticks([])
    subplot.set_yticks([])
    subplot.axis('off')
    subplot.set_title('%s' % label[np.argmax(test_y[seed2[i]])])
    subplot.imshow(x_test[seed2[i]])
    subplot.text(0.5,-0.2, label[np.argmax(pred_y[seed2[i]])], size=12, ha="center", transform=subplot.transAxes)

fig2.subplots_adjust(left=3, right=5, wspace=0.1)


In [None]:
score = model.evaluate(test_x, test_y, verbose=0)
run0 = score[1]
print('\n', 'Local Accuracy:', run0, '&', 'Processing Time:', end0-start0)

## Training Script

In [None]:
import os
script_folder = os.path.join(os.getcwd(), "Training")
os.makedirs(script_folder, exist_ok=True)
script_folder

In [None]:
%%writefile $script_folder/train_cnn.py

import time
import argparse
import tensorflow as tf
from tensorflow import keras
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from azureml.core import Run

start = time.time()

parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', type=int, dest='batch_size')
parser.add_argument('--epochs', type=int, dest='epochs')
args = parser.parse_args()

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

x_train = x_train.astype('float32')/255
x_test = x_test.astype('float32')/255

train_x=x_train.reshape(60000,28,28,1)
test_x=x_test.reshape(10000,28,28,1)

train_y = to_categorical(y_train)
test_y = to_categorical(y_test)

model = Sequential()

# First Convolution Layer
model.add(Conv2D(filters=64, kernel_size=3, padding='same', activation='relu', input_shape=(28,28,1))) 
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.3))

# Second Convolution Layer
model.add(Conv2D(filters=32, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.3))

# Hidden Layer
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

history=model.fit(train_x , 
                  train_y , 
                  batch_size=args.batch_size , 
                  epochs=args.epochs ,
                  validation_data = (test_x, test_y) ,
                  shuffle=False)

end = time.time()
t_process = end-start

run = Run.get_context()
run.log('processing_time', t_process)
run.log_list('accuracy', history.history['acc'])
run.log_list('loss', history.history['loss'])
run.log_list('val_accuracy', history.history['val_acc'])
run.log_list('val_loss', history.history['val_loss'])

## Estimator

In [None]:
from azureml.train.estimator import Estimator

script_params = {
    '--batch_size': 64 ,
    '--epochs': 10 
}

est = Estimator(source_directory=script_folder ,
                script_params=script_params ,
                compute_target=compute_target ,
                conda_packages=['tensorflow', 'keras'] ,
                entry_script='train_cnn.py' ,
                use_gpu=True)

In [None]:
experiment_name = 'fashion-MNIST'
exp1 = Experiment(workspace = ws, name = experiment_name)

In [None]:
run1 = exp1.submit(est)
run1

In [None]:
from azureml.widgets import RunDetails
RunDetails(run1).show()

In [None]:
run1.wait_for_completion(show_output=True)

In [None]:
run1_metrics = run1.get_metrics()
print('\n', 'Estimator Accuracy:', run1_metrics['val_accuracy'][9], '&', 'Processing Time:', run1_metrics['processing_time'])

## TensorFlow Estimator

In [None]:
from azureml.train.dnn import TensorFlow

script_params = {
    '--batch_size': 64 ,
    '--epochs': 10 
}

keras_est = TensorFlow(source_directory=script_folder ,
                       script_params=script_params ,
                       compute_target=compute_target ,
                       pip_packages=['keras'] ,
                       entry_script='train_cnn.py' ,
                       use_gpu=True)

In [None]:
experiment_name = 'fashion-MNIST2'
exp2 = Experiment(workspace = ws, name = experiment_name)

In [None]:
run2 = exp2.submit(keras_est)
run2

In [None]:
from azureml.contrib.tensorboard import Tensorboard

# The Tensorboard constructor takes an array of runs, so be sure and pass it in as a single-element array here
tb = Tensorboard([run])

# If successful, start() returns a string with the URI of the instance.
tb.start()

In [None]:
from azureml.widgets import RunDetails
RunDetails(run2).show()

In [None]:
run2.wait_for_completion(show_output=True)

In [None]:
run2_metrics = run2.get_metrics()
print('\n', 'TensorFlow Estimator Accuracy:', run2_metrics['val_accuracy'][9], '&', 'Processing Time:', run2_metrics['processing_time'])

## TensorFlow with Horovod

In [None]:
from azureml.train.dnn import TensorFlow

script_params = {
    '--batch_size': 64 ,
    '--epochs': 10 
}

keras_est2 = TensorFlow(source_directory=script_folder ,
                      script_params=script_params ,
                      compute_target=compute_target ,
                      node_count=2 ,
                      process_count_per_node=1 ,
                      pip_packages=['keras'] ,
                      entry_script='train_cnn.py' ,
                      distributed_backend='mpi' ,
                      use_gpu=True)


In [None]:
experiment_name = 'fashion-MNIST3'
exp3 = Experiment(workspace = ws, name = experiment_name)

In [None]:
run3 = exp3.submit(keras_est2)
run3

In [None]:
from azureml.widgets import RunDetails
RunDetails(run3).show()

In [None]:
run3.wait_for_completion(show_output=True)

In [None]:
run3_metrics = run3.get_metrics()
print('\n', 'TensorFlow + Horovod Estimator Accuracy:',run3_metrics['val_accuracy'][9], '&', 'Processing Time:', run3_metrics['processing_time'])

In [None]:
print('\n', 'Local Accuracy:', run0, '&', 'Processing Time:', end0-start0)
print('\n', 'Estimator Accuracy:', run1_metrics['val_accuracy'][9], '&', 'Processing Time:', run1_metrics['processing_time'])
print('\n', 'TensorFlow Estimator Accuracy:', run2_metrics['val_accuracy'][9], '&', 'Processing Time:', run2_metrics['processing_time'])
print('\n', 'TensorFlow + Horovod Estimator Accuracy:', run3_metrics['val_accuracy'][9], '&', 'Processing Time:', run3_metrics['processing_time'])