In [None]:
import tensorflow as tf
print(tf.__version__)

![대체 텍스트](https://www.pyimagesearch.com/wp-content/uploads/2019/02/fashion_mnist_dataset_sample.png)


# MNIST is too easy.
Convolutional nets can achieve 99.7% on MNIST. Classic machine learning algorithms can also achieve 97% easily. 
# MNIST is overused. 
In this April 2017 Twitter thread, Google Brain research scientist and deep learning expert Ian Goodfellow calls for people to move away from MNIST.
# MNIST can not represent modern CV tasks.

# Fashion MNIST dataset
Similar to the MNIST digit dataset, the Fashion MNIST dataset includes:

60,000 training examples

10,000 testing examples

10 classes

28×28 grayscale/single channel images

![대체 텍스트](https://www.pyimagesearch.com/wp-content/uploads/2019/02/fashion_mnist_obtaining.jpg)

In [None]:
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Activation
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dropout, BatchNormalization

from tensorflow.keras import optimizers

from tensorflow.keras import backend as K

%matplotlib inline
import matplotlib.pyplot as plt

#STEP 1: Fashion MNIST 데이터 읽어들이기

In [None]:
from tensorflow.keras.datasets import fashion_mnist
((trainX, trainY), (testX, testY)) = fashion_mnist.load_data()

# initialize the label names
labelNames = ["top", "trouser", "pullover", "dress", "coat", 
             "sandal", "shirt", "sneaker", "bag", "ankle boot"]

#STEP 2: 데이터 살펴보기

In [None]:
plt_row = 5
plt_col = 5

width = height = 28

plt.rcParams["figure.figsize"] = (15,15)

f, axarr = plt.subplots(plt_row, plt_col)

for i in range(plt_row*plt_col):

    sub_plt = axarr[int(i/plt_row), i%plt_col]
    sub_plt.axis('off')
    sub_plt.imshow(testX[i].reshape(width, height), cmap='gray')
    sub_plt_title = 'R: ' + labelNames[testY[i]]
    sub_plt.set_title(sub_plt_title)

plt.show()

#STEP 3: 딥러닝을 위한 데이터 전처리

In [None]:
# flatten 28*28 images to a 784 vector for each image
width = height = 28
num_pixels = width * height
trainX = trainX.reshape(60000, num_pixels).astype('float32') / 255.0
testX = testX.reshape(10000, num_pixels).astype('float32') / 255.0

# 훈련셋과 검증셋 분리
valX = trainX[50000:]
valY = trainY[50000:]
trainX = trainX[:50000]
trainY = trainY[:50000]

# one hot encode outputs
num_classes = 10
trainY = tf.keras.utils.to_categorical(trainY, num_classes)
valY = tf.keras.utils.to_categorical(valY, num_classes)
testY = tf.keras.utils.to_categorical(testY, num_classes)

print ('train shape: \t', trainX.shape)
print ('valid shape: \t', valX.shape)
print ('test shape: \t', testX.shape)


In [None]:
trainX.shape

#STEP 4: 첫번째 인공지능 모델 (퍼셉트론)

![대체 텍스트](https://www.simplilearn.com/ice9/free_resources_article_thumb/diagram-of-a-biological-neuron.jpg)

![대체 텍스트](http://bit.ly/2ldH0Bg)

In [None]:
def logistic_regression_model():
    # create model
    model = Sequential()
    
    model.add(Dense(num_classes, input_dim=num_pixels, kernel_initializer='normal', activation='softmax'))
    
    # compile model
    sgd = optimizers.SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    
    return model

#STEP 5: 첫번째 인공지능 모델 학습!!!

In [None]:
# build the model
model = logistic_regression_model()
model.summary()
# fix random seed for reproductibility
seed = 7
np.random.seed(seed)

# FIT THE MODEL - OPTIMIZATION
hist = model.fit(trainX, trainY, validation_data=(valX, valY), epochs=20, batch_size=64, verbose=2)
model.save('logistic_regression_model.h5')

# 학습과정 살펴보기
fig, loss_ax = plt.subplots()

acc_ax = loss_ax.twinx()

loss_ax.plot(hist.history['loss'], 'y', label='train loss')
loss_ax.plot(hist.history['val_loss'], 'r', label='val loss')
loss_ax.set_ylim([0.0, 1.5])

acc_ax.plot(hist.history['accuracy'], 'b', label='train acc')
acc_ax.plot(hist.history['val_accuracy'], 'g', label='val acc')
acc_ax.set_ylim([0.5, 1.0])

loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
acc_ax.set_ylabel('accuray')

loss_ax.legend(loc='upper left')
acc_ax.legend(loc='lower left')

plt.show()

#STEP 6: 결과 확인 (테스트 데이터셋)

In [None]:
# Final evaluation of the model
scores = model.evaluate(testX, testY, verbose=0)
print("Baseline Error: %.2f%%" % (100-scores[1]*100))

#STEP 7: 학습된 weight 살펴보기

In [None]:
# Visualize weights
W = model.layers[0].get_weights()[0]
print("W shape : ", W.shape)

W = np.transpose(W, (1,0))

plt.figure(figsize=(15, 15), frameon=False)
for ind, val in enumerate(W):
    plt.subplot(5, 5, ind + 1)
    im = val.reshape((28,28))
    plt.axis("off")
    plt.imshow(im, cmap='gray',interpolation='nearest')

In [None]:
model.layers[0].get_weights()[0].shape

![대체 텍스트](https://www.pyimagesearch.com/wp-content/uploads/2019/02/fashion_mnist_obtaining.jpg)

#STEP 8: 두번째 인공지능 모델 (MLP)

![대체 텍스트](https://www.researchgate.net/profile/Hadley_Brooks/publication/270274130/figure/fig3/AS:667886670594050@1536247999230/Architecture-of-a-multilayer-neural-network-with-one-hidden-layer-The-input-layer.png)

In [None]:
def multi_linear_perceptron_model():
    # create model
    model = Sequential()
    
    model.add(Dense(256, input_dim=num_pixels, kernel_initializer='normal', activation='sigmoid'))
    model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
    
    # compile model
    sgd = optimizers.SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    
    return model

In [None]:
# build the model
model = multi_linear_perceptron_model()
model.summary()

# fix random seed for reproductibility
seed = 7
np.random.seed(seed)

# Fit the model
hist = model.fit(trainX, trainY, validation_data=(valX, valY), epochs=20, batch_size=64, verbose=2)
model.save('multi_linear_perceptron_model.h5')

# 5. 학습과정 살펴보기
fig, loss_ax = plt.subplots()

acc_ax = loss_ax.twinx()

loss_ax.plot(hist.history['loss'], 'y', label='train loss')
loss_ax.plot(hist.history['val_loss'], 'r', label='val loss')
loss_ax.set_ylim([0.0, 1.5])

acc_ax.plot(hist.history['accuracy'], 'b', label='train acc')
acc_ax.plot(hist.history['val_accuracy'], 'g', label='val acc')
acc_ax.set_ylim([0.5, 1.0])

loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
acc_ax.set_ylabel('accuray')

loss_ax.legend(loc='upper left')
acc_ax.legend(loc='lower left')

plt.show()

In [None]:
# Final evaluation of the model
scores = model.evaluate(testX, testY, verbose=0)
print("Error: %.2f%%" % (100-scores[1]*100))

#STEP 9: 세번째 인공지능 모델 (DEEP-MLP)

![대체 텍스트](https://i.stack.imgur.com/OH3gI.png)

![대체 텍스트](http://www.saedsayad.com/images/ANN_Sigmoid.png)

In [None]:
def deep_perceptron_initial_model():
    # create model
    model = Sequential()
    
    model.add(Dense(256, input_dim=num_pixels, kernel_initializer='normal', activation='sigmoid'))
    model.add(Dense(256, kernel_initializer='normal', activation='sigmoid'))
    model.add(Dense(256, kernel_initializer='normal', activation='sigmoid'))
    model.add(Dense(256, kernel_initializer='normal', activation='sigmoid')) 
    model.add(Dense(256, kernel_initializer='normal', activation='sigmoid'))
    model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
    
    # compile model
    sgd = optimizers.SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    
    return model

In [None]:
# build the model
model = deep_perceptron_initial_model()
model.summary()

# fix random seed for reproductibility
seed = 7
np.random.seed(seed)

# Fit the model
hist = model.fit(trainX, trainY, validation_data=(valX, valY), epochs=20, batch_size=64, verbose=2)
model.save('deep_perceptron_initial_model.h5')

# 5. 학습과정 살펴보기
fig, loss_ax = plt.subplots()

acc_ax = loss_ax.twinx()

loss_ax.plot(hist.history['loss'], 'y', label='train loss')
loss_ax.plot(hist.history['val_loss'], 'r', label='val loss')

acc_ax.plot(hist.history['accuracy'], 'b', label='train acc')
acc_ax.plot(hist.history['val_accuracy'], 'g', label='val acc')

loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
acc_ax.set_ylabel('accuracy')

loss_ax.legend(loc='upper left')
acc_ax.legend(loc='lower left')

plt.show()

In [None]:
# Final evaluation of the model
scores = model.evaluate(testX, testY, verbose=0)
print("Error: %.2f%%" % (100-scores[1]*100))

#STEP 10: 세번째 인공지능 모델의 문제점과 개선

![대체 텍스트](https://image.slidesharecdn.com/usuconference-deeplearning-160418191119/95/introduction-to-deep-learning-7-638.jpg?cb=1461006739)

![대체 텍스트](https://smartstuartkim.files.wordpress.com/2019/02/vanishinggradient-1.png?w=1140&h=492)

In [None]:
#  Hint
# 'relu'

def deep_perceptron_model_with_relu():
    # create model
    model = Sequential()
    
    model.add(Dense(256, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
    model.add(Dense(256, kernel_initializer='normal', activation='????'))
    model.add(Dense(256, kernel_initializer='normal', activation='????'))
    model.add(Dense(256, kernel_initializer='normal', activation='????'))
    model.add(Dense(256, kernel_initializer='normal', activation='????'))    
    model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
    # compile model
    
    sgd = optimizers.SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    
    return model

In [None]:
# build the model
model = deep_perceptron_model_with_relu()
model.summary()

# fix random seed for reproductibility
seed = 7
np.random.seed(seed)

# Fit the model
hist = model.fit(trainX, trainY, validation_data=(valX, valY), epochs=20, batch_size=64, verbose=2)
model.save('deep_perceptron_model_with_dropout.h5')

# 5. 학습과정 살펴보기
fig, loss_ax = plt.subplots()

acc_ax = loss_ax.twinx()

loss_ax.plot(hist.history['loss'], 'y', label='train loss')
loss_ax.plot(hist.history['val_loss'], 'r', label='val loss')
loss_ax.set_ylim([0.0, 1.5])

acc_ax.plot(hist.history['accuracy'], 'b', label='train acc')
acc_ax.plot(hist.history['val_accuracy'], 'g', label='val acc')
acc_ax.set_ylim([0.5, 1.0])

loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
acc_ax.set_ylabel('accuracy')

loss_ax.legend(loc='upper left')
acc_ax.legend(loc='lower left')

plt.show()

In [None]:
# Final evaluation of the model
scores = model.evaluate(testX, testY, verbose=0)
print("Perceptron model with relu error: %.2f%%" % (100-scores[1]*100))

![대체 텍스트](https://miro.medium.com/max/1200/1*iWQzxhVlvadk6VAJjsgXgg.png)

In [None]:
#  Hint
# 'Dropout'

def deep_perceptron_model_with_relu_dropout():
    # create model
    model = Sequential()
    model.add(Dense(256, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
    model.add(????(0.2))
    
    model.add(Dense(256, kernel_initializer='normal', activation='relu'))
    model.add(????(0.2))
    
    model.add(Dense(256, kernel_initializer='normal', activation='relu'))
    model.add(????(0.2))
    
    model.add(Dense(256, kernel_initializer='normal', activation='relu'))
    model.add(????(0.2))
    
    model.add(Dense(256, kernel_initializer='normal', activation='relu'))
    model.add(????(0.2))
    
    model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
    
    # compile model
    sgd = optimizers.SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    
    return model

In [None]:
# build the model
model = deep_perceptron_model_with_relu_dropout()
model.summary()

# fix random seed for reproductibility
seed = 7
np.random.seed(seed)

# Fit the model
hist = model.fit(trainX, trainY, validation_data=(valX, valY), epochs=20, batch_size=64, verbose=2)
model.save('deep_perceptron_model_with_dropout.h5')

# 5. 학습과정 살펴보기
fig, loss_ax = plt.subplots()

acc_ax = loss_ax.twinx()

loss_ax.plot(hist.history['loss'], 'y', label='train loss')
loss_ax.plot(hist.history['val_loss'], 'r', label='val loss')
loss_ax.set_ylim([0.0, 1.5])

acc_ax.plot(hist.history['acc'], 'b', label='train acc')
acc_ax.plot(hist.history['val_acc'], 'g', label='val acc')
acc_ax.set_ylim([0.5, 1.0])

loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
acc_ax.set_ylabel('accuray')

loss_ax.legend(loc='upper left')
acc_ax.legend(loc='lower left')

plt.show()

In [None]:
# Final evaluation of the model
scores = model.evaluate(testX, testY, verbose=0)
print("Perceptron model with relu and dropout error: %.2f%%" % (100-scores[1]*100))

#STEP 11: 네번째 인공지능 모델 (CNN)

![대체 텍스트](https://www.mdpi.com/entropy/entropy-19-00242/article_deploy/html/images/entropy-19-00242-g001.png)


# 중요! 입력데이터의 형태가 바뀌어야 한다. 
# 784 (1D) -> 28x28 (2D)

In [None]:
# reshape to be [samples][pixels][width][height]
trainX = trainX.reshape(50000, 28, 28, 1)
valX = valX.reshape(10000, 28, 28, 1)
testX = testX.reshape(10000, 28, 28, 1)


In [None]:
def simple_cnn_model():
    # create model    
    model = Sequential()
    
    model.add(Conv2D(32, (5,5), input_shape=(28, 28, 1), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    
    # Compile model
    sgd = optimizers.SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    
    return model

In [None]:
# build the model
model = simple_cnn_model()
model.summary()

# fix random seed for reproductibility
seed = 7
np.random.seed(seed)

# Fit the model
hist = model.fit(trainX, trainY, validation_data=(valX, valY), epochs=20, batch_size=64, verbose=2)
model.save('simple_cnn_model.h5')

# 5. 학습과정 살펴보기
fig, loss_ax = plt.subplots()

acc_ax = loss_ax.twinx()

loss_ax.plot(hist.history['loss'], 'y', label='train loss')
loss_ax.plot(hist.history['val_loss'], 'r', label='val loss')
loss_ax.set_ylim([0.0, 1.5])

acc_ax.plot(hist.history['accuracy'], 'b', label='train acc')
acc_ax.plot(hist.history['val_accuracy'], 'g', label='val acc')
acc_ax.set_ylim([0.5, 1.0])

loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
acc_ax.set_ylabel('accuray')

loss_ax.legend(loc='upper left')
acc_ax.legend(loc='lower left')

plt.show()

In [None]:
# Final evaluation of the model
scores = model.evaluate(testX, testY, verbose=0)
print("2D simple CNN error: %.2f%%" % (100-scores[1]*100))

#STEP 12: Convolution kernel 살펴보기 (5x5)

In [None]:
W1 = model.layers[0].get_weights()[0]
W1 = np.squeeze(W1)

print(W1.shape)
W1 = np.transpose(W1, (2,0,1))

plt.figure(figsize=(15, 15), frameon=False)
for ind, val in enumerate(W1):
    plt.subplot(6, 6, ind + 1)
    im = val.reshape((5,5))
    plt.axis("off")
    plt.imshow(im, cmap='gray',interpolation='nearest')

In [None]:
convout1_f = K.function([model.layers[0].input], [model.layers[1].output])

x_rep = convout1_f([testX[0:3]])
x_rep = np.squeeze(x_rep)

print(x_rep.shape)

for this_x_rep in x_rep:
    plt.figure(figsize=(15, 15), frameon=False)
    
    for i in range (this_x_rep.shape[2]):
        val = this_x_rep[:,:,i]
        plt.subplot(6, 6, i + 1)
        plt.axis("off")
        plt.imshow(val, cmap='gray',interpolation='nearest')

#STEP 13: 마지막 인공지능 모델 (VGG-like CNN)

![대체 텍스트](https://neurohive.io/wp-content/uploads/2018/11/vgg16-1-e1542731207177.png)

In [None]:
def cnn_model():
    # create model
    model = Sequential()
    
    model.add(Conv2D(32, (3,3), input_shape=(28, 28, 1)))
    model.add(BatchNormalization())
    model.add(Activation(activation='relu'))
    
    model.add(Conv2D(32, (3,3)))
    model.add(BatchNormalization())
    model.add(Activation(activation='relu'))
    
    model.add(MaxPooling2D(pool_size=(2,2)))
    
    model.add(Conv2D(64, (3,3)))
    model.add(BatchNormalization())
    model.add(Activation(activation='relu'))
    
    model.add(Conv2D(64, (3,3)))
    model.add(BatchNormalization())
    model.add(Activation(activation='relu'))
    
    model.add(MaxPooling2D(pool_size=(2,2)))
    
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    
    # Compile model
    sgd = optimizers.SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    
    return model

In [None]:
# build the model
model = cnn_model()

# fix random seed for reproductibility
seed = 7
np.random.seed(seed)

# Fit the model
hist = model.fit(trainX, trainY, validation_data=(valX, valY), epochs=20, batch_size=64, verbose=2)
model.save('cnn_model.h5')

# 5. 학습과정 살펴보기
fig, loss_ax = plt.subplots()

acc_ax = loss_ax.twinx()

loss_ax.plot(hist.history['loss'], 'y', label='train loss')
loss_ax.plot(hist.history['val_loss'], 'r', label='val loss')
loss_ax.set_ylim([0.0, 1.5])

acc_ax.plot(hist.history['accuracy'], 'b', label='train acc')
acc_ax.plot(hist.history['val_accuracy'], 'g', label='val acc')
acc_ax.set_ylim([0.5, 1.0])

loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
acc_ax.set_ylabel('accuray')

loss_ax.legend(loc='upper left')
acc_ax.legend(loc='lower left')

plt.show()

In [None]:
# Final evaluation of the model
scores = model.evaluate(testX, testY, verbose=0)
print("VGG-like CNN error: %.2f%%" % (100-scores[1]*100))

#STEP 14: 결과 확인하기 (틀린 것 들만)

In [None]:
# 7. 모델 사용하기
yhat_test = model.predict(testX, batch_size=32)

plt_row = 5
plt_col = 5

plt.rcParams["figure.figsize"] = (20,20)

f, axarr = plt.subplots(plt_row, plt_col)

cnt = 0
i = 0

while cnt < (plt_row*plt_col):
    
    if np.argmax(testY[i]) == np.argmax(yhat_test[i]):
        i += 1
        continue
    
    sub_plt = axarr[(int)(cnt/plt_row), cnt%plt_col]
    sub_plt.axis('off')
    sub_plt.imshow(testX[i].reshape(width, height), cmap='gray')
    sub_plt_title = 'R: ' + labelNames[np.argmax(testY[i])] + '(%.2f)'% (yhat_test[i][np.argmax(testY[i])]) + ' P: ' + labelNames[np.argmax(yhat_test[i])] + '(%.2f)'% (  yhat_test[i][np.argmax(yhat_test[i])])
    sub_plt.set_title(sub_plt_title)

    i += 1    
    cnt += 1

plt.show()