### mnist_cnn_deep
#### MNIST and Convolutional Neural Network
- L1,L2 : conv2d + relu + max_pool 
- L3 : FC(Fully Connected Layer)

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
tf.random.set_seed(5)

In [2]:
# mnist 데이터 가져오기
mnist = tf.keras.datasets.mnist
(x_train,y_train),(x_test,y_test) = mnist.load_data()
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [3]:
#Onehot encoding
nb_classes = 10
yonehot = tf.one_hot(y_train,nb_classes) #y.shape (60000,) => (60000,10) rank2로 변경됨
print(yonehot.shape)
#yonehot = tf.reshape(yonehot,[-1,nb_classes])

(60000, 10)


In [4]:
#기존값의 타입은 uint8
print(x_train[0].dtype)
#x값의 타입을 float형으로 변환
x_train = tf.cast(x_train,dtype=tf.float32)
x_test = tf.cast(x_test,dtype=tf.float32)

uint8


In [5]:
#CNN은 4차원으로 연산 (N,H,W,C)
#x값의 shape를 4차원으로 변환
X_img = tf.reshape(x_train,[-1,28,28,1]) #리스트 내 요소가 모두 정상으로 입력되었다면 하나는 -1로 모두를 표현할 수 있음
print(X_img.shape)

(60000, 28, 28, 1)


In [6]:
#filter 생성 (Conv, Relu, max_Pool) - layer1
# (?, 28, 28, 1) --> (?, 14, 14, 32)

W1 = tf.Variable(tf.random.normal([3,3,1,32]),name='Weight1')
# <1> conv2d
# L1 input image shape : (?, 28, 28, 1)
# filter : (3,3,1,32), => (3x3, color:1, number:32) 필터 32개
# strides : (1,1,1,1), padding='SAME'
# 출력 이미지 : (28+2 - 3)/1 + 1 = 28
# (?, 28, 28, 1) --> (?, 28, 28, 32)
def L1_conv2d(X):
    return tf.nn.conv2d(X,W1,strides=[1,1,1,1],padding='SAME')
    
# <2> Relu
def L1_relu(X):
    return tf.nn.relu(L1_conv2d(X)) #shape 변화는 필요없음
    
# <3> max_pool
# input image : (?, 28, 28, 32)
# ksize : (1,2,2,1), strides : (1,2,2,1), padding='SAME'
# 출력 이미지 : (28+1 - 2)/2 + 1 = 14
#  (?, 28, 28, 32) -->  (?, 14, 14, 32)
def L1_Maxpool(X):
    return tf.nn.max_pool(L1_relu(X),ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

In [7]:
# Layer 2 : (conv2d --> relu --> max_pool)*n --> flatten
# (?, 14, 14, 32) --> 

# <1> conv2d
# L2 input image shape : (?, 14, 14, 32)
# filter : (3,3,32,64), 필터 64개
# strides : (1,1,1,1), padding='SAME'
# 출력 이미지 : (14+2 - 3)/1 + 1 = 14
# (?, 14, 14, 32) --> (?, 14,14,64) --> (?,7,7,64)
W2 = tf.Variable(tf.random.normal([3,3,32,64]),name='Weight2')

def L2_conv2d(X):
    return tf.nn.conv2d(L1_Maxpool(X),W2,strides=[1,1,1,1],padding='SAME')

def L2_relu(X):
    return tf.nn.relu(L2_conv2d(X)) 

def L2_Maxpool(X):
    return tf.nn.max_pool(L2_relu(X),ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

# <4> flatten layer : 다차원 배열을 2차원으로 변환하여 FC layer에 전달한다
def Flat(X):
    return tf.reshape(L2_Maxpool(X),[-1,7*7*64])

In [8]:
# Layer 3 : FC(Fully Connected Layer)
# (?,7*7*64) * (7*7*64, 10) = (?,10)
W3=tf.Variable(tf.random.normal([7*7*64,nb_classes]),name='Weight3')
b=tf.Variable(tf.random.normal([nb_classes]),name='Bias3')

def logits(X):
    return tf.matmul(Flat(X),W3)+b
    
def hy(X):
    return tf.nn.softmax(logits(X))

In [12]:
# batch 사이즈로 나누어 학습, 효율적 이며 학습 시간 단축(단위 epoch)
training_epoch = 50
batch_size = 600 # 1epoch 당 100회 학습 (60000개의 데이터)

#경사하강법
opt = tf.keras.optimizers.Adam(learning_rate=0.01)

print("===Start===")
for epoch in range(training_epoch):
    avg_cost = 0
    total_batch = x_train.shape[0]//batch_size
    for k in range(total_batch):
        batch_xs = x_train[k*batch_size:(k+1)*batch_size]
        batch_ys = yonehot[k*batch_size:(k+1)*batch_size]

#x값의 shape를 4차원으로 변경
        X_img = tf.reshape(batch_xs,[-1,28,28,1])
        def batch_cost_func():
            cost_i = tf.nn.softmax_cross_entropy_with_logits(logits = logits(X_img),
                                                     labels = batch_ys)
            cost = tf.reduce_mean(cost_i)
            return cost
            
        opt.minimize(batch_cost_func,var_list=[W1,W2,W3,b])
        avg_cost += batch_cost_func().numpy()/total_batch
    print('Epoch:','%04d'%(epoch + 1),'cost:','{:.9f}'.format(avg_cost))
print("====End====")

===Start===
Epoch: 0001 cost: 16355.137250977
Epoch: 0002 cost: 2603.961317749
Epoch: 0003 cost: 1486.975006409
Epoch: 0004 cost: 1013.342236328
Epoch: 0005 cost: 751.475326843
Epoch: 0006 cost: 580.008846512
Epoch: 0007 cost: 459.861003456
Epoch: 0008 cost: 372.834581833
Epoch: 0009 cost: 301.126077042
Epoch: 0010 cost: 247.667602310
Epoch: 0011 cost: 213.867624512
Epoch: 0012 cost: 194.353375349
Epoch: 0013 cost: 155.027475615
Epoch: 0014 cost: 127.590496216
Epoch: 0015 cost: 102.772195482
Epoch: 0016 cost: 84.648362182
Epoch: 0017 cost: 75.302405913
Epoch: 0018 cost: 62.141945759
Epoch: 0019 cost: 53.801338247
Epoch: 0020 cost: 42.865027601
Epoch: 0021 cost: 37.265522803
Epoch: 0022 cost: 29.454476304
Epoch: 0023 cost: 30.204148861
Epoch: 0024 cost: 26.276840490
Epoch: 0025 cost: 23.602795168
Epoch: 0026 cost: 23.085343289
Epoch: 0027 cost: 14.292190758
Epoch: 0028 cost: 18.206025555
Epoch: 0029 cost: 15.470276575
Epoch: 0030 cost: 11.813008163
Epoch: 0031 cost: 12.172869969
Epoch: 

In [16]:
# 정확도 측정 : accuracy computation

# y_test 값의 one-hot 인코딩
Y_one_hot = tf.one_hot(y_test,nb_classes)    # (10000,10)
# print(Y_one_hot.shape)                       # (10000,10)  , (2차원)

# tf.argmax() : 값이 가장 큰 요소의 인덱스 값을 반환
def predict(X):
    return tf.argmax(hy(X),axis=1)

# X값의 shape을 4차원으로 변환
X_img = tf.reshape(x_test,[-1,28,28,1])
        
correct_predict = tf.equal(predict(X_img),tf.argmax(Y_one_hot,1))
accuracy = tf.reduce_mean(tf.cast(correct_predict, dtype = tf.float32))
print("Accuracy:",accuracy.numpy())

Accuracy: 0.9824
