In [1]:
import tensorflow as tf
import numpy as np

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('./mnist/data/', one_hot = True)


Extracting ./mnist/data/train-images-idx3-ubyte.gz
Extracting ./mnist/data/train-labels-idx1-ubyte.gz
Extracting ./mnist/data/t10k-images-idx3-ubyte.gz
Extracting ./mnist/data/t10k-labels-idx1-ubyte.gz


In [None]:
np.get

In [26]:
for i, j, k in zip(range(1, 4), [4, 5, 6], [7, 8, 9]):
    print(i, j ,k)

1 4 7
2 5 8
3 6 9


In [None]:
#hyperparameter
params = {
    'num_classes': 10,
    'num_filters': [8, 16, 32],
    'filter_size': [2, 2, 2],
    'activation' : tf.nn.relu,
    'cnn_batch_norm' : [False, False, False],
    'fc_hidden_units': [256, 128],
    'fc_batch_norm': [False, False],
    
    'rnn_n_step': 28, #width, time
    'rnn_n_hiddens': [128, 128],
    'rnn_dropout_keep_prob': [0.5, 0.4],
    
    'learning_rate': 0.01,
    
    'batch_size': 100
}
tf.reset_default_graph()

class Model(object):

    def __init__(self, params, sess, name):
        # 하이퍼파라미터
        self.num_classes = params['num_classes']
        self.num_filters = params['num_filters']
        self.filter_sizes = params['filter_size']
        self.cnn_batch_norm  = params['cnn_batch_norm']
        
        self.fc_hidden_units = params['fc_hidden_units']
        self.fc_batch_norm = params['fc_batch_norm']
        
        self.rnn_n_hiddens = params['rnn_n_hiddens']
        self.rnn_dropout_keep_prob = params['rnn_dropout_keep_prob']
        
        self.learning_rate = params['learning_rate']
        self.activation = params['activation']
        self.batch_size = params['batch_size']        
        
        self.idx_convolutional_layers = range(1, len(self.filter_sizes) + 1)
        self.idx_fc_layers = range(1, len(self.fc_hidden_units) + 1)
        self.idx_rnn_layers = range(1, len(self.rnn_n_hiddens) + 1)
        self.name = name
        # 플레이스홀더
        self.X = tf.placeholder(tf.float32, [None, 28, 28, 1], name="input_x")
        self.Y = tf.placeholder(tf.float32, [None, self.num_classes], name="input_y")
        self.sess = sess
        
        self._build_net()

    #  컨볼루션 레이어를 params에서 받은 파라미터를 따라 구축
    def convolutional_layers(self, X, is_training = True, reuse = False):
        inputs = X
        for i, num_filter, filter_size, use_bn in zip(self.idx_convolutional_layers, self.num_filters, self.filter_sizes, self.cnn_batch_norm):            
            L = tf.layers.conv2d(inputs,
                                 filters=num_filter,
                                 kernel_size=filter_size,
                                 strides=1,
                                 padding='SAME',
                                 name = 'CONV'+str(i),
                                 reuse= reuse)
            if use_bn:
                L= tf.layers.batch_normalization(L, training= is_training, name='BN' + str(i), reuse= reuse)
            L = self.activation(L)
            L = tf.layers.max_pooling2d(L, pool_size = 2, strides = 2, padding = 'SAME')
            inputs = L
        return inputs
    #  dense 레이어를 params에서 받은 파라미터를 따라 구축
    def fc_layers(self, X, is_training = True, reuse = False):
        inputs = X
        for i, units, use_bn in zip(self.idx_fc_layers, self.fc_hidden_units, self.fc_batch_norm):
            fc = tf.layers.dense(inputs,
                                 units=units,
                                 reuse=reuse,
                                 name = 'FC' + str(i))
            if use_bn:
                fc = tf.layers.batch_normalization(fc, training= is_training, name='fc_BN' + str(i), reuse= reuse)
            fc = self.activation(fc)
            inputs = fc 
        return inputs
    def rnn_single_layers(self, inputs, is_training = True, reuse = False):
        if is_training:
            keep_probs = self.rnn_dropout_keep_prob
        else:
            keep_probs = np.ones_like(self.rnn_dropout_keep_prob)
        cell = tf.nn.rnn_cell.BasicLSTMCell(self.rnn_n_hiddens[0])
        cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_probs[0])
        # output_shape [batch_size, width(n_step), n_classes]
        outputs, states = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
        print(outputs.get_shape().as_list())
        outputs = tf.transpose(outputs, [1, 0, 2])
        outputs = outputs[-1]
        return outputs 
    
    def rnn_multi_layers(self, inputs, is_training = True, reuse = False):
        if is_training:
            keep_probs = self.rnn_dropout_keep_prob
            
        else:
            keep_probs = np.ones_like(self.rnn_dropout_keep_prob)
        
        cell1 = tf.nn.rnn_cell.BasicLSTMCell(self.rnn_n_hiddens[0], reuse = reuse)
        cell1 = tf.nn.rnn_cell.DropoutWrapper(cell1, output_keep_prob=keep_probs[0])
        cell2 = tf.nn.rnn_cell.BasicLSTMCell(self.rnn_n_hiddens[1], reuse = reuse)
        cell2 = tf.nn.rnn_cell.DropoutWrapper(cell2, output_keep_prob=keep_probs[1])
        cell = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2])
        # output_shape [batch_size, width(n_step), n_classes]
        outputs, states = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
        print(outputs.get_shape().as_list())
        outputs = tf.transpose(outputs, [1, 0, 2])
        outputs = outputs[-1]
        return outputs
    
    def get_reshaped_cnn_to_rnn(self, inputs):
        # [batch, height, width, n_feature map]
        shape = inputs.get_shape().as_list() 
        # 우리가 얻어야하는 사이즈 [batch, height, width x n_feature map]
        reshaped_inputs = tf.reshape(inputs, [-1, shape[1], shape[2] * shape[3]])
        return reshaped_inputs
    
    # 모델 구축/ logit 
    def get_logits(self, X, is_training = True, reuse = False):        
        conv = self.convolutional_layers(self.X, is_training, reuse)                           
        #flat = tf.layers.flatten(conv)
        reshaped_fp = self.get_reshaped_cnn_to_rnn(conv)
        rnn = self.rnn_multi_layers(reshaped_fp, reuse)
        #fc = self.fc_layers(flat, is_training, reuse)
        output = tf.layers.dense(rnn, units= self.num_classes, reuse=reuse, name = 'out')
        
        return output
    
    # 모델 구축
    def _build_net(self):
        with tf.variable_scope(self.name):
            self.logits_train = self.get_logits(self.X)                              
            self.loss = tf.losses.softmax_cross_entropy(self.Y, self.logits_train)   
                # batch_normalization 적용을 위해 모든 변수들을 불러와서 moving
                #학습 단계에서는 데이터가 배치 단위로 들어오기 때문에 배치의 평균, 분산을 구하는 것이 가능하지만,
                # 테스트 단계에서는 배치 단위로 평균/분산을 구하기가 어렵기때문에
                # 학습 단계에서 배치 단위의 평균/분산을 저장해 놓고 테스트 시에는 평균/분산을 사용합니다.
                # 저장한 값을 get_collection을 통해서 불러온다.
            self.global_step = tf.Variable(0, name="global_step", trainable=False)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=self.name)            
            with tf.control_dependencies(update_ops):    
                self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)

            self.logits_eval = self.get_logits(self.X, is_training = False, reuse = True)
            self.predict_proba_ = tf.nn.softmax(self.logits_eval)
            self.prediction = tf.argmax(self.predict_proba_, 1)
            self.accuracy = tf.metrics.accuracy(tf.argmax(self.Y, 1), self.prediction)
            # 변수들 프린트/ 텐서보드 summary 생성
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('accuracy', self.accuracy[1])
        for v in tf.trainable_variables():
            tf.summary.histogram('Var_{}'.format(v.name), v)
            print(v)            
        self.merged = tf.summary.merge_all()
        # 모델저장
        saver = tf.train.Saver()
            
        
    def fit(self):
        total_batch = int(mnist.train.num_examples/ self.batch_size)
        self.sess.run(tf.global_variables_initializer())
        self.sess.run(tf.local_variables_initializer())
        writer = tf.summary.FileWriter('./logs/', sess.graph)
        for epoch in range(10):
            total_cost = 0
        
            for i in range(total_batch):
                batch_xs, batch_ys = mnist.train.next_batch(self.batch_size)
                _, c, _summ = sess.run([self.optimizer, self.loss, self.merged], feed_dict = {self.X:batch_xs.reshape(-1, 28, 28, 1), self.Y: batch_ys})
                writer.add_summary(_summ, i)
            acc = sess.run(self.accuracy, feed_dict = {self.X: mnist.test.images.reshape(-1, 28, 28, 1), self.Y: mnist.test.labels})
            
            print('epoch : {}, cost : {}, acc: {}'.format(epoch, c, acc))
        return self

In [82]:
#hyperparameter
params = {
    'num_classes': 10,
    'num_filters': [8, 16, 32],
    'filter_size': [2, 2, 2],
    'activation' : tf.nn.relu,
    'cnn_batch_norm' : [False, False, False],
    'fc_hidden_units': [256, 128],
    'fc_batch_norm': [False, False],
    
    'rnn_n_step': 28, #width, time
    'rnn_n_hiddens': [128],
    'rnn_dropout_keep_prob': [0.5],
    
    'learning_rate': 0.01,
    
    'batch_size': 100
}
tf.reset_default_graph()

class Model(object):

    def __init__(self, params, sess, name):
        # 하이퍼파라미터
        self.num_classes = params['num_classes']
        self.num_filters = params['num_filters']
        self.filter_sizes = params['filter_size']
        self.cnn_batch_norm  = params['cnn_batch_norm']
        
        self.fc_hidden_units = params['fc_hidden_units']
        self.fc_batch_norm = params['fc_batch_norm']
        
        self.rnn_n_hiddens = params['rnn_n_hiddens']
        self.rnn_dropout_keep_prob = params['rnn_dropout_keep_prob']
        
        self.learning_rate = params['learning_rate']
        self.activation = params['activation']
        self.batch_size = params['batch_size']        
        
        self.idx_convolutional_layers = range(1, len(self.filter_sizes) + 1)
        self.idx_fc_layers = range(1, len(self.fc_hidden_units) + 1)
        self.idx_rnn_layers = range(1, len(self.rnn_n_hiddens) + 1)
        self.name = name
        # 플레이스홀더
        self.X = tf.placeholder(tf.float32, [None, 28, 28, 1], name="input_x")
        self.Y = tf.placeholder(tf.float32, [None, self.num_classes], name="input_y")
        self.sess = sess
        
        self._build_net()

    #  컨볼루션 레이어를 params에서 받은 파라미터를 따라 구축
    def convolutional_layers(self, X, is_training = True, reuse = False):
        inputs = X
        for i, num_filter, filter_size, use_bn in zip(self.idx_convolutional_layers, self.num_filters, self.filter_sizes, self.cnn_batch_norm):            
            L = tf.layers.conv2d(inputs,
                                 filters=num_filter,
                                 kernel_size=filter_size,
                                 strides=1,
                                 padding='SAME',
                                 name = 'CONV'+str(i),
                                 reuse= reuse)
            if use_bn:
                L= tf.layers.batch_normalization(L, training= is_training, name='BN' + str(i), reuse= reuse)
            L = self.activation(L)
            L = tf.layers.max_pooling2d(L, pool_size = 2, strides = 2, padding = 'SAME')
            inputs = L
        return inputs
    #  dense 레이어를 params에서 받은 파라미터를 따라 구축
    def fc_layers(self, X, is_training = True, reuse = False):
        inputs = X
        for i, units, use_bn in zip(self.idx_fc_layers, self.fc_hidden_units, self.fc_batch_norm):
            fc = tf.layers.dense(inputs,
                                 units=units,
                                 reuse=reuse,
                                 name = 'FC' + str(i))
            if use_bn:
                fc = tf.layers.batch_normalization(fc, training= is_training, name='fc_BN' + str(i), reuse= reuse)
            fc = self.activation(fc)
            inputs = fc 
        return inputs
    
     # LSTM 레이어 
    def rnn_layers(self, inputs, is_training = True, reuse = False):
        if is_training:
            keep_probs = self.rnn_dropout_keep_prob
            
        else:
            keep_probs = np.ones_like(self.rnn_dropout_keep_prob)
        # single layer
        if len(self.idx_rnn_layers) == 1:
            cell = tf.nn.rnn_cell.BasicLSTMCell(self.rnn_n_hiddens[0], reuse = reuse)
            cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=keep_probs[0])
        # multi layer 
        else:
            cell_list = []
            for i, n_hidden, keep_prob in zip(self.idx_rnn_layers, self.rnn_n_hiddens, keep_probs):
                cell_ = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, reuse = reuse)
                cell_ = tf.nn.rnn_cell.DropoutWrapper(cell_, output_keep_prob=keep_prob)
                cell_list.append(cell_)
            cell = tf.nn.rnn_cell.MultiRNNCell(cell_list)
        # output_shape [batch_size, width(n_step), n_classes]
        outputs, states = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
        print(outputs.get_shape().as_list())
        outputs = tf.transpose(outputs, [1, 0, 2])
        outputs = outputs[-1]
        return outputs
    
    def get_reshaped_cnn_to_rnn(self, inputs):
        # [batch, height, width, n_feature map]
        shape = inputs.get_shape().as_list() 
        # 우리가 얻어야하는 사이즈 [batch, height, width x n_feature map]
        reshaped_inputs = tf.reshape(inputs, [-1, shape[1], shape[2] * shape[3]])
        return reshaped_inputs
    
    # 모델 구축/ logit 
    def get_logits(self, X, is_training = True, reuse = False):        
        conv = self.convolutional_layers(self.X, is_training, reuse)                           
        #flat = tf.layers.flatten(conv)
        reshaped_fp = self.get_reshaped_cnn_to_rnn(conv)
        rnn = self.rnn_layers(reshaped_fp, is_training, reuse)
        #fc = self.fc_layers(flat, is_training, reuse)
        output = tf.layers.dense(rnn, units= self.num_classes, reuse=reuse, name = 'out')
        
        return output
    
    # 모델 구축
    def _build_net(self):
        with tf.variable_scope(self.name):
            self.logits_train = self.get_logits(self.X)                              
            self.loss = tf.losses.softmax_cross_entropy(self.Y, self.logits_train)   
                # batch_normalization 적용을 위해 모든 변수들을 불러와서 moving
                #학습 단계에서는 데이터가 배치 단위로 들어오기 때문에 배치의 평균, 분산을 구하는 것이 가능하지만,
                # 테스트 단계에서는 배치 단위로 평균/분산을 구하기가 어렵기때문에
                # 학습 단계에서 배치 단위의 평균/분산을 저장해 놓고 테스트 시에는 평균/분산을 사용합니다.
                # 저장한 값을 get_collection을 통해서 불러온다.
            self.global_step = tf.Variable(0, name="global_step", trainable=False)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope=self.name)            
            with tf.control_dependencies(update_ops):    
                self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)

            self.logits_eval = self.get_logits(self.X, is_training = False, reuse = True)
            self.predict_proba_ = tf.nn.softmax(self.logits_eval)
            self.prediction = tf.argmax(self.predict_proba_, 1)
            self.accuracy = tf.metrics.accuracy(tf.argmax(self.Y, 1), self.prediction)
            # 변수들 프린트/ 텐서보드 summary 생성
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('accuracy', self.accuracy[1])
        for v in tf.trainable_variables():
            tf.summary.histogram('Var_{}'.format(v.name), v)
            print(v)            
        self.merged = tf.summary.merge_all()
        # 모델저장
        saver = tf.train.Saver()
            
        
    def fit(self):
        total_batch = int(mnist.train.num_examples/ self.batch_size)
        self.sess.run(tf.global_variables_initializer())
        self.sess.run(tf.local_variables_initializer())
        writer = tf.summary.FileWriter('./logs/', sess.graph)
        for epoch in range(10):
            total_cost = 0
        
            for i in range(total_batch):
                batch_xs, batch_ys = mnist.train.next_batch(self.batch_size)
                _, c, _summ = sess.run([self.optimizer, self.loss, self.merged], feed_dict = {self.X:batch_xs.reshape(-1, 28, 28, 1), self.Y: batch_ys})
                writer.add_summary(_summ, i)
            acc = sess.run(self.accuracy, feed_dict = {self.X: mnist.test.images.reshape(-1, 28, 28, 1), self.Y: mnist.test.labels})
            
            print('epoch : {}, cost : {}, acc: {}'.format(epoch, c, acc))
        return self

In [83]:
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

In [84]:
sess = tf.Session()
model = Model(params, sess, name = 'model')

[None, 4, 128]
[None, 4, 128]
INFO:tensorflow:Summary name Var_model/CONV1/kernel:0 is illegal; using Var_model/CONV1/kernel_0 instead.
<tf.Variable 'model/CONV1/kernel:0' shape=(2, 2, 1, 8) dtype=float32_ref>
INFO:tensorflow:Summary name Var_model/CONV1/bias:0 is illegal; using Var_model/CONV1/bias_0 instead.
<tf.Variable 'model/CONV1/bias:0' shape=(8,) dtype=float32_ref>
INFO:tensorflow:Summary name Var_model/CONV2/kernel:0 is illegal; using Var_model/CONV2/kernel_0 instead.
<tf.Variable 'model/CONV2/kernel:0' shape=(2, 2, 8, 16) dtype=float32_ref>
INFO:tensorflow:Summary name Var_model/CONV2/bias:0 is illegal; using Var_model/CONV2/bias_0 instead.
<tf.Variable 'model/CONV2/bias:0' shape=(16,) dtype=float32_ref>
INFO:tensorflow:Summary name Var_model/CONV3/kernel:0 is illegal; using Var_model/CONV3/kernel_0 instead.
<tf.Variable 'model/CONV3/kernel:0' shape=(2, 2, 16, 32) dtype=float32_ref>
INFO:tensorflow:Summary name Var_model/CONV3/bias:0 is illegal; using Var_model/CONV3/bias_0 i

In [85]:
model.fit()
sess.close()

epoch : 0, cost : 0.11908344179391861, acc: (0.90190911, 0.91336924)


KeyboardInterrupt: 

In [18]:
X = tf.placeholder(tf.float32, [None, 28, 28, 1])
Y = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)

W1 = tf.Variable(tf.random_normal([3, 3, 1, 32], stddev = 0.01))
L1 = tf.nn.conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME')
L1 = tf.nn.relu(L1)
L1 = tf.nn.max_pool(L1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

W2 = tf.Variable(tf.random_normal([3, 3, 32, 64], stddev = 0.01))
L2 = tf.nn.conv2d(L1, W2, strides=[1, 1, 1, 1], padding='SAME')
L2 = tf.nn.relu(L2)
L2 = tf.nn.max_pool(L2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

L2_flatten = tf.contrib.layers.flatten(L2)

W3 = tf.Variable(tf.random_normal([7 * 7 * 64, 256], stddev = 0.01))
b3 = tf.Variable(tf.random_normal([256]))
L3 = tf.nn.relu(tf.matmul(L2_flatten, W3) + b3)
L3 = tf.nn.dropout(L3, keep_prob)

W4 = tf.Variable(tf.random_normal([256, 10], stddev =0.01))
b4 = tf.Variable(tf.random_normal([10]))
logits = tf.matmul(L3, W4) + b4

cost = tf.losses.softmax_cross_entropy(Y, logits = logits)
train = tf.train.AdamOptimizer(0.001).minimize(cost)


predictions = tf.argmax(tf.nn.softmax(logits), 1)
accuracy = tf.metrics.accuracy(tf.argmax(Y, 1), predictions)

batch_size = 100
with tf.Session() as sess:
    total_batch = int(mnist.train.num_examples/ batch_size)
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    for epoch in range(15):
        total_cost = 0
        
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            _, c, = sess.run([train, cost], feed_dict = {X:batch_xs.reshape(-1, 28, 28, 1), Y: batch_ys, keep_prob: 0.8})
        acc = sess.run([accuracy], feed_dict = {X: mnist.test.images.reshape(-1, 28, 28, 1), Y: mnist.test.labels, keep_prob: 1.0})
        print('epoch : {}, cost : {}, acc: {}'.format(epoch, c, acc))
            
            


epoch : 0, cost : 0.053370747715234756, acc: [(0.0, 0.96810001)]
epoch : 1, cost : 0.06267564743757248, acc: [(0.96810001, 0.97430003)]
epoch : 2, cost : 0.0855538621544838, acc: [(0.97430003, 0.97680002)]
epoch : 3, cost : 0.19877442717552185, acc: [(0.97680002, 0.97907501)]
epoch : 4, cost : 0.05799204856157303, acc: [(0.97907501, 0.98093998)]
epoch : 5, cost : 0.11063935607671738, acc: [(0.98093998, 0.98213333)]
epoch : 6, cost : 0.03810318559408188, acc: [(0.98213333, 0.98282856)]
epoch : 7, cost : 0.0006602299981750548, acc: [(0.98282856, 0.98348749)]
epoch : 8, cost : 0.033298783004283905, acc: [(0.98348749, 0.98413336)]
epoch : 9, cost : 0.008241044357419014, acc: [(0.98413336, 0.98462999)]
epoch : 10, cost : 0.02387131005525589, acc: [(0.98462999, 0.98500907)]
epoch : 11, cost : 0.0030551606323570013, acc: [(0.98500907, 0.98540002)]
epoch : 12, cost : 0.06204137206077576, acc: [(0.98540002, 0.98581541)]
epoch : 13, cost : 0.0004125334962736815, acc: [(0.98581541, 0.98612142)]
e

In [20]:
tf.reset_default_graph()

In [21]:
X = tf.placeholder(tf.float32, [None, 28, 28, 1])
Y = tf.placeholder(tf.float32, [None, 10])

def model_net(x, activation, is_training, reuse = False):
    L1 = tf.layers.conv2d(x, 32, 3, padding='SAME', activation = activation, reuse= reuse, name = 'L1')
    L1 = tf.layers.max_pooling2d(L1, 2, 2)
    L2 = tf.layers.conv2d(L1, 64, 3, padding='SAME', activation = activation, reuse=reuse, name = 'L2')
    L2 = tf.layers.max_pooling2d(L2, 2, 2)
    
    L2_flatten = tf.contrib.layers.flatten(L2)
    
    fc1 = tf.layers.dense(L2_flatten, 256, activation = activation, reuse=reuse, name = 'FC1')
    fc1 = tf.layers.dropout(fc1, 0.2, training = is_training)
    
    fc2 = tf.layers.dense(fc1, 10, reuse=reuse, name = 'output')
    return fc2


In [22]:
logits = model_net(X, tf.nn.relu, True)
test_logits = model_net(X, tf.nn.relu, False, True)
cost = tf.losses.softmax_cross_entropy(Y, logits = logits)
train = tf.train.AdamOptimizer(0.001).minimize(cost)


predictions = tf.argmax(tf.nn.softmax(test_logits), 1)
accuracy = tf.metrics.accuracy(tf.argmax(Y, 1), predictions)

In [24]:
batch_size = 100
with tf.Session() as sess:
    total_batch = int(mnist.train.num_examples/ batch_size)
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    for epoch in range(15):
        total_cost = 0
        
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            _, c, = sess.run([train, cost], feed_dict = {X:batch_xs.reshape(-1, 28, 28, 1), Y: batch_ys})
        acc = sess.run([accuracy], feed_dict = {X: mnist.test.images.reshape(-1, 28, 28, 1), Y: mnist.test.labels})
        print('epoch : {}, cost : {}, acc: {}'.format(epoch, c, acc))
            
          

epoch : 0, cost : 0.020873257890343666, acc: [(0.0, 0.98430002)]
epoch : 1, cost : 0.012488684616982937, acc: [(0.98430002, 0.98689997)]
epoch : 2, cost : 0.058932267129421234, acc: [(0.98689997, 0.9878)]
epoch : 3, cost : 0.050051361322402954, acc: [(0.9878, 0.98860002)]
epoch : 4, cost : 0.047780223190784454, acc: [(0.98860002, 0.98923999)]
epoch : 5, cost : 0.03912124037742615, acc: [(0.98923999, 0.98943335)]
epoch : 6, cost : 0.02453668788075447, acc: [(0.98943335, 0.98968571)]
epoch : 7, cost : 0.0036200848408043385, acc: [(0.98968571, 0.98982501)]
epoch : 8, cost : 0.0011123416479676962, acc: [(0.98982501, 0.99014443)]
epoch : 9, cost : 0.004260535817593336, acc: [(0.99014443, 0.98979002)]
epoch : 10, cost : 0.007146528456360102, acc: [(0.98979002, 0.98985457)]
epoch : 11, cost : 0.04281054064631462, acc: [(0.98985457, 0.99010831)]
epoch : 12, cost : 0.0008145206375047565, acc: [(0.99010831, 0.99017692)]
epoch : 13, cost : 0.02385837584733963, acc: [(0.99017692, 0.99017859)]
epoc

In [25]:
tf.reset_default_graph()

In [26]:
X = tf.placeholder(tf.float32, [None, 28, 28, 1])
Y = tf.placeholder(tf.float32, [None, 10])

def model_net(x, activation, is_training, reuse = False):
    L1 = tf.layers.conv2d(x, 32, 3, padding='SAME', reuse= reuse, name = 'L1')
    L1 = tf.layers.batch_normalization(L1, training=is_training)
    L1 = activation(L1)
    L1 = tf.layers.max_pooling2d(L1, 2, 2)
    
    L2 = tf.layers.conv2d(L1, 64, 3, padding='SAME', activation = activation, reuse=reuse, name = 'L2')
    L2 = tf.layers.batch_normalization(L2, training=is_training)
    L2 = activation(L2)
    L2 = tf.layers.max_pooling2d(L2, 2, 2)
    
    L2_flatten = tf.contrib.layers.flatten(L2)
    
    fc1 = tf.layers.dense(L2_flatten, 256, activation = activation, reuse=reuse, name = 'FC1')
    fc1 = tf.layers.dropout(fc1, 0.2, training = is_training)
    
    fc2 = tf.layers.dense(fc1, 10, reuse=reuse, name = 'output')
    return fc2


In [27]:
logits = model_net(X, tf.nn.relu, True)
test_logits = model_net(X, tf.nn.relu, False, True)
cost = tf.losses.softmax_cross_entropy(Y, logits = logits)
train = tf.train.AdamOptimizer(0.001).minimize(cost)


predictions = tf.argmax(tf.nn.softmax(test_logits), 1)
accuracy = tf.metrics.accuracy(tf.argmax(Y, 1), predictions)

In [None]:
batch_size = 100
with tf.Session() as sess:
    total_batch = int(mnist.train.num_examples/ batch_size)
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    for epoch in range(15):
        total_cost = 0
        for i in range(total_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            _, c, = sess.run([train, cost], feed_dict = {X:batch_xs.reshape(-1, 28, 28, 1), Y: batch_ys})
        acc = sess.run([accuracy], feed_dict = {X: mnist.test.images.reshape(-1, 28, 28, 1), Y: mnist.test.labels})
        print('epoch : {}, cost : {}, acc: {}'.format(epoch, c, acc))          