<a href="https://colab.research.google.com/github/sinjy1203/deep-learning/blob/master/handson_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
import tensorflow as tf
import numpy as np
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [0]:
x_train, x_test = x_train.astype(np.float32).reshape(-1, 28*28), x_test.astype(np.float32).reshape(-1, 28*28)
y_train, y_test = y_train.astype(np.int32), y_test.astype(np.int32)
y_train.shape

(60000,)

In [0]:
from functools import partial
import time
start = time.time()
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

tf.reset_default_graph()
tf.set_random_seed(42)
np.random.seed(42)
x = tf.placeholder(dtype=tf.float32, shape=(None, 784), name='x')
y = tf.placeholder(dtype=tf.int32, shape=(None), name='y')
training = tf.placeholder_with_default(False, shape=(), name='training')
dropout_rate = 0.5

he_init = tf.variance_scaling_initializer()

# layer1 = tf.layers.dense(x, 200, activation=tf.nn.relu, kernel_initializer=he_init, name='layer1')
# layer2 = tf.layers.dense(layer1, 200, activation=tf.nn.elu, kernel_initializer=he_init, name='layer2')
# layer3 = tf.layers.dense(layer2, 200, activation=tf.nn.elu, kernel_initializer=he_init, name='layer3')
# layer4 = tf.layers.dense(layer3, 200, activation=tf.nn.elu, kernel_initializer=he_init, name='layer4')
# layer5 = tf.layers.dense(layer4, 200, activation=tf.nn.elu, kernel_initializer=he_init, name='layer5')
# logits = tf.layers.dense(layer5, 10, kernel_initializer=he_init)
# # loss: 0.00038, time: 216 acc: 0.966, learning_rate: 0.001

my_batch_norm_layer = partial(tf.layers.batch_normalization, training=training, momentum=0.9)

x_drop = tf.layers.dropout(x, dropout_rate, training=training)

layer1 = tf.layers.dense(x_drop, 200, kernel_initializer=he_init, name='layer1')
bn1 = my_batch_norm_layer(layer1)
bn1_act = tf.nn.elu(bn1)
layer1_drop = tf.layers.dropout(bn1_act, dropout_rate, training=training)

layer2 = tf.layers.dense(layer1_drop, 200, kernel_initializer=he_init, name='layer2')
bn2 = my_batch_norm_layer(layer2)
bn2_act = tf.nn.elu(bn2)
layer2_drop = tf.layers.dropout(bn2_act, dropout_rate, training=training)

layer3 = tf.layers.dense(layer2_drop, 200, kernel_initializer=he_init, name='layer3')
bn3 = my_batch_norm_layer(layer3)
bn3_act = tf.nn.elu(bn3)
layer3_drop = tf.layers.dropout(bn3_act, dropout_rate, training=training)

layer4 = tf.layers.dense(layer3_drop, 200, kernel_initializer=he_init, name='layer4')
bn4 = my_batch_norm_layer(layer4)
bn4_act = tf.nn.elu(bn4)
layer4_drop = tf.layers.dropout(bn4_act, dropout_rate, training=training)

layer5 = tf.layers.dense(layer4_drop, 200, kernel_initializer=he_init, name='layer5')
bn5 = my_batch_norm_layer(layer5)
bn5_act = tf.nn.elu(bn5)
layer5_drop = tf.layers.dropout(bn5_act, dropout_rate, training=training)

logits_before = tf.layers.dense(layer5_drop, 10, kernel_initializer=he_init)
logits = my_batch_norm_layer(logits_before)
# loss: 0.0053, time: 110, acc: 0.9825, learning_rate: 0.05, epoch: 10

pred = tf.nn.softmax(logits)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits=logits)
loss = tf.reduce_mean(xentropy)
# initial_learning_rate = 0.1
# decay_steps = 10000
# decay_rate = 0.1
# global_step = tf.Variable(0, trainable=False, name="global_step")
# learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step, decay_steps, decay_rate)
# optimizer = tf.train.GradientDescentOptimizer(0.05)
# optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9, use_nesterov=True)
# optimizer = tf.train.RMSPropOptimizer(learning_rate=0.05, momentum=0.9, decay=0.9, epsilon=1e-10)
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(loss)

correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 11
batch_size = 50
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.Session() as sess:
  sess.run(init)
  for n_epoch in range(n_epochs):
    cost = 0.0
    for i in range(np.int(len(x_train) / batch_size)):
      xs = x_train[i * batch_size:i * batch_size + batch_size]
      ys = y_train[i * batch_size:i * batch_size + batch_size]
      _, cost_val, __ = sess.run([train_op, loss, extra_update_ops], feed_dict={x: xs, y: ys, training: True})
      cost += cost_val / (len(x_train) / batch_size)
    if n_epoch % 1 == 0:
      print(n_epoch, cost)
  save_path = saver.save(sess, "/content/gdrive/My Drive/tmp/my_model3.ckpt")
print("time: ", time.time() - start)
# a = 0
# with tf.Session() as sess:
#   sess.run(init)
#   cost = 0
#   for epoch in range(n_epochs):
#         for x_batch, y_batch in shuffle_batch(x_train, y_train, batch_size):
#             _, cost_val = sess.run([train_op, loss], feed_dict={x: x_batch, y: y_batch})
#             #a += 1
#             cost += cost_val / 1200
#         if epoch % 10 == 0:
#           print(epoch,"  ", cost)
#   save_path = saver.save(sess, "/content/gdrive/My Drive/tmp/my_model3.ckpt")
  #print(a)
# with tf.Session() as sess:
#   sess.run(init)
#   for n_epoch in range(n_epochs):
#     _, cost = sess.run([train_op, loss], feed_dict={x: x_train, y: y_train})
#     if n_epoch % 10 == 0:
#       print(n_epoch, cost)
#   save_path = saver.save(sess, "/content/gdrive/My Drive/tmp/my_model3.ckpt") 0.018

0 1.0210959591219826
1 0.6528678620606659
2 0.5696823270743084
3 0.5254673164710401
4 0.497938951291144
5 0.46817267211154084
6 0.45477097672720745
7 0.43868895040825034
8 0.4235622403336066
9 0.41553315779815075
10 0.4006539055580896
time:  124.6114866733551


In [0]:
with tf.Session() as sess:
  saver.restore(sess, "/content/gdrive/My Drive/tmp/my_model3.ckpt")
  acc = sess.run(accuracy, feed_dict={x: x_test, y: y_test})
  print(acc, "he 초기화+elu+배치정규화")

INFO:tensorflow:Restoring parameters from /content/gdrive/My Drive/tmp/my_model3.ckpt
0.9519 he 초기화+elu+배치정규화
