## Alexnet

![model](./test.png)

출처: https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf

conv1 conv2 max1 norm1 conv3 max2 norm2 conv4 conv5 max3 fc1 fc2 fc3(output)

In [1]:
import tensorflow as tf
import numpy as np 
import os
import matplotlib.pyplot as plt
import random
%matplotlib inline

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
learning_rate = 0.001
training_epochs = 15
batch_size = 100

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
X = tf.placeholder(tf.float32,shape=[None,784])
X_image = tf.reshape(X,[-1,28,28,1])
y = tf.placeholder(tf.float32,[None,10])

# dropout 
keep_prob = tf.placeholder(tf.float32)


In [4]:
#conv1 
W1 = tf.Variable(tf.random_normal([3,3,1,32],stddev=0.01))
L1 = tf.nn.conv2d(X_image, W1, strides=[1, 1, 1, 1], padding='SAME')
L1 = tf.nn.relu(L1)
L1 = tf.nn.dropout(L1, keep_prob=keep_prob)
print(L1)

Tensor("dropout/mul:0", shape=(?, 28, 28, 32), dtype=float32)


In [5]:
#conv2
W2 = tf.Variable(tf.random_normal([3,3,32,64],stddev=0.01))
L2 = tf.nn.conv2d(L1,W2,strides=[1,1,1,1],padding='SAME')
L2 = tf.nn.relu(L2)
print(L2)

Tensor("Relu_1:0", shape=(?, 28, 28, 64), dtype=float32)


In [6]:
#max1 
L2 = tf.nn.max_pool(L2,ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME')
print(L2)

Tensor("MaxPool:0", shape=(?, 14, 14, 64), dtype=float32)


In [7]:
#norm1
L2 = tf.nn.local_response_normalization(L2)
L2 = tf.nn.dropout(L2, keep_prob=keep_prob)
print(L2)

Tensor("dropout_1/mul:0", shape=(?, 14, 14, 64), dtype=float32)


In [8]:
#conv3
W3 = tf.Variable(tf.random_normal([3,3,64,128]))
L3 = tf.nn.conv2d(L2,W3,strides=[1,1,1,1],padding='SAME')
L3 = tf.nn.relu(L3)
print(L3)

Tensor("Relu_2:0", shape=(?, 14, 14, 128), dtype=float32)


In [9]:
#max2
L3 = tf.nn.max_pool(L3,ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME')
print(L3)

Tensor("MaxPool_1:0", shape=(?, 7, 7, 128), dtype=float32)


In [10]:
#norm2
L3 = tf.nn.local_response_normalization(L3)
L3 = tf.nn.dropout(L3, keep_prob=keep_prob)

In [11]:
#conv4
W4 = tf.Variable(tf.random_normal([3,3,128,256]))
L4 = tf.nn.conv2d(L3,W4,strides=[1,1,1,1],padding='SAME')
L4= tf.nn.relu(L4)
L4 = tf.nn.dropout(L4, keep_prob=keep_prob)

In [12]:
#conv5
W5 = tf.Variable(tf.random_normal([3,3,256,256]))
L5 = tf.nn.conv2d(L4,W5,strides=[1,1,1,1],padding='SAME')
L5 = tf.nn.relu(L5)

In [13]:
#max3
L5 = tf.nn.max_pool(L5, ksize=[1,3,3,1],strides=[1,2,2,1],padding='SAME')
L5 = tf.nn.dropout(L5, keep_prob=keep_prob)
print(L5)

Tensor("dropout_4/mul:0", shape=(?, 4, 4, 256), dtype=float32)


conv1 conv2 max1 norm1 conv3 max2 norm2 conv4 conv5 max3 fc1 fc2 fc3(output)

In [14]:
#fc1
L5_flat = tf.reshape(L5, [-1, 256 * 4 * 4])
W6 = tf.get_variable("W6", shape=[256 * 4 * 4, 1024],initializer=tf.contrib.layers.xavier_initializer())
b6 = tf.Variable(tf.random_normal([1024]))
L6 = tf.nn.relu(tf.matmul(L5_flat, W6) + b6)
L6 = tf.nn.dropout(L6, keep_prob=keep_prob)

In [15]:
W7 = tf.get_variable("W7", shape=[1024, 512],initializer=tf.contrib.layers.xavier_initializer())
b7 = tf.Variable(tf.random_normal([512]))
L7 = tf.nn.relu(tf.matmul(L6, W7) + b7)
L7 = tf.nn.dropout(L7, keep_prob=keep_prob)

In [16]:
# L5 Final FC 625 inputs -> 5 outputs
W8 = tf.get_variable("W8", shape=[512, 10],initializer=tf.contrib.layers.xavier_initializer())
b8 = tf.Variable(tf.random_normal([10]))
logits = tf.matmul(L7, W8) + b8

In [17]:
# define cost/loss & optimizer

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)

#accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))    

In [18]:
sess=tf.Session()
sess.run(tf.global_variables_initializer())


# train model
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        feed_dict = {X: batch_xs, y: batch_ys, keep_prob: 0.7}
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch
    if epoch%5 == 0:
        print('Epoch:', '%03d' % (epoch), 'cost =', '{:.5f}'.format(avg_cost))

print('Finished')

print('Accuracy:', sess.run(accuracy, feed_dict={
      X: mnist.test.images, y: mnist.test.labels, keep_prob: 1}))

# Get one and predict
r = random.randint(0, mnist.test.num_examples - 1)
print("Label: ", sess.run(tf.argmax(mnist.test.labels[r:r + 1], 1)))
print("Prediction: ", sess.run(
tf.argmax(logits, 1), feed_dict={X: mnist.test.images[r:r + 1], keep_prob: 1}))

plt.imshow(mnist.test.images[r:r + 1].
           reshape(28,28,1), cmap='Greys', interpolation='nearest')
plt.show()

('Epoch:', '000', 'cost =', '6.67960')
('Epoch:', '005', 'cost =', '0.43886')
('Epoch:', '010', 'cost =', '0.24955')
Finished
('Accuracy:', 0.96569997)


NameError: name 'random' is not defined

AlexNet의 경우 2개의 GPU 를 병렬처리하여 학습시키는데 일주일 이상 소요했다.
초기 cost 가 높은 이유는 weight의 unit 수가 많기 때문인 것 같다.