In [1]:
import tensorflow as tf
import pandas as pd
import time

train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

  from ._conv import register_converters as _register_converters


In [2]:
# feature 와 label 분리
train_y = train['label']
train_x = train.drop(labels='label', axis=1)

# label을 one-hot encoding
train_y_onehot = pd.get_dummies(train_y)

# train data count
total_train_rows = train_y_onehot.shape[0]

In [3]:
# hyper parameters 
learning_rate = 0.001 
training_epochs = 20
batch_size = 100 

# dropout (keep_prob) rate  0.7~0.5 on training, but should be 1 for testing 
keep_prob = tf.placeholder(tf.float32) 

# input place holders 
X = tf.placeholder(tf.float32, [None, 784]) 
X_img = tf.reshape(X, [-1, 28, 28, 1])   # img 28x28x1 (black/white) 
Y = tf.placeholder(tf.float32, [None, 10]) 

# L1 ImgIn shape=(?, 28, 28, 1) 
W1 = tf.Variable(tf.random_normal([3, 3, 1, 32], stddev=0.01)) 
#    Conv     -> (?, 28, 28, 32) 
#    Pool     -> (?, 14, 14, 32) 
L1 = tf.nn.conv2d(X_img, W1, strides=[1, 1, 1, 1], padding='SAME') 
L1 = tf.nn.relu(L1) 
L1 = tf.nn.max_pool(L1, ksize=[1, 2, 2, 1], 
                    strides=[1, 2, 2, 1], padding='SAME') 
L1 = tf.nn.dropout(L1, keep_prob=keep_prob) 
''' 
Tensor("Conv2D:0", shape=(?, 28, 28, 32), dtype=float32) 
Tensor("Relu:0", shape=(?, 28, 28, 32), dtype=float32) 
Tensor("MaxPool:0", shape=(?, 14, 14, 32), dtype=float32) 
Tensor("dropout/mul:0", shape=(?, 14, 14, 32), dtype=float32) 
''' 

# L2 ImgIn shape=(?, 14, 14, 32) 
W2 = tf.Variable(tf.random_normal([3, 3, 32, 64], stddev=0.01)) 
#    Conv      ->(?, 14, 14, 64) 
#    Pool      ->(?, 7, 7, 64) 
L2 = tf.nn.conv2d(L1, W2, strides=[1, 1, 1, 1], padding='SAME') 
L2 = tf.nn.relu(L2) 
L2 = tf.nn.max_pool(L2, ksize=[1, 2, 2, 1], 
                    strides=[1, 2, 2, 1], padding='SAME') 
L2 = tf.nn.dropout(L2, keep_prob=keep_prob) 
''' 
Tensor("Conv2D_1:0", shape=(?, 14, 14, 64), dtype=float32) 
Tensor("Relu_1:0", shape=(?, 14, 14, 64), dtype=float32) 
Tensor("MaxPool_1:0", shape=(?, 7, 7, 64), dtype=float32) 
Tensor("dropout_1/mul:0", shape=(?, 7, 7, 64), dtype=float32) 
''' 

# L3 ImgIn shape=(?, 7, 7, 64) 
W3 = tf.Variable(tf.random_normal([3, 3, 64, 128], stddev=0.01)) 
#    Conv      ->(?, 7, 7, 128) 
#    Pool      ->(?, 4, 4, 128) 
#    Reshape   ->(?, 4 * 4 * 128) # Flatten them for FC 
L3 = tf.nn.conv2d(L2, W3, strides=[1, 1, 1, 1], padding='SAME') 
L3 = tf.nn.relu(L3) 
L3 = tf.nn.max_pool(L3, ksize=[1, 2, 2, 1], strides=[ 
                    1, 2, 2, 1], padding='SAME') 
L3 = tf.nn.dropout(L3, keep_prob=keep_prob) 
L3_flat = tf.reshape(L3, [-1, 128 * 4 * 4]) 
''' 
Tensor("Conv2D_2:0", shape=(?, 7, 7, 128), dtype=float32) 
Tensor("Relu_2:0", shape=(?, 7, 7, 128), dtype=float32) 
Tensor("MaxPool_2:0", shape=(?, 4, 4, 128), dtype=float32) 
Tensor("dropout_2/mul:0", shape=(?, 4, 4, 128), dtype=float32) 
Tensor("Reshape_1:0", shape=(?, 2048), dtype=float32) 
''' 

# L4 FC 4x4x128 inputs -> 625 outputs 
W4 = tf.get_variable("W4", shape=[128 * 4 * 4, 625], 
                     initializer=tf.contrib.layers.xavier_initializer()) 
b4 = tf.Variable(tf.random_normal([625])) 
L4 = tf.nn.relu(tf.matmul(L3_flat, W4) + b4) 
L4 = tf.nn.dropout(L4, keep_prob=keep_prob) 
''' 
Tensor("Relu_3:0", shape=(?, 625), dtype=float32) 
Tensor("dropout_3/mul:0", shape=(?, 625), dtype=float32) 
''' 

# L5 Final FC 625 inputs -> 10 outputs 
W5 = tf.get_variable("W5", shape=[625, 10], 
                     initializer=tf.contrib.layers.xavier_initializer()) 
b5 = tf.Variable(tf.random_normal([10])) 
logits = tf.matmul(L4, W5) + b5 
''' 
Tensor("add_1:0", shape=(?, 10), dtype=float32) 
''' 

# define cost/loss & optimizer 
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( 
    logits=logits, labels=Y)) 
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) 

# initialize 
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 
sess.run(tf.global_variables_initializer()) 

start_time = time.time()

# train my model 
print('Learning started. It takes sometime.') 
for epoch in range(training_epochs): 
    avg_cost = 0 
    total_batch = int(total_train_rows / batch_size)
    start_time_epoch = time.time()
    for i in range(total_batch):
        begin = i * batch_size
        end = begin + batch_size
        batch_xs = train_x[begin : end].values
        batch_ys = train_y_onehot[begin : end].values
        feed_dict = {X: batch_xs, Y: batch_ys, keep_prob: 0.7} 
        c, _ = sess.run([cost, optimizer], feed_dict=feed_dict) 
        avg_cost += c / total_batch 

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost), " %.2f seconds" % (time.time() - start_time_epoch))

print('Learning Finished!') 
print("--- %.2f seconds ---" %(time.time() - start_time))

Instructions for updating:
Use the retry module or similar alternatives.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

Learning started. It takes sometime.
Epoch: 0001 cost = 0.279893738  5.59 seconds
Epoch: 0002 cost = 0.088521125  4.79 seconds
Epoch: 0003 cost = 0.074797229  4.81 seconds
Epoch: 0004 cost = 0.065256128  5.14 seconds
Epoch: 0005 cost = 0.060059659  5.02 seconds
Epoch: 0006 cost = 0.058227872  4.88 seconds
Epoch: 0007 cost = 0.058876476  4.81 seconds
Epoch: 0008 cost = 0.054005304  4.83 seconds
Epoch: 0009 cost = 0.054540391  4.84 seconds
Epoch: 0010 cost = 0.052946540  4.96 seconds
Epoch: 0011 cost = 0.053823483  4.78 seconds
Epoch: 0012 cost = 0.049257074  4.84 seconds
Epoch: 0013 cost = 0.049320622  4.90 seconds
Epoch: 0014 cost = 0.049022258  5.45 seconds
Epoch: 0015 cost = 0.047131278  5.26 seconds
Epoch: 0016 cost = 0.05178

In [4]:
# 학습 데이터로 predict
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
correct_count = tf.reduce_sum(tf.cast(correct_prediction, tf.float32))

total_correct_count = 0;
for i in range(total_batch):
        begin = i * batch_size
        end = begin + batch_size
        batch_xs = train_x[begin : end].values # pandas 데이터에서 values로 값을 가져온다.
        batch_ys = train_y_onehot[begin : end].values
        cur_count = sess.run(correct_count, feed_dict={X: batch_xs, Y: batch_ys, keep_prob: 1})
        total_correct_count += cur_count
        
accuracy = total_correct_count / total_train_rows
print('Accuracy:', accuracy)
        

Accuracy: 0.9989523809523809


In [8]:
# 테스트 데이터로 predict
import numpy as np

test_prediction = tf.argmax(logits, 1)

test_result = []
for i in range(total_batch):
        begin = i * batch_size
        end = begin + batch_size
        batch_xs = test[begin : end].values
        cur_predict = sess.run(test_prediction, feed_dict={X: batch_xs, keep_prob: 1})
        if i == 0:
            test_result = cur_predict
        else:
            test_result = np.append(test_result, cur_predict)

# pandas 형태로 predict column 생성
pd_test_result = pd.Series(test_result, name="Label")

# ImageId column 생성해서 predict와 결합
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),pd_test_result],axis = 1)

# submission 파일 생성
submission.to_csv("submission_mnist_cnn.csv",index=False)