In [96]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [97]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv("test.csv")

In [98]:
train_data.info()
test_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42000 entries, 0 to 41999
Columns: 785 entries, label to pixel783
dtypes: int64(785)
memory usage: 251.5 MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28000 entries, 0 to 27999
Columns: 784 entries, pixel0 to pixel783
dtypes: int64(784)
memory usage: 167.5 MB


In [99]:
def print_epoch_stats(epoch_i, sess, last_features, last_labels):
    """
    Print cost and validation accuracy of an epoch
    """
    
    current_cost = sess.run(
        cost,
        feed_dict={x: last_features, y: last_labels, keep_prob: 1.})
    valid_accuracy = sess.run(
        accuracy,
        feed_dict={x: last_features,y: last_labels, keep_prob: 1.})
    print('Epoch: {} - Cost: {} Valid Accuracy: {}'.format(
        epoch_i,
        current_cost,
        valid_accuracy))

def normalize_grayscale(image_data):
    """
    Normalize the image data with Min-Max scaling to a range of [0.1, 0.9]
    :param image_data: The image data to be normalized
    :return: Normalized image data
    """
    a = 0.1
    b = 0.9
    grayscale_min = 0
    grayscale_max = 255
    return a + ( ( (image_data - grayscale_min)*(b - a) )/( grayscale_max - grayscale_min ) )

In [100]:
# one-hot 编码
train_d = train_data
dummies = pd.get_dummies(train_data['label'], prefix='label', drop_first=False)
train_features = train_d.drop('label', axis=1)
train_labels = dummies
train_data = pd.concat([train_labels, train_features], axis=1)

train_features = np.reshape(np.array(normalize_grayscale(train_features)),(42000, 28, 28, 1))
train_labels = np.array(train_labels)
test_features = np.reshape(np.array(normalize_grayscale(test_data)),(28000, 28, 28, 1))
print(test_features.shape)

(28000, 28, 28, 1)


In [157]:
learning_rate = 0.001
epochs = 35000
batch_size = 50
# 用来验证和计算准确率的样本数
# 如果内存不够，可以调小这个数字
test_valid_size = 256

# Network Parameters
# 神经网络参数
n_classes = 10  # MNIST total classes (0-9 digits)
dropout = 0.8  # Dropout, probability to keep units

In [158]:
# print(train_features)

In [159]:
# print(np.array(train_labels))

In [160]:
weights = {
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
    'out': tf.Variable(tf.random_normal([1024, n_classes]))}

biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_classes]))}

In [161]:
def conv2d(x, W, b, strides=1):
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpool2d(x, k=2):
    return tf.nn.max_pool(
        x,
        ksize=[1, k, k, 1],
        strides=[1, k, k, 1],
        padding='SAME')

In [162]:
def conv_net(x, weights, biases, dropout):
    # Layer 1 - 28*28*1 to 14*14*32
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    conv1 = maxpool2d(conv1, k=2)
    print(conv1.get_shape())

    # Layer 2 - 14*14*32 to 7*7*64
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    conv2 = maxpool2d(conv2, k=2)
    print(conv2.get_shape())

    # Fully connected layer - 7*7*64 to 1024
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, dropout)
    print(fc1.get_shape())
    

    # Output Layer - class prediction - 1024 to 10
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out

In [163]:
# tf Graph input
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32)

# Model
logits = conv_net(x, weights, biases, keep_prob)

# Define loss and optimizer
cost = tf.reduce_mean(\
    tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\
    .minimize(cost)

# Accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

(?, 14, 14, 32)
(?, 7, 7, 64)
(?, 1024)


In [164]:
print( train_features.shape[0])

42000


In [165]:
epochs_completed = 0
index_in_epoch = 0
num_examples = train_features.shape[0]

# serve data by batches
def next_batch(batch_size):
    
    global train_features
    global train_labels
    global index_in_epoch
    global epochs_completed
    
    start = index_in_epoch
    index_in_epoch += batch_size
#     print(index_in_epoch)
    
    # when all trainig data have been already used, it is reorder randomly    
    if index_in_epoch > num_examples:
#         print("weeeeeeee")
        # finished epoch
        epochs_completed += 1
        # shuffle the data
        perm = np.arange(num_examples)
        np.random.shuffle(perm)
#         print("weeeeeeee2")
        train_features = train_features[perm]
#         print("weeeeeeee3")
#         print(type(train_labels))
#         print(train_labels.shape)
#         print(train_features.shape)
        
#         print(perm)
#         print(len(perm))
        train_labels = train_labels[perm]
        # start next epoch
        start = 0
        index_in_epoch = batch_size
        assert batch_size <= num_examples
    end = index_in_epoch
    return train_features[start:end], train_labels[start:end]

In [166]:

init = tf.global_variables_initializer()
# writer = tf.summary.FileWriter('./board')

with tf.Session() as sess:
    sess.run(init)
    # Training cycle
    for epoch in range(epochs):
        # total_batch = int(mnist.train.num_examples/batch_size)
        # Loop over all batches
        # for i in range(total_batch):
        batch_x, batch_y = next_batch(batch_size)
#         print(batch_x.shape)
#         print(batch_y.shape)
        # Run optimization op (backprop) and cost op (to get loss value)
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
        #print( sess.run(cost, feed_dict={x: batch_x, y: batch_y, keep_prob: 1.}))
        if(epoch % 500 == 0):
            print_epoch_stats(epoch, sess, batch_x, batch_y)
    
    conv_y_predict = []
    for i in np.arange(100, 280001, 100):
        conv_y_predict = np.append(conv_y_predict, np.array(sess.run(logits, feed_dict = {x: test_features[i-100:i], keep_prob: 1.})))
#         print(sess.run(logits, feed_dict = {x: test_features[i-100:i], keep_prob: 1.}))
#         conv_y_predict += np.array(sess.run(logits, feed_dict = {x: test_features[i-100:i], keep_prob: 1.}))

    conv_y_predict = np.reshape(np.array(conv_y_predict),(28000, 10))
    print(conv_y_predict.shape)
    
    test_pre = np.argmax(conv_y_predict, 1)
    
    conv_y_submission = pd.DataFrame({
        'ImageId': range(1, 28001), 
        'Label': test_pre 
    })

    conv_y_submission.head()
    print(conv_y_submission)

Epoch: 0 - Cost: 67819.234375 Valid Accuracy: 0.09999999403953552
Epoch: 500 - Cost: 1542.554931640625 Valid Accuracy: 0.8199999928474426
Epoch: 1000 - Cost: 734.9008178710938 Valid Accuracy: 0.8999999761581421
Epoch: 1500 - Cost: 379.24383544921875 Valid Accuracy: 0.9600000381469727
Epoch: 2000 - Cost: 159.68606567382812 Valid Accuracy: 0.940000057220459
Epoch: 2500 - Cost: 217.58084106445312 Valid Accuracy: 0.9599999785423279
Epoch: 3000 - Cost: 192.2558135986328 Valid Accuracy: 0.9599999785423279
Epoch: 3500 - Cost: 0.0 Valid Accuracy: 1.0
Epoch: 4000 - Cost: 0.0 Valid Accuracy: 1.0
Epoch: 4500 - Cost: 0.0 Valid Accuracy: 1.0
Epoch: 5000 - Cost: 0.0 Valid Accuracy: 1.0
Epoch: 5500 - Cost: 10.180459022521973 Valid Accuracy: 0.9800000190734863
Epoch: 6000 - Cost: 0.0 Valid Accuracy: 1.0
Epoch: 6500 - Cost: 0.0 Valid Accuracy: 1.0
Epoch: 7000 - Cost: 66.54513549804688 Valid Accuracy: 0.9799999594688416
Epoch: 7500 - Cost: 0.0 Valid Accuracy: 1.0
Epoch: 8000 - Cost: 0.0 Valid Accuracy: 

In [167]:
conv_y_submission.to_csv('./conv_submission.csv', index=False)