This document explores using tensorflow (and maybe keras) on the MNIST set

In [1]:
import os
import sys
import numpy as np
import csv

Load in the data

In [2]:
sys.platform

'win32'

In [3]:
if 'darwin' in sys.platform:
    data_dir = os.path.join('/Users', 'njchiang', 'CloudStation',
                            'kaggle', 'digits')
else:
    data_dir = os.path.join('D:\\', 'CloudStation', 'kaggle', 'digits')
    
data = np.loadtxt(os.path.join(data_dir, 'train.csv'), delimiter=',', skiprows=1)

In [4]:
y_train_full = data[:, 0:1]
x_train_full = data[:, 1:]
print(x_train_full.shape, y_train_full.shape)

(42000, 784) (42000, 1)


In [5]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_train_full, y_train_full, test_size=.4)

In [6]:
import tensorflow as tf
sess = tf.InteractiveSession()

In [7]:
from keras.utils.np_utils import to_categorical
y_train_vec = to_categorical(y_train)
y_test_vec = to_categorical(y_test)

Using TensorFlow backend.


In [8]:
x = tf.placeholder(tf.float32, shape=[None, x_train_full.shape[1]])
y_ = tf.placeholder(tf.float32, shape=[None, y_train_vec.shape[1]])

Neural networks are defined by their weights and biases, and we can specify the neuronal function at each layer

In [9]:
def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)


def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

Define some layer functions

In [10]:
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')


def softmax_dense(x, W, b):
    return tf.matmul(x, W) + b


def relu_dense(x, W, b):
    return tf.nn.relu(softmax_dense(x, W, b))

### Construct a dense (only) network with some dropout

In [11]:
architecture = [x_train.shape[1], 100, y_train_vec.shape[1]]
# layer 1
h_fc1 = relu_dense(x, weight_variable([architecture[0], architecture[1]]), 
                   bias_variable([architecture[1]]))

keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# layer 2
y_res = softmax_dense(h_fc1_drop, weight_variable([architecture[1], architecture[2]]), 
                   bias_variable([architecture[2]]))

### Define metrics

In [12]:
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_res))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_res,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [13]:
# write my own batching function:
def get_next_batch(n, x, y):
    batch_idx = np.random.choice(range(x.shape[0]), n)
    return [x[batch_idx], y[batch_idx]]

In [15]:
sess.run(tf.global_variables_initializer())
for i in range(200):
    batch = get_next_batch(50, x_train, y_train_vec)
    if i%100 == 0:
        train_accuracy = accuracy.eval(feed_dict={
            x:batch[0], y_: batch[1], keep_prob: 1.0})
    print("step %d, training accuracy %g"%(i, train_accuracy))
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

step 0, training accuracy 0.08
step 1, training accuracy 0.08
step 2, training accuracy 0.08
step 3, training accuracy 0.08
step 4, training accuracy 0.08
step 5, training accuracy 0.08
step 6, training accuracy 0.08
step 7, training accuracy 0.08
step 8, training accuracy 0.08
step 9, training accuracy 0.08
step 10, training accuracy 0.08
step 11, training accuracy 0.08
step 12, training accuracy 0.08
step 13, training accuracy 0.08
step 14, training accuracy 0.08
step 15, training accuracy 0.08
step 16, training accuracy 0.08
step 17, training accuracy 0.08
step 18, training accuracy 0.08
step 19, training accuracy 0.08
step 20, training accuracy 0.08
step 21, training accuracy 0.08
step 22, training accuracy 0.08
step 23, training accuracy 0.08
step 24, training accuracy 0.08
step 25, training accuracy 0.08
step 26, training accuracy 0.08
step 27, training accuracy 0.08
step 28, training accuracy 0.08
step 29, training accuracy 0.08
step 30, training accuracy 0.08
step 31, training 

step 36, training accuracy 0.08
step 37, training accuracy 0.08
step 38, training accuracy 0.08
step 39, training accuracy 0.08
step 40, training accuracy 0.08
step 41, training accuracy 0.08
step 42, training accuracy 0.08
step 43, training accuracy 0.08
step 44, training accuracy 0.08
step 45, training accuracy 0.08
step 46, training accuracy 0.08
step 47, training accuracy 0.08
step 48, training accuracy 0.08
step 49, training accuracy 0.08


step 50, training accuracy 0.08
step 51, training accuracy 0.08
step 52, training accuracy 0.08
step 53, training accuracy 0.08
step 54, training accuracy 0.08
step 55, training accuracy 0.08
step 56, training accuracy 0.08
step 57, training accuracy 0.08
step 58, training accuracy 0.08
step 59, training accuracy 0.08
step 60, training accuracy 0.08
step 61, training accuracy 0.08
step 62, training accuracy 0.08
step 63, training accuracy 0.08
step 64, training accuracy 0.08
step 65, training accuracy 0.08
step 66, training accuracy 0.08
step 67, training accuracy 0.08
step 68, training accuracy 0.08
step 69, training accuracy 0.08
step 70, training accuracy 0.08
step 71, training accuracy 0.08
step 72, training accuracy 0.08
step 73, training accuracy 0.08
step 74, training accuracy 0.08
step 75, training accuracy 0.08
step 76, training accuracy 0.08
step 77, training accuracy 0.08
step 78, training accuracy 0.08
step 79, training accuracy 0.08
step 80, training accuracy 0.08
step 81

step 85, training accuracy 0.08
step 86, training accuracy 0.08
step 87, training accuracy 0.08
step 88, training accuracy 0.08
step 89, training accuracy 0.08
step 90, training accuracy 0.08
step 91, training accuracy 0.08
step 92, training accuracy 0.08
step 93, training accuracy 0.08
step 94, training accuracy 0.08
step 95, training accuracy 0.08
step 96, training accuracy 0.08
step 97, training accuracy 0.08
step 98, training accuracy 0.08


step 99, training accuracy 0.08
step 100, training accuracy 0.28
step 101, training accuracy 0.28
step 102, training accuracy 0.28
step 103, training accuracy 0.28
step 104, training accuracy 0.28
step 105, training accuracy 0.28
step 106, training accuracy 0.28
step 107, training accuracy 0.28
step 108, training accuracy 0.28
step 109, training accuracy 0.28
step 110, training accuracy 0.28
step 111, training accuracy 0.28
step 112, training accuracy 0.28
step 113, training accuracy 0.28
step 114, training accuracy 0.28
step 115, training accuracy 0.28
step 116, training accuracy 0.28
step 117, training accuracy 0.28
step 118, training accuracy 0.28
step 119, training accuracy 0.28
step 120, training accuracy 0.28
step 121, training accuracy 0.28
step 122, training accuracy 0.28
step 123, training accuracy 0.28
step 124, training accuracy 0.28
step 125, training accuracy 0.28
step 126, training accuracy 0.28
step 127, training accuracy 0.28
step 128, training accuracy 0.28
step 129, 


step 134, training accuracy 0.28
step 135, training accuracy 0.28
step 136, training accuracy 0.28
step 137, training accuracy 0.28
step 138, training accuracy 0.28
step 139, training accuracy 0.28
step 140, training accuracy 0.28
step 141, training accuracy 0.28
step 142, training accuracy 0.28
step 143, training accuracy 0.28
step 144, training accuracy 0.28
step 145, training accuracy 0.28
step 146, training accuracy 0.28


step 147, training accuracy 0.28
step 148, training accuracy 0.28
step 149, training accuracy 0.28
step 150, training accuracy 0.28
step 151, training accuracy 0.28
step 152, training accuracy 0.28
step 153, training accuracy 0.28
step 154, training accuracy 0.28
step 155, training accuracy 0.28
step 156, training accuracy 0.28
step 157, training accuracy 0.28
step 158, training accuracy 0.28
step 159, training accuracy 0.28
step 160, training accuracy 0.28
step 161, training accuracy 0.28
step 162, training accuracy 0.28
step 163, training accuracy 0.28
step 164, training accuracy 0.28
step 165, training accuracy 0.28
step 166, training accuracy 0.28
step 167, training accuracy 0.28
step 168, training accuracy 0.28
step 169, training accuracy 0.28
step 170, training accuracy 0.28
step 171, training accuracy 0.28
step 172, training accuracy 0.28
step 173, training accuracy 0.28
step 174, training accuracy 0.28
step 175, training accuracy 0.28
step 176, training accuracy 0.28
step 177,

step 178, training accuracy 0.28
step 179, training accuracy 0.28
step 180, training accuracy 0.28
step 181, training accuracy 0.28
step 182, training accuracy 0.28
step 183, training accuracy 0.28
step 184, training accuracy 0.28
step 185, training accuracy 0.28
step 186, training accuracy 0.28
step 187, training accuracy 0.28
step 188, training accuracy 0.28
step 189, training accuracy 0.28
step 190, training accuracy 0.28
step 191, training accuracy 0.28
step 192, training accuracy 0.28
step 193, training accuracy 0.28


step 194, training accuracy 0.28
step 195, training accuracy 0.28
step 196, training accuracy 0.28
step 197, training accuracy 0.28
step 198, training accuracy 0.28
step 199, training accuracy 0.28


In [16]:
print("test accuracy %g"%accuracy.eval(feed_dict={
    x: x_test, y_: y_test_vec, keep_prob: 1.0}))

test accuracy 0.474345


## Convolutional architecture

In [17]:
conv_relu_softmax_size = [x_train.shape[1], 32, 100, y_train_vec.shape[1]]  # fully connected architecture

# reshape as image
x_image = tf.reshape(x, [-1,28,28,1])

# convolution and max pooling (layer 1)
W_conv1 = weight_variable([5, 5, 1, conv_relu_softmax_size[1]])
b_conv1 = bias_variable([conv_relu_softmax_size[1]])

h_conv1 = conv2d(x_image, W_conv1)
h_pool1 = max_pool_2x2(h_conv1)

h_pool1_vec = tf.reshape(h_pool1, [-1, 14*14*conv_relu_softmax_size[1]])

# layer 2
h_fc1 = relu_dense(h_pool1_vec, weight_variable([14*14*conv_relu_softmax_size[1], 
                                                 conv_relu_softmax_size[2]]), 
                   bias_variable([conv_relu_softmax_size[2]]))

keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# 
# layer 3
y_res = softmax_dense(h_fc1_drop, weight_variable([conv_relu_softmax_size[2], 
                                                   conv_relu_softmax_size[3]]), 
                      bias_variable([conv_relu_softmax_size[3]]))

In [18]:
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_res))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_res,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [19]:
sess.run(tf.global_variables_initializer())
for i in range(200):
    batch = get_next_batch(50, x_train, y_train_vec)
    if i%100 == 0:
        train_accuracy = accuracy.eval(feed_dict={
            x:batch[0], y_: batch[1], keep_prob: 1.0})
    print("step %d, training accuracy %g"%(i, train_accuracy))
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

step 0, training accuracy 0.1
step 1, training accuracy 0.1
step 2, training accuracy 0.1
step 3, training accuracy 0.1
step 4, training accuracy 0.1
step 5, training accuracy 0.1
step 6, training accuracy 0.1
step 7, training accuracy 0.1


step 8, training accuracy 0.1
step 9, training accuracy 0.1
step 10, training accuracy 0.1
step 11, training accuracy 0.1
step 12, training accuracy 0.1
step 13, training accuracy 0.1
step 14, training accuracy 0.1
step 15, training accuracy 0.1
step 16, training accuracy 0.1


step 17, training accuracy 0.1
step 18, training accuracy 0.1
step 19, training accuracy 0.1
step 20, training accuracy 0.1
step 21, training accuracy 0.1
step 22, training accuracy 0.1
step 23, training accuracy 0.1
step 24, training accuracy 0.1
step 25, training accuracy 0.1


step 26, training accuracy 0.1
step 27, training accuracy 0.1
step 28, training accuracy 0.1
step 29, training accuracy 0.1
step 30, training accuracy 0.1
step 31, training accuracy 0.1
step 32, training accuracy 0.1
step 33, training accuracy 0.1
step 34, training accuracy 0.1


step 35, training accuracy 0.1
step 36, training accuracy 0.1
step 37, training accuracy 0.1
step 38, training accuracy 0.1
step 39, training accuracy 0.1
step 40, training accuracy 0.1
step 41, training accuracy 0.1
step 42, training accuracy 0.1
step 43, training accuracy 0.1


step 44, training accuracy 0.1
step 45, training accuracy 0.1
step 46, training accuracy 0.1
step 47, training accuracy 0.1
step 48, training accuracy 0.1
step 49, training accuracy 0.1
step 50, training accuracy 0.1
step 51, training accuracy 0.1
step 52, training accuracy 0.1


step 53, training accuracy 0.1
step 54, training accuracy 0.1
step 55, training accuracy 0.1
step 56, training accuracy 0.1
step 57, training accuracy 0.1
step 58, training accuracy 0.1
step 59, training accuracy 0.1
step 60, training accuracy 0.1
step 61, training accuracy 0.1


step 62, training accuracy 0.1
step 63, training accuracy 0.1
step 64, training accuracy 0.1
step 65, training accuracy 0.1
step 66, training accuracy 0.1
step 67, training accuracy 0.1
step 68, training accuracy 0.1
step 69, training accuracy 0.1
step 70, training accuracy 0.1


step 71, training accuracy 0.1
step 72, training accuracy 0.1
step 73, training accuracy 0.1
step 74, training accuracy 0.1
step 75, training accuracy 0.1
step 76, training accuracy 0.1
step 77, training accuracy 0.1
step 78, training accuracy 0.1
step 79, training accuracy 0.1


step 80, training accuracy 0.1
step 81, training accuracy 0.1
step 82, training accuracy 0.1
step 83, training accuracy 0.1
step 84, training accuracy 0.1
step 85, training accuracy 0.1
step 86, training accuracy 0.1
step 87, training accuracy 0.1
step 88, training accuracy 0.1


step 89, training accuracy 0.1
step 90, training accuracy 0.1
step 91, training accuracy 0.1
step 92, training accuracy 0.1
step 93, training accuracy 0.1
step 94, training accuracy 0.1
step 95, training accuracy 0.1
step 96, training accuracy 0.1
step 97, training accuracy 0.1


step 98, training accuracy 0.1
step 99, training accuracy 0.1
step 100, training accuracy 0.58
step 101, training accuracy 0.58
step 102, training accuracy 0.58
step 103, training accuracy 0.58
step 104, training accuracy 0.58
step 105, training accuracy 0.58
step 106, training accuracy 0.58


step 107, training accuracy 0.58
step 108, training accuracy 0.58
step 109, training accuracy 0.58
step 110, training accuracy 0.58
step 111, training accuracy 0.58
step 112, training accuracy 0.58
step 113, training accuracy 0.58
step 114, training accuracy 0.58
step 115, training accuracy 0.58
step 116, training accuracy 0.58


step 117, training accuracy 0.58
step 118, training accuracy 0.58
step 119, training accuracy 0.58
step 120, training accuracy 0.58
step 121, training accuracy 0.58
step 122, training accuracy 0.58
step 123, training accuracy 0.58
step 124, training accuracy 0.58


step 125, training accuracy 0.58
step 126, training accuracy 0.58
step 127, training accuracy 0.58
step 128, training accuracy 0.58
step 129, training accuracy 0.58
step 130, training accuracy 0.58
step 131, training accuracy 0.58
step 132, training accuracy 0.58
step 133, training accuracy 0.58


step 134, training accuracy 0.58
step 135, training accuracy 0.58
step 136, training accuracy 0.58
step 137, training accuracy 0.58
step 138, training accuracy 0.58
step 139, training accuracy 0.58
step 140, training accuracy 0.58
step 141, training accuracy 0.58
step 142, training accuracy 0.58


step 143, training accuracy 0.58
step 144, training accuracy 0.58
step 145, training accuracy 0.58
step 146, training accuracy 0.58
step 147, training accuracy 0.58
step 148, training accuracy 0.58
step 149, training accuracy 0.58
step 150, training accuracy 0.58
step 151, training accuracy 0.58


step 152, training accuracy 0.58
step 153, training accuracy 0.58
step 154, training accuracy 0.58
step 155, training accuracy 0.58
step 156, training accuracy 0.58
step 157, training accuracy 0.58
step 158, training accuracy 0.58
step 159, training accuracy 0.58
step 160, training accuracy 0.58
step 161, training accuracy 0.58


step 162, training accuracy 0.58
step 163, training accuracy 0.58
step 164, training accuracy 0.58
step 165, training accuracy 0.58
step 166, training accuracy 0.58
step 167, training accuracy 0.58
step 168, training accuracy 0.58
step 169, training accuracy 0.58
step 170, training accuracy 0.58


step 171, training accuracy 0.58
step 172, training accuracy 0.58
step 173, training accuracy 0.58
step 174, training accuracy 0.58
step 175, training accuracy 0.58
step 176, training accuracy 0.58
step 177, training accuracy 0.58
step 178, training accuracy 0.58


step 179, training accuracy 0.58
step 180, training accuracy 0.58
step 181, training accuracy 0.58
step 182, training accuracy 0.58
step 183, training accuracy 0.58
step 184, training accuracy 0.58
step 185, training accuracy 0.58
step 186, training accuracy 0.58
step 187, training accuracy 0.58
step 188, training accuracy 0.58


step 189, training accuracy 0.58
step 190, training accuracy 0.58
step 191, training accuracy 0.58
step 192, training accuracy 0.58
step 193, training accuracy 0.58
step 194, training accuracy 0.58
step 195, training accuracy 0.58
step 196, training accuracy 0.58


step 197, training accuracy 0.58
step 198, training accuracy 0.58
step 199, training accuracy 0.58


In [20]:
print("test accuracy %g"%accuracy.eval(feed_dict={
    x: x_test, y_: y_test_vec, keep_prob: 1.0}))

test accuracy 0.625357
