In [24]:
import numpy as np
import tensorflow as tf

from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = np.reshape(x_train, (x_train.shape[0], 28*28)) / 255.0
x_test = np.reshape(x_test, (x_test.shape[0], 28*28)) / 255.0


In [28]:
def build_graph():
    X = tf.placeholder(shape = [None, 28*28], dtype = tf.float32, name = "X")
    y = tf.placeholder(shape = [None], dtype = tf.int32, name = "y")
    Y = tf.one_hot(y, 10)
    learning_rate = tf.placeholder(shape = [], dtype = tf.float32, name = "learning_rate")
    
    # First layer!
    W1 = tf.Variable(np.random.normal(
        size = [28*28, 128],
        scale = 1/np.sqrt(28*28),
    ), dtype = tf.float32)
    b1 = tf.Variable(np.zeros(shape = [128]), dtype = tf.float32)
    
    Z1 = tf.matmul(X, W1)
    Z1 = Z1 + b1
    H1 = tf.nn.sigmoid(Z1)
    
    # Second Layer!
    W2 = tf.Variable(np.random.normal(
        size = [128, 10],
        scale = 1/np.sqrt(128),
    ), dtype = tf.float32)
    b2 = tf.Variable(np.zeros(shape = [10]), dtype = tf.float32)

    Z2 = tf.matmul(H1, W2)
    Z2 = Z2 + b2
    H2 = tf.nn.softmax(Z2)

    # Calculate mean cross entropy
    negative_logs = -tf.log(H2)
    errors = tf.reduce_sum(Y * negative_logs, axis = 1)
    mean_error = tf.reduce_mean(errors)
    
    predictions = tf.argmax(H2, axis = 1, output_type = tf.int32)
    accuracy = tf.reduce_mean(
        tf.cast(tf.equal(y, predictions), tf.float32)
    )
    
    # Training!
    optimizer = tf.train.GradientDescentOptimizer(
        learning_rate = learning_rate
    )
    train_step = optimizer.minimize(mean_error)
    
    return {
        "X": X,
        "y": y,
        "learning_rate": learning_rate,
        "mean_error": mean_error,
        "accuracy": accuracy,
        "train_step": train_step
    }


In [34]:
BATCH_SIZE = 32

mnist_batches = []
num_training_examples = x_train.shape[0]
for batch_start_idx in range(0, num_training_examples, BATCH_SIZE):
    batch_x = x_train[
        batch_start_idx:(batch_start_idx + BATCH_SIZE), :
    ]
    batch_y = y_train[batch_start_idx:(batch_start_idx + BATCH_SIZE)]
    
    mnist_batches.append((batch_x, batch_y))
    
LEARNING_RATE = 0.01
def train_batch(session, batch_x, batch_y, graph):
    session.run(
        graph["train_step"],
        feed_dict = {
            graph["X"]: batch_x,
            graph["y"]: batch_y,
            graph["learning_rate"]: LEARNING_RATE
        }
    )
    
def evaluate_model(batch_idx, session, graph):
    me, acc = session.run(
        [graph["mean_error"], graph["accuracy"]],
        feed_dict = {
            graph["X"]: x_test,
            graph["y"]: y_test,
        }
    )
    
    print(f'B: {batch_idx} | ME: {me:0.2f} | ACC: {acc:0.2f}')
    
def train_epoch(session, graph):
    for (batch_idx, (batch_x, batch_y)) in enumerate(mnist_batches):
        train_batch(
            session = session,
            batch_x = batch_x,
            batch_y = batch_y,
            graph = graph,
        )
        
        if batch_idx % 100 == 0:
            evaluate_model(batch_idx, session, graph)

NUM_EPOCHS = 10
with tf.Session() as session:
    graph = build_graph()
    session.run(tf.global_variables_initializer())
    
    for epoch_idx in range(NUM_EPOCHS):
        train_epoch(session, graph)

B: 0 | ME: 2.39 | ACC: 0.10
B: 100 | ME: 2.22 | ACC: 0.32
B: 200 | ME: 2.14 | ACC: 0.50
B: 300 | ME: 2.07 | ACC: 0.57
B: 400 | ME: 1.99 | ACC: 0.65
B: 500 | ME: 1.91 | ACC: 0.67
B: 600 | ME: 1.83 | ACC: 0.68
B: 700 | ME: 1.74 | ACC: 0.72
B: 800 | ME: 1.65 | ACC: 0.74
B: 900 | ME: 1.57 | ACC: 0.75
B: 1000 | ME: 1.49 | ACC: 0.77
B: 1100 | ME: 1.42 | ACC: 0.77
B: 1200 | ME: 1.35 | ACC: 0.78
B: 1300 | ME: 1.28 | ACC: 0.78
B: 1400 | ME: 1.22 | ACC: 0.79
B: 1500 | ME: 1.16 | ACC: 0.80
B: 1600 | ME: 1.11 | ACC: 0.80
B: 1700 | ME: 1.06 | ACC: 0.81
B: 1800 | ME: 1.02 | ACC: 0.81
B: 0 | ME: 0.99 | ACC: 0.81
B: 100 | ME: 0.95 | ACC: 0.82
B: 200 | ME: 0.92 | ACC: 0.82
B: 300 | ME: 0.89 | ACC: 0.82
B: 400 | ME: 0.86 | ACC: 0.83
B: 500 | ME: 0.83 | ACC: 0.83
B: 600 | ME: 0.81 | ACC: 0.83
B: 700 | ME: 0.78 | ACC: 0.84
B: 800 | ME: 0.76 | ACC: 0.85
B: 900 | ME: 0.74 | ACC: 0.85
B: 1000 | ME: 0.73 | ACC: 0.85
B: 1100 | ME: 0.71 | ACC: 0.85
B: 1200 | ME: 0.69 | ACC: 0.86
B: 1300 | ME: 0.68 | ACC: 0.85
B