In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

import tensorflow as tf

In [2]:
# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [3]:
from IPython.display import Image

### build a simple NN for  MNIST dataset

load the mnist dataset

In [4]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0

y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)

X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [5]:
print(X_train.shape) # training
print(X_valid.shape)  # valid
print(X_test.shape) # testing

(55000, 784)
(5000, 784)
(10000, 784)


In [6]:
# X_train[0, :]

tf.layers.dense()
 * almost identical to the fully_connected() function

tf.name_scope
* avoid variable conflicts
* reuse variables

In [7]:
n_inputs = 28*28  # MNIST
n_hidden1 = 50
n_outputs = 10 # digits 0-9, 10 classes

In [8]:
reset_graph()
# if batch size is 64, then x is 64 x 784
# if batch size is 128, then x is 128 x 784
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="input")   # "None" is for batch size
y = tf.placeholder(tf.int32, shape=(None), name="y")

x_image = tf.reshape(X,[-1,28,28,1])  # reshpae for 2d conv

In [9]:
# tf.shape(X)

<tf.Tensor 'Shape:0' shape=(2,) dtype=int32>

In [10]:
# tf.shape(X)

<tf.Tensor 'Shape_1:0' shape=(2,) dtype=int32>

In [11]:
# X.get_shape()

TensorShape([Dimension(None), Dimension(784)])

In [12]:
with tf.name_scope("dnn"):
    conv1 = tf.contrib.layers.conv2d(x_image, num_outputs = 16, kernel_size=3, padding='same')
    
    conv2 = tf.contrib.layers.conv2d(conv1, num_outputs = 16, kernel_size=5, padding='same')
    conv2_flat = tf.contrib.layers.flatten(conv2)
    
    #pool2 = tf.contrib.layers.max_pool2d(conv2, kernel_size = 3, padding='same')
    #pool2_flat = tf.contrib.layers.flatten(pool2)
    
    conv3 = tf.contrib.layers.conv2d(conv2, num_outputs = 1, kernel_size=3, padding='same')
    conv3_flat = tf.contrib.layers.flatten(conv3)
    
    conv4 = tf.concat([conv2_flat, conv3_flat], 1)
    
    
    hidden1 = tf.layers.dense(conv4, n_hidden1, name="hidden1", activation=tf.nn.relu)
    logits = tf.layers.dense(hidden1, n_outputs, name="outputs")
    y_proba = tf.nn.softmax(logits)

In [13]:
# tf.shape(conv1)

<tf.Tensor 'Shape_2:0' shape=(4,) dtype=int32>

In [14]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [15]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [16]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [17]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [18]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [20]:
n_epochs = 1
batch_size = 50

with tf.Session() as sess:
    init.run()
    
    print(tf.shape(conv1))
    print(conv1.get_shape())
    print(conv2.get_shape())
    #print(pool2.get_shape())
    #print(pool2_flat.get_shape())
    
    
    
    print(conv3.get_shape())
    print(conv3_flat.get_shape())
    
    print("conv2_flat : {}".format(conv2_flat.get_shape()))
    print("conv3_flat : {}".format(conv3_flat.get_shape()))
    print("conv4 : {}".format(conv4.get_shape()))
    
    #print(hidden1.get_shape())
    
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            training_v = sess.run([training_op], feed_dict={X: X_batch, y: y_batch})
            #print(loss_v)
            
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        
        loss_v = loss.eval(feed_dict={X: X_batch, y: y_batch})
        print(loss_v)
        print(epoch, "Batch accuracy:", acc_batch, "Validation accuracy:", acc_valid)
        #save_path = saver.save(sess, "./my_model_final.ckpt")

    #save_path = saver.save(sess, "./test.ckpt")

Tensor("Shape_3:0", shape=(4,), dtype=int32)
(?, 28, 28, 16)
(?, 28, 28, 16)
(?, 28, 28, 1)
(?, 784)
conv2_flat : (?, 12544)
conv3_flat : (?, 784)
conv4 : (?, 13328)
0.120639
0 Batch accuracy: 0.98 Validation accuracy: 0.9438
