## Checkpoint

In [1]:
%matplotlib inline

# Load the modules
import pickle
import math

import numpy as np
import tensorflow as tf
from tqdm import tqdm
import matplotlib.pyplot as plt

# Reload the data
pickle_file = 'notMNIST_Norm.pickle'
with open(pickle_file, 'rb') as f:
  pickle_data = pickle.load(f)
  train_features = pickle_data['train_dataset']
  train_labels = pickle_data['train_labels']
  valid_features = pickle_data['valid_dataset']
  valid_labels = pickle_data['valid_labels']
  test_features = pickle_data['test_dataset']
  test_labels = pickle_data['test_labels']
  del pickle_data  # Free up memory

print('Data and modules loaded.')

Data and modules loaded.


## Build the neural Network

### Input

In [2]:
import tensorflow as tf

def neural_net_image_input(image_shape):
    """
    Return a Tensor for a bach of image input
    : image_shape: Shape of the images
    : return: Tensor for image input.
    """
    return tf.placeholder(tf.float32, shape=(None,image_shape[0],image_shape[1],image_shape[2]),name='x')


def neural_net_label_input(n_classes):
    """
    Return a Tensor for a batch of label input
    : n_classes: Number of classes
    : return: Tensor for label input.
    """
    return tf.placeholder(tf.float32, shape=(None,n_classes), name='y')


def neural_net_keep_prob_input():
    """
    Return a Tensor for keep probability
    : return: Tensor for keep probability.
    """
    return tf.placeholder(tf.float32, name='keep_prob')

### Convolution and Max Pooling Layer

In [3]:
def conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides):
    """
    Apply convolution then max pooling to x_tensor
    :param x_tensor: TensorFlow Tensor
    :param conv_num_outputs: Number of outputs for the convolutional layer
    :param conv_ksize: kernal size 2-D Tuple for the convolutional layer
    :param conv_strides: Stride 2-D Tuple for convolution
    :param pool_ksize: kernal size 2-D Tuple for pool
    :param pool_strides: Stride 2-D Tuple for pool
    : return: A tensor that represents convolution and max pooling of x_tensor
    """
    # Weight and bias
    weight = tf.Variable(tf.truncated_normal([
        conv_ksize[0], 
        conv_ksize[1], 
        x_tensor.get_shape().as_list()[-1], 
        conv_num_outputs], stddev=0.1))

    bias = tf.Variable(tf.constant(0.05, shape=[conv_num_outputs]))

    # Apply Convolution
    conv_layer = tf.nn.conv2d(x_tensor, 
                              weight, 
                              strides=[1,conv_strides[0],conv_strides[1],1],
                              padding='SAME')
    # Add bias
    conv_layer = tf.nn.bias_add(conv_layer, bias)
    
    # Apply activation function
    conv_layer = tf.nn.relu(conv_layer)
    
    # Apply Max Pooling
    conv_layer = tf.nn.max_pool(conv_layer,
                                ksize=[1,pool_ksize[0],pool_ksize[1],1],
                                strides=[1,pool_strides[0],pool_strides[1],1],
                                padding ='SAME')
    return conv_layer

### Flatten Layer

In [5]:
def flatten(x_tensor):
    """
    Flatten x_tensor to (Batch Size, Flattened Image Size)
    : x_tensor: A tensor of size (Batch Size, ...), where ... are the image dimensions.
    : return: A tensor of size (Batch Size, Flattened Image Size).
    """
    flat_shape = np.prod(x_tensor.get_shape().as_list()[1:])
    return tf.reshape(x_tensor,[-1,flat_shape])

### Fully-Connected Layer

In [6]:
def fully_conn(x_tensor, num_outputs):
    """
    Apply a fully connected layer to x_tensor using weight and bias
    : x_tensor: A 2-D tensor where the first dimension is batch size.
    : num_outputs: The number of output that the new tensor should be.
    : return: A 2-D tensor where the second dimension is num_outputs.
    """
    # Weight and bias
    weight = tf.Variable(tf.truncated_normal([
        x_tensor.get_shape().as_list()[1],
        num_outputs], stddev=0.1))
    
    bias = tf.Variable(tf.constant(0.05, shape=[num_outputs]))
    
    # Matrix multiplication input and weight, then add bias
    fc_layer = tf.matmul(x_tensor, weight) + bias
    # Apply activation function
    fc_layer = tf.nn.relu(fc_layer)
    
    return fc_layer

### Output layer

In [8]:
def output(x_tensor, num_outputs):
    """
    Apply a output layer to x_tensor using weight and bias
    : x_tensor: A 2-D tensor where the first dimension is batch size.
    : num_outputs: The number of output that the new tensor should be.
    : return: A 2-D tensor where the second dimension is num_outputs.
    """
    # Weight and bias
    weight = tf.Variable(tf.truncated_normal([
        x_tensor.get_shape().as_list()[1],
        num_outputs], stddev=0.1))
    #bias = tf.Variable(tf.zeros(num_outputs))
    bias = tf.Variable(tf.constant(0.05,shape=[num_outputs]))
    
    # Matrix multiplication input and weight, then add bias
    output_layer = tf.matmul(x_tensor, weight) + bias
    
    return output_layer

### Create convolutional model

In [9]:
def conv_net(x, keep_prob):
    """
    Create a convolutional neural network model
    : x: Placeholder tensor that holds image data.
    : keep_prob: Placeholder tensor that hold dropout keep probability.
    : return: Tensor that represents logits
    """
    # TODO: Apply 1, 2, or 3 Convolution and Max Pool layers
    #    Play around with different number of outputs, kernel size and stride
    # Function Definition from Above:
    #    conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides)
    conv_layer1 = conv2d_maxpool(x, 32, (3,3), (1,1), (2,2), (2,2))
    conv_layer2 = conv2d_maxpool(conv_layer1, 32, (3,3), (1,1), (2,2), (2,2))
    

    # TODO: Apply a Flatten Layer
    # Function Definition from Above:
    #   flatten(x_tensor)
    flat_layer = flatten(conv_layer2)
    

    # TODO: Apply 1, 2, or 3 Fully Connected Layers
    #    Play around with different number of outputs
    # Function Definition from Above:
    #   fully_conn(x_tensor, num_outputs)
    fully_conn_layer1 = fully_conn(flat_layer, 512)
    fully_conn_layer1 = tf.nn.dropout(fully_conn_layer1, keep_prob)
    
    ##------ adding following layer seems screwing the accuracy ----------------------
    #fully_conn_layer2 = fully_conn(fully_conn_layer1, 128)
    #fully_conn_layer2 = tf.nn.dropout(fully_conn_layer2, keep_prob)
    
    
    # TODO: Apply an Output Layer
    #    Set this to the number of classes
    # Function Definition from Above:
    #   output(x_tensor, num_outputs)
    
    
    # TODO: return output
    return output(fully_conn_layer1, 10)



##############################
## Build the Neural Network ##
##############################

# Remove previous weights, bias, inputs, etc..
tf.reset_default_graph()

# Inputs
x = neural_net_image_input((28, 28, 1)) # gray scale - number of channels = 1
y = neural_net_label_input(10)
keep_prob = neural_net_keep_prob_input()

# Model
logits = conv_net(x, keep_prob)

# Name logits Tensor, so that is can be loaded from disk after training
logits = tf.identity(logits, name='logits')

# Loss and Optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost)

# Accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')

## Build the neural Network

In [10]:
def train_neural_network(session, optimizer, keep_probability, feature_batch, label_batch):
    """
    Optimize the session on a batch of images and labels
    : session: Current TensorFlow session
    : optimizer: TensorFlow optimizer function
    : keep_probability: keep probability
    : feature_batch: Batch of Numpy image data
    : label_batch: Batch of Numpy label data
    """
    # TODO: Implement Function
    session.run(optimizer, feed_dict={x:feature_batch,
                                     y:label_batch,
                                     keep_prob: keep_probability})

In [11]:
def print_stats(session, feature_batch, label_batch, cost, accuracy):
    """
    Print information about loss and validation accuracy
    : session: Current TensorFlow session
    : feature_batch: Batch of Numpy image data
    : label_batch: Batch of Numpy label data
    : cost: TensorFlow cost function
    : accuracy: TensorFlow accuracy function
    """
    # TODO: Implement Function
    loss = session.run(cost, feed_dict={x: feature_batch, y: label_batch, keep_prob: 1.0})
    validation_accuracy = session.run(accuracy, feed_dict={x: valid_features, y: valid_labels, keep_prob: 1.0})
    print('Loss: {} Validation Accuracy: {}'.format(loss, validation_accuracy))

In [17]:
# TODO: Tune Parameters
epochs = 1
batch_size = 64
keep_probability = 1.0 # Lower values are not good

In [18]:
def batch_features_labels(features, labels, batch_size):
    """
    Split features and labels into batches
    """
    for start in range(0, len(features), batch_size):
        end = min(start + batch_size, len(features))
        yield features[start:end], labels[start:end]

#### Since our image is flattend, we have to reshape it back  in order to use in CNN

In [23]:
img_size= 28 
num_channels = 1 # gray scale is 1
#train_features_reshape = tf.reshape(train_features, [-1, img_size, img_size, num_channels])
train_features_reshape = np.reshape(train_features, (-1,img_size, img_size, num_channels))

In [24]:
with tf.Session() as sess:
    # Initializing the variables
    sess.run(tf.global_variables_initializer())
    
    # Training cycle
    for epoch in range(epochs):
        batch_i = 1
        for batch_features, batch_labels in batch_features_labels(train_features_reshape,train_labels, batch_size):
            train_neural_network(sess, optimizer, keep_probability, batch_features, batch_labels)
        print('Epoch {:>2}, CIFAR-10 Batch {}:  '.format(epoch + 1, batch_i), end='')
        print_stats(sess, batch_features, batch_labels, cost, accuracy)

KeyboardInterrupt: 