## Check the Version of TensorFlow and Access to GPU

In [58]:
"""
DON'T MODIFY ANYTHING IN THIS CELL
"""
from distutils.version import LooseVersion
import warnings
import tensorflow as tf

# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer'
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

TensorFlow Version: 1.0.0
Default GPU Device: /gpu:0


## Check memory

In [5]:
!pip install psutil

Collecting psutil
  Downloading psutil-5.2.2.tar.gz (348kB)
[K    100% |████████████████████████████████| 358kB 1.7MB/s ta 0:00:01
[?25hBuilding wheels for collected packages: psutil
  Running setup.py bdist_wheel for psutil ... [?25l- \ | done
[?25h  Stored in directory: /home/carnd/.cache/pip/wheels/65/e8/6f/b3fa73e0b422fab906cd67e8446901b27c2769a29890151135
Successfully built psutil
Installing collected packages: psutil
Successfully installed psutil-5.2.2


In [1]:
import psutil
import os

In [23]:
def memory_usage():
    process = psutil.Process(os.getpid())
    return process.memory_info().rss/1e9

In [24]:
memory_usage()

1.230913536

## try to release memory

In [49]:
import time
import tensorflow as tf

for i in range(0,100):
    t0 = time.clock()
    
    with tf.Graph().as_default():
        config = tf.ConfigProto()
        config.gpu_options.allow_growth=True
        sess = tf.Session(config=config)

        a = tf.placeholder(tf.int16, name='a')
        y = tf.identity(a, name='y')

        sess.run(y, feed_dict={a: 3})
        sess.close()

    del sess

    time.sleep(20)

print(time.clock() - t0)

KeyboardInterrupt: 

In [39]:
%reset out

Once deleted, variables cannot be recovered. Proceed (y/[n])? y
Flushing output cache (5 entries)


In [41]:
%reset array

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


## Checkpoint

In [1]:
%matplotlib inline

# Load the modules
import pickle
import math

import numpy as np
import tensorflow as tf
from tqdm import tqdm
import matplotlib.pyplot as plt

# Reload the data
pickle_file = 'notMNIST_Norm.pickle'
with open(pickle_file, 'rb') as f:
  pickle_data = pickle.load(f)
  train_features = pickle_data['train_dataset']
  train_labels = pickle_data['train_labels']
  valid_features = pickle_data['valid_dataset']
  valid_labels = pickle_data['valid_labels']
  test_features = pickle_data['test_dataset']
  test_labels = pickle_data['test_labels']
  del pickle_data  # Free up memory

print('Data and modules loaded.')

Data and modules loaded.


## Build the neural Network

### Input

In [2]:
import tensorflow as tf

def neural_net_image_input(image_shape):
    """
    Return a Tensor for a bach of image input
    : image_shape: Shape of the images
    : return: Tensor for image input.
    """
    return tf.placeholder(tf.float32, shape=(None,image_shape[0],image_shape[1],image_shape[2]), name='x')


def neural_net_label_input(n_classes):
    """
    Return a Tensor for a batch of label input
    : n_classes: Number of classes
    : return: Tensor for label input.
    """
    return tf.placeholder(tf.float32, shape=(None,n_classes), name='y')


def neural_net_keep_prob_input():
    """
    Return a Tensor for keep probability
    : return: Tensor for keep probability.
    """
    return tf.placeholder(tf.float32, name='keep_prob')

### Convolution and Max Pooling Layer

In [3]:
def conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides):
    """
    Apply convolution then max pooling to x_tensor
    :param x_tensor: TensorFlow Tensor
    :param conv_num_outputs: Number of outputs for the convolutional layer
    :param conv_ksize: kernal size 2-D Tuple for the convolutional layer
    :param conv_strides: Stride 2-D Tuple for convolution
    :param pool_ksize: kernal size 2-D Tuple for pool
    :param pool_strides: Stride 2-D Tuple for pool
    : return: A tensor that represents convolution and max pooling of x_tensor
    """
    # Weight and bias
    weight = tf.Variable(tf.truncated_normal([
        conv_ksize[0], 
        conv_ksize[1], 
        x_tensor.get_shape().as_list()[-1], 
        conv_num_outputs], stddev=0.1))

    bias = tf.Variable(tf.constant(0.05, shape=[conv_num_outputs]))

    # Apply Convolution
    conv_layer = tf.nn.conv2d(x_tensor, 
                              weight, 
                              strides=[1,conv_strides[0],conv_strides[1],1],
                              padding='SAME')
    # Add bias
    conv_layer = tf.nn.bias_add(conv_layer, bias)
    
    # Apply activation function
    conv_layer = tf.nn.relu(conv_layer)
    
    # Apply Max Pooling
    conv_layer = tf.nn.max_pool(conv_layer,
                                ksize=[1,pool_ksize[0],pool_ksize[1],1],
                                strides=[1,pool_strides[0],pool_strides[1],1],
                                padding ='SAME')
    return conv_layer

### Flatten Layer

In [4]:
def flatten(x_tensor):
    """
    Flatten x_tensor to (Batch Size, Flattened Image Size)
    : x_tensor: A tensor of size (Batch Size, ...), where ... are the image dimensions.
    : return: A tensor of size (Batch Size, Flattened Image Size).
    """
    flat_shape = np.prod(x_tensor.get_shape().as_list()[1:])
    return tf.reshape(x_tensor,[-1,flat_shape])

### Fully-Connected Layer

In [5]:
def fully_conn(x_tensor, num_outputs):
    """
    Apply a fully connected layer to x_tensor using weight and bias
    : x_tensor: A 2-D tensor where the first dimension is batch size.
    : num_outputs: The number of output that the new tensor should be.
    : return: A 2-D tensor where the second dimension is num_outputs.
    """
    # Weight and bias
    weight = tf.Variable(tf.truncated_normal([
        x_tensor.get_shape().as_list()[1],
        num_outputs], stddev=0.1))
    
    bias = tf.Variable(tf.constant(0.05, shape=[num_outputs]))
    
    # Matrix multiplication input and weight, then add bias
    fc_layer = tf.matmul(x_tensor, weight) + bias
    # Apply activation function
    fc_layer = tf.nn.relu(fc_layer)
    
    return fc_layer

### Output layer

In [6]:
def output(x_tensor, num_outputs):
    """
    Apply a output layer to x_tensor using weight and bias
    : x_tensor: A 2-D tensor where the first dimension is batch size.
    : num_outputs: The number of output that the new tensor should be.
    : return: A 2-D tensor where the second dimension is num_outputs.
    """
    # Weight and bias
    weight = tf.Variable(tf.truncated_normal([
        x_tensor.get_shape().as_list()[1],
        num_outputs], stddev=0.1))
    #bias = tf.Variable(tf.zeros(num_outputs))
    bias = tf.Variable(tf.constant(0.05,shape=[num_outputs]))
    
    # Matrix multiplication input and weight, then add bias
    output_layer = tf.matmul(x_tensor, weight) + bias
    
    return output_layer

### Create convolutional model

In [7]:
def conv_net(x, keep_prob):
    """
    Create a convolutional neural network model
    : x: Placeholder tensor that holds image data.
    : keep_prob: Placeholder tensor that hold dropout keep probability.
    : return: Tensor that represents logits
    """
    # TODO: Apply 1, 2, or 3 Convolution and Max Pool layers
    #    Play around with different number of outputs, kernel size and stride
    # Function Definition from Above:
    #    conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides)
    conv_layer1 = conv2d_maxpool(x, 32, (3,3), (1,1), (2,2), (2,2))
    conv_layer2 = conv2d_maxpool(conv_layer1, 32, (3,3), (1,1), (2,2), (2,2))
    

    # TODO: Apply a Flatten Layer
    # Function Definition from Above:
    #   flatten(x_tensor)
    flat_layer = flatten(conv_layer2)
    

    # TODO: Apply 1, 2, or 3 Fully Connected Layers
    #    Play around with different number of outputs
    # Function Definition from Above:
    #   fully_conn(x_tensor, num_outputs)
    fully_conn_layer1 = fully_conn(flat_layer, 512)
    fully_conn_layer1 = tf.nn.dropout(fully_conn_layer1, keep_prob)
    
    ##------ adding following layer seems screwing the accuracy ----------------------
    #fully_conn_layer2 = fully_conn(fully_conn_layer1, 128)
    #fully_conn_layer2 = tf.nn.dropout(fully_conn_layer2, keep_prob)
    
    
    # TODO: Apply an Output Layer
    #    Set this to the number of classes
    # Function Definition from Above:
    #   output(x_tensor, num_outputs)
    
    
    # TODO: return output
    return output(fully_conn_layer1, 10)



##############################
## Build the Neural Network ##
##############################

# Remove previous weights, bias, inputs, etc..
tf.reset_default_graph()

# Inputs
x = neural_net_image_input((28, 28, 1)) # gray scale - number of channels = 1
y = neural_net_label_input(10)
keep_prob = neural_net_keep_prob_input()

# Model
logits = conv_net(x, keep_prob)

# Name logits Tensor, so that is can be loaded from disk after training
logits = tf.identity(logits, name='logits')

# Loss and Optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost)

# Accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')

## Train the neural Network

In [8]:
def train_neural_network(session, optimizer, keep_probability, feature_batch, label_batch):
    """
    Optimize the session on a batch of images and labels
    : session: Current TensorFlow session
    : optimizer: TensorFlow optimizer function
    : keep_probability: keep probability
    : feature_batch: Batch of Numpy image data
    : label_batch: Batch of Numpy label data
    """
    # TODO: Implement Function
    session.run(optimizer, feed_dict={x:feature_batch,
                                     y:label_batch,
                                     keep_prob: keep_probability})

In [9]:
def print_stats(session, feature_batch, label_batch, cost, accuracy):
    """
    Print information about loss and validation accuracy
    : session: Current TensorFlow session
    : feature_batch: Batch of Numpy image data
    : label_batch: Batch of Numpy label data
    : cost: TensorFlow cost function
    : accuracy: TensorFlow accuracy function
    """
    # TODO: Implement Function
    loss = session.run(cost, feed_dict={x: feature_batch, y: label_batch, keep_prob: 1.0})
    validation_accuracy = session.run(accuracy, feed_dict={x: valid_features, y: valid_labels, keep_prob: 1.0})
    print('Loss: {} Validation Accuracy: {}'.format(loss, validation_accuracy))
    #train_accuracy = session.run(accuracy, feed_dict={x: train_features, y: train_labels, keep_prob: 1.0})
    #print('Loss: {} Train Accuracy: {}  Validation Accuracy: {}'.format(loss,train_accuracy, validation_accuracy))

In [10]:
# TODO: Tune Parameters
epochs = 10
batch_size = 128
keep_probability = 1.0 # Lower values are not good

In [11]:
def batch_features_labels(features, labels, batch_size):
    """
    Split features and labels into batches
    """
    for start in range(0, len(features), batch_size):
        end = min(start + batch_size, len(features))
        yield features[start:end], labels[start:end]

#### Since our image is flattend, we have to reshape it back  in order to use in CNN

In [12]:
img_size= 28 
num_channels = 1 # gray scale is 1
train_features = np.reshape(train_features, (-1,img_size, img_size, num_channels))
valid_features = np.reshape(valid_features,(-1,img_size, img_size, num_channels))
test_features = np.reshape(test_features,(-1,img_size, img_size, num_channels))

In [16]:
with tf.Session() as sess:
    # Initializing the variables
    sess.run(tf.global_variables_initializer())
    
    # Training cycle
    for epoch in range(epochs):
        for batch_features, batch_labels in batch_features_labels(train_features,train_labels, batch_size):
            train_neural_network(sess, optimizer, keep_probability, batch_features, batch_labels)
        print('Epoch {:>2}:  '.format(epoch + 1), end='')
        print_stats(sess, batch_features, batch_labels, cost, accuracy)
        
    # save the model
    saver = tf.train.Saver()
    save_path = saver.save(sess, './model')

Epoch  1:  Loss: 0.1974611133337021 Validation Accuracy: 0.9022665619850159
Epoch  2:  Loss: 0.06473953276872635 Validation Accuracy: 0.9151998162269592
Epoch  3:  Loss: 0.036455631256103516 Validation Accuracy: 0.9282665252685547
Epoch  4:  Loss: 0.013745954260230064 Validation Accuracy: 0.9361332058906555
Epoch  5:  Loss: 0.008648397400975227 Validation Accuracy: 0.9419997930526733
Epoch  6:  Loss: 0.0034123288933187723 Validation Accuracy: 0.9430664777755737
Epoch  7:  Loss: 0.002502160845324397 Validation Accuracy: 0.9477332234382629
Epoch  8:  Loss: 0.0018196540186181664 Validation Accuracy: 0.9503998756408691
Epoch  9:  Loss: 0.0014050720492377877 Validation Accuracy: 0.9515998363494873
Epoch 10:  Loss: 0.0009697001078166068 Validation Accuracy: 0.9510664939880371


## TEST
load tf models:
 - the name defined in input are very important
 - with tf.Session(graph=loaded_graph) as sess:    if you forget graph=loaded_graph, error.

In [14]:
# The accuracy measured against the test set
test_accuracy = 0.0
batch_size = 128

save_model_path = './model'
loaded_graph = tf.Graph()

with tf.Session(graph=loaded_graph) as sess:
    
    # load model
    loader = tf.train.import_meta_graph(save_model_path + '.meta')
    loader.restore(sess, save_model_path)
    
    # Get tensors from loaded model
    loaded_x = loaded_graph.get_tensor_by_name('x:0')
    loaded_y = loaded_graph.get_tensor_by_name('y:0')
    loaded_keep_prob = loaded_graph.get_tensor_by_name('keep_prob:0')
    loaded_logits = loaded_graph.get_tensor_by_name('logits:0')
    loaded_acc = loaded_graph.get_tensor_by_name('accuracy:0')
    
    test_batch_acc_total = 0
    test_batch_count = 0
    
    for batch_features, batch_labels in batch_features_labels(test_features,test_labels, batch_size):
        test_batch_acc_total += sess.run(loaded_acc, feed_dict={loaded_x:batch_features, loaded_y:batch_labels, loaded_keep_prob:1.0})
        test_batch_count +=1
        
    
    test_accuracy =  test_batch_acc_total/test_batch_count   
    print('Testing Accuracy: {}\n'.format(test_accuracy))



assert test_accuracy >= 0.80, 'Test accuracy at {}, should be equal to or greater than 0.80'.format(test_accuracy)
print('Nice Job! Test Accuracy is {}'.format(test_accuracy))

Testing Accuracy: 0.9615308544303798

Nice Job! Test Accuracy is 0.9615308544303798
