In [None]:
# Copyright 2018 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

# Construct a Model for Computer Vision using Tensorflow

This *ML pipeline' constructs a Convolutional Neural Network (CNN) using Tensorflow framework, as follows:

        1. Configurable # of 2D Convolutional Layer, with configurable input size.
        2. Max Pooling and Flattening Layer.
        3. Configurable # of Neural Network layers, with configurable number of nodes.
        4. Configurable # of dropout Layer, with configurable percentage.
        5. Output Layer, with configurable number of outputs (classes).

In [None]:
# Tensorflow's Neural Network components
import tensorflow as tf
from tensorflow.python.framework import ops

## Setting Parameters

Use the `construct_cnn()` routine to construct the CNN, which is construct as:

                    Convolutional -> Neural Network -> Output

The `input_size` is the (height,width) shape of the preprocessed image data (machine learning ready data). For example, in the MNIST and EMMNIST datasets, the input size is (28,28).

The `n_classes` is the number of classes to train the model for. Each class is a distinct object to recognize (e.g., a cat). The number of classes will be the number of nodes in the output layer of the neural network. For example, in the MNIST and EMMNIST datasets, the number of classes is 10 and 62 respectively.

The `n_nodes` may either be a single integer value or a list of integer values. When specified as a single integer, the value is the number of nodes in the input layer of the neural network from the convolutional front-end, and there are no hidden layers.

Otherwise, `n_nodes` is a list, the first list element is the number of nodes in the input layer of the neural network from the convolutional front-end. The remaining elements are the hidden layers, where the value of the element is the number of nodes in the corresponding hidden layer.

The `dropout` is the percentage of dropout after the first layer of the neural network. If the value is 0, then there is no dropout.


In [3]:
def construct_cnn(input_shape, n_classes, n_filters=32, n_nodes=128, dropout=0):
    """ Construct a CNN model function for instantiating a CNN model using tf.estimator
    Args:
        input_shape: (tuple(int,int,int)) the 3D shape of the input vector.
        n_classes  : (int) total number of classes.
        n_filters  : (tuple(int,...)) number of filters per convolutional layer
        n_nodes    : (tuple(int,...)) number of nodes per neural network layer.
        dropout    : (float) Dropout rate between 0 and 1.
    
    Returns:
        A function which creates a CNN model creation function for the tf.estimator

    Raises:
        None.
    """
    
    def model_cnn_fn(features, labels, mode):
        """ A CNN model creation function for tf.estimator
        Args:
            features :
            labels   :
            mode     :
            
        Returns:
        
        Raises:
            None.
        """

        # Constructing a Feed Forward Neural Network
        
        # Make n_filters a tuple if a single int
        if isinstance(n_filters, int):
            n_filters = tuple([n_filters])
      
        # Add a first convolutional front-end with 3x3 kernal
        A = _Conv2D(X, filters=n_filters[0], kernel_size=3)
        Z = _MaxPooling2D(A, kernel_size=2)

        # Add Remaining Convolutional layers
        for ix in range(1, len(n_filters)):
            # Add next convolutional front-end with 3x3 kernal
            A = _Conv2D(Z, filters=n_filters[ix], kernel_size=3) 
            Z = _MaxPooling2D(A, kernel_size=2)

        # Flatten the output from the max pooling layer for input to the neural network
        Z = _Flatten(Z)
    
        # make n_nodes a tuple if a single integer
        if isinstance(n_nodes, int):
            n_nodes = tuple([n_nodes])
        
        # make dropout a tuple if a single integer
        if isinstance(dropout, int) or isinstance(dropout, float):
            # apply dropout to the first layer
            dropout = [dropout]
            # make remaining layers zero
            for _ in range(1, len(n_nodes)):
                dropout.append(0)
            dropout = tuple(dropout)

        # Add layers
        for ix in range(len(n_nodes)):
            Z = _Dense(Z, n_nodes[ix])
            # Add dropout if any])
            if dropout[ix] > 0:
                Z = _Dropout(Z, dropout[ix], mode)
    
        # Add the output layer
        logits = _Dense(Z, n_classes, activation='softmax')
        
        predictions = {
            # Generate predictions (for PREDICT and EVAL mode)
            "classes": tf.argmax(input=logits, axis=1),
            # Add `softmax_tensor` to the graph. It is used for PREDICT and by the `logging_hook`.
            "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
        }

        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

        # Calculate Loss (for both TRAIN and EVAL modes)
        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
    
        # Configure the Training Op (for TRAIN mode)
        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
            train_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step())
            return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

        # Add evaluation metrics (for EVAL mode)
        eval_metric_ops = {
          "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
        }
        
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
    
    return model_cnn_fn

def _Conv2D(T, filters=32, kernel_size=3):
    """ Construct a 2D Convolutional Layer
    Args:
        T          : input tensor
        filters    : number of filters
        kernel_size: size of the kernel (e.g., 3 for 3x3)
        
    Returns:
        An output tensor.
        
    Raises:
        None.
    """  
    # Convolutional Layer
    A = tf.layers.conv2d(
          inputs=T,
          filters=filters,
          kernel_size=[kernel_size, kernel_size],
          padding="same",
          activation=tf.nn.relu)


    return A

def _MaxPooling2D(T, kernel_size=2):
    """ Construct a 2D Max Pooling Layer
    Args:
       T          : input tensor
       kernel_size: size of the kernel (e.g., 2 for 2x2)
       
    Returns:
        An output tensor.
        
    Raises:
        None
    """
    # The max pooling layer
    Z = tf.layers.max_pooling2d(inputs=T, pool_size=[kernel_size, kernel_size], strides=2)
    return Z

def _Flatten(T):
    """ Flatten a tensor.
    Args:
        T : input tensor
        
    Returns:
        An output tensor.
    
    Raises:
        None
    """
    Z = tf.layers.Flatten()(T)
    return Z

def _Dense(T, n_nodes, activation='relu'):
    """ Construct a Dense Layer
    Args:
        T      : input tensor
        n_nodes: the number of nodes in the layer
        
    Returns:
        An output tensor.
        
    Raises
        None
    """
    if activation == 'relu':
        A = tf.layers.dense(inputs=T, units=n_nodes, activation=tf.nn.relu)
    else:
        A = tf.layers.dense(inputs=T, units=n_nodes)
    return A

def _Dropout(T, percent, mode):
    """ Construct a dropout layer
    Args:
        T      : input tensor
        percent: the percent of nodes to drop out
        
    Returns:
        An output tensor.
        
    Raises:
        None
    """
 
    Z = tf.layers.dropout(inputs=T, rate=percent, training=mode == tf.estimator.ModeKeys.TRAIN)
    return Z

In the function train_cnn() there are two ways of passing the train/test data.

1. Unsplit: the train and test data are passed as combined data, in which case x_test and y_test are None. The function will then shuffle the combined data and then split the combined data into training and test based on the percent parameter.

2. Split: the train and test data are already split, in which case x and y are the training data and x_test and y_test are the test data. The function does not shuffle or split the pre-split data, and the percent parameter is ignored.

In [None]:
import time
import numpy as np
from keras.utils import np_utils

def train_cnn(model, x, y, x_test=None, y_test=None, epochs=10, batch_size=32, percent=0.2, verbose=False, seed=113, learning_rate=0.01):
    """ Train the model
    Args:
        model     : (model_fn) The CNN model creation function.
        x         : (numpy.ndarray) The x portion (preprocessed image data) of the dataset.
        y         : (numpy.ndarray) The y portion (labels) of the dataset.
        x_test    : (numpy.ndarray) The x_test (if pre-split) portion of the dataset.
        y_test    : (numpy.ndarray) The y_test (if pre-split) portion of the dataset.
        epochs    : (int) The number of times to feed the entire dataset for training.
        batch_size: (int) The mini-batch size.
        percent   : (float) The percent of the dataset to use for test.
        verbose   : (bool) Display (console) progress status.
        seed      : (int) Seed for random shuffle before splitting.
        
    Returns:
        The model accuracy after training and evaluation.
    
    Raises:
        None
    """
    
    # Create the Estimator
    classifier = tf.estimator.Estimator(model_fn=model, model_dir="./")
    
    # Logging Hooks to output progress while training
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50)

    
    # one hot encode the labels
    y = np_utils.to_categorical(y)
    if y_test is not None:
        y_test = np_utils.to_categorical(y_test)
   
    # Images are grayscale. This model expects shape to be (rows, height, width, channels) vs. (rows, height, width)
    if len(x.shape) == 3:
        x = x.reshape(x.shape[0], x.shape[1], x.shape[2], 1)
        if x_test is not None:
            x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], x_test.shape[2], 1)
            
    # Ignore percent if data is already split
    if x_test is not None:
        percent = 0
    
    
    # Calculate the number of elements which will be used as training data
    train_size = int((1-percent) * len(x))
    if verbose: print("Training Size:", train_size)
     
    # Dataset is combined
    if x_test is None:
        # Randomly shuffle the data before splitting
        np.random.seed(seed)
        np.random.shuffle(x)
        np.random.seed(seed)
        np.random.shuffle(y)

        # split the data into Train and Test
        X_train = x[:train_size]
        Y_train = y[:train_size]
        X_test  = x[train_size:]
        Y_test  = y[train_size:]
    # Dataset is presplit
    else:
        X_train = x
        Y_train = y
        X_test  = x_test
        Y_test  = y_test


    start = time.time()
    
    # Train the model
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": X_train},
        y=Y_train,
        batch_size=batch_size,
        num_epochs=epochs,
        shuffle=True)
    
    # TODO: Resolve difference between epochs and steps
    classifier.train(
        input_fn=train_input_fn,
        steps=20000,
        hooks=[logging_hook])
    
    if verbose: print("Time", time.time() - start)

    # Evaluate the model and print results
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": X_test},
        y=Y_test,
        num_epochs=1,
        shuffle=False)
    eval_results = classifier.evaluate(input_fn=eval_input_fn)
    if verbose: print(eval_results)
            
    
    return eval_results

    

In [54]:
def codeGenerator(input_shape, n_classes, n_filters=32, n_nodes=128, dropout=0):
    code = []
    code.append("def model_cnn_fn(features, labels, mode):")
    
    code.append("\t# Constructing a Feed Forward Neural Network")
    code.append("")
    
    code.append("\ttf.reshape(features['x'], [-1, " + str(input_shape[0]) + ", " + str(input_shape[1]) + ", " + str(input_shape[2]) + "])")
    code.append("")
        
    # Make n_filters a tuple if a single int
    if isinstance(n_filters, int):
        n_filters = tuple([n_filters])
      
    code.append("\t" + codeConv2D(filters=n_filters[0], kernel_size=3))
        
    # Add Remaining Convolutional layers
    for ix in range(1, len(n_filters)):
        if n_filters[ix] == True:
            # Add max pooling layer
            code.append("\t" + codeMaxPooling2D(kernel_size=2))
        else:
            # Add next convolutional front-end with 3x3 kernal
            code.append("\t" + codeConv2D(filters=n_filters[ix], kernel_size=3))

    # Add max pooling layer
    code.append("\t" + codeMaxPooling2D(kernel_size=2))
              
    # Flatten the output from the max pooling layer for input to the neural network
    code.append("\t" + codeFlatten())
    
    # make n_nodes a tuple if a single integer
    if isinstance(n_nodes, int):
        n_nodes = tuple([n_nodes])
        
    # make dropout a tuple if a single integer
    if isinstance(dropout, int) or isinstance(dropout, float):
        # apply dropout to the first layer
        dropout = [dropout]
        # make remaining layers zero
        for _ in range(1, len(n_nodes)):
            dropout.append(0)
        dropout = tuple(dropout)

    # Add layers
    for ix in range(len(n_nodes)):
        code.append("\t" + codeDense(n_nodes[ix]))
        # Add dropout if any])
        if dropout[ix] > 0:
            code.append("\t" + codeDropout(dropout[ix]))
    
    # Add the output layer
    code.append("\tlogits = " + codeDense(n_classes, activation='softmax'))
    
    code.append("")
    code.append("\tpredictions = {")
    code.append("\t\t# Generate predictions (for PREDICT and EVAL mode)")
    code.append("\t\t'classes': tf.argmax(input=logits, axis=1),")
    code.append("\t\t# Add `softmax_tensor` to the graph. It is used for PREDICT and by the `logging_hook`.")
    code.append("\t\t'probabilities': tf.nn.softmax(logits, name='softmax_tensor')")
    code.append("\t}")  
    
    code.append("")
    code.append("\tif mode == tf.estimator.ModeKeys.PREDICT:")
    code.append("\t\treturn tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)")

    code.append("")
    code.append("\t# Calculate Loss (for both TRAIN and EVAL modes)")
    code.append("\tloss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)")
    
    code.append("")
    code.append("\t# Configure the Training Op (for TRAIN mode)")
    code.append("\tif mode == tf.estimator.ModeKeys.TRAIN:")
    code.append("\t\toptimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)")
    code.append("\t\ttrain_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step())")
    code.append("\t\treturn tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)")
    
    code.append("")
    code.append("\t# Add evaluation metrics (for EVAL mode)")
    code.append("\teval_metric_ops = {")
    code.append("\t\t'accuracy': tf.metrics.accuracy(labels=labels, predictions=predictions['classes'])")
    code.append("\t}")
        
    code.append("")
    code.append("\treturn tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)")
            
    for line in code:
        print(line)
                
def codeConv2D(filters=32, kernel_size=3):
    """ Construct a 2D Convolutional Layer
    Args:
        filters    : number of filters
        kernel_size: size of the kernel (e.g., 3 for 3x3)
        
    Returns:
        An output tensor.
        
    Raises:
        None.
    """  
    # Convolutional Layer
    s = 'T = tf.layers.conv2d(inputs=T, filters=' + str(filters) + ', kernel_size=[' + str(kernel_size) + ', ' + str(kernel_size) + '], padding="same",activation=tf.nn.relu)'

    return s


def codeMaxPooling2D(kernel_size=2):
    """ Construct a 2D Max Pooling Layer
    Args:
       kernel_size: size of the kernel (e.g., 2 for 2x2)
       
    Returns:
        An output tensor.
        
    Raises:
        None
    """
    # The max pooling layer
    s = 'T = tf.layers.max_pooling2d(inputs=T, pool_size=[' + str(kernel_size) + ', ' + str(kernel_size) + '], strides=2)'
    return s

def codeFlatten():
    """ Flatten a tensor.
    Args:
        
    Returns:
        An output tensor.
    
    Raises:
        None
    """
    s = 'T = tf.layers.Flatten()(T)'
    return s

def codeDense(n_nodes, activation='relu'):
    """ Construct a Dense Layer
    Args:
        n_nodes: the number of nodes in the layer
        
    Returns:
        An output tensor.
        
    Raises
        None
    """
    if activation == 'relu':
        s = 'T = tf.layers.dense(inputs=T, units=' + str(n_nodes) + ', activation=tf.nn.relu)'
    else:
        s = 'T = tf.layers.dense(inputs=T, units=' + str(n_nodes) + ')'
    return s

def codeDropout(percent):
    """ Construct a dropout layer
    Args:
        percent: the percent of nodes to drop out
        
    Returns:
        An output tensor.
        
    Raises:
        None
    """
 
    s = 'T = tf.layers.dropout(inputs=T, rate=' + str(percent) + ', training=mode == tf.estimator.ModeKeys.TRAIN)'
    return s


In [55]:
codeGenerator((28,28,1), 10, n_filters=(64, True, 32), n_nodes=(512, 20), dropout=0.5)

def model_cnn_fn(features, labels, mode):
	# Constructing a Feed Forward Neural Network

	tf.reshape(features['x'], [-1, 28, 28, 1])

	T = tf.layers.conv2d(inputs=T, filters=64, kernel_size=[3, 3], padding="same",activation=tf.nn.relu)
	T = tf.layers.max_pooling2d(inputs=T, pool_size=[2, 2], strides=2)
	T = tf.layers.conv2d(inputs=T, filters=32, kernel_size=[3, 3], padding="same",activation=tf.nn.relu)
	T = tf.layers.max_pooling2d(inputs=T, pool_size=[2, 2], strides=2)
	T = tf.layers.Flatten()(T)
	T = tf.layers.dense(inputs=T, units=512, activation=tf.nn.relu)
	T = tf.layers.dropout(inputs=T, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN)
	T = tf.layers.dense(inputs=T, units=20, activation=tf.nn.relu)
	logits = T = tf.layers.dense(inputs=T, units=10)

	predictions = {
		# Generate predictions (for PREDICT and EVAL mode)
		'classes': tf.argmax(input=logits, axis=1),
		# Add `softmax_tensor` to the graph. It is used for PREDICT and by the `logging_hook`.
		'probabilities': tf.nn