In [None]:
"""Convolutional Neural Network Estimator, built with tf.layers (originally for MNIST)."""

#  FROM : https://www.tensorflow.org/tutorials/layers#building_the_cnn_mnist_classifier
#  CODE : https://www.tensorflow.org/code/tensorflow/examples/tutorials/layers/cnn_mnist.py

import os

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import pickle

import tensorflow as tf

from tensorflow.contrib import learn
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib

tf.logging.set_verbosity(tf.logging.INFO)  # Quite a lot...
#tf.logging.set_verbosity(tf.logging.WARN)   # This prevents Logging ...

do_training = True

In [None]:
import sys
print(sys.version)
print('Tensorflow:',tf.__version__)

Expecting:
```
Tensorflow: 1.0.0
3.5.2 (default, Sep 14 2016, 11:28:32) 
[GCC 6.2.1 20160901 (Red Hat 6.2.1-1)]
```

In [None]:
def cnn_model_fn(features, integer_labels, mode):
  """Model function for CNN."""

  features_images=features['images']

  input_layer = tf.reshape(features_images, [-1, 64, 32], name='input_layer')

  # Convolutional Layer #1 (5x5 kernels)
  conv1 = tf.layers.conv2d( inputs=input_layer,
      filters=16, kernel_size=[5, 5], padding="same",
      activation=tf.nn.relu)

  # First max pooling layer with a 2x2 filter and stride of 2
  pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

  # Convolutional Layer #2 (5x5 kernels)
  conv2 = tf.layers.conv2d( inputs=pool1,
      filters=16, kernel_size=[5, 5], padding="same",
      activation=tf.nn.relu)

  # Pooling Layer #2 (2x2 filter and stride of 2)
  pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

  # Flatten tensor into a batch of vectors
  pool2_flat = tf.contrib.layers.flatten(pool2)

  # Dense Layer
  dense = tf.layers.dense(inputs=pool2_flat, units=16, activation=tf.nn.relu)

  # Add dropout operation; 0.6 probability that element will be kept
  #dropout = tf.layers.dropout(
  #    inputs=dense, rate=0.4, training= (mode == learn.ModeKeys.TRAIN) )

  # Logits layer
  # Input Tensor Shape: [batch_size, 1024]
  # Output Tensor Shape: [batch_size, 10]
  #logits = tf.layers.dense(inputs=dropout, units=10)

  logits = tf.layers.dense(inputs=dense, units=10)
  #logits = tf.Print(logits, [input_layer.get_shape(), integer_labels.get_shape()], "Debug size information : ", first_n=1)

  loss = None
  train_op = None

  # Calculate Loss (for both TRAIN and EVAL modes)
  if mode != learn.ModeKeys.INFER:
    onehot_labels = tf.one_hot(indices=tf.cast(integer_labels, tf.int32), depth=10)
    loss = tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=onehot_labels)

  # Configure the Training Op (for TRAIN mode)
  if mode == learn.ModeKeys.TRAIN:
    train_op = tf.contrib.layers.optimize_loss(
      loss=loss,
      global_step=tf.contrib.framework.get_global_step(),
      learning_rate=0.01,
      optimizer="Adam")

  # Generate Predictions
  predictions = {
    "classes":       tf.argmax(input=logits, axis=1),
    "probabilities": tf.nn.softmax(logits, name="softmax_tensor"), 
    "logits":        logits,
  }
    
  # Return a ModelFnOps object
  return model_fn_lib.ModelFnOps(
      mode=mode, predictions=predictions, loss=loss, train_op=train_op)

In [None]:
prefix='num'

In [None]:
# Create the Estimator : https://www.tensorflow.org/extend/estimators
cnn_classifier = learn.Estimator(
      model_fn=cnn_model_fn, model_dir="cnn_model/"+prefix)  # This is relative to the ipynb

# Check : the checkpoints file in 'cnn_model/num' has filenames that are in same directory

In [None]:
# Load training and eval data
# Read in the dataset
dataset = pickle.load(open(os.path.join('data', prefix+'.pkl'), 'rb'))

train_indices = [ i for i,r in enumerate(dataset['rand']) if r<=0.8 ]
test_indices  = [ i for i,r in enumerate(dataset['rand']) if r>0.8 ]

#train_data   = mnist.train.images  # Returns np.array
#train_labels = np.asarray(mnist.train.labels, dtype=np.int32)

#eval_data    = mnist.test.images  # Returns np.array
#eval_labels  = np.asarray(mnist.test.labels, dtype=np.int32)

#print(eval_labels[7])
print("Data Loaded")

In [None]:
def batch_input_fn(dataset, indices, batch_size=100, seed=None, num_epochs=1):  
    # If seed is defined, this will shuffle data into batches

    # Get the data into tensorflow
    stamps = np.array( dataset['stamp'] )[indices]
    print("stamps.shape:", stamps.shape)
    labels = np.array( dataset['label'] )[indices]
    print("labels.shape:", labels.shape)

    all_images = tf.constant( stamps, shape=stamps.shape, verify_shape=True )
    all_labels = tf.constant( labels, shape=labels.shape, verify_shape=True )
    
    print("batch_input_fn sizing : ", all_images.shape, )
    
    # And create a 'feeder' to batch up the data appropriately...
    image, label = tf.train.slice_input_producer( [ all_images, all_labels ], 
                                           num_epochs=num_epochs,
                                           shuffle=(seed is not None), seed=seed,
                                         )
    
    dataset_dict = dict( images=image, labels=label ) # This becomes pluralized into batches by .batch()
    
    batch_dict = tf.train.batch( dataset_dict, batch_size,
                                num_threads=1, capacity=batch_size*2, 
                                enqueue_many=False, shapes=None, dynamic_pad=False, 
                                allow_smaller_final_batch=False, 
                                shared_name=None, name=None)

    batch_labels = batch_dict.pop('labels')
    
    # Return : 
    # 1) a mapping of feature columns to Tensors with the corresponding feature data, and 
    # 2) the corresponding labels
    return batch_dict, batch_labels

batch_size, epochs = 100, 20


In [None]:
if do_training:
    # Set up logging for predictions
    # Log the values in the "Softmax" tensor with label "probabilities"
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook( tensors=tensors_to_log, every_n_secs=20 ) #every_n_iter=1000 )

    # Train the model
    epochs=5

    if False:
        cnn_classifier.fit(
          x=train_data,
          y=train_labels,
          batch_size=batch_size,
          steps=train_labels.shape[0]/batch_size * epochs,
          monitors=[logging_hook]
        )

    cnn_classifier.fit(
        input_fn=lambda: batch_input_fn(dataset, train_indices, batch_size=batch_size, seed=42, num_epochs=epochs), 
        #steps=train_labels.shape[0] / batch_size * epochs,
        #monitors=[logging_hook],
    )

In [None]:
# Configure the accuracy metric for evaluation
cnn_metrics = {
  "accuracy":
      learn.MetricSpec(
          metric_fn=tf.metrics.accuracy, prediction_key="classes"),
}

# Evaluate the model and print results
#cnn_eval_results = mnist_classifier.evaluate( x=eval_data, y=eval_labels, metrics=cnn_metrics)

cnn_eval_results = mnist_classifier.evaluate(
    input_fn=lambda: cnn_classifier(dataset, test_indices, batch_size=batch_size), 
    metrics=cnn_metrics,
)

print(cnn_eval_results)

Ok, so the built Estimator gets ~99% accuracy on the test set in <20 secs on CPU.

In [None]:
?tf.reduce_sum()

### Now try on some unseen images