# TensorFlow Estimators

In [1]:
import math
import numpy as np

import tensorflow as tf

In [2]:
print(tf.__version__)

1.3.0


## Assembling our data set

In [3]:
num_examples_per_class = 10000

classes = ['airplane','alarm clock','ambulance','angel','ant','anvil','apple','axe','banana','bandage','barn','baseball bat','baseball',
           'basket','basketball','bathtub','beach','bear','beard','bed','bee','belt','bicycle','binoculars','birthday cake','blueberry',
           'book','boomerang','bottlecap','bowtie','bracelet','brain','bread','broom','bulldozer','bus','bush','butterfly','cactus','cake',
           'calculator','calendar','camel','camera','campfire','candle','cannon','canoe','car','carrot','cello','computer',
           'cat','chandelier','clock','cloud','coffee cup','compass','cookie','couch','cow','crab','crayon','crocodile','crown',
           'cup','diamond','dog','dolphin','donut','dragon','dresser','drill','drums','duck','dumbbell','ear','elbow',
           'elephant','envelope','eraser','eye','eyeglasses','face','fan','feather','fence','finger','fire hydrant',
           'fireplace','firetruck','fish','flamingo','flashlight','flip flops','floor lamp','flower','flying saucer',
           'foot','fork']

len(classes)

100

In [4]:


x_data = np.load("./x_data_100_classes_10k.npy")
x_data.shape

(1000000, 784)

In [5]:
labels = [np.full((10000,), classes.index(qdraw)) for qdraw in classes]

## Concat the arrays together
y_data = np.concatenate(labels,axis=0)
y_data.shape

(1000000,)

In [6]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [7]:
x_data,y_data = unison_shuffled_copies(x_data,y_data)

x_data = x_data.astype('float32')
y_data = y_data.astype('float32')
x_data /= 255.

print(x_data.shape)
print(y_data.shape)

number_of_records = len(y_data)
test_fraction = 0.1

print(int(number_of_records*test_fraction))

#making a train / test split
test_split = int(number_of_records*(1-test_fraction))

x_train, y_train = x_data[0:test_split], y_data[0:test_split]
x_test, y_test = x_data[test_split:], y_data[test_split:]

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(1000000, 784)
(1000000,)
100000
(900000, 784)
(900000,)
(100000, 784)
(100000,)


In [8]:
# The CNN we'll use later expects a color channel dimension
# Let's add this here
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

# Convert to one-hot.
y_train = tf.contrib.keras.utils.to_categorical(y_train, num_classes=100)
y_test = tf.contrib.keras.utils.to_categorical(y_test, num_classes=100)

print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

900000 train samples
100000 test samples


## Make our model

We are going to use the tf.layers API 

In [10]:
def build_cnn(features, mode):
    
    image_batch = features['x']
    
    with tf.name_scope("conv1"):  
        conv1 = tf.layers.conv2d(inputs=image_batch, filters=32, kernel_size=[3, 3],
                                 padding='same', activation=tf.nn.relu)

    with tf.name_scope("pool1"):  
        pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    with tf.name_scope("conv2"):  
        conv2 = tf.layers.conv2d(inputs=pool1, filters=64, kernel_size=[3, 3],
                                 padding='same', activation=tf.nn.relu)

    with tf.name_scope("pool2"):  
        pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
        
    with tf.name_scope("batchnorm"):
        batch_norm = tf.layers.batch_normalization(inputs= pool2)

    with tf.name_scope("dense"):  
        # The 'images' are now 7x7 (28 / 2 / 2), and we have 64 channels per image
        pool2_flat = tf.reshape(batch_norm, [-1, 7 * 7 * 64])
        dense = tf.layers.dense(inputs=pool2_flat, units=128, activation=tf.nn.relu)

    with tf.name_scope("dropout"):  
        # Add some dropout 20% dropout
        dropout = tf.layers.dropout(
            inputs=dense, rate=0.2, training = mode == tf.estimator.ModeKeys.TRAIN)

    # our logits has to put 
    logits = tf.layers.dense(inputs=dropout, units=100)

    return logits

## Model function

this handles the different modes of training, evaluting, predicting

In [11]:
def model_fn(features, labels, mode):
    
    
    logits = build_cnn(features, mode)
    
    # Generate Predictions
    classes = tf.argmax(logits, axis=1)
    predictions = {
        'classes': classes,
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
    }
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        # Return an EstimatorSpec for prediction
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
        
    # Compute the loss, per usual.
    loss = tf.losses.softmax_cross_entropy(
        onehot_labels=labels, logits=logits)
        
    if mode == tf.estimator.ModeKeys.TRAIN:
        
        # Configure the Training Op
        train_op = tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=tf.train.get_global_step(),
            learning_rate=1e-3,
            optimizer='Adam')

        # Return an EstimatorSpec for training
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions,
                                      loss=loss, train_op=train_op)    

    assert mode == tf.estimator.ModeKeys.EVAL
    
    # Configure the accuracy metric for evaluation
    metrics = {'accuracy': tf.metrics.accuracy(classes, tf.argmax(labels, axis=1))}
    
    return tf.estimator.EstimatorSpec(mode=mode, 
                                      predictions=predictions, 
                                      loss=loss,
                                      eval_metric_ops=metrics)

## Input functions

1 for training dataset 

1 for test dataset

In [12]:

train_input = tf.estimator.inputs.numpy_input_fn(
    {'x': x_train},
    y_train, 
    num_epochs=None, # repeat forever
    shuffle=True # 
)

test_input = tf.estimator.inputs.numpy_input_fn(
    {'x': x_test},
    y_test,
    num_epochs=1, # loop through the dataset once
    shuffle=False # don't shuffle the test data
)

### Compiling the Estimator

In [13]:
estimator = tf.estimator.Estimator(model_fn=model_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_session_config': None, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_model_dir': '/var/folders/pl/nlv4vvks1bvc3rs628vdcldc0000gn/T/tmp91s1yu5e', '_save_summary_steps': 100, '_tf_random_seed': 1, '_keep_checkpoint_every_n_hours': 10000}


### Training Function

In [14]:
# the training function
estimator.train(input_fn=train_input, steps=2000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/pl/nlv4vvks1bvc3rs628vdcldc0000gn/T/tmp91s1yu5e/model.ckpt.
INFO:tensorflow:step = 1, loss = 4.60765
INFO:tensorflow:global_step/sec: 9.79593
INFO:tensorflow:step = 101, loss = 3.26781 (10.210 sec)
INFO:tensorflow:global_step/sec: 9.84055
INFO:tensorflow:step = 201, loss = 2.82332 (10.162 sec)
INFO:tensorflow:global_step/sec: 9.97492
INFO:tensorflow:step = 301, loss = 2.38729 (10.025 sec)
INFO:tensorflow:global_step/sec: 9.95216
INFO:tensorflow:step = 401, loss = 2.19031 (10.048 sec)
INFO:tensorflow:global_step/sec: 9.6768
INFO:tensorflow:step = 501, loss = 2.10521 (10.334 sec)
INFO:tensorflow:global_step/sec: 9.74849
INFO:tensorflow:step = 601, loss = 1.93064 (10.258 sec)
INFO:tensorflow:global_step/sec: 9.67001
INFO:tensorflow:step = 701, loss = 1.85593 (10.341 sec)
INFO:tensorflow:global_step/sec: 10.3118
INFO:tensorflow:step = 801, loss = 1.70718 (9.697 sec)
INFO:tensorflow:global

<tensorflow.python.estimator.estimator.Estimator at 0x1260e2588>

### Evaluating the Estimator

In [15]:
# We Evaluate now using the Test input function
evaluation = estimator.evaluate(input_fn=test_input)
print(evaluation)

INFO:tensorflow:Starting evaluation at 2017-10-11-02:44:27
INFO:tensorflow:Restoring parameters from /var/folders/pl/nlv4vvks1bvc3rs628vdcldc0000gn/T/tmp91s1yu5e/model.ckpt-2000
INFO:tensorflow:Finished evaluation at 2017-10-11-02:44:53
INFO:tensorflow:Saving dict for global step 2000: accuracy = 0.65935, global_step = 2000, loss = 1.36915
{'global_step': 2000, 'loss': 1.3691481, 'accuracy': 0.65934998}


### Now lets check our model bby making some predictions

In [16]:
# Here's how to print predictions on a few examples
MAX_TO_PRINT = 5

# This returns a generator object
predictions = estimator.predict(input_fn=test_input)
i = 0
for p in predictions:
    true_label = classes[np.argmax(y_test[i])]
    predicted_label = classes[p['classes']]
    #print(predicted_label)
    print("Example %d. True: %s, Predicted: %s" % (i, true_label, predicted_label))
    i += 1
    if i == MAX_TO_PRINT: break

INFO:tensorflow:Restoring parameters from /var/folders/pl/nlv4vvks1bvc3rs628vdcldc0000gn/T/tmp91s1yu5e/model.ckpt-2000
Example 0. True: fork, Predicted: fork
Example 1. True: flip flops, Predicted: flip flops
Example 2. True: dresser, Predicted: dresser
Example 3. True: clock, Predicted: clock
Example 4. True: couch, Predicted: computer
