# TensorFlow Estimators

In [1]:
import math
import numpy as np

import tensorflow as tf

In [2]:
print(tf.__version__)

1.2.1


## Assembling our data set

In [20]:
num_examples_per_class = 5000

classes = ['airplane','alarm clock','ambulance','angel','ant','anvil','apple','axe','banana','bandage','barn','baseball bat','baseball',
           'basket','basketball','bathtub','beach','bear','beard','bed','bee','belt','bicycle','binoculars','birthday cake','blueberry',
           'book','boomerang','bottlecap','bowtie','bracelet','brain','bread','broom','bulldozer','bus','bush','butterfly','cactus','cake',
           ]

len(classes)

40

In [21]:
x_data = np.load("./x_data_40_classes_5k.npy")
x_data.shape

(200000, 784)

In [22]:
labels = [np.full((5000,), classes.index(qdraw)) for qdraw in classes]

## Concat the arrays together
y_data = np.concatenate(labels,axis=0)
y_data.shape

(200000,)

In [23]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [24]:
#x_data = x_data[:500000]
#y_data = y_data[:500000]

x_data,y_data = unison_shuffled_copies(x_data,y_data)

x_data = x_data.astype('float32')
y_data = y_data.astype('float32')
x_data /= 255.

print(x_data.shape)
print(y_data.shape)

number_of_records = len(y_data)
test_fraction = 0.1

print(int(number_of_records*test_fraction))

#making a train / test split
test_split = int(number_of_records*(1-test_fraction))

x_train, y_train = x_data[0:test_split], y_data[0:test_split]
x_test, y_test = x_data[test_split:], y_data[test_split:]

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(200000, 784)
(200000,)
20000
(180000, 784)
(180000,)
(20000, 784)
(20000,)


In [25]:
# The CNN we'll use later expects a color channel dimension
# Let's add this here
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

# Convert to one-hot.
y_train = tf.contrib.keras.utils.to_categorical(y_train, num_classes=100)
y_test = tf.contrib.keras.utils.to_categorical(y_test, num_classes=100)

print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')



180000 train samples
20000 test samples


### Using datasetAPI

just showing the concept here , you wouldn't need this open like this in the real model

In [27]:
tf.reset_default_graph()

ds = tf.contrib.data.Dataset.from_tensor_slices(({'x':x_train},y_train))
ds = ds.repeat().shuffle(100000).batch(16)

In [28]:
next_batch = ( ds.make_one_shot_iterator().get_next())

## Make our model

We are going to use the tf.layers API 

In [26]:
#tf.layers.batch_normalization(inputs = )

In [27]:
def build_cnn(features, mode):
    
    image_batch = features['x']
    
    with tf.name_scope("conv1"):  
        conv1 = tf.layers.conv2d(inputs=image_batch, filters=32, kernel_size=[3, 3],
                                 padding='same', activation=tf.nn.relu)

    with tf.name_scope("pool1"):  
        pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    with tf.name_scope("conv2"):  
        conv2 = tf.layers.conv2d(inputs=pool1, filters=64, kernel_size=[3, 3],
                                 padding='same', activation=tf.nn.relu)

    with tf.name_scope("pool2"):  
        pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
        
    with tf.name_scope("batchnorm"):
        batch_norm = tf.layers.batch_normalization(inputs= pool2)

    with tf.name_scope("dense"):  
        # The 'images' are now 7x7 (28 / 2 / 2), and we have 64 channels per image
        pool2_flat = tf.reshape(batch_norm, [-1, 7 * 7 * 64])
        dense = tf.layers.dense(inputs=pool2_flat, units=128, activation=tf.nn.relu)

    with tf.name_scope("dropout"):  
        # Add some dropout 20% dropout
        dropout = tf.layers.dropout(
            inputs=dense, rate=0.2, training = mode == tf.estimator.ModeKeys.TRAIN)

    # our logits has to put 
    logits = tf.layers.dense(inputs=dropout, units=100)

    return logits

## Model function

this handles the different modes of training, evaluting, predicting

In [28]:
def model_fn(features, labels, mode):
    
    
    logits = build_cnn(features, mode)
    
    # Generate Predictions
    classes = tf.argmax(logits, axis=1)
    predictions = {
        'classes': classes,
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
    }
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        # Return an EstimatorSpec for prediction
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
        
    # Compute the loss, per usual.
    loss = tf.losses.softmax_cross_entropy(
        onehot_labels=labels, logits=logits)
        
    if mode == tf.estimator.ModeKeys.TRAIN:
        
        # Configure the Training Op
        train_op = tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=tf.train.get_global_step(),
            learning_rate=1e-3,
            optimizer='Adam')

        # Return an EstimatorSpec for training
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions,
                                      loss=loss, train_op=train_op)    

    assert mode == tf.estimator.ModeKeys.EVAL
    
    # Configure the accuracy metric for evaluation
    metrics = {'accuracy': tf.metrics.accuracy(classes, tf.argmax(labels, axis=1))}
    
    return tf.estimator.EstimatorSpec(mode=mode, 
                                      predictions=predictions, 
                                      loss=loss,
                                      eval_metric_ops=metrics)

## Input functions

1 for training dataset 

1 for test dataset

In [36]:
def train_input():
    ds = tf.contrib.data.Dataset.from_tensor_slices(({'x':x_train},y_train))
    ds = ds.repeat().shuffle(100000).batch(16)
    
    iterator = ds.make_one_shot_iterator()
    images, labels = iterator.get_next()
    
    return images, labels
    
    
def test_input():
    ds = tf.contrib.data.Dataset.from_tensor_slices(({'x':x_test},y_test))
    ds = ds.repeat().shuffle(1000).batch(16)
    
    iterator = ds.make_one_shot_iterator()
    images, labels = iterator.get_next()
    
    return images, labels

In [30]:
tf.reset_default_graph()

### Compiling the Estimator

In [31]:
estimator = tf.estimator.Estimator(model_fn=model_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_every_n_hours': 10000, '_model_dir': '/var/folders/pl/nlv4vvks1bvc3rs628vdcldc0000gn/T/tmps2nrya7u', '_keep_checkpoint_max': 5, '_save_summary_steps': 100, '_tf_random_seed': 1, '_save_checkpoints_steps': None, '_session_config': None, '_log_step_count_steps': 100, '_save_checkpoints_secs': 600}


### Training Function

In [32]:
# the training function
estimator.train(input_fn=train_input, steps=2000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/pl/nlv4vvks1bvc3rs628vdcldc0000gn/T/tmps2nrya7u/model.ckpt.
INFO:tensorflow:loss = 4.62892, step = 1
INFO:tensorflow:global_step/sec: 38.3018
INFO:tensorflow:loss = 3.21171, step = 101 (2.612 sec)
INFO:tensorflow:global_step/sec: 65.2384
INFO:tensorflow:loss = 2.61186, step = 201 (1.532 sec)
INFO:tensorflow:global_step/sec: 65.5066
INFO:tensorflow:loss = 3.25405, step = 301 (1.527 sec)
INFO:tensorflow:global_step/sec: 65.4136
INFO:tensorflow:loss = 1.73261, step = 401 (1.529 sec)
INFO:tensorflow:global_step/sec: 65.7043
INFO:tensorflow:loss = 2.92336, step = 501 (1.522 sec)
INFO:tensorflow:global_step/sec: 64.6708
INFO:tensorflow:loss = 1.56658, step = 601 (1.546 sec)
INFO:tensorflow:global_step/sec: 64.5974
INFO:tensorflow:loss = 1.44781, step = 701 (1.548 sec)
INFO:tensorflow:global_step/sec: 64.4743
INFO:tensorflow:loss = 1.85731, step = 801 (1.551 sec)
INFO:tensorflow:global_step/

<tensorflow.python.estimator.estimator.Estimator at 0x152759080>

### Evaluating the Estimator

In [38]:
# We Evaluate now using the Test input function
evaluation = estimator.evaluate(input_fn=test_input)
print(evaluation)

INFO:tensorflow:Starting evaluation at 2017-10-11-02:39:23
INFO:tensorflow:Restoring parameters from /var/folders/pl/nlv4vvks1bvc3rs628vdcldc0000gn/T/tmps2nrya7u/model.ckpt-2000


KeyboardInterrupt: 

### Now lets check our model bby making some predictions

In [35]:
# Here's how to print predictions on a few examples
MAX_TO_PRINT = 5

# This returns a generator object
predictions = estimator.predict(input_fn=test_input)
i = 0
for p in predictions:
    true_label = classes[np.argmax(y_test[i])]
    predicted_label = classes[p['classes']]
    #print(predicted_label)
    print("Example %d. True: %s, Predicted: %s" % (i, true_label, predicted_label))
    i += 1
    if i == MAX_TO_PRINT: break

INFO:tensorflow:Restoring parameters from /var/folders/pl/nlv4vvks1bvc3rs628vdcldc0000gn/T/tmps2nrya7u/model.ckpt-2000
Example 0. True: beard, Predicted: alarm clock
Example 1. True: ant, Predicted: angel
Example 2. True: butterfly, Predicted: bulldozer
Example 3. True: bed, Predicted: bandage
Example 4. True: bear, Predicted: bus
