In [None]:
# Import training/test files in Pandas

import pandas as pd

train_data = pd.read_csv('training.csv')
test_data = pd.read_csv('testing.csv')

In [None]:
# Note: we are enabling eager execution for debugging!

import numpy as np
import tensorflow as tf
tf.enable_eager_execution()

In [None]:
# Example code for handling datasets
import matplotlib.pyplot as plt

# Load filenames and labels
filenames = tf.constant(train_data.iloc[:, 0].tolist())
labels = tf.constant(train_data.iloc[:, 1:].values)

# Add to a dataset object
dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))

# We can debug using eager execution
for img, labels in dataset.batch(4).take(1):
    print(img)
    print(labels)

In [None]:
# Reads an image from a file, decodes it into a dense tensor, and resizes it
# to a fixed shape.
def _parse_function(filename, label):
    image_string = tf.read_file(filename) 
    image_decoded = tf.image.decode_jpeg(image_string, channels=3) # Channels needed because some test images are b/w
    image_resized = tf.image.resize_images(image_decoded, [40, 40])
    image_shape = tf.cast(tf.shape(image_decoded), tf.float32)
    label = tf.concat([label[0:5] / image_shape[0], label[5:10] / image_shape[1], label[10:]], axis=0)
    return {"x": image_resized}, label

In [None]:
# This snippet is adapted from here: https://www.tensorflow.org/guide/datasets
def input_fn(dataframe, is_eval=False):

    # Load the list of files
    filenames = tf.constant(dataframe.iloc[:, 0].tolist())

    # Load the labels
    labels = tf.constant(dataframe.iloc[:, 1:].values.astype(np.float32))

    # Build the dataset with image processing on top of it
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    dataset = dataset.map(_parse_function)

    # Add shuffling and repeatition if training
    if is_eval:
        dataset = dataset.batch(64)
    else:
        dataset = dataset.repeat().shuffle(1000).batch(64)

    return dataset

In [None]:
# Check the image
import matplotlib.pyplot as plt
for (imgs, labels) in input_fn(train_data, is_eval=True).take(1):
    plt.imshow(imgs['x'][0] / 255)
    print(labels[0])

In [None]:
## Standard classical estimator (single-task only!)

In [None]:
# Reimplement the feature extraction from the original paper
def extract_features(features):
    # Input layer
    input_layer = tf.reshape(features["x"], [-1, 40, 40, 3])

    # First convolutive layer
    conv1 = tf.layers.conv2d(inputs=input_layer, filters=16, kernel_size=[5, 5], padding="same", activation=tf.nn.relu)
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

    # Second convolutive layer
    conv2 = tf.layers.conv2d(inputs=pool1, filters=48, kernel_size=[3, 3], padding="same", activation=tf.nn.relu)
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

    # Third convolutive layer
    conv3 = tf.layers.conv2d(inputs=pool2, filters=64, kernel_size=[3, 3], padding="same", activation=tf.nn.relu)
    pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2)

    # Fourth convolutive layer
    conv4 = tf.layers.conv2d(inputs=pool3, filters=64, kernel_size=[2, 2], padding="same", activation=tf.nn.relu)

    # Dense Layer
    flat = tf.reshape(conv4, [-1, 5 * 5 * 64])
    dense = tf.layers.dense(inputs=flat, units=100, activation=tf.nn.relu)
  
    return dense

In [None]:
# Adapted from here: https://www.tensorflow.org/tutorials/layers
def single_task_cnn_model_fn(features, labels, mode):
  
    # Get features
    dense = extract_features(features)
  
    # Make predictions
    predictions = tf.layers.dense(inputs=dense, units=2)

    outputs = {
        "predictions": predictions
    }

    # We just want the predictions
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=outputs)

    # If not in mode.PREDICT, compute the loss (mean squared error)
    loss = tf.losses.mean_squared_error(labels=labels[:, 2:8:5], predictions=predictions)

    # Single optimization step
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer()
        train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    # If not PREDICT or TRAIN, then we are evaluating the model
    eval_metric_ops = {
        "rmse": tf.metrics.root_mean_squared_error(
            labels=labels[:, 2:8:5], predictions=outputs["predictions"])}
    return tf.estimator.EstimatorSpec(
        mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

In [None]:
# Build the Estimator
single_task_classifier = tf.estimator.Estimator(
    model_fn=single_task_cnn_model_fn)

In [None]:
# Train the model
single_task_classifier.train(input_fn=lambda: input_fn(train_data), steps=2000)

In [None]:
single_task_classifier.evaluate(input_fn=lambda: input_fn(test_data, is_eval=True))

In [None]:
p = list(single_task_classifier.predict(lambda: input_fn(test_data, is_eval=True)))

In [None]:
# Evaluate a single prediction
for imgs, _ in input_fn(test_data, is_eval=True).take(1):
    img_idx = 1
    plt.imshow(imgs["x"][img_idx] / 255)
    plt.scatter(p[img_idx]['predictions'][0] * 40, p[img_idx]['predictions'][1] * 40, 500, marker='x', color='red', linewidth=5)

In [None]:
## Simplifying the code of the custom estimator with the Head API

In [None]:
# Check the code here: https://www.tensorflow.org/api_docs/python/tf/contrib/estimator/regression_head

def single_head_cnn_model_fn(features, labels, mode):

    dense = extract_features(features)

    # Predictions
    predictions = tf.layers.dense(inputs=dense, units=2)

    # Optimizer
    optimizer = tf.train.AdamOptimizer()

    # Define the head
    regression_head = tf.contrib.estimator.regression_head(label_dimension=2)
    return regression_head.create_estimator_spec(features, mode, predictions, labels[:, 2:8:5], optimizer)

In [None]:
# Create the Estimator
cnn_classifier = tf.estimator.Estimator(
    model_fn=single_head_cnn_model_fn, model_dir="/tmp/cnn_single_head")

In [None]:
## Multi-task learning with the Head API

In [None]:
# Need to disable the eager execution at this point
import numpy as np
import tensorflow as tf

In [None]:
def multihead_input_fn(data, is_eval=False):
    features, labels = input_fn(data, is_eval=is_eval).make_one_shot_iterator().get_next() # Why?
    return features, {'head_nose': labels[:, 2:8:5], 'head_pose': tf.cast(labels[:, -1] - 1.0, tf.int32)}

In [None]:
def multi_head_cnn_model_fn(features, labels, mode):

    dense = extract_features(features)

    # Predictions for each task
    predictions_nose = tf.layers.dense(inputs=dense, units=2)
    predictions_pose = tf.layers.dense(inputs=dense, units=5)
    logits = {'head_nose': predictions_nose, 'head_pose': predictions_pose}

    # Optimizer
    optimizer = tf.train.AdamOptimizer()

    # Double head
    regression_head = tf.contrib.estimator.regression_head(name='head_nose', label_dimension=2)
    classification_head = tf.contrib.estimator.multi_class_head(name='head_pose', n_classes=5)

    # Multi head combining two single heads
    multi_head = tf.contrib.estimator.multi_head([regression_head, classification_head])

    return multi_head.create_estimator_spec(features, mode, logits, labels, optimizer)

In [None]:
# Create the Estimator
multitask_classifier = tf.estimator.Estimator(model_fn=multi_head_cnn_model_fn)

In [None]:
# Train the model
multitask_classifier.train(input_fn=lambda: multihead_input_fn(train_data), steps=1000)

In [None]:
multitask_classifier.evaluate(input_fn=lambda: multihead_input_fn(test_data, is_eval=True))

In [None]:
p = list(multitask_classifier.predict(lambda: input_fn(test_data, is_eval=True)))
print(p[0])