# 03 CNN - Max pooling

In [None]:
import utils

import pandas as pd
import numpy as np
import sklearn.model_selection
import tensorflow as tf

In [None]:
data, imgs = utils.load_data()

In [None]:
data.head()

In [None]:
# TRAIN-TEST
train_data, test_data = sklearn.model_selection.train_test_split(data, train_size=0.95, random_state=22)
test_imgs_left = np.array([imgs[path] for path in test_data['eye_left_image']])
test_imgs_right = np.array([imgs[path] for path in test_data['eye_right_image']])

# TRAIN-VALIDATION
train_data, validation_data = sklearn.model_selection.train_test_split(data, train_size=0.95, random_state=22)
validation_imgs_left = np.array([imgs[path] for path in validation_data['eye_left_image']])
validation_imgs_right = np.array([imgs[path] for path in validation_data['eye_right_image']])

In [None]:
print("Train length: {}".format(len(train_data)))
print("Validation length: {}".format(len(validation_data)))
print("Test length: {}".format(len(test_data)))

## Utils

In [None]:
def get_placeholders(n_features, img_shape, n_labels):
    return(
        tf.placeholder(dtype=tf.float32, shape=(None, n_features), name="features"),
        tf.placeholder(dtype=tf.float32, shape=(None, *img_shape), name="left_imgs"),
        tf.placeholder(dtype=tf.float32, shape=(None, *img_shape), name="right_imgs"),
        tf.placeholder(dtype=tf.float32, shape=(None, n_labels), name="labels"),
    )

In [None]:
def get_loss(labels, predictions):
    '''Average of euclidean distance between labels and predictions
    '''
    return tf.reduce_mean(
        tf.norm(
            tf.subtract(labels, predictions),
            ord='euclidean',
            axis=1,
        )
    )

## Model

In [None]:
def get_model(features, left_imgs, right_imgs, training, dropout_rate=0.7):
    new_shape = np.array([-1, 20, 30, 1])
    with tf.variable_scope('model'):
        # Left Eye Img
        l_input = tf.reshape(left_imgs, new_shape)
        # 20x30x1
        cnn_l_01 = tf.layers.conv2d(
            inputs=l_input, 
            filters=32, 
            kernel_size=5, 
            strides=1,
            padding="SAME"
        )
        # 20x30x32
        pool_l_01 = tf.layers.max_pooling2d(
            inputs=cnn_l_01,
            pool_size=[2, 2],
            strides=2
        )
        # 10x15x32
        cnn_l_02 = tf.layers.conv2d(
            inputs=pool_l_01, 
            filters=128, 
            kernel_size=3, 
            strides=2,
            padding="VALID"
        )  
        # 4x7x128
        
        # Right Eye Img  
        r_input = tf.reshape(right_imgs, new_shape)      
        # 20x30x1
        cnn_r_01 = tf.layers.conv2d(
            inputs=r_input, 
            filters=32, 
            kernel_size=5, 
            strides=1,
            padding="SAME"
        )
        # 20x30x32
        pool_r_01 = tf.layers.max_pooling2d(
            inputs=cnn_r_01,
            pool_size=[2, 2],
            strides=2
        )
        # 10x15x32 
        cnn_r_02 = tf.layers.conv2d(
            inputs=pool_r_01, 
            filters=128, 
            kernel_size=3, 
            strides=2,
            padding="VALID"
        )
        # 4x7x128
        
        # Flatten convs, concat & dense        
        left_flat = tf.contrib.layers.flatten (cnn_l_02)
        right_flat =  tf.contrib.layers.flatten (cnn_r_02)
        img_concat = tf.concat(
            values=[left_flat, right_flat],
            axis=1
        )
        img_dense = tf.layers.dense(
            inputs=img_concat,
            units=128,
            activation=tf.nn.relu,
        )
        img_dropout = tf.layers.dropout(
            inputs=img_dense,
            rate=dropout_rate,
            training=training
        )

        
        # Concat imgs with features, dense x 2 and output
        global_concat = tf.concat(
            values=[features, img_dropout],
            axis=1
        )
        global_dense01 = tf.layers.dense(global_concat, 128, activation=tf.nn.relu)
        global_dropout01 = tf.layers.dropout(
            inputs=global_dense01,
            rate=dropout_rate,
            training=training
        )
        global_dense02 = tf.layers.dense(global_dropout01, 64, activation=tf.nn.relu)
        global_dropout02 = tf.layers.dropout(
            inputs=global_dense02,
            rate=dropout_rate,
            training=training
        )
        global_dense03 = tf.layers.dense(global_dropout01, 2, activation=None)
        
        return global_dense03

## Parameters

In [None]:
MODEL_NAME = '02-CNN-maxpooling'

#
### Data parameters
#
IMG_SHAPE = (20,30)
FEATURES = [
    'eye_right_x', 'eye_right_y', 'eye_right_width', 'eye_right_height', 
    'eye_left_x', 'eye_left_y', 'eye_left_width', 'eye_left_height',
    'face_x', 'face_y', 'face_width', 'face_height'
]
TARGETS = ['x','y']

#
### Hyperparams
#
EPOCHS = 10
BATCH_SIZE = 512
LEARNING_RATE = 0.005
DROPOUT_RATE = 0.4

### Tests

| Epochs | Batch Size | Learning rate  | Dropout rate | Train | Validation | Test |
|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
| 10 | 512 | 0.005 | 0.4 | 0.38746994733810425 | 0.40232908725738525 | 0.3758082985877991 |

## Train

In [None]:
graph_train = tf.Graph()
with tf.Session(graph=graph_train) as sess: 
    t_features, t_imgs_left, t_imgs_right, t_labels = get_placeholders(len(FEATURES), IMG_SHAPE, len(TARGETS))
    model = get_model(t_features, t_imgs_left, t_imgs_right, True, DROPOUT_RATE)
    loss = get_loss(t_labels, model)
    optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(loss=loss)
    sess.run(tf.global_variables_initializer())
    steps = 0 
    for epoch in range(EPOCHS):
        for b_data, b_imgs_left, b_imgs_right in utils.get_batch(train_data, imgs, BATCH_SIZE):
            steps += 1
            sess.run(optimizer, feed_dict={
                t_features: b_data[FEATURES],
                t_imgs_left: b_imgs_left,
                t_imgs_right: b_imgs_right,
                t_labels: b_data[TARGETS]
            })
            # Print Info
            if steps % 20 == 0:
                train_loss = loss.eval({
                    t_features: b_data[FEATURES],
                    t_imgs_left: b_imgs_left,
                    t_imgs_right: b_imgs_right,
                    t_labels: b_data[TARGETS]
                })
                validations_loss = loss.eval({
                    t_features: validation_data[FEATURES],
                    t_imgs_left: validation_imgs_left,
                    t_imgs_right: validation_imgs_right,
                    t_labels:validation_data[TARGETS]
                    
                })
                print("Epoch: {} of {}".format(epoch+1, EPOCHS))
                print("Loss train: {}".format(train_loss))
                print("Loss validation: {}".format(validations_loss))
        utils.model_save(sess, MODEL_NAME+"."+str(epoch).zfill(4))  # Save after each epoch
    utils.model_save(sess, MODEL_NAME+".final")

## Test

In [None]:
graph_test = tf.Graph()
with tf.Session(graph=graph_test) as sess:
    t_features, t_imgs_left, t_imgs_right, t_labels = get_placeholders(len(FEATURES), IMG_SHAPE, len(TARGETS))
    model = get_model(t_features, t_imgs_left, t_imgs_right, training=False, )
    loss = get_loss(t_labels, model)
    utils.model_load(sess, MODEL_NAME+".final")
    test_loss = loss.eval({
        t_features: test_data[FEATURES],
        t_imgs_left: test_imgs_left,
        t_imgs_right: test_imgs_right,
        t_labels: test_data[TARGETS]
    })
    print("Loss test: {}".format(test_loss))

## Predict

In [None]:
graph_m = tf.Graph()
with tf.Session(graph=graph_m) as sess:
    t_features, t_imgs_left, t_imgs_right, t_labels = get_placeholders(len(FEATURES), IMG_SHAPE, len(TARGETS))
    model = get_model(t_features, t_imgs_left, t_imgs_right, training=False)
    utils.model_load(sess, MODEL_NAME+".final")
    predictions = sess.run(model, {
        t_features: test_data[FEATURES],
        t_imgs_left: test_imgs_left,
        t_imgs_right: test_imgs_right
    })
    print("PREDICTIONS: {}".format(predictions))
