### Load Necessary Libraries 

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

import seaborn as sns
import tensorflow as tf


from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

#### Data Description 
A nice overview and the data description can be found in the [Kaggle pages](https://www.kaggle.com/c/facial-keypoints-detection/data). So I skip this part.  Let's get started with the available training data. 

#### Load Training Data

In [None]:
train_read = pd.read_csv("../input/facial-keypoints-detection/training.zip", 
                       compression='zip')
print ('shape of dataframe: ', train_read.shape)

In [None]:
# see few columns of the training data 
train_read.head(3).T

In [None]:
print ('nan in every cols: ', train_read.isna().sum())

### Visualizing the Distribution of Keypoints 

In [None]:
fig, axes = plt.subplots(5, 6, figsize=(15, 9))
ax = axes.ravel() 
for i in range(30):
  ax[i].hist(train_read[train_read.columns[i]], bins=50, density=True, alpha=0.7, color='magenta')
  ax[i].set_title(train_read.columns[i],fontsize=7)
  # ax[i].axes.get_xaxis().set_visible(False)
plt.tight_layout()  

* As expected the keypoints do follow a normal distribution. 
* So we can actually _fill the NaN entries with the distribution mean_ as one of the simplest strategies.   

Here we can test two different workflows. First one is we only use the clean data i.e. drop all the rows with NaN and train a model. 

Second workflow will use data imputation.  

**For data imputation, from ML perspective it is necessary to first split the data into train-test and then apply the transformation otherwise we are prone to induce data-leakage.**   

First I will use only Clean data here (submission score with a simple VGG-16 like network is 10.3, including horizontal flip in the augmentation).  

## Proceed Using the Clean Data. 

In [None]:
train_clean = train_read.dropna(axis=0, how='any', inplace=False)
train_clean = train_clean.reset_index(drop=True)

In [None]:
clean_imgs = []
# print (train_clean[['Image']].shape)
for i in range(0, len(train_clean)):
  x_c = train_clean['Image'][i].split(' ') # split the pixel values based on the space 
  x_c = [y for y in x_c] # create the listed pixels
  clean_imgs.append(x_c)
clean_imgs_arr = np.array(clean_imgs, dtype='float') # arrays are always better than lists

In [None]:
clean_imgs_arr = np.reshape(clean_imgs_arr, (train_clean.shape[0], 96, 96, 1))
train_ims_clean = clean_imgs_arr/255.

In [None]:
clean_keypoints_df = train_clean.drop('Image', axis=1)
print ('check shape after dropping Image col in clean df: ', clean_keypoints_df.shape)

clean_keypoints_arr = clean_keypoints_df.to_numpy()
print ('check shape of clean keypoints arr: ', clean_keypoints_arr.shape)

#### Functions to Standardize Keypoints and Revert Back to Original 

In [None]:
def standardize_keypoint(keypoints):
  y_points = (keypoints - 48.)/48. 
  print ('check keypoints max and min: ', np.max(y_points), np.min(y_points))
  return y_points
def revert_standardize(keypoints):
    ys_points = 48*(keypoints + 1.)
    print ('check keypoints max and min: ', np.max(ys_points), np.min(ys_points))
    return ys_points

In [None]:
def vis_im_keypoint(img, points, axs):
  axs.imshow(img.reshape(96, 96))
  # points should be in the standardized format 
  xcoords = 48* (points[0::2] + 1.)
  ycoords = 48* (points[1::2] + 1.) 
  axs.scatter(xcoords, ycoords, color='red', marker='o')

def vis_im_keypoint_notstandard(img, points, axs):
  # fig = plt.figure(figsize=(6, 4))
  axs.imshow(img.reshape(96, 96))
  # points should be in the standardized 
  xcoords = (points[0::2] + 0.)
  ycoords = (points[1::2] + 0.) 
  axs.scatter(xcoords, ycoords, color='red', marker='o')

#### Train Test Split 

We will use a relatively higher `test_size` because we will perform augmentation on the training data

In [None]:
# imgs_train_clean, imgs_val_clean, points_train_clean, points_val_clean = train_test_split(train_ims_clean, clean_keypoints_arr, 
#                                                                   test_size=0.05, random_state=21)

# print ('train clean image data size: ', imgs_train_clean.shape)
# print ('train clean keypoints data size: ', points_train_clean.shape)
# print ('validation clean image data size: ', imgs_val_clean.shape)

In [None]:
# points_train_standardize_clean = standardize_keypoint(points_train_clean)
# points_val_standardize_clean = standardize_keypoint(points_val_clean)

# print ('check example standardize keypoint: ', points_train_standardize_clean[10])

#### Include Augmentation 

We will include horizontal flip to the training data. 

The keypoints are already standardized before. So the range is within -1 to 1. So flipping at this stage means, we keep the y coordinates same, but multiply the x coordinates by -1.   

Also using [Imgaug](https://imgaug.readthedocs.io/en/latest/) library we will include gaussian blur, linear contrast, and rotation. 

In [None]:
def flip_im_points1(img, points):
  flip_im = np.fliplr(img)
  xcoords = points[0::2]
  ycoords = points[1::2]
  new_points = []
  for i in range(len(xcoords)):
    xp = xcoords[i]
    yp = ycoords[i]
    new_points.append(xp*(-1))
    new_points.append(yp)
  return flip_im, np.asarray(new_points)  

def flip_im_points0(img, points): # use keypoints that are not standardized
  flip_im = np.fliplr(img)
  xcoords = points[0::2]
  ycoords = points[1::2]
  new_points = []
  for i in range(len(xcoords)):
    xp = xcoords[i]
    yp = ycoords[i]
    new_points.append(96.-xp)
    new_points.append(yp)
  return flip_im, np.asarray(new_points)

In [None]:
import imgaug as ia
import imgaug.augmenters as iaa

def gnoise_lincontrast(im_tr, pt_tr):
  seq = iaa.Sequential([iaa.LinearContrast((0.6, 1.5)), 
                        iaa.Sometimes(
        0.80, iaa.GaussianBlur(sigma=(0., 2.0)))])
  aug_ims = []
  aug_pts = []
  for im, pt in zip(im_tr, pt_tr):
    #f_im, f_pts = flip_im_points1(im, pt)
    f_im = seq(image=im)
    aug_ims.append(im)
    aug_ims.append(f_im)
    aug_pts.append(pt)
    aug_pts.append(pt)
  return np.asarray(aug_ims), np.asarray(aug_pts)
    

In [None]:
aug_ims_train_clean_g, aug_points_train_clean_g = gnoise_lincontrast(train_ims_clean, clean_keypoints_arr)
print (type(aug_ims_train_clean_g), aug_ims_train_clean_g.shape, aug_points_train_clean_g.shape)

In [None]:
fig = plt.figure(figsize=(8, 9))
npics= 16
count = 1
for i in range(npics):
  ipic = i # use this to see original and augmented image side by side
#   ipic = np.random.choice(aug_ims_train_clean.shape[0])
  ax = fig.add_subplot(npics/4 , 4, count, xticks=[],yticks=[])
  vis_im_keypoint_notstandard(aug_ims_train_clean_g[ipic], aug_points_train_clean_g[ipic], ax)
  count = count + 1

# plt.title('Gaussian Blur and Linear Contrast')
plt.tight_layout()
plt.show()    

In [None]:
# include rotation augmentation 

from imgaug.augmentables import Keypoint, KeypointsOnImage

def rotate_aug(im_tr, pt_tr):
  seq = iaa.Sequential([iaa.Affine(rotate=15, scale=(0.8, 1.2))])
  #image_aug, kps_aug = seq(image=image, keypoints=kps)
  aug_ims = []
  aug_pts = []
  coordlist = []
  for im, pt in zip(im_tr, pt_tr):
    #f_im, f_pts = flip_im_points1(im, pt)
    xcoord = pt[0::2]
    ycoord = pt[1::2]
    for i in range(len(xcoord)): 
      coordlist.append(Keypoint(xcoord[i], ycoord[i]))
    kps = KeypointsOnImage(coordlist, shape=im.shape)  
    f_im, f_kp = seq(image=im, keypoints=kps)
    #new_xcoords = []
    #new_ycoords = []
    all_coords = []
    for k in range(len(kps.keypoints)):
      before = kps.keypoints[k]
      after = f_kp.keypoints[k]
      # print("Keypoint %d: (%.8f, %.8f) -> (%.8f, %.8f)" % (
      #     i, before.x, before.y, after.x, after.y)
      # )
      all_coords.append(after.x)
      all_coords.append(after.y)
      all_coords_arr = np.asarray(all_coords)
    aug_ims.append(im)
    aug_ims.append(f_im)
    aug_pts.append(pt)
    aug_pts.append(all_coords)
    coordlist.clear()
  return np.asarray(aug_ims), np.asarray(aug_pts)

In [None]:
aug_ims_train_clean_g2, aug_points_train_clean_g2 = rotate_aug(aug_ims_train_clean_g, aug_points_train_clean_g)

print (type(aug_ims_train_clean_g2), aug_ims_train_clean_g2.shape, aug_points_train_clean_g2.shape)

In [None]:
fig = plt.figure(figsize=(8, 9))
npics= 20
count = 1
for i in range(npics):
  ipic = i # use this to see original and augmented image side by side
#   ipic = np.random.choice(aug_ims_train_clean.shape[0])
  ax = fig.add_subplot(npics/4 , 5, count, xticks=[],yticks=[])
  vis_im_keypoint_notstandard(aug_ims_train_clean_g2[ipic], aug_points_train_clean_g2[ipic], ax)
  count = count + 1


plt.tight_layout()
plt.show()

In [None]:
### add the flipped images in the training data-set
def aug_flip(im_tr, pt_tr):
  aug_ims = []
  aug_pts = []
  for im, pt in zip(im_tr, pt_tr):
    f_im, f_pts = flip_im_points1(im, pt)
    aug_ims.append(im)
    aug_ims.append(f_im)
    aug_pts.append(pt)
    aug_pts.append(f_pts)
  return np.asarray(aug_ims), np.asarray(aug_pts)

def aug_flip0(im_tr, pt_tr):
  aug_ims = []
  aug_pts = []
  for im, pt in zip(im_tr, pt_tr):
    f_im, f_pts = flip_im_points0(im, pt)
    aug_ims.append(im)
    aug_ims.append(f_im)
    aug_pts.append(pt)
    aug_pts.append(f_pts)
  return np.asarray(aug_ims), np.asarray(aug_pts)

In [None]:
# aug_points_train_clean_g2_norm = standardize_keypoint(aug_points_train_clean_g2)
# points_val_standardize_clean = standardize_keypoint(points_val_clean)

# print ('check example standardize keypoint: ', aug_points_train_clean_g2_norm[10])

In [None]:
aug_ims_train_clean_g3, aug_points_train_clean_g3 = aug_flip0(aug_ims_train_clean_g2, 
                                                         aug_points_train_clean_g2)

print ('size of training data now: ', aug_ims_train_clean_g3.shape, aug_points_train_clean_g3.shape)

# aug_ims_train_clean_g3, aug_points_train_clean_g3 = aug_flip(aug_ims_train_clean_g2, 
#                                                          aug_points_train_clean_g2_norm)

# print ('size of training data now: ', aug_ims_train_clean_g3.shape, aug_points_train_clean_g3.shape)

In [None]:
fig = plt.figure(figsize=(10, 9))
npics= 24
count = 1
for i in range(npics):
  ipic = i # use this to see original and augmented image side by side
#   ipic = np.random.choice(aug_ims_train_clean.shape[0])
  ax = fig.add_subplot(npics/4 , 6, count, xticks=[],yticks=[])
  vis_im_keypoint_notstandard(aug_ims_train_clean_g3[ipic], aug_points_train_clean_g3[ipic], ax)
  count = count + 1


plt.tight_layout()
plt.show()

In [None]:
fig = plt.figure(figsize=(10, 9))
npics= 24
count = 1
for i in range(npics):
  #ipic = i # use this to see original and augmented image side by side
  ipic = np.random.choice(aug_ims_train_clean_g3.shape[0])
  ax = fig.add_subplot(npics/4 , 6, count, xticks=[],yticks=[])
  vis_im_keypoint_notstandard(aug_ims_train_clean_g3[ipic], aug_points_train_clean_g3[ipic], ax)
  count = count + 1


plt.tight_layout()
plt.show()

In [None]:
from sklearn.utils import shuffle
aug_ims_train_final, aug_points_train_final = shuffle(aug_ims_train_clean_g3, aug_points_train_clean_g3)
print ('check number of training files: ', len(aug_points_train_final))

#### Necessary Imports for CNN  

In [None]:
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dropout, \
     Flatten, BatchNormalization, Dense, Concatenate, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.activations import elu, relu
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
# from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2

In [None]:
# input_im = Input(shape=(96, 96, 1))
# def model():
#   #layer 1: 
#   conv1 = Conv2D(32, (3, 3), activation='relu', )(input_im) #96 x 96 x 32
#   conv2 = Conv2D(32, (3, 3), activation='relu', )(conv1) #96 x 96 x 32
#   pool1 = MaxPooling2D((2, 2))(conv2) 
#   conv3 = Conv2D(64, (3, 3), activation='relu', )(pool1) #48 x 16 x 64
#   conv4 = Conv2D(64, (3, 3), activation='relu', )(conv3)
#   pool2 = MaxPooling2D(pool_size=(2, 2))(conv4)
#   conv5 = Conv2D(128, (3, 3), padding='same', activation='relu',)(pool2)
#   conv6 = Conv2D(128, (3, 3), padding='same', activation='relu',)(conv5)
#   conv7 = Conv2D(128, (3, 3), padding='same', activation='relu',)(conv6)
#   pool3 = MaxPooling2D(pool_size=(2, 2))(conv7)
#   conv8 = Conv2D(256, (3, 3), padding='same', activation='relu',)(pool3)
#   conv9 = Conv2D(256, (3, 3), padding='same', activation='relu',)(conv8)
#   conv10 = Conv2D(256, (3, 3), padding='same', activation='relu',)(conv9)
#   pool4 = MaxPooling2D(pool_size=(2, 2))(conv10)
#   flat = Flatten()(pool4)
#   den1 = Dense(128, activation='relu')(flat)
#   den1 = Dropout(0.20)(den1)
#   den2 = Dense(64, activation='relu')(den1)
#   den2 = Dropout(0.20)(den2)
#   pred = Dense(clean_keypoints_arr.shape[1])(den2)
#   model = Model(inputs=input_im, outputs=pred, name='VGG_Like')
#   return model 

In [None]:
class customCallbacks(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs=None):
    self.epoch = epoch + 1
    if self.epoch % 50 == 0:
      print ('epoch num {}, train mae: {}, validation mae: {}'.format(epoch, logs['mae'], logs['val_mae']))


learning_rate = 1e-3

def lrdecay(epoch):
  lr = 1e-2
  if epoch > 1600:
    lr *= 1e-1
  elif epoch > 800:
    lr *= 3e-1
  elif epoch > 400:
    lr *= 5e-1
  elif epoch > 200:
    lr *= 7e-1
  elif epoch > 100:
    lr *= 9e-1
  if epoch % 50 == 0:    
    print('Learning rate: ', lr)    
  return lr


def lrexpdecay(epoch):
  decay = 0.1
  lr = learning_rate*(np.exp(-decay*epoch))
  return lr


def earlystop(mode):
  if mode=='acc':
    estop = tf.keras.callbacks.EarlyStopping(monitor='val_acc', patience=20, mode='max')
  elif mode=='loss':
    estop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=20, mode='min')
  return estop

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_mae', factor=0.8,
                              patience=25, min_lr=1e-5, verbose=1)

lrdecay = tf.keras.callbacks.LearningRateScheduler(lrdecay) # learning rate decay

sgd = SGD(lr=1e-2, momentum = 0.9,nesterov=True)
adam = Adam(learning_rate=3e-3)

In [None]:
# face_key_model_aug = model()
# face_key_model_aug.summary()

In [None]:
# face_key_model_aug.compile(loss='mse', 
#                        optimizer=adam, 
#                        metrics=['acc', 'mae'])

In [None]:
# face_key_model_aug_train = face_key_model_aug.fit(aug_ims_train_clean, aug_points_train_clean, 
#                                                   validation_data=(imgs_val_clean, points_val_standardize_clean), 
#                                                   batch_size=128, epochs=500, 
#                                                   callbacks=[customCallbacks(), lrdecay], 
#                                           verbose=0)

In [None]:
# mae = face_key_model_aug_train.history['mae']
# val_mae = face_key_model_aug_train.history['val_mae']

# loss = face_key_model_aug_train.history['loss']
# val_loss = face_key_model_aug_train.history['val_loss']

# acc = face_key_model_aug_train.history['acc']
# val_acc = face_key_model_aug_train.history['val_acc']

# fig = plt.figure(figsize=(7, 3))

# fig.add_subplot(121)
# plt.plot(range(len(loss)), loss, linestyle='-', color='red', alpha=0.7, label='Train Loss')
# plt.plot(range(len(loss)), val_loss, linestyle='-.', color='navy', alpha=0.7, label='Val Loss')
# plt.xlabel('Epochs', fontsize=12)
# plt.ylabel('Loss', fontsize=13)
# plt.legend(fontsize=12)

# fig.add_subplot(122)
# plt.plot(range(len(mae)), mae, linestyle='-', color='red', alpha=0.7, label='Train MAE')
# plt.plot(range(len(val_mae)), val_mae, linestyle='-.', color='navy', alpha=0.7, label='Val MAE')
# plt.xlabel('Epochs', fontsize=12)
# plt.ylabel('MAE', fontsize=13)
# plt.legend(fontsize=12)

# plt.tight_layout()
# plt.show()

In [None]:
### let's try to predict some keypoints on the test data-set
test_read = pd.read_csv("../input/facial-keypoints-detection/test.zip", 
                       compression='zip')
print ('test dataframe shape; ', test_read.shape)
test_read.head(3)

In [None]:
test_ims = []

for i in range(0, 1783):
  x_t = test_read['Image'][i].split(' ') # split the pixel values based on the space 
  x_t = [y for y in x_t] # create the listed pixels
  test_ims.append(x_t)
test_imgs_arr = np.array(test_ims, dtype='float') # arrays are always better than lists

test_imgs_arr = np.reshape(test_imgs_arr, (1783, 96, 96, 1))
test_ims = test_imgs_arr/255.

In [None]:
# predict_points_aug = face_key_model_aug.predict(test_ims)

# print ('check shape of predicted points: ', predict_points_aug.shape)

In [None]:
# fig = plt.figure(figsize=(8, 8))
# npics= 12
# count = 1
# for i in range(npics):
#   # ipic = i
#   ipic = np.random.choice(test_ims.shape[0])
#   ax = fig.add_subplot(npics/3 , 4, count, xticks=[],yticks=[])
#   vis_im_keypoint(test_ims[ipic], predict_points_aug[ipic], ax)
#   count = count + 1


# plt.tight_layout()
# plt.show()

### Preparing the Submission File 

In [None]:
# revert the points from standardized coordinate to image shape coordinate
# predict_points_aug_s = revert_standardize(predict_points_aug)
# test_predicts = pd.DataFrame(predict_points_aug_s, columns = list(clean_keypoints_df.columns))

# print ('check the new predict data frame: ', '\n')
# test_predicts.head(3)

In [None]:
lookup_table = pd.read_csv('../input/facial-keypoints-detection/IdLookupTable.csv')
print ('lookup table shape: ', lookup_table.shape)
lookup_table.head()

In [None]:
# the required format of output
sub_form_table = pd.read_csv('../input/facial-keypoints-detection/SampleSubmission.csv')
sub_form_table.head()

In [None]:
# for i in range(lookup_table.shape[0]):
#     lookup_table.Location[i] = test_predicts.loc[lookup_table.ImageId[i]-1][lookup_table.FeatureName[i]]

In [None]:
# sub_form_table.Location = lookup_table.Location
# new_submission = sub_form_table

In [None]:
# CHECK THE FORMAT
# new_submission.head(3)

In [None]:

# new_submission.to_csv('Submission.csv', index=False)

----------------------------------------------------------------------------------------------------------------------------

## Proceed Using the Complete Data 

Before, I have discussed 2 major points regading using the complete data-set. Just to revise them again here-- 
* Since there are lots of NaN values -- As expected the keypoints do follow a normal distribution, so we can actually _fill the NaN entries with the distribution mean_ as one of the simplest strategies.   
* **For data imputation, from ML perspective it is necessary to first split the data into train-test and then apply the transformation otherwise we are prone to induce data-leakage.**   


In [None]:
# all_imgs = []
# print (train_read[['Image']].shape)
# for i in range(0, 7049):
#   x = train_read['Image'][i].split(' ') # split the pixel values based on the space 
#   x = [y for y in x] # create the listed pixels
#   all_imgs.append(x)
# all_imgs_arr = np.array(all_imgs, dtype='float') # arrays are always better than lists :)

In [None]:
# all_imgs_arr = np.reshape(all_imgs_arr, (7049, 96, 96, 1))
# train_ims = all_imgs_arr/255.

In [None]:
# keypoints_df = train_read.drop('Image', axis=1)
# # print ('check shape after dropping Image col: ', keypoints_df.shape)

# keypoints_arr = keypoints_df.to_numpy()
# print ('check shape: ', keypoints_arr.shape)

In [None]:
# imgs_train, imgs_val, points_train, points_val = train_test_split(train_ims, keypoints_arr, 
#                                                                   test_size=0.15, random_state=21)
# print ('train image data size: ', imgs_train.shape)
# print ('train keypoints data size: ', points_train.shape)
# print ('validation image data size: ', imgs_val.shape)

In [None]:
# # now we perform the imputation on the train and validation keypoints separately 
# print ('check if nan exixts in the train and valid set; ', np.isnan(np.min(points_train)), np.isnan(np.min(points_val))) 

# import numpy.ma as ma # masked array
# points_train_imputed = np.where(np.isnan(points_train), ma.array(points_train, mask=np.isnan(points_train)).mean(axis=0), 
#                                 points_train)
# points_val_imputed = np.where(np.isnan(points_val), ma.array(points_val, mask=np.isnan(points_val)).mean(axis=0), 
#                                 points_val)
# print ('check if nan exixts in the train and valid set after imputation; ', 
#        np.isnan(np.min(points_train_imputed)), np.isnan(np.min(points_val_imputed)))

In [None]:
# points_train_standardize = standardize_keypoint(points_train_imputed)
# points_val_standardize = standardize_keypoint(points_val_imputed)

# print ('check example standardize keypoint: ', points_train_standardize[10])

In [None]:
# aug_ims_train, aug_points_train = aug_sample(imgs_train, points_train_standardize)

In [None]:
# print ('check shape of an augmented image:  ', aug_ims_train[1].shape)

In [None]:
# fig = plt.figure(figsize=(8, 9))
# npics= 16
# count = 1
# for i in range(npics):
#   # ipic = i
#   ipic = np.random.randint(1, len(aug_ims_train), 1)
#   ax = fig.add_subplot(npics/4 , 4, count, xticks=[],yticks=[])
#   vis_im_keypoint(aug_ims_train[ipic[0]], aug_points_train[ipic[0]], ax)
#   count = count + 1


# plt.tight_layout()
# plt.show()    

# print ('total training images now: ', aug_ims_train.shape, aug_points_train.shape)

In [None]:
# fig = plt.figure(figsize=(8, 9))
# npics= 16
# count = 1
# for i in range(npics):
#   ipic = i 
# #ipic = np.random.randint(1, len(aug_ims_train), 1)
#   ax = fig.add_subplot(npics/4 , 4, count, xticks=[],yticks=[])
#   vis_im_keypoint(aug_ims_train[ipic], aug_points_train[ipic], ax)
#   count = count + 1


# plt.tight_layout()
# plt.show()

In [None]:
def inception_like(input_layer, filter1, filter2, filter3):
  # 1x1 conv
  conv1 = Conv2D(filter1, (1,1), padding='same', activation='relu')(input_layer)
  bn1 = BatchNormalization()(conv1)
  # 3x3 conv
  conv3 = Conv2D(filter2, (3,3), padding='same', activation='relu')(input_layer)
  bn3 = BatchNormalization()(conv3)
  # 5x5 conv
  conv5 = Conv2D(filter3, (5,5), padding='same', activation='relu')(input_layer)
  bn5 = BatchNormalization()(conv5)
  # 3x3 max pooling
#   pool = MaxPooling2D((3,3), strides=(1,1), padding='same')(input_layer)
  pool = MaxPooling2D((2,2), strides=(1,1), padding='same')(input_layer)
  # concatenate filters, assumes filters/channels last
  layer_out = Concatenate(axis=-1)([bn3, bn5, pool])
  return layer_out

In [None]:
input_im = Input(shape=(96, 96, 1))
def model2():
#   x = Conv2D(64, (3, 3), padding='same', strides=(2, 2), activation='relu', )(input_im)
#   x = MaxPooling2D((3, 3), padding='same', strides=(2, 2), )(x)
#   x = Conv2D(64, (1, 1), padding='same', strides=(1, 1), activation='relu', )(x)
# #   x =  Conv2D(64, (3, 3), padding='same', strides=(1, 1), activation='relu', )(x) 
#   x = Conv2D(96, (3, 3), padding='same', strides=(1, 1), activation='relu', )(x)
#   x = MaxPooling2D((3, 3), padding='same', strides=(2, 2) )(x)
#   x = Conv2D(16, (3, 3), padding='same', activation='relu', )(input_im)
#   x = Conv2D(32, (3, 3), padding='same', activation='relu', )(input_im)  
#   x = Conv2D(64, (3, 3), padding='same', activation='relu', )(x)  
  x1 = inception_like(input_im, 64, 64, 32)
  x1 = MaxPooling2D((3, 3), padding='same', strides=(2, 2) )(x1)

  x2 = inception_like(x1, 64, 64, 32)
  x2 = MaxPooling2D((3, 3), padding='same', strides=(2, 2) )(x2)
  
  x2_1 = inception_like(x2, 96, 96, 64)
  x2_1 = MaxPooling2D((3, 3), padding='same', strides=(2, 2) )(x2_1)  

  x3 = inception_like(x2_1, 96, 128, 64)
  #x3 = MaxPooling2D((3, 3), padding='same', strides=(2, 2) )(x3)
  x3 = MaxPooling2D()(x3)

  x3_1 = inception_like(x3, 128, 256, 128)
  #x3_1 = MaxPooling2D((3, 3), padding='same', strides=(2, 2) )(x3_1) 
  x3_1 = GlobalAveragePooling2D()(x3_1)

  x4 = Flatten()(x3_1)
  x4 = Dense(1024, kernel_regularizer=l2(l2=0.03))(x4)
  x4 = Dropout(0.2)(x4)

  #x5 = Dense(128, kernel_regularizer=l2(l2=0.02))(x4)
  #x5 = Dropout(0.1)(x5)

  pred = Dense(30)(x4)
  model = Model(inputs=input_im, outputs=pred, name='Inception_Like')

  return model

In [None]:
face_key_model2_aug = model2()
face_key_model2_aug.summary()

In [None]:
tf.keras.utils.plot_model(face_key_model2_aug, show_shapes=True)

In [None]:
face_key_model2_aug.compile(loss='mse', 
                       optimizer=Adam(learning_rate=3e-3), 
                       metrics=['mae'])

In [None]:
# face_key_model2_aug_train = face_key_model2_aug.fit(aug_ims_train, aug_points_train, 
#                                                   validation_data=(imgs_val, points_val_standardize), 
#                                                   batch_size=256, epochs=500, 
#                                                   callbacks=[customCallbacks(), reduce_lr], verbose=0)

In [None]:
# face_key_model2_aug_train_clean = face_key_model2_aug.fit(aug_ims_train_final, aug_points_train_final, 
#                                                   validation_data=(imgs_val_clean, points_val_clean), 
#                                                   batch_size=64, epochs=300, 
#                                                   callbacks=[customCallbacks(), reduce_lr], 
#                                           verbose=0)

face_key_model2_aug_train_clean = face_key_model2_aug.fit(aug_ims_train_final, aug_points_train_final, 
                                                  validation_split= 0.05, 
                                                  batch_size=64, epochs=300, 
                                                  callbacks=[customCallbacks(), reduce_lr], 
                                          verbose=0)

In [None]:
mae = face_key_model2_aug_train_clean.history['mae']
# mae = [i for i in mae if i<60]
print (type(mae))
val_mae = face_key_model2_aug_train_clean.history['val_mae']
# val_mae = [i for i in val_mae if i<60]

loss = face_key_model2_aug_train_clean.history['loss']
# loss = [i for i in loss if i<1200]
val_loss = face_key_model2_aug_train_clean.history['val_loss']
# val_loss = [i for i in val_loss if i<1200]

fig = plt.figure(figsize=(8, 4))

fig.add_subplot(121)
plt.plot(range(len(loss)), loss, linestyle='-', color='red', alpha=0.7, label='Train Loss')
plt.plot(range(len(val_loss)), val_loss, linestyle='-.', color='navy', alpha=0.7, label='Val Loss')
plt.xlabel('Epochs', fontsize=12)
plt.ylabel('Loss', fontsize=13)
plt.yscale('log')
plt.legend(fontsize=12)

fig.add_subplot(122)
plt.plot(range(len(mae)), mae, linestyle='-', color='red', alpha=0.7, label='Train MAE')
plt.plot(range(len(val_mae)), val_mae, linestyle='-.', color='navy', alpha=0.7, label='Val MAE')
plt.xlabel('Epochs', fontsize=12)
plt.ylabel('MAE', fontsize=13)
plt.yscale('log')
plt.legend(fontsize=12)

plt.tight_layout()
plt.show()

In [None]:
# predict_points_aug2 = face_key_model2_aug.predict(test_ims)

# print ('check shape of predicted points: ', predict_points_aug2.shape)

In [None]:
predict_points_aug2_clean = face_key_model2_aug.predict(test_ims)

print ('check shape of predicted points: ', predict_points_aug2_clean.shape)

In [None]:
fig = plt.figure(figsize=(8, 8))
npics= 12
count = 1
for i in range(npics):
  # ipic = i
  ipic = np.random.choice(test_ims.shape[0])
  ax = fig.add_subplot(npics/3 , 4, count, xticks=[],yticks=[])
  vis_im_keypoint_notstandard(test_ims[ipic], predict_points_aug2_clean[ipic], ax)
  count = count + 1


plt.tight_layout()
plt.show()

In [None]:
# predict_points_aug2_s = revert_standardize(predict_points_aug2_clean)
predict_points_aug2_s = predict_points_aug2_clean
print ('check max and min: ', predict_points_aug2_s.max(), predict_points_aug2_s.min())
# test_predicts2 = pd.DataFrame(predict_points_aug2_s, columns = list(clean_keypoints_df.columns))

# print ('check the new predict data frame: ', '\n')
# test_predicts2.head(3)

In [None]:
print ('check the new predict data frame: ', predict_points_aug2_s.shape)
predict_points_aug2_s[predict_points_aug2_s > 95.99] = 96.0
predict_points_aug2_s[predict_points_aug2_s < 0.0] = 0.0

print ('check max and min now: ', predict_points_aug2_s.max(), predict_points_aug2_s.min())

In [None]:
test_predicts3 = pd.DataFrame(predict_points_aug2_s, columns = list(clean_keypoints_df.columns))

print ('check the new predict data frame: ', '\n')
test_predicts3.head(3)

In [None]:
for i in range(lookup_table.shape[0]):
    lookup_table.Location[i] = test_predicts3.loc[lookup_table.ImageId[i]-1][lookup_table.FeatureName[i]]

In [None]:
sub_form_table.Location = lookup_table.Location
new_submission = sub_form_table

In [None]:
# CHECK THE FORMAT
new_submission.head(3)

In [None]:
new_submission.to_csv('Submission11.csv', index=False)

In [None]:
print (new_submission['Location'].max(), new_submission['Location'].min())