# Keypoint detection

Inspired from the blog post at https://fairyonice.github.io/achieving-top-23-in-kaggles-facial-keypoints-detection-with-keras-tensorflow.html

In [None]:
%matplotlib inline

In [None]:
!pip install --user --upgrade numpy pandas scikit-learn matplotlib
!pip install --user --upgrade tensorflow
!pip install --user --upgrade keras
# https://stackoverflow.com/questions/49887968/what-does-symbol-not-found-clock-gettime-mean-when-calling-a-python-2-7-scri
!pip install --force-reinstall Pillow==5.0.0
!pip install --user --upgrade imgaug
!pip install --upgrade opencv-python==3.3.0.10
# https://stackoverflow.com/questions/48717726/python3-opencv-install-error-symbol-not-found-clock-gettime

In [None]:
import cv2
print "cv2 version", cv2.__version__
import matplotlib.pyplot as plt
import numpy as np
from pandas.io.parsers import read_csv
from sklearn.utils import shuffle
import tensorflow as tf
import keras

In [None]:
!ls data/facial-keypoints-detection/

In [None]:
import os
path=os.path.join("data", "facial-keypoints-detection")
FTRAIN = os.path.join(path,"training.csv")
FTEST = os.path.join(path, "test.csv")
FIdLookup = os.path.join(path, "IdLookupTable.csv")

## Image loading functions

In [None]:
def plot_sample(X,y,axs):
    '''
    kaggle picture is 96 by 96
    y is rescaled to range between -1 and 1
    '''
    
    axs.imshow(X.reshape(96,96),cmap="gray")
    axs.scatter(48*y[0::2]+ 48,48*y[1::2]+ 48)
    
def load(test=False, cols=None):
    """
    load test/train data
    cols : a list containing landmark label names.
           If this is specified, only the subset of the landmark labels are 
           extracted. for example, cols could be:
           
          [left_eye_center_x, left_eye_center_y]
            
    return: 
    X: 2-d numpy array (Nsample, Ncol*Nrow)
    y: 2-d numpy array (Nsample, Nlandmarks*2) 
       In total there are 15 landmarks. 
       As x and y coordinates are recorded, u.shape = (Nsample,30)
       
    """

    fname = FTEST if test else FTRAIN
    df = read_csv(os.path.expanduser(fname)) 

    
    df['Image'] = df['Image'].apply(lambda im: np.fromstring(im, sep=' '))

    if cols:  
        df = df[list(cols) + ['Image']]

    myprint = df.count()
    myprint = myprint.reset_index()
    print(myprint)  
    ## row with at least one NA columns are removed!
    df = df.dropna()  
    

    X = np.vstack(df['Image'].values) / 255.  # changes valeus between 0 and 1
    X = X.astype(np.float32)

    if not test:  # labels only exists for the training data
        ## standardization of the response
        y = df[df.columns[:-1]].values
        y = (y - 48) / 48  # y values are between [-1,1]
        X, y = shuffle(X, y, random_state=42)  # shuffle data
        y = y.astype(np.float32)
    else:
        y = None
    
    return X, y

def load2d(test=False,cols=None):

    re = load(test, cols)
    
    X = re[0].reshape(-1,96,96,1)
    y = re[1]

    return X, y

def plot_loss(hist,name,plt,RMSE_TF=False):
    '''
    RMSE_TF: if True, then RMSE is plotted with original scale 
    '''
    loss = hist['loss']
    val_loss = hist['val_loss']
    if RMSE_TF:
        loss = np.sqrt(np.array(loss))*48 
        val_loss = np.sqrt(np.array(val_loss))*48 
        
    plt.plot(loss,"--",linewidth=3,label="train:"+name)
    plt.plot(val_loss,linewidth=3,label="val:"+name)

In [None]:
X, y = load()

In [None]:
print("X.shape == {}; X.min == {:.3f}; X.max == {:.3f}".format(X.shape, X.min(), X.max()))
print("y.shape == {}; y.min == {:.3f}; y.max == {:.3f}".format(y.shape, y.min(), y.max()))

As visible we have 2140 entries for both X and labels, where 96x96 sized images are flattened to a 9216-long array, and similarly 15 keypoints are placed contiguously in a 30-element array.

In [None]:
X2, y2 = load2d()

In [None]:
print("X2.shape == {}; X2.min == {:.3f}; X2.max == {:.3f}".format(X2.shape, X2.min(), X2.max()))
print("y2.shape == {}; y2.min == {:.3f}; y2.max == {:.3f}".format(y2.shape, y2.min(), y2.max()))

### Plotting on unprocessed images

In [None]:
fig = plt.figure(figsize=(7, 7))
fig.subplots_adjust(hspace=0.13,wspace=0.0001,
                    left=0,right=1,bottom=0, top=1)
Npicture = 3
count = 1
for irow in range(Npicture):
    ipic = np.random.choice(X2.shape[0])
    ax = fig.add_subplot(Npicture/3 , 3, count,xticks=[],yticks=[])
    plot_sample(X2[ipic],y2[ipic],ax)
    ax.set_title("picture "+ str(ipic))
    count += 1

In [None]:
w = X2.shape[1]
h = X2.shape[2]
c = X2.shape[3]
print w, h, c

## Image Augmentation

In [None]:
import imgaug as ia
import imgaug.augmenters as iaa
from imgaug.augmentables import Keypoint, KeypointsOnImage

In [None]:
ia.seed(1)

### Defining a function to apply keypoints to an image

In [None]:
def keypoints_1d_to_2d(keypoints):
    return [(keypoints[2*i], keypoints[2*i+1]) for i in range(len(keypoints)/2) ]

def add_keypoints_on_image(img, keypoints):
    # reshape the keypoints to pairs
    return KeypointsOnImage([
           Keypoint(x=x, y=y) for (x, y) in keypoints_1d_to_2d(keypoints) 
        ], shape=img.shape)

### Defining the augmentation pipeline

In [None]:
seq = iaa.Sequential([
    iaa.Multiply((1.2, 1.5)), # change brightness, doesn't affect keypoints
    iaa.Affine(
        rotate=10,
        scale=(0.5, 0.7)
    ) # rotate by exactly 10deg and scale to 50-70%, affects keypoints
])

### Testing on an example image

In [None]:
image = ia.quokka(size=(256, 256))
keypoints=[65, 100, 75, 200, 100, 100, 200, 80]
koi = add_keypoints_on_image(image, keypoints)
koi

In [None]:
# Augment keypoints and images.
image_aug, kps_aug = seq(image=image, keypoints=koi)

In [None]:
# image with keypoints before/after augmentation (shown below)
image_before = koi.draw_on_image(image, size=7)
image_after = kps_aug.draw_on_image(image_aug, size=7)

In [None]:
plt.imshow(image_before)

In [None]:
plt.imshow(image_after)

### Converting the image for the ANN

The input image is already in range [0, 1], as well as the keypoints.

In [None]:
print("X2.shape == {}; X2.min == {:.3f}; X2.max == {:.3f}".format(X2.shape, X2.min(), X2.max()))
print("y2.shape == {}; y2.min == {:.3f}; y2.max == {:.3f}".format(y2.shape, y2.min(), y2.max()))

In [None]:
import cv2

def load_image_as_greyscale(path):
    return cv2.imread(path, 0)

def plot_image(img):
    # https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_gui/py_image_display/py_image_display.html#using-matplotlib
    plt.imshow(img, cmap = 'gray', interpolation = 'bicubic')

In [None]:
test_image = load_image_as_greyscale("data/VOCdevkit/VOC2012/JPEGImages/2007_000027.jpg")
print(test_image.shape)
plot_image(test_image)

We need to rescale the image in [0, 1].

In [None]:
print(test_image.shape)
print(test_image.min(), test_image.max())

Let's look at the example at https://machinelearningmastery.com/how-to-manually-scale-image-pixel-data-for-deep-learning/

In [None]:
from numpy import asarray

pixels = asarray(test_image)
# confirm pixel range is 0-255
print('Data Type: %s' % pixels.dtype)
print('Min: %.3f, Max: %.3f' % (pixels.min(), pixels.max()))
# convert from integers to floats
pixels = pixels.astype('float32')
# normalize to the range 0-1
pixels /= 255.0
# confirm the normalization
print('Min: %.3f, Max: %.3f' % (pixels.min(), pixels.max()))
print(pixels.shape)

Let's pack this into a reusable function:

In [None]:
def convert_image(img):
    pixels = asarray(img)
    pixels = img.astype('float32')
    if pixels.max() > 1.0:
        pixels /= 255.0
    return pixels

In [None]:
print X2[0].shape
converted_img = convert_image(X2[0])
plt.imshow(converted_img.reshape(96,96),cmap="gray")

### Converting the keypoints for the ANN

Similarly to the input image, we also expect the labels (keypoint positions) to be in the interval [0,1].
To recap, we previously had the following:

In [None]:
koi

In [None]:
kps_aug

where:

In [None]:
kps_aug.keypoints

In [None]:
kps_aug.shape

we can easily normalize these values based on the image shape:

In [None]:
def normalize_keypoints(kps):
    result = []
    for k in kps.keypoints:
        result.append( k.x / float(kps.shape[0]) ) # x / x_max
        result.append( k.y / float(kps.shape[1]) ) # y / y_max
    return np.array(result)

def denormalize_keypoints(kps, img_width, img_height):
    kps = []
    for i, k in enumerate(kps):
        kps.append(k * ( img_width if i % 2 == 0 else img_height))
    return kps

For instance:

In [None]:
normalize_keypoints(kps_aug)

## Model Definition

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout
from keras.optimizers import SGD

def SimpleCNN(input_shape=(96,96,1), withDropout=False):
    '''
    WithDropout: If True, then dropout regularlization is added.
    This feature is experimented later.
    '''
    model = Sequential()
    model.add(Conv2D(32, (3, 3), input_shape = input_shape))
    model.add(Activation('relu')) ## 96 - 3 + 2
    model.add(MaxPooling2D(pool_size = (2,2))) ## 96 - (3-1)*2
    if withDropout:
        model.add(Dropout(0.1))
        
    model.add(Conv2D(64, (2,2)))
    model.add(Activation('relu')) ## 
    model.add(MaxPooling2D(pool_size = (2,2)))
    if withDropout:
        model.add(Dropout(0.1))
    
    model.add(Conv2D(128, (2,2)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    if withDropout:
        model.add(Dropout(0.1))
        
    model.add(Flatten())
    
    model.add(Dense(500))
    model.add(Activation('relu'))
    if withDropout:
        model.add(Dropout(0.1))
        
    model.add(Dense(500))
    model.add(Activation('relu'))
    if withDropout:
        model.add(Dropout(0.1))
        
    model.add(Dense(30))
    sgd = SGD(lr=0.01,momentum = 0.9,nesterov=True)
    model.compile(loss="mean_squared_error",optimizer=sgd)
    return(model)

## Training

### Defining an image generator for training

Example taken from https://towardsdatascience.com/writing-custom-keras-generators-fe815d992c5a

In [None]:
def get_input(path):
    return load_image_as_greyscale(path)

In [None]:
def get_output(path, label_file):
    #get the file id
    img_id = path.split('/')[-1].split('.')[0]
    img_id = np.int64(img_id)
    return label_file.loc[img_id].values

In [None]:
def image_generator(files, label_file, batch_size = 64):
    while True:
        # Select files (paths/indices) for the batch
        batch_paths = np.random.choice(a = files, size = batch_size)
        batch_input = []
        batch_output = [] 
          
        # Read in each input, perform preprocessing and get labels         
        for input_path in batch_paths:
            in_image = get_input(input_path)
            label = get_output(input_path, label_file=label_file )
            
            in_image = preprocess_input(image=in_image)
            batch_input += [ in_image ]
            batch_output += [ label ]
            
        # Return a tuple of (input,output) to feed the network 
        batch_x = np.array( batch_input )
        batch_y = np.array( batch_output )
        
        yield( batch_x, batch_y )

### Defining an image generator over 1 image and 1 label file

In [None]:
print X.shape, X2.shape

In [None]:
y2.shape

In [None]:
np.random.choice(a = X2.shape[0], size = 32)

In [None]:
def preprocess_input(image, keypoints, target_width, target_height, channels):
    # convert from 1-D to 2-D image
    #image = image.reshape(-1, target_width, target_height,1)
    #image = image.reshape(target_width, target_height, channels)
    # add keypoints on image
    koi = add_keypoints_on_image(image, keypoints)
    # augment image and keypoints
    #print image.shape
    image_aug, kps_aug = seq(image=image, keypoints=koi)
    #print "augmented: ", image_aug.shape, kps_aug.shape
    # rescale image and keypoints in [0, 1]
    image_aug = convert_image(image_aug)
    #print "converted:", image_aug.shape
    kps = normalize_keypoints(kps_aug)
    return image_aug, kps

In [None]:
image.shape

In [None]:
keypoints

In [None]:
plt.imshow(preprocess_input(image, keypoints, 256, 256, 3)[0])

In [None]:
preprocess_input(image, keypoints, 256, 256, 3)[1]

In [None]:
import math

def image_generator(images_file, labels_file, dimensions=2, batch_size = 64, target_width=None, target_height=None, channels=None):
    if target_width is None or target_height is None:
        target_width = target_height = int(math.sqrt(images_file.shape[1]))
    while True:
        dataset_size = len(images_file)
        batch_indexes = np.random.choice(a = dataset_size, size = batch_size)
        batch_input = []
        batch_output = []
        # Read in each input, perform preprocessing and get labels         
        for input_index in batch_indexes:
            in_image = images_file[input_index]
            label = labels_file[input_index]
            #print "in_image.shape", in_image.shape, "label.shape", label.shape
            #print "target_width", target_width, "target_height", target_height
            # preprocess image
            img_aug, kps = preprocess_input(in_image, label, target_width, target_height, channels)
            #print "preprocessed:", img_aug.shape, kps.shape
            if dimensions == 1:
                if len(img_aug.shape) == 3 and img_aug.shape[2] != 1:
                    img_aug = np.reshape(img_aug, (img_aug.shape[0] * img_aug.shape[1], img_aug.shape[2] ))
                else:
                    #img_aug = np.reshape(img_aug, (img_aug.shape[0] * img_aug.shape[1]))
                    img_aug = img_aug.flatten()
            
            #print "reshaped", img_aug.shape
            batch_input += [ img_aug ]
            batch_output += [ kps ]
            
        # Return a tuple of (input,output) to feed the network 
        batch_x = np.array( batch_input )
        batch_y = np.array( batch_output )
        #print "Batch: ", batch_x.shape, batch_y.shape
        yield( batch_x, batch_y )

In [None]:
preprocess_input(X2[0], y2[0], 96, 96, 1)[0].shape

In [None]:
def test_generator(images_file, labels_file, dimensions=1, batch_size = 1, target_width=None, target_height=None, channels=None):
    # Select files (paths/indices) for the batch
    dataset_size = len(images_file)
    batch_indexes = np.random.choice(a = dataset_size, size = batch_size)
    batch_input = []
    batch_output = [] 
          
    # Read in each input, perform preprocessing and get labels         
    for input_index in batch_indexes:
        in_image = images_file[input_index]
        label = labels_file[input_index]
        #print "in_image.shape", in_image.shape, "label.shape", label.shape
        #print "target_width", target_width, "target_height", target_height
        # preprocess image
        img_aug, kps = preprocess_input(in_image, label, target_width, target_height, channels)
        #print "preprocessed:", img_aug.shape, kps.shape
        if dimensions == 1:
            if len(img_aug.shape) == 3 and img_aug.shape[2] != 1:
                img_aug = np.reshape(img_aug, (img_aug.shape[0] * img_aug.shape[1], img_aug.shape[2] ))
            else:
                #img_aug = np.reshape(img_aug, (img_aug.shape[0] * img_aug.shape[1]))
                img_aug = img_aug.flatten()
            
        #print "reshaped", img_aug.shape
        batch_input += [ img_aug ]
        batch_output += [ kps ]
            
        # Return a tuple of (input,output) to feed the network 
        batch_x = np.array( batch_input )
        batch_y = np.array( batch_output )
        #print "Batch: ", batch_x.shape, batch_y.shape
        return batch_x, batch_y

In [None]:
a, b = test_generator(X2[0:4], y2[0:4], dimensions=1, batch_size=1, target_width=96, target_height=96, channels=1)
print a.shape, b.shape

In [None]:
a, b = test_generator(X2[0:4], y2[0:4], dimensions=2, batch_size=1, target_width=96, target_height=96, channels=1)
print a.shape, b.shape

In [None]:
def convert_to_3_channels(img, width, height):
    tmp = np.reshape(img, (width, height))
    return np.stack((tmp, tmp, tmp), axis=2)

### Split in test and train sets

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X2, y2, test_size=0.2, random_state=42)
print(X.shape)
print(X_train.shape)
print(X_val.shape)

### Training

In [None]:
cnn_model = SimpleCNN(input_shape=(96,96, 1))
cnn_model.summary()

In [None]:
print X_train.shape, y_train.shape

In [None]:
# initialize the number of epochs and batch size
epochs = 200
batch_size = 32

# train the network
hist = cnn_model.fit_generator(
    
    image_generator(
        X_train, y_train, dimensions=2,
        batch_size=batch_size, target_width=96, target_height=96, channels=1
    ),
    validation_data=image_generator(
        X_val, y_val, dimensions=2,
        batch_size=batch_size, target_width=96, target_height=96, channels=1
    ),
    validation_steps=len(X_val) // batch_size,
    steps_per_epoch=len(X_train) // batch_size,
    epochs=epochs)

In [None]:
plot_loss(hist.history,"model 1",plt)
plt.legend()
plt.grid()
plt.yscale("log")
plt.xlabel("epoch")
plt.ylabel("log loss")
plt.show()

## Prediction

In [None]:
predicted_y_test = cnn_model.predict(X_val.reshape(428, 96, 96, 1))

In [None]:
print predicted_y_test.min(), predicted_y_test.max()

In [None]:
fig = plt.figure(figsize=(7, 7))
fig.subplots_adjust(hspace=0.13,wspace=0.0001,
                    left=0,right=1,bottom=0, top=1)
Npicture = 9
count = 1
for irow in range(Npicture):
    ipic = np.random.choice(X_val.shape[0])
    ax = fig.add_subplot(Npicture/3 , 3, count, xticks=[], yticks=[])        
    # denormalize_keypoints(predicted_y_test[ipic], 96, 96)
    plot_sample(X_val[ipic], predicted_y_test[ipic], ax)
    ax.set_title("picture "+ str(ipic))
    count += 1
plt.show()