In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
IMG_WIDTH  = 96
IMG_HEIGHT = 96
IMG_CHANNELS = 1

BATCH_SIZE = 128
EPOCHS = 5

In [None]:
df = pd.read_csv('../input/facial-keypoints-detection/training.zip')
images = df.pop("Image")
y = df[["nose_tip_x", "nose_tip_y"]].values
print(df.shape)
df.head()

In [None]:
images = np.array([[float(pixel) for pixel in image.split()]  for image in images])
images = images.reshape(-1, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)

In [None]:
fig, ax = plt.subplots(3, 2, figsize=(12, 12))
for i in range(3):
    for j in range(2):
        idx = np.random.randint(len(images))
        ax[i][j].imshow(images[idx], cmap="gray")
        ax[i][j].scatter(y[idx][0], y[idx][1], color="r", marker="x")
fig.show()

Preprocess the data:

In [None]:
images = images / 255.0
y = y / 96.0

## Model
Note how I use the sigmoid activation even though it's a regression problem. Try doing it without it to see what happens.

In [None]:
model = keras.Sequential()
model.add(keras.layers.Input(images.shape[1:]))
model.add(keras.layers.Conv2D(filters=20, kernel_size=(3,3), strides=(2, 2), activation="relu"))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Conv2D(filters=40, kernel_size=3, strides=2, activation="relu"))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(2))
model.add(keras.layers.Activation("sigmoid"))

model.compile(loss="mse", 
              optimizer=keras.optimizers.Adam(1e-3)) # no accuracy metric

model.summary()

In [None]:
model.fit(images, y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.15)

## Testing

In [None]:
test_df = pd.read_csv('../input/facial-keypoints-detection/test.zip')
test_images = test_df.pop("Image")
test_images = np.array([[float(pixel) for pixel in image.split()]  for image in test_images])
test_images = test_images.reshape(-1, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)

In [None]:
test_preds = model.predict(test_images / 255.0) * 96.0

In [None]:
fig, ax = plt.subplots(3, 2, figsize=(12, 12))
for i in range(3):
    for j in range(2):
        idx = np.random.randint(len(test_images))
        ax[i][j].imshow(test_images[idx], cmap="gray")
        ax[i][j].scatter(test_preds[idx][0], test_preds[idx][1], color="r", marker="x")
fig.show()