The Notebook presents a runthrough the Facial Keypoint Detection Challenge hosted on Kaggle

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from matplotlib import pyplot as plt
%matplotlib inline 

In [None]:
base_dir='/kaggle/input/facial-keypoints-detection/'
train_dir_zip=base_dir+'training.zip'
test_dir_zip=base_dir+'test.zip'

In [None]:
from zipfile import ZipFile
with ZipFile(train_dir_zip,'r') as zipObj:
    zipObj.extractall('.')
    print("Train Archive unzipped")
with ZipFile(test_dir_zip,'r') as zipObj:
    zipObj.extractall('.')
    print("Test Archive unzipped")

In [None]:
train_dir='./training.csv'
test_dir='./test.csv'
train=pd.read_csv(train_dir)
test=pd.read_csv(test_dir)

In [None]:
train.head()

In [None]:
print("Size of dataframe: "+str(len(train))+'x'+str(len(train.columns))+'\n')
print(train.info())

In [None]:
train=train.dropna()
train=train.reset_index(drop=True)
print("After droppping all the rows with any NA in column\n")
print("Size = "+str(len(train))+'x'+str(len(train.columns)))

In [None]:
X=[]
Y=[]

for img in train['Image']:
    X.append(np.asarray(img.split(),dtype=float).reshape(96,96,1))
X=np.reshape(X,(-1,96,96,1))
X = np.asarray(X).astype('float32')
    
for i in range(len((train))):
    Y.append(np.asarray(train.iloc[i][0:30].to_numpy()))
Y = np.asarray(Y).astype('float32')

In [None]:
print(X.shape)
print(Y.shape)

In [None]:
disp=8

fig,axes=plt.subplots((disp+3)//4,4,figsize=(15,10))
                    
for i in range(disp):
    axes[i//4,i%4].imshow(X[i].reshape(96,96),cmap='gray')
    axes[i//4,i%4].scatter([train[train.columns[2*j]][i] for j in range(15)],[train[train.columns[2*j+1]][i] for j in range(15)],s=10,c='r')

In [None]:
import tensorflow
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import BatchNormalization, Flatten, Dense, Dropout, Conv2D, MaxPool2D, LeakyReLU

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1)

In [None]:
model = Sequential()

model.add(Conv2D(32, (3,3), padding='same', use_bias=False, input_shape=(96,96,1)))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())

model.add(Conv2D(32, (3,3), padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3,3), padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())

model.add(Conv2D(64, (3,3), padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Conv2D(96, (3,3), padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())

model.add(Conv2D(96, (3,3), padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3,3),padding='same', use_bias=False))
# model.add(BatchNormalization())
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())

model.add(Conv2D(128, (3,3),padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Conv2D(256, (3,3),padding='same',use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())

model.add(Conv2D(256, (3,3),padding='same',use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Conv2D(512, (3,3), padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())

model.add(Conv2D(512, (3,3), padding='same', use_bias=False))
model.add(LeakyReLU(alpha = 0.1))
model.add(BatchNormalization())


model.add(Flatten())
model.add(Dense(512,activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(30))
model.summary()

In [None]:
model.compile(optimizer='Adam', loss='mse', metrics=['mae'])
history=model.fit(X_train, Y_train, epochs=500,batch_size=32,validation_data=(X_test,Y_test))

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss vs Epoch')
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
Test_Data=[]
for img in test['Image']:
    Test_Data.append(np.asarray(img.split(),dtype=float).reshape(96,96,1))
Test_Data=np.reshape(Test_Data,(-1,96,96,1))
Test_Data = np.asarray(Test_Data).astype('float32')

In [None]:
Pred=model.predict(Test_Data)

In [None]:
disp=8

fig,axes=plt.subplots((disp+3)//4,4,figsize=(15,10))
                    
for i in range(disp):
    axes[i//4,i%4].imshow(Test_Data[i].reshape(96,96),cmap='gray')
    axes[i//4,i%4].scatter([Pred[i][2*j] for j in range(15)],[Pred[i][2*j+1] for j in range(15)],s=10,c='r')

In [None]:
idtable=pd.read_csv(base_dir+'IdLookupTable.csv')
rowId=list(idtable['RowId'])
imageId=list(idtable['ImageId'])
featureHead=list(train.columns.values)
featureIndex=[featureHead.index(feature) for feature in idtable['FeatureName']]

loc=[]
for index,imgId in zip(featureIndex,imageId):
    loc.append(Pred[imgId-1][index])
subm=pd.DataFrame({'RowId':rowId,'Location':loc})
subm.head()

In [None]:
subm.to_csv('submission.csv',index = False)