In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.image as mpimg

from sklearn.metrics import classification_report

#from keras.models import Sequential 
#from keras.layers import BatchNormalization, Conv2D, Dense, Activation, MaxPooling2D, GlobalAveragePooling2D
#from keras import optimizers

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler

#from tensorflow.keras.preprocessing.image import ImageDataGenerator
#from tensorflow.keras.preprocessing.image import array_to_img, img_to_array, load_img

#PW: added warning ignore
import warnings
warnings.filterwarnings('ignore')

In [None]:
import dlib
import face_recognition

In [None]:
# not in git repo because of file size...
path_train = 'training.csv'
path_test = 'test.csv'

df_train = pd.read_csv(path_train)
df_test = pd.read_csv(path_test)

In [None]:
# positions training
labels = df_train.iloc[:, 0:30]
labels.columns

In [None]:
# processing training data such that each pixel has its own column and is seperated from the labels
kaggle_train = df_train.loc[:,'Image'].T
kaggle_train = kaggle_train.astype(str)
kaggle_train = kaggle_train.str.split(" ", expand = True) 

In [None]:
# processing test data such that each pixel has its own column,
# merging it then with the ImageId column
image_test = df_test.loc[:,'Image'].T
image_test = image_test.astype(str)
image_test = image_test.str.split(" ", expand = True)
kaggle_test = pd.DataFrame(df_test.loc[:,'ImageId']).join(image_test)

In [None]:
# What we can do is attempt to label as much as we can. 
# we can go through each example that does not have a complete set of labels 
# feed the image through and attempt to let face_recognition spit out the labels for us 
# If there are no labels to be spat out, mark the index. 

In [None]:
def get_image_and_save(index): 
    a = np.array(kaggle_train.iloc[index,:].astype(int))
    b = np.interp(a, (a.min(), a.max()), (0, +1)) #rescale 
    mat=np.reshape(b, (96,96))
    img = Image.fromarray(np.uint8(mat * 255) , 'L')
    return(img)

In [None]:
def make_points(labels_fr, index): 
    new_row_label={'row_index':index,'left_eye_center_x': 0, 'left_eye_center_y': 0, 'right_eye_center_x': 0,
       'right_eye_center_y':0, 'left_eye_inner_corner_x':0,
       'left_eye_inner_corner_y':0, 'left_eye_outer_corner_x':0,
       'left_eye_outer_corner_y':0, 'right_eye_inner_corner_x':0,
       'right_eye_inner_corner_y':0, 'right_eye_outer_corner_x':0,
       'right_eye_outer_corner_y':0, 'left_eyebrow_inner_end_x':0,
       'left_eyebrow_inner_end_y':0, 'left_eyebrow_outer_end_x':0,
       'left_eyebrow_outer_end_y':0, 'right_eyebrow_inner_end_x':0,
       'right_eyebrow_inner_end_y':0, 'right_eyebrow_outer_end_x':0,
       'right_eyebrow_outer_end_y':0, 'nose_tip_x':0, 'nose_tip_y':0,
       'mouth_left_corner_x':0, 'mouth_left_corner_y':0, 'mouth_right_corner_x':0,
       'mouth_right_corner_y':0, 'mouth_center_top_lip_x':0,
       'mouth_center_top_lip_y':0, 'mouth_center_bottom_lip_x':0,
       'mouth_center_bottom_lip_y':0}
    for face_landmarks in labels_fr: 
        for facial_feature in kaggle_keys: 
            #print(facial_feature)
            df_feature = pd.DataFrame(face_landmarks[facial_feature])
            df_feature.columns=['x','y']
            if (facial_feature=='left_eyebrow'): 
                left_inner = df_feature.sort_values(by='x', ascending=True).reset_index(drop=True).loc[0,:]
                left_outer = df_feature.sort_values(by='x', ascending=False).reset_index(drop=True).loc[0,:]
                #df_subfeature=pd.DataFrame([right, left]).reset_index(drop=True)
                #plt.gca().scatter(df_subfeature['x'], df_subfeature['y'], c = 'red', s = 5)
                new_row_label['left_eyebrow_inner_end_y']=left_inner['y']
                new_row_label['left_eyebrow_inner_end_x']=left_inner['x']
                new_row_label['left_eyebrow_outer_end_y']=left_outer['y']
                new_row_label['left_eyebrow_outer_end_x']=left_outer['x']
            elif (facial_feature=='right_eyebrow'): 
                right_inner = df_feature.sort_values(by='x', ascending=True).reset_index(drop=True).loc[0,:]
                right_outer = df_feature.sort_values(by='x', ascending=False).reset_index(drop=True).loc[0,:]
                new_row_label['right_eyebrow_inner_end_y']=right_inner['y']
                new_row_label['right_eyebrow_inner_end_x']=right_inner['x']
                new_row_label['right_eyebrow_outer_end_y']=right_outer['y']
                new_row_label['right_eyebrow_outer_end_x']=right_outer['x']
            elif (facial_feature=='nose_tip'): 
                df_subfeature=df_feature.sort_values(by='y', ascending=False).reset_index(drop=True).loc[0,:]
                new_row_label['nose_tip_x']=df_subfeature['x']
                new_row_label['nose_tip_y']=df_subfeature['y']
                #plt.gca().scatter(df_subfeature['x'], df_subfeature['y'], c = 'red', s = 5)
            elif (facial_feature=='left_eye'):
                left_inner = df_feature.sort_values(by='x', ascending=True).reset_index(drop=True).loc[0,:]
                left_outer = df_feature.sort_values(by='x', ascending=False).reset_index(drop=True).loc[0,:]
                mu_x= (left_inner['x']+left_outer['x'])/2
                mu_y= (left_inner['y']+left_outer['y'])/2 
                new_row_label['left_eye_center_x']=mu_x
                new_row_label['left_eye_center_y']=mu_y
                new_row_label['left_eye_inner_corner_x']=left_inner['x']
                new_row_label['left_eye_inner_corner_y']=left_inner['y']
                new_row_label['left_eye_outer_corner_x']=left_outer['x']
                new_row_label['left_eye_outer_corner_y']=left_outer['y']
                #plt.gca().scatter(df_subfeature['x'], df_subfeature['y'], c = 'red', s = 5)
            elif (facial_feature=='right_eye'): 
                right_inner = df_feature.sort_values(by='x', ascending=True).reset_index(drop=True).loc[0,:]
                right_outer = df_feature.sort_values(by='x', ascending=False).reset_index(drop=True).loc[0,:]
                mu_x= (right_inner['x']+right_outer['x'])/2
                mu_y= (right_inner['y']+right_outer['y'])/2 
                new_row_label['right_eye_center_x']=mu_x
                new_row_label['right_eye_center_y']=mu_y
                new_row_label['right_eye_inner_corner_x']=right_inner['x']
                new_row_label['right_eye_inner_corner_y']=right_inner['y']
                new_row_label['right_eye_outer_corner_x']=right_outer['x']
                new_row_label['right_eye_outer_corner_y']=right_outer['y']                
            elif (facial_feature=='top_lip'): #this will take some messing with python
                df_subfeature = df_feature.sort_values(by='y', ascending=False).reset_index(drop=True)
                toplip_x_mean=np.nanmean([min(df_subfeature['x']), max(df_subfeature['x'])])
                subdf = df_subfeature[(df_subfeature['x']<=(toplip_x_mean+5)) & (df_subfeature['x']>=(toplip_x_mean-5))].sort_values(by='y').reset_index(drop=True).loc[0,:]
                getnosetip = pd.DataFrame(face_landmarks['nose_tip']) 
                getnosetip.columns=['x','y']
                getnosetip_sub = getnosetip.sort_values(by='y', ascending=False).reset_index(drop=True).loc[0,:]
                new_row_label['mouth_center_top_lip_x']=getnosetip_sub['x']
                new_row_label['mouth_center_top_lip_y']=subdf['y']
                #plt.gca().scatter(subdf['x'], subdf['y'], c = 'red', s = 5)
            else: 
                # this is bottom_lip 
                right = df_feature.sort_values(by='x', ascending=True).reset_index(drop=True).loc[0,:]
                left = df_feature.sort_values(by='x', ascending=False).reset_index(drop=True).loc[0,:]
                df_subfeature = df_feature.sort_values(by='y', ascending=False).reset_index(drop=True)
                bottomlip_x_mean = np.nanmean([min(df_subfeature['x']), max(df_subfeature['x'])])
                subdf = df_subfeature[(df_subfeature['x']<=(bottomlip_x_mean+5)) & (df_subfeature['x']>=(bottomlip_x_mean-5))].sort_values(by='y', ascending=False).reset_index(drop=True).loc[0,:]
                #bottom = df_feature.sort_values(by='y', ascending=False).reset_index(drop=True).loc[0:2,:].apply(lambda x: np.nanmean(x), axis=0)
                #df_subfeature=pd.DataFrame([right,left,bottom]).reset_index(drop=True)
                getnosetip = pd.DataFrame(face_landmarks['nose_tip']) 
                getnosetip.columns=['x','y']
                getnosetip_sub = getnosetip.sort_values(by='y', ascending=False).reset_index(drop=True).loc[0,:]
                new_row_label['mouth_left_corner_x']=left['x']
                new_row_label['mouth_left_corner_y']=left['y']
                new_row_label['mouth_right_corner_x']=right['x']
                new_row_label['mouth_right_corner_y']=right['y']
                new_row_label['mouth_center_bottom_lip_x']=getnosetip_sub['x']
                new_row_label['mouth_center_bottom_lip_y']=subdf['y']
                #plt.gca().scatter(df_subfeature['x'], df_subfeature['y'], c = 'red', s = 5)
    return pd.Series(new_row_label)

In [None]:
from PIL import Image
import PIL.ImageOps
from matplotlib import pyplot as mp

copy_labels = labels.copy(deep=True) 
copy_labels['row_index']=range(len(copy_labels))
copy_labels['nan_count'] = copy_labels.apply(lambda x: np.nansum(np.isnan(x)), axis=1)

kaggle_keys = ['left_eyebrow','right_eyebrow','nose_tip','left_eye','right_eye','top_lip','bottom_lip']

edit_labels=[]

for index, row in copy_labels.iterrows(): 
    # go through each item in labels. If row is complete, append row by itself 
    if (row['nan_count']==0): 
        edit_labels.append(row.loc['left_eye_center_x':'row_index'])
    # Row is not complete: send into face_recognition 
    else: 
        print(index)
        array_image=get_image_and_save(index)
        array_image.save('foo.jpg')
        image_test = face_recognition.load_image_file("foo.jpg")
        image_test_labels = face_recognition.face_landmarks(image_test) #make landmarks 
        if (len(image_test_labels)==0): #face_recognition was unable to detect anything
            edit_labels.append(row.loc['left_eye_center_x':'row_index'])#keep original
        else: 
            created_labels = make_points(image_test_labels, index)
            edit_labels.append(created_labels)
            
            
        
        

In [None]:
edited_labels = []
for i in range(len(edit_labels)): 
    edited_labels.append(pd.DataFrame(edit_labels[i]).T)

In [None]:
edited_labels_df = pd.concat(edited_labels).reset_index(drop=True)
edited_labels_df.drop('row_index', axis=1, inplace=True)

In [None]:
# we don't want to overwrite everything. 
# go through each label. 
# only take the edited_label's coordinate if it's missing. 

copy_labels_2 = labels.copy(deep=True) #because I don't want to edit the old one 
list_of_columns = copy_labels_2.columns
for index, row in copy_labels_2.iterrows(): # go through each example 
    if np.nansum(np.isnan(row))==0: #if they have everything, skip 
        continue 
    else: #missing items in the row 
        for col in list_of_columns: #go through each col 
            if np.isnan(row[col])==True: 
                # replace 
                row[col]=edited_labels_df.loc[index, col]
            else: 
                continue

In [None]:
#copy_labels_2.to_csv('edited_labels_v2.csv', index=False) 
# commented out such as not to overwrite the file by accident. 