In [13]:
import cv2
import numpy as np
import pandas as pd
from pathlib import Path
import os
from PIL import Image
from ipywidgets import IntProgress, IntText
from sklearn.decomposition import PCA

In [2]:
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades +'haarcascade_frontalface_default.xml')

In [81]:
def convert_image(filename):
    image = Image.open("./attachements/"+filename).convert('L').resize((512,512))
    output_name = "./attachements/faces/"+filename
    image.save(output_name, image.format)
    return output_name

In [73]:
def detect_face(image_path):
    image_pil = Image.open(image_path)
    image = np.array(image_pil, 'uint8')
    # Detect the face in the image
    faces = face_cascade.detectMultiScale(image)
    if (len(faces) == 0):
        return None, None
    
    (x, y, w, h) = faces[0]
    return image[y:y+w, x:x+h], faces[0]

In [82]:
def get_images_and_labels(path):
    facepath = path+"faces/"
    No_facepath = path+"no_faces/"
    if not os.path.exists(facepath):
        os.makedirs(facepath)
    if not os.path.exists(No_facepath):
        os.makedirs(No_facepath)
        
    image_paths = [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.jpg')]
    df = pd.DataFrame(columns=['slug','image'])
    label = 0
    img = 0
    
    progressBar = IntProgress(description='Finding faces in '+str(len(image_paths))+ ' images: ', min=0, max=len(image_paths), style = {'description_width': 'initial'})
    progressText = IntText(value=0, description='Images searched so far: ', style = {'description_width': 'initial'})
    display(progressBar, progressText)
    
    for image_path in image_paths:
        filename = image_path.split('/')[2]
        image_pil = Image.open(convert_image(filename))
        image = np.array(image_pil, 'uint8')
        faces = face_cascade.detectMultiScale(image)
        if len(faces) == 0:
            os.rename(facepath+filename, No_facepath+filename)
        for (x, y, w, h) in faces:
            df.loc[label] = [filename, image[y: y + h, x: x + w]]
            label +=1
        progressBar.value +=1
        progressText.value +=1
    return df

In [84]:
path = './attachements/'

df_faces = get_images_and_labels(path)

IntProgress(value=0, description='Finding faces in 293 images: ', max=293, style=ProgressStyle(description_wid…

IntText(value=0, description='Images searched so far: ', style=DescriptionStyle(description_width='initial'))

In [46]:
def flatten(list_2d):
    flat_list = np.empty(0,)
    for l in list_2d:
        if type(l[0]) == int or type(l[0]) == np.uint8:
            flat_list = np.concatenate((flat_list,np.array(l)))
        else:
            print(type(l[0]))
            
    return list(flat_list)

In [47]:
df_faces['flat_image'] = df_faces['image'].apply(flatten)

(101, 3)

In [48]:
vectors = df_faces['flat_image'].apply(pd.Series).fillna(0)

In [49]:
c=15
pca = PCA(n_components=15)

pca_result = pca.fit_transform(vectors.values)
print(sum(pca.explained_variance_ratio_))

0.8613621508302944


In [50]:
pca_df=pd.DataFrame(index=df_faces['slug'])

for column in range(c):
    c_str = "pca-" + str(column)
    pca_df[c_str] = pca_result[:,column]

In [51]:
pca_df.head()

Unnamed: 0_level_0,pca-0,pca-1,pca-2,pca-3,pca-4,pca-5,pca-6,pca-7,pca-8,pca-9,pca-10,pca-11,pca-12,pca-13,pca-14
slug,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
attachements,-4286.891345,3460.901705,-2287.053958,237.517989,558.684153,-490.481738,-198.597786,-50.601145,-73.618074,-415.938519,-135.252814,-23.52723,41.409421,-124.889636,-9.713776
attachements,-1153.032927,-2150.543513,1667.082188,334.773203,1047.195609,-239.317252,-1668.788283,798.512159,-15.490188,-261.884251,-73.156095,-43.022641,-357.317946,-525.228536,-145.988663
attachements,-2545.834089,-825.942418,2834.185768,-1385.451985,-1263.059639,610.877648,1016.270799,-561.447266,109.605345,929.294219,239.390534,272.122708,-473.716454,-401.944122,102.523006
attachements,1366.113334,-3876.920109,-222.711948,1702.719213,-165.706237,1612.910032,-847.769288,-19.787991,263.359985,1318.416591,421.040455,663.930982,-966.693104,-1182.107525,-634.292573
attachements,-4180.899985,3094.9909,-1755.677415,4.07923,142.684588,439.836546,-714.005781,232.218882,-7.246643,-347.002399,-125.76494,-258.865312,209.810218,144.412378,-139.544183


In [52]:
pca_df.to_csv("./data/faces.csv", encoding='utf-8')