### Webcam Facial Recognition 

First part of this notebook: Transfer learning CNN without Face Landmarks from Mediapipe

Later part: Facial Recognition with Face Lanfmarks from Mediapipe

In [21]:
import cv2
import os
import pickle
import numpy as np
from PIL import Image

import matplotlib.pyplot as plt

headshots_folder_name = 'dataset_cnn'

# dimension of images
image_width = 224
image_height = 224

# for detecting faces
facecascade = cv2.CascadeClassifier('data/cascades/haarcascade_frontalface_default.xml')

# set the directory containing the images
images_dir = os.path.join(".", headshots_folder_name)

current_id = 0
label_ids = {}

# iterates through all the files in each subdirectories
for root, _, files in os.walk(images_dir):
    for file in files:
        if file.endswith("png") or file.endswith("jpg") or file.endswith("jpeg"):
        # path of the image
            path = os.path.join(root, file)

            # get the label name (name of the person)
            label = os.path.basename(root).replace(" ", ".").lower()

            # add the label (key) and its number (value)
            if not label in label_ids:
                label_ids[label] = current_id
                current_id += 1

            # load the image
            imgtest = cv2.imread(path, cv2.IMREAD_COLOR)
            image_array = np.array(imgtest, "uint8")

            # get the faces detected in the image
            faces = facecascade.detectMultiScale(imgtest,
            scaleFactor=1.1, minNeighbors=5)

            # if not exactly 1 face is detected, skip this photo
            if len(faces) != 1:
                print(f'---Photo skipped---\n')
                # remove the original image
                continue

            # save the detected face(s) and associate
            # them with the label
            for (x_, y_, w, h) in faces:

                # draw the face detected
                face_detect = cv2.rectangle(imgtest,
                        (x_, y_),
                        (x_+w, y_+h),
                        (255, 0, 255), 2)
                plt.imshow(face_detect)
                plt.show()

                # resize the detected face to 224x224
                size = (image_width, image_height)

                # detected face region
                roi = image_array[y_: y_ + h, x_: x_ + w]

                # resize the detected head to target size
                resized_image = cv2.resize(roi, size)
                image_array = np.array(resized_image, "uint8")

                
                # replace the image with only the face
                im = Image.fromarray(image_array)
                im.save(path)


In [2]:
# Without Mediapipe, Training CNN

import os
import pandas as pd
import numpy as np
import tensorflow
from tensorflow import keras as keras

import matplotlib.pyplot as plt

from keras.layers import Dense, GlobalAveragePooling2D

from keras.preprocessing import image
from keras.applications.mobilenet import preprocess_input

from keras.preprocessing.image import ImageDataGenerator

from keras.models import Model
from keras.optimizers import Adam

In [20]:
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input)

train_generator = \
    train_datagen.flow_from_directory(
'./dataset_cnn',
target_size=(224,224),
color_mode='rgb',
batch_size=32,
class_mode='categorical',
shuffle=True)

Found 42 images belonging to 5 classes.


In [4]:
train_generator.class_indices.values()
# dict_values([0, 1, 2])
NO_CLASSES = len(train_generator.class_indices.values())
NO_CLASSES

5

In [5]:
from keras_vggface.vggface import VGGFace 

base_model = VGGFace(include_top=True, #include True
    model='vgg16',
    input_shape=(224, 224, 3))
base_model.summary()

print(len(base_model.layers))
# 26 layers in the original VGG-Face

Model: "vggface_vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv1_1 (Conv2D)            (None, 224, 224, 64)      1792      
                                                                 
 conv1_2 (Conv2D)            (None, 224, 224, 64)      36928     
                                                                 
 pool1 (MaxPooling2D)        (None, 112, 112, 64)      0         
                                                                 
 conv2_1 (Conv2D)            (None, 112, 112, 128)     73856     
                                                                 
 conv2_2 (Conv2D)            (None, 112, 112, 128)     147584    
                                                                 
 pool2 (MaxPooling2D)        (None, 56, 56, 128)     

In [6]:

base_model = VGGFace(include_top=False, #include False
model='vgg16',
input_shape=(224, 224, 3))
base_model.summary()
print(len(base_model.layers))
# 19 layers after excluding the last few layers

Model: "vggface_vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv1_1 (Conv2D)            (None, 224, 224, 64)      1792      
                                                                 
 conv1_2 (Conv2D)            (None, 224, 224, 64)      36928     
                                                                 
 pool1 (MaxPooling2D)        (None, 112, 112, 64)      0         
                                                                 
 conv2_1 (Conv2D)            (None, 112, 112, 128)     73856     
                                                                 
 conv2_2 (Conv2D)            (None, 112, 112, 128)     147584    
                                                                 
 pool2 (MaxPooling2D)        (None, 56, 56, 128)     

In [7]:
x = base_model.output

x = GlobalAveragePooling2D()(x)

x = Dense(1024, activation='relu')(x)
x = Dense(1024, activation='relu')(x)
x = Dense(512, activation='relu')(x)

# final layer with softmax activation
preds = Dense(NO_CLASSES, activation='softmax')(x)

In [8]:
# create a new model with the base model's original input and the 
# new model's output
model = Model(inputs = base_model.input, outputs = preds)
model.summary()

# don't train the first 19 layers - 0..18
for layer in model.layers[:19]:
    layer.trainable = False

# train the rest of the layers - 19 onwards
for layer in model.layers[19:]:
    layer.trainable = True

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv1_1 (Conv2D)            (None, 224, 224, 64)      1792      
                                                                 
 conv1_2 (Conv2D)            (None, 224, 224, 64)      36928     
                                                                 
 pool1 (MaxPooling2D)        (None, 112, 112, 64)      0         
                                                                 
 conv2_1 (Conv2D)            (None, 112, 112, 128)     73856     
                                                                 
 conv2_2 (Conv2D)            (None, 112, 112, 128)     147584    
                                                                 
 pool2 (MaxPooling2D)        (None, 56, 56, 128)       0     

                                                                 
 conv5_3 (Conv2D)            (None, 14, 14, 512)       2359808   
                                                                 
 pool5 (MaxPooling2D)        (None, 7, 7, 512)         0         
                                                                 
 global_average_pooling2d (G  (None, 512)              0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 1024)              525312    
                                                                 
 dense_1 (Dense)             (None, 1024)              1049600   
                                                                 
 dense_2 (Dense)             (None, 512)               524800    
                                                                 
 dense_3 (Dense)             (None, 5)                 2565      
          

In [9]:
model.compile(optimizer='Adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'])

In [10]:
model.fit(train_generator,
  batch_size = 1,
  verbose = 1,
  epochs = 20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2956e106260>

In [11]:
# creates a HDF5 file
model.save(
    'transfer_learning_trained' +
    '_face_cnn_model.h5')

In [12]:
from keras.models import load_model

# deletes the existing model
del model

# returns a compiled model identical to the previous one
model = load_model(
    'transfer_learning_trained' +
    '_face_cnn_model.h5')

In [13]:
import pickle

class_dictionary = train_generator.class_indices
class_dictionary = {
    value:key for key, value in class_dictionary.items()
}
print(class_dictionary)

{0: 'constantin', 1: 'joshua', 2: 'luis', 3: 'pascal', 4: 'tom cruise'}


In [14]:
# save the class dictionary to pickle
face_label_filename = 'face-labels.pickle'
with open(face_label_filename, 'wb') as f: pickle.dump(class_dictionary, f)

In [15]:
# Further Imports 
import cv2
import os
import pickle
import numpy as np
import pickle

from PIL import Image
import matplotlib.pyplot as plt
from keras.preprocessing import image
from keras_vggface import utils

In [16]:
# dimension of images
image_width = 224
image_height = 224

# load the training labels
face_label_filename = 'face-labels.pickle'
with open(face_label_filename, "rb") as \
    f: class_dictionary = pickle.load(f)

class_list = [value for _, value in class_dictionary.items()]
print(class_list)


['constantin', 'joshua', 'luis', 'pascal', 'tom cruise']


In [17]:
from PIL import Image
import numpy as np
import cv2
import pickle
from keras.models import load_model

# for face detection
face_cascade = cv2.CascadeClassifier(
    'data/cascades/haarcascade_frontalface_default.xml')

# resolution of the webcam
screen_width = 1280       # try 640 if code fails
screen_height = 720

# size of the image to predict
image_width = 224
image_height = 224

# load the trained model
model = load_model('transfer_learning_trained_face_cnn_model.h5')

# the labels for the trained model
with open("face-labels.pickle", 'rb') as f:
    og_labels = pickle.load(f)
    labels = {key:value for key,value in og_labels.items()}
    print(labels)

# default webcam
stream = cv2.VideoCapture(0)

while(True):
    # Capture frame-by-frame
    (grabbed, frame) = stream.read()
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # try to detect faces in the webcam
    faces = face_cascade.detectMultiScale(
        rgb, scaleFactor=1.3, minNeighbors=5)

    # for each faces found
    for (x, y, w, h) in faces:
        roi_rgb = rgb[y:y+h, x:x+w]

        # Draw a rectangle around the face
        color = (255, 0, 0)
        stroke = 2
        cv2.rectangle(frame, (x, y), (x + w, y + h), color, stroke)

        # resize the image
        size = (image_width, image_height)
        resized_image = cv2.resize(roi_rgb, size)
        image_array = np.array(resized_image, "uint8")
        img = image_array.reshape(1,image_width,image_height,3) 
        img = img.astype('float32')
        img /= 255

        # predict the image
        predicted_prob = model.predict(img)
        confidence = round(predicted_prob[0].max() * 100, 2)
        print(confidence)
        
        # Display the label
        font = cv2.FONT_HERSHEY_SIMPLEX
        name = labels[predicted_prob[0].argmax()]
        
        color = (255, 0, 0)
        stroke = 2
        cv2.putText(frame, f'{name}', (x+5,y-5),
            font, 1, color,  stroke, cv2.LINE_AA)
        cv2.putText(frame, f'{confidence}', (x+5,y+h-5),
            font, 1, color,  stroke, cv2.LINE_AA)
    # Show the frame
    cv2.imshow("Image", frame)
    k = cv2.waitKey(10) & 0xff # Press 'ESC' for exiting 
    if k == 27:
        break      

# Cleanup
stream.release()
cv2.waitKey(1)
cv2.destroyAllWindows()
cv2.waitKey(1)



{0: 'constantin', 1: 'joshua', 2: 'luis', 3: 'pascal', 4: 'tom cruise'}
94.95
71.58
99.05
87.38
99.54
88.61
99.33
79.01
99.78
68.91
99.86
60.19
99.27
77.41
93.2
66.4
71.08
75.83
99.51
88.69
99.56
83.97
98.49
79.18
98.14
69.08
99.32
70.68
98.81
63.77
98.75
76.55
99.77
72.86
99.82
80.36
99.57
84.14
99.3
59.85
99.53
67.9
99.79
99.82
99.9
99.83
82.49
99.88
58.67
99.49
65.9
99.81
50.12
97.4
59.29
94.26
64.47
91.66
99.64
60.33
49.49
99.78
52.21
70.74
99.63
51.85
78.76
99.81
58.42
99.8
74.86
49.77
99.88
65.18
51.49
99.8
59.35
62.16
99.87
80.13
59.85
99.86
70.19
50.19
99.85
63.86
77.24
99.91
60.56
99.87
53.25
52.95
99.85
73.82
53.68
99.9
61.94
76.59
99.93
76.96
99.93
64.45
53.58
77.18
99.77
82.44
48.37
99.92
75.66
48.47
99.71
74.05
56.31
99.11
76.75
99.21
79.87
99.54
71.93
65.88
99.82
77.33
99.83
81.97
99.42
73.27
99.81
51.83
69.78
99.92
58.92
57.91
99.94
72.75
59.72
99.94
67.26
99.95
54.94
99.94
62.78
99.89
79.69
49.31
75.58
99.95
67.59
99.89
66.23
99.86


-1

In [1]:
# Imports 
import cv2
import mediapipe as mp

# Mediapipe Face Mesh Tesselations 
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles



webcam=cv2.VideoCapture(0)
while webcam.isOpened():
    success,img=webcam.read()

    # applying face mesh model 
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = mp_face_mesh.FaceMesh(refine_landmarks=True).process(img)

    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            mp_drawing.draw_landmarks(
                image=img,
                landmark_list=face_landmarks,
                connections=mp_face_mesh.FACEMESH_TESSELATION,
                landmark_drawing_spec=None, 
                connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
            )



    # Show the frame
    cv2.imshow("Face Recognition", img)
    k = cv2.waitKey(10) & 0xff # Press 'ESC' for exiting 
    if k == 27:
        break   
    
# Cleanup
webcam.release()
cv2.waitKey(1)
cv2.destroyAllWindows()
cv2.waitKey(1)

NameError: name 'stream' is not defined

: 