# Mount google drive to Google Colab

---


In [None]:
""" Cell 1 """

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Install necessary packages

---


In [None]:
pip install git+https://github.com/rcmalli/keras-vggface.git

Collecting git+https://github.com/rcmalli/keras-vggface.git
  Cloning https://github.com/rcmalli/keras-vggface.git to /tmp/pip-req-build-je2y3t93
  Running command git clone -q https://github.com/rcmalli/keras-vggface.git /tmp/pip-req-build-je2y3t93
Building wheels for collected packages: keras-vggface
  Building wheel for keras-vggface (setup.py) ... [?25l[?25hdone
  Created wheel for keras-vggface: filename=keras_vggface-0.6-py3-none-any.whl size=8325 sha256=cf1799682e3d6068ff088e30c36de778df3f1950b99f23353da2acd089d051e0
  Stored in directory: /tmp/pip-ephem-wheel-cache-ofi9zaie/wheels/08/df/86/0225d44647ab2256dbf1e006823288fe9cc86367a056e6ea2c
Successfully built keras-vggface
Installing collected packages: keras-vggface
Successfully installed keras-vggface-0.6


In [None]:
pip show keras-vggface

Name: keras-vggface
Version: 0.6
Summary: VGGFace implementation with Keras framework
Home-page: https://github.com/rcmalli/keras-vggface
Author: Refik Can MALLI
Author-email: mallir@itu.edu.tr
License: MIT
Location: /usr/local/lib/python3.7/dist-packages
Requires: h5py, pillow, pyyaml, keras, numpy, six, scipy
Required-by: 


In [None]:
!pip install keras_vggface
!pip3 install keras_applications
!pip install mtcnn

Collecting keras_applications
  Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 4.4 MB/s 
Installing collected packages: keras-applications
Successfully installed keras-applications-1.0.8
Collecting mtcnn
  Downloading mtcnn-0.1.1-py3-none-any.whl (2.3 MB)
[K     |████████████████████████████████| 2.3 MB 8.3 MB/s 
Installing collected packages: mtcnn
Successfully installed mtcnn-0.1.1


In [None]:
import os
os.getcwd()

'/content'

In [None]:
""" Cell 6 for changing directory"""

base_dir = '/content/drive/MyDrive/EA_HAM_WS_2'

# change directory to our base directory
os.chdir(base_dir)

In [None]:
""" Cell 7 for importing packages necessary"""

# import necessary modules for working

import numpy as np
import csv
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt
from mtcnn.mtcnn import MTCNN
from PIL import Image, ImageOps
import cv2
import os


## 1. Extract faces (Region of Interest) from given data

---


    - use MTCNN detector to detect the faces and then store the face coordinates
    - extract the bounding box of a face as an image
    - save the cropped image in a folder with respective names

In [None]:
""" Cell 8 for face extraction from our image"""


""" Use detector to detect faces and find the face region """

# create the detector, using default weights
detector = MTCNN()

def extract_face(filename, required_size=(197, 197), detector = detector):
    # load image from file
    pixels = plt.imread(filename)
    # detect faces in the image
    faces = detector.detect_faces(pixels)
    print("This is faces now:\n", faces)
    # extract the bounding box from the first face
    if len(faces) == 0:
        return 0
    else:
        x1, y1, width, height = faces[0]['box']
        x2, y2 = x1 + width, y1 + height
        
        # extract the face
        face = pixels[y1:y2, x1:x2]
        # resize pixels to the model size
        img = Image.fromarray(face)

        # ImageOps.exif_transpose makes sure that there is no orientation issues after resizing
        img = ImageOps.exif_transpose(img.resize(required_size))
        return img

""" Demo Snippet """

test_extract_face = extract_face('/content/drive/MyDrive/EA_HAM_WS_2/team_images/captain/ca_1.jpg')
plt.imshow(test_extract_face)

In [None]:
""" Cell 9 just a demo how for loops work in Cell 10"""


# Read Image
train_folders_path = 'team_images/*/'
all_images = glob(train_folders_path + "*.jpg")
all_images += glob(train_folders_path + "*.png")
all_images += glob(train_folders_path + "*.jpeg")
all_images += glob(train_folders_path + "*.JPG")
all_images += glob(train_folders_path + "*.PNG")
all_images += glob(train_folders_path + "*.JPEG")
all_images.sort()

print(all_images)

# for larger image size, instead of default size
plt.rcParams["figure.figsize"] = (15,7)

# to display images in the form of 2x6 matrix
f, axarr = plt.subplots(2,6)
i, j = 0, 0

""" Demo Snippet """

for im in all_images:
    cropped_im = extract_face(im)
    if cropped_im == 0:
        continue
    else:
        axarr[i,j].imshow(cropped_im)
        j+=1
        if j == 6:
          i, j = 1, 0

In [None]:
""" Cell 10 as you've seen how above loops work, now use same code but instead of showing copy the images
    from given data to 'face_data' folder after creating it """


""" Save the images in a folder """

# make a directory to store cropped images
if not os.path.exists('face_data'):
    os.mkdir('face_data/')

root_dir = 'face_data/'

for im in all_images:
    # print(im)
    class_dir = root_dir + im.split('/')[-2]
    if not os.path.exists(class_dir):
        os.mkdir(class_dir)
    cropped_im = extract_face(im)
    if cropped_im == 0:
        continue
    else:
      if not os.path.exists(class_dir +'/'+im.split('/')[-1]):
        cropped_im.save(class_dir +'/'+im.split('/')[-1])

## 2. Prepare data for the model

---


    - find number of people (classes) in the data
    - assign numbers for easier classification of the people
    - divide the data into train and validation sets for later use

In [None]:
""" Cell 11 just try to get names of persons in 'retour variable' """


import os
import os.path

retour=[]

# remember root_dir here is face_data
for (root,dirs,files) in os.walk(root_dir):
    for f in files:
        if (f.endswith("jpg")| f.endswith("JPG") |f.endswith("jpeg")| f.endswith("JPEG") | f.endswith("PNG") | f.endswith("png")):
            r=root.split('/')
            lr=len(r)
            retour.append((f,r[lr-1],root))

print("== Found %d items "%len(retour))
print(root)
print(r)
print(f)
print(retour)

In [None]:
""" Cell 12 give each person a unique code(here just numbers from 0 to n-1)"""


""" find total classes(here, number of people) in the given data set and assign unique indices to each class """
idx = {}

for i in retour:
    if i[1] not in idx:
        idx[i[1]]=len(idx)

print("== Found %d classes"% len(idx))
print(i)
print(i[1])
print(idx)

In [None]:
""" Cell 13 convert the idx to a csv file (a widely used file format for data-analysis, ML, etc)"""


# store all the classes and their values in a CSV file

df = pd.DataFrame.from_dict(idx, orient='index')
print(df)
df.to_csv('idx.csv')

In [None]:
""" Cell 14 """

# make empty train and val directories
os.mkdir('train')
os.mkdir('val')

In [None]:

""" Cell 15 make a folder for each person in train and val folders"""

# create train and val folders for each class
for i in idx.keys():
    print(i)
    os.mkdir('train/'+i)
    os.mkdir('val/'+i)

In [None]:
""" Cell 16 demo for what we do in cell 17, just randomly sample our 'face_data' into train and val based on 70-30 splitting rule """

# check if there is any out of index before filling randomly 4 in train and 2 in val from given 6 images of data
from random import sample, choice

for i in idx.keys():
    print(root_dir+i)
    for (root,dirs,files) in os.walk(root_dir+'/'+i):
        imgs = sample(files, 6)
        print(imgs)
        for j in range(4):
            print(root_dir+i+'/'+imgs[j], 'train/'+i+'/'+imgs[j])
        for j in range(2):
            print(root_dir+i+'/'+imgs[j+4], 'val/'+i+'/'+imgs[j+4])

In [None]:
""" Cell 17 now as you saw how the above cell loops works, instead of printing COPY the images from face_data to train and val """


""" divide the data by filling randomly 4 in train and 2 in val from given 6 images of data """

import shutil
from random import sample, choice

for i in idx.keys():
    for (root,dirs,files) in os.walk(root_dir+'/'+i):
        imgs = sample(files, 6)
        print(imgs)
        for j in range(4):
            shutil.copy(root_dir+i+'/'+imgs[j], 'train/'+i+'/'+imgs[j])
        for j in range(2):
            shutil.copy(root_dir+i+'/'+imgs[j+4], 'val/'+i+'/'+imgs[j+4])

## 3. Train the model on data
---


    - read the image and resize it to required architecture input-size
    - define a baseline model using ResNet architecture
    - train the model on the train data and cross-validate with validation dataset

In [None]:
""" Cell 18 """


import numpy as np 
import pandas as pd 
import os
from random import choice, sample
from glob import glob

import keras
from keras.preprocessing import image
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from keras.layers import Input, Dense, GlobalMaxPool2D, GlobalAvgPool2D, Concatenate, Multiply, Dropout, Subtract, Flatten
from keras.models import Model, load_model
from keras.optimizers import Adam
from keras_vggface.utils import preprocess_input
from keras_vggface.vggface import VGGFace

In [None]:
""" Cell 19 randomly sample images to get Batches of data to feed model"""

classes = sample(idx.keys(),2)
print(sample(idx.keys(),2))
batch = []

for i in classes:
    print(i)
    for (r,dirs,files) in os.walk('train'+'/'+i):
        print(r,dirs,files)
        batch.append('train'+'/'+i+'/'+choice(files))
        print(batch)

In [None]:
""" Cell 20 
    - process input so it satisfies necessary conditions for our model
    - generate batches of data
    - make a base_line model to start training
    - train the baseline_model with necessary arguments set to heuristic values
"""

# training the model

def read_img(path):
    img = image.load_img(path, target_size=(197, 197))
    img = np.array(img).astype(np.float)
    return preprocess_input(img, version=2)

def gen(root, batch_size=2, idx = idx):
    while True:
        batch = []
        labels = []
        classes = sample(idx.keys(), len(idx.keys()))
        
        for i in classes:
            for (r,dirs,files) in os.walk(root+'/'+i):
                batch.append(root+'/'+i+'/'+choice(files))

        labels = np.zeros((batch_size, len(idx.keys())))
        for i in range(len(idx.keys())):
            labels[i, idx[classes[i]]] = 1
        X = np.array([read_img(x) for x in batch])

        yield X, labels

def baseline_model(classes = 2):
    base_model = VGGFace(model='resnet50', include_top=False, input_shape=(197, 197, 3))
    last_layer = base_model.get_layer('avg_pool').output
    y = Flatten(name='flatten')(last_layer)

    for x in base_model.layers[:-3]:
        x.trainable = False
    for x in base_model.layers[-3:]:   # train only last 2 layers of the model, which are last_layer and Flatten Layer we defined above
        x.trainable = True

    out = Dense(classes, activation="softmax")(y)  # (Dense from keras) here our total predictions are 2 classes

    model = Model(base_model.input, out) # Model from keras.models

    model.compile(loss="binary_crossentropy", metrics=['acc'], optimizer=Adam(0.00002))

    return model

def train_model():
    file_path = "vgg_face.h5"
    checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
    reduce_on_plateau = ReduceLROnPlateau(monitor="val_acc", mode="max", factor=0.1, patience=10, verbose=1)
    es = EarlyStopping(monitor="val_acc", min_delta = 0.0001, patience=20, verbose=1)
    callbacks_list = [checkpoint, reduce_on_plateau, es]

    history = model1.fit_generator(gen('train', batch_size=2),
                                  use_multiprocessing=False,
                                  validation_data=gen('val', batch_size=2), 
                                  epochs=3, verbose=1, max_queue_size = 10,  
                                  workers=1, callbacks=callbacks_list, 
                                  steps_per_epoch=100, validation_steps=50)

classes = 2
model1 = baseline_model(classes)

train_model()

In [None]:
""" Cell 21 save the trained model to h5 file"""

""" save the trained model in form of .h5 file (type of Hierarchical Data Format) for ease of storing weights and model configuration"""

model1.save("face_recognition_model.h5")
# model1.save("face_recognition_model_pretrained.h5")

In [None]:
""" Cell 22 a code snippet to visualise the model you just trained """


import keras
from keras.models import load_model
model = load_model("face_recognition_model.h5")

# visualize model
from keras.utils.vis_utils import plot_model

plot_model(model, show_shapes=True, show_layer_names=True)

## 4. Predict the classes (faces)
---


    - use the saved weights .h5 file to create a model for testing
    - load the numbers corresponding to our classes from csv file
    - predict the faces in groups

In [None]:
""" Cell 23 import modules and define where computer should look for your predicition images faces"""

# from here we can use the saved model to recognise faces

import keras
from keras.models import load_model
from keras_vggface.utils import preprocess_input
from glob import glob
import numpy as np
import csv
import pandas as pd
from mtcnn.mtcnn import MTCNN
from PIL import Image
import cv2

test_imgs = 'test'

im_path = glob(test_imgs+'/*.jpg')
print(im_path)


In [None]:
""" Cell 24 convert csv file back to a list for coding later on"""

idx = pd.read_csv('idx.csv')
idx = idx.values.tolist()
print(idx)

In [None]:
""" Cell 25 load the trained model into 'model' variable """

""" Use our trained model for testing """

model_path = 'face_recognition_model.h5'
model = load_model(model_path)

classes = 2
att = np.zeros(classes) # your classes
print(att)

In [None]:
""" Cell 26 write a function to """

# create the detector, using default weights
detector = MTCNN()

""" Now lets use the model to find faces in a group """

def recognise_person(filename, required_size=(197, 197), idx = idx, att = att):
    # load image from file
    pixels = cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2RGB) # if you remember BGR is standard cv2 format, so convert to RGB channel space
    pixels = cv2.copyMakeBorder(pixels, 100, 100, 100, 100, cv2.BORDER_CONSTANT)
    pixels_l = cv2.cvtColor(pixels, cv2.COLOR_RGB2BGR)
    # detect faces in the image
    faces = detector.detect_faces(pixels)
    print(faces)
    for f in range(len(faces)):
        # extract the bounding box from the first face
        if(faces[f]['confidence']<0.9): # extract only if model has more than 90 % confidence that its a face
            print(faces[f]['confidence'])
            continue
        x1, y1, width, height = faces[f]['box']  # from here this is same as extract_face as we defined above
        x2, y2 = x1 + width, y1 + height
        # extract the face
        face = pixels[y1:y2, x1:x2]
        
        # resize pixels to the model size
        image = Image.fromarray(face)
        image = image.resize(required_size)

        face = np.asarray(image, dtype = np.float64)
        face = np.copy(face)
        face = preprocess_input(face, version=2)
        face = np.reshape(face, (1, 197, 197, 3))

        """ Predict which class (person) the face belongs to using our model """
        Y = model.predict(face) 

        att[np.argmax(Y)] = 1  # among all our persons names only take the unique-code whose final probability from model is highest

        # draw a rectangle around a person face
        cv2.rectangle(pixels_l, (x1, y1), (x2, y2), (66,245,224), 2)
        
        # put person name at top of the face
        cv2.putText(pixels_l, str(idx[np.argmax(Y)][0]), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.75 , (66,245,224), 2, cv2.LINE_AA)
    
    cv2.imwrite(test_imgs+'/labeled_'+filename.split('/')[-1], pixels_l)

    return 

for im in im_path:
    recognise_person(im)

###Yayy!! you've successfully completed training and testing a face recognition model
Remember, AI is not just about coding in some fancy programming langauage, its about how well can we apply concepts to come up with better solutions. 
Sometimes ML or AI is more of an art rather than science! 

**Just keep learning!!!**