In [1]:
import os
import cv2
import time
import pandas as pd
import numpy as np
import pickle
import glob
from os import listdir
import shutil
from os.path import isfile, join
from matplotlib import pyplot as plt
from FaceDetectionModule import Face_Detector
from FaceGazeModule import GazeEstimator
from FaceLandmarksModule import Landmark_Extractor
from FaceEncoderModule import Face_Encoder
from AgeGenderModule import AgeGenderDetector
from FaceEmotionModule import Face_Emotion
import imutils
from imutils.video import VideoStream
from sklearn.externals import joblib
from sklearn.neighbors import KNeighborsClassifier
from scipy.spatial import distance
from CentroidBasedObjectDetector import CentroidTracker
from collections import deque
from collections import Counter

Imported Python modules.
Imported Python modules.
Imported Python modules.
Imported Python modules.




# General Settings

In [2]:
device = "MYRIAD"
cpu_extension_path = "cpu_extension_avx2.dll"
yaw_threshold = 180
detection_thresh = 0.7
src_dir = r"/home/pi/Notebooks/FaceRecognitionFramework/Face_DB"
gender_acceptance_thresh = 0.6
min_face_dim = 64
load_emotion_module = True
cap_w = 800
cap_h = 600
ct = CentroidTracker()
(H, W) = (cap_h, cap_w)
maxQueueLen = 20

# Load Face Detector Module

In [3]:
model_xml = r"Models/openvino/face-detection/FP16/face-detection-retail-0004.xml"
model_bin = r"Models/openvino/face-detection/FP16/face-detection-retail-0004.bin"

plugin = Face_Detector.init_plugin(device,cpu_extension_path)

faceDetector = Face_Detector()

faceDetector.load_net(model_xml,model_bin,plugin)

INFO: All network layers are supported.
Input Shape: [1, 3, 300, 300]
Output Shape: [1, 1, 200, 7]


# Load Pose Estimator Module

In [4]:
model_xml = r"Models/openvino/gaze-estimation/FP16/head-pose-estimation-adas-0001.xml"
model_bin =r"Models/openvino/gaze-estimation/FP16/head-pose-estimation-adas-0001.bin"

gazeEstimator = GazeEstimator()

gazeEstimator.load_net(model_xml,model_bin,plugin)

INFO: All network layers are supported.
Input Shape: [1, 3, 60, 60]
Output Shape: [1, 1]*3 for (Y,P,R)


# Load Face Align Module

In [5]:
model_xml = r"Models/openvino/face-landmarks/FP16/facial-landmarks-35-adas-0002.xml"
model_bin = r"Models/openvino/face-landmarks/FP16/facial-landmarks-35-adas-0002.bin"

landmarkExtractor = Landmark_Extractor()

landmarkExtractor.load_net(model_xml,model_bin,plugin)

INFO: All network layers are supported.
Input Shape: [1, 3, 60, 60]
Output Shape: [1, 70]


# Load Face Encoder Module

In [6]:
model_xml = r"Models/openvino/face-identification/FP16/face-reidentification-retail-0095.xml"
model_bin = r"Models/openvino/face-identification/FP16/face-reidentification-retail-0095.bin"

faceEncoder = Face_Encoder()

faceEncoder.load_net(model_xml,model_bin,plugin)

INFO: All network layers are supported.
Input Shape: [1, 3, 128, 128]
Output Shape: [1, 256, 1, 1]


# Load Age Gender Detection Module

In [7]:
model_xml = r"Models/openvino/age-gender/FP16/age-gender-recognition-retail-0013.xml"
model_bin = r"Models/openvino/age-gender/FP16/age-gender-recognition-retail-0013.bin"

ageDetector = AgeGenderDetector()

ageDetector.load_net(model_xml,model_bin,plugin)

INFO: All network layers are supported.
Input Shape: [1, 3, 62, 62]
Output Shape Age: [1, 1, 1, 1]
Output Shape Gender: [1, 2, 1, 1]


# Load Emotion Estimation Module

In [8]:
if load_emotion_module:

    model_xml = r"Models/openvino/emotion-estimation/FP16/emotions-recognition-retail-0003.xml"
    model_bin = r"Models/openvino/emotion-estimation/FP16/emotions-recognition-retail-0003.bin"

    emotionEstimator = Face_Emotion()

    emotionEstimator.load_net(model_xml,model_bin,plugin)

INFO: All network layers are supported.
Input Shape: [1, 3, 64, 64]
Output Shape: [1, 5, 1, 1]


In [9]:
class FaceRecognition:       
    #def Train_Model(self,image_dir_path,allowed_extensions = [".jpg",".jpeg",".png",".gif",".bmp"]):
     #   failed = []
      #  images = [f for f in listdir(image_dir_path) if isfile(join(image_dir_path, f)) and (f.lower()[-4:] in allowed_extensions)]
       # img_count = len(images)
        #count =0
        #if len(images)>0:
         #   for imgname in images:
          #      imgpath = image_dir_path + "/" + imgname
           #     img = face_recognition.load_image_file(imgpath)
            #    try:
             #       count = count+1
              #      print("Training on Image: " + str(count) + " out of " + str(img_count))
               #     face_encoding = face_recognition.face_encodings(img)[0]
                #    np.save(imgpath[0:-4] ,face_encoding)
                #except:
                 #   failed.append(imgname)
                  #  continue
        #return failed
    
    #def generate_enc_data(self,imagePath,resize=True,resizeWidth = 64):
      #  img = face_recognition.load_image_file(imagePath)
       # print(img.shape)
        #if resize:
         #   img = self.resizeImage(img,resizeWidth)
        #enc = face_recognition.face_encodings(img)
        #face_encoding = []
        #if len(enc)>0:
         #   face_encoding = face_recognition.face_encodings(img)[0]
        #return face_encoding
    def __init__(self):
        from sklearn.externals import joblib
    
    def LoadImageEncoding(self,path):
        arr = np.load(path)
        return arr

    def LoadAllImageEncondings(self,image_dir_path):
        encondings_files = [f for f in listdir(image_dir_path) if isfile(join(image_dir_path, f)) and (f.lower()[-4:] in [".npy"])]
        return {enc_file[0:-4] : self.LoadImageEncoding(image_dir_path + "/" + enc_file) for enc_file in encondings_files}
    
    def TrainEnc(self,encodings,dimensions=3,metric="minkowski"):
        df = pd.DataFrame(encodings).transpose()
        labels = list(df.index)
        model = KNeighborsClassifier(n_neighbors=dimensions,metric=metric)
        model.fit(df,labels)
        self.df = df
        self.model = model
        return model,df

    def GetNearestFaces(self,img_target):
        img_target = img_target.reshape(1,-1)
        nn = self.model.kneighbors(img_target)
        return nn[0][0],nn[1][0]
    
    def GetNearestFacesAsString(self,img_target):
        img_target = img_target.reshape(1,-1)
        nn = self.model.kneighbors(img_target)
        names = []
        a2 = nn[1][0]
        distances = []
        for ix in range(0,len(a2)):
            element = self.df.iloc[[a2[ix]]]
            names.append(element.index[0])
            distances.append(distance.euclidean(element.values[0],img_target))
        return  names,a2,distances
    
    def SaveModel(self,path):
        if self.model == None:
            raise Exception("No models to save.")
            return
        joblib.dump(self.model, path) 
        print("Saved Model: {}".format(path))
        
    def LoadModel(self,path):
        self.model = joblib.load(path)
        print("Loaded Model: {}".format(path))
        
    def SaveEncodingsDataframe(self,path):
        #if self.df == None:
         #   raise Exception("No data frames to save.")
          #  return
        self.df.to_pickle(path)
        print("Saved Dataset: {}".format(path))
        
    def LoadEncodingsDataframe(self,path):
        self.df = pd.read_pickle(path)
        print("Loaded Dataset: {}".format(path))
        
    def resizeImage(self,image, width = None, height = None, inter = cv2.INTER_AREA):
        dim = None
        (h, w) = image.shape[:2]

        if width is None and height is None:
            return image

        if width is None:
            r = height / float(h)
            dim = (int(w * r), height)

        else:
            r = width / float(w)
            dim = (width, int(h * r))

        resized = cv2.resize(image, dim, interpolation = inter)
        return resized

# Encode Training Images

In [10]:
def Train_Model(image_dir_path,yaw_threshold = 30,detection_thresh = 0.7,visualize = False,allowed_extensions = [".jpg",".jpeg",".png",".gif",".bmp"]):
    failed = []
    images = [f for f in listdir(image_dir_path) if isfile(join(image_dir_path, f)) and (f.lower()[-4:] in allowed_extensions)]
    img_count = len(images)
    count =0
    if len(images)>0:
        for imgname in images:
            print("============================================================")
            imgpath = image_dir_path + "/" + imgname
            img = cv2.imread(imgpath)
            try:
                count = count+1
                print("Training on Image: " + str(count) + " out of " + str(img_count))
                faces,_ = faceDetector.detectFaces(img,detection_thresh)
                if faces is None or len(faces) == 0:
                    print("Skipping image, no faces detected.")
                    failed.append(imgname)
                    continue
                elif len(faces)>1:
                    print("Skipping image, multiple faces detected.")
                    failed.append(imgname)
                    continue
                (x_min, y_min, x_max, y_max) = faces[0]
                img = img[y_min:y_max,x_min:x_max]  
                img = landmarkExtractor.prepare_face(img,visualize,False)
                y,_,_ = gazeEstimator.detectFaces(img)
                print("Yaw = " + str(y))
                if y>yaw_threshold or y< -yaw_threshold:
                    print("Skipping image, face Yaw exceeded provided threshold.")
                    failed.append(imgname)
                    continue
                #if visualize:
                 #   plt.imshow(cv2.cvtColor(img,cv2.COLOR_BGR2RGB))
                  #  plt.show()
                face_encoding = faceEncoder.encode_face(img)
                np.save(imgpath[0:-4] ,face_encoding)
            except Exception as e:
                print(e)
                failed.append(imgname)
                continue
    return failed

In [11]:
def crop_with_margin(img,x_min,y_min,x_max,y_max,margin_x_rate = 0.125 , margin_y_rate = 0.125):
    margin_x = int((x_max-x_min) * margin_x_rate)
    margin_y = int((y_max-y_min) * margin_y_rate)
    img = img[max(y_min-margin_y,0):min(y_max+margin_y,img.shape[0]),max(x_min-margin_x-0,0):min(x_max+margin_x,img.shape[1])]
    return img

# Match a query image to a dataset

In [12]:
rolling_Genders = {}
rolling_ages = {}
rolling_emotions = {}

def FindMatchedFace(img,rec,match_thresh = 11,visualize=False,yaw_threshold = 30,detection_thresh = 0.7,detectAgeGender = False,verbose=True,cropimage=True):
    rects = []
    faces,_ = faceDetector.detectFaces(img,detection_thresh)
    ages = []
    genders = []
    emotions = []
    if faces is None or len(faces) == 0:
        return -1,None #No faces detected
    detected = []
    encs = []
    coords = []
    for (x_min, y_min, x_max, y_max) in faces:
        rects.append((x_min,y_min,x_max,y_max))
    objects = ct.update(rects)
    #print(objects.keys())
    for i,(x_min, y_min, x_max, y_max) in enumerate(faces):
        rolling_ix = list(objects.keys())[i]
        
        crop = img[y_min:y_max,x_min:x_max]  
        if crop.shape[0]<min_face_dim or crop.shape[1]<min_face_dim:
            continue
        crop = landmarkExtractor.prepare_face(crop,visualize,False)
        y,_,_ = gazeEstimator.detectFaces(img)
        if y<yaw_threshold and y>-yaw_threshold:
            face_encoding = faceEncoder.encode_face(crop)
            encs.append(face_encoding)
            coords.append((x_min, y_min, x_max, y_max))
           # if load_emotion_module:
            #    emotion = emotionEstimator.estimate_emotion(crop)
             #   emotions.append(emotion)
            #else:
             #   emotions.append("")
            if detectAgeGender:
                if cropimage:
                    gender,age,prob = ageDetector.detectGenderAge(crop_with_margin(img,x_min,y_min,x_max,y_max))
                    #gender,age,prob = ageDetector.detectGenderAge(crop)
                    if prob<gender_acceptance_thresh:
                        gender="Unsure"

                    emotion = emotionEstimator.estimate_emotion(crop)
                    #print(rolling_ix)
                    #print(rolling_Genders)
                    ##print(rolling_Genders.keys())
                    #print(rolling_Genders)
                    #print(rolling_Genders.keys())
                    if rolling_ix in rolling_Genders.keys():
                        #print("Appending,,,")
                        rolling_Genders[rolling_ix].append(gender)
                        rolling_ages[rolling_ix].append(age)
                        rolling_emotions[rolling_ix].append(emotion)
                    else:
                        #print("Adding {}".format(rolling_ix))
                        Q_g = deque(maxlen = maxQueueLen)
                        Q_g.append(gender)
                        Q_em = deque(maxlen = maxQueueLen)
                        Q_em.append(emotion)
                        Q_age = deque(maxlen = maxQueueLen)
                        Q_age.append(age)
                        rolling_Genders[rolling_ix] = Q_g
                        rolling_ages[rolling_ix] = Q_age
                        rolling_emotions[rolling_ix] = Q_em
                        
                    Counter(rolling_ages[rolling_ix]).most_common(1)[0][0]
                    ages.append(int(np.array(rolling_ages[rolling_ix]).mean(axis=0)))
                    
                    genders.append(Counter(rolling_Genders[rolling_ix]).most_common(1)[0][0])
                    emotions.append(Counter(rolling_emotions[rolling_ix]).most_common(1)[0][0])
                else:
                    gender,age,prob = ageDetector.detectGenderAge(crop)
                    ages.append(age)
                    if prob<gender_acceptance_thresh:
                        genders.append("Unsure")
                    else:
                        genders.append(gender)
    if(len(encs) == 0):
        return -2,None #Faces detected but all of them unfit for comparisons
    start = time.time()
    #dists = cdist(encs,df,"cosine")
    for i,enc in enumerate(encs):       
        names,indices,distances = rec.GetNearestFacesAsString(enc)
        if(distances[0]>match_thresh):
            if detectAgeGender:
                detected.append((["N/A"],[],distances,coords[i],ages[i],genders[i],emotions[i]))
            else:
                detected.append((["N/A"],[],distances,coords[i],"",-1,emotions[i]))
            continue
        if detectAgeGender:
            detected.append((names,indices,distances,coords[i],ages[i],genders[i],emotions[i]))
        else:
            detected.append((names,indices,distances,coords[i],"",-1,emotions[i]))
    end = time.time()
    if verbose:
        print("Distance measured in {} milliseconds.".format(int((end-start)*1000)))
    return 1,detected

In [13]:
def LoadImageEncoding(path):
    arr = np.load(path)
    return arr

def LoadAllImageEncondings(image_dir_path):
    encondings_files = [f for f in listdir(image_dir_path) if isfile(join(image_dir_path, f)) and (f.lower()[-4:] in [".npy"])]
    return {enc_file[0:-4] : LoadImageEncoding(image_dir_path + "/" + enc_file) for enc_file in encondings_files}

# Load All Encodings, Train and save the KNN model and dataframe

In [14]:
#Train_Model(src_dir,visualize=True,yaw_threshold=yaw_threshold)

In [15]:
#rec = FaceRecognition()
#encs = rec.LoadAllImageEncondings(src_dir)

In [16]:
#len(encs)

In [17]:
#model,df = rec.TrainEnc(encs,3,metric="cosine")
#rec.SaveModel("Models/encModel.pkl")
#rec.SaveEncodingsDataframe("Models/encDataFrame.pkl")
#del encs

# Load KNN Model For Matching

In [18]:
rec = FaceRecognition()
rec.LoadModel("Models/encModel.pkl")
rec.LoadEncodingsDataframe("Models/encDataFrame.pkl")

Loaded Model: Models/encModel.pkl
Loaded Dataset: Models/encDataFrame.pkl


In [19]:
def identify_image(img,rec,detectAgeGender = True ,match_thresh = 13.55,verbose=True,cropimage=True):
    times = []

   # start = time.time()
    status,detected = FindMatchedFace(img,rec,match_thresh=match_thresh,detectAgeGender=detectAgeGender,verbose=verbose,cropimage=cropimage)
    #end = time.time()

    if(status == 1):
        for (names,indices,distances,(x_min, y_min, x_max, y_max),age,gender,emotion) in detected:
            name = names[0]
            cv2.rectangle(img,(x_min,y_min),(x_max,y_max),(255,255,0),2)
            cv2.putText(img,"{},{},{},{} years.".format(name,gender,emotion,age),(x_min,y_min-10)
                            ,cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255))
            
    return img

img = cv2.imread(r"/home/pi/Notebooks/FaceRecognitionFramework/eval/ang.JPG")
start = time.time()
img = identify_image(img,rec = rec,verbose=False,match_thresh = 16)
end = time.time()

print(int((end-start)*1000))

cv2.imshow("Output",img)
cv2.waitKey(0)
cv2.destroyAllWindows()

def identify_images(input_dir):
    times = []
    for img_path in glob.glob(input_dir):
        img = cv2.imread(img_path)

        start = time.time()
        status,detected = FindMatchedFace(img,df,match_thresh=0.65,visualize=False,yaw_threshold = 20
                                          ,detectAgeGender=True)
        end = time.time()
        times.append(end-start)
        print("Executed in {} milliseconds.".format(int((end-start)*1000)))
        print(detected)

        if(status == 1):
            for (name,(x_min, y_min, x_max, y_max),age,gender) in detected:
                cv2.rectangle(img,(x_min,y_min),(x_max,y_max),(255,255,0),2)
                cv2.putText(img,"{},{} years,{}".format(name,gender,age),(x_min-5,y_min-10)
                            ,cv2.FONT_HERSHEY_COMPLEX,1,(255,255,255))
                if name == "Unknown":
                    cv2.imshow(name,cv2.imread(src_dir + "/Unknown.png"))
                else:
                    cv2.imshow(name,cv2.imread(src_dir + "/" +name+".png"))

        cv2.imshow("Output",img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    print("All Images Executed in average {} milliseconds.".format(int(np.mean(times)*1000)))

In [20]:
#identify_images("../images/query/*.*")

for img_path in glob.glob("/home/pi/Notebooks/FaceRecognitionFramework/eval/*.*"):
    img = cv2.imread(img_path)
    start = time.time()
    img = identify_image(img,rec,match_thresh=16,verbose=False)
    end = time.time()
    print("Executed in {} milliseconds.".format(int((end-start)*1000)))
    cv2.imshow("output",img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

def sample_from_dir(num_files,src_path,target_path,filter="*.png"):
    src_path = src_path + "/" + filter
    image_paths = glob.glob(src_path)
    indices = np.random.randint(0,len(image_paths)-1,size=(num_files))
    for i in indices:
        img_path = image_paths[i]
        shutil.copy(img_path,target_path + "/" + img_path.split("\\")[-1])
    print("Done!")

In [21]:
#sample_from_dir(1000,r"E:\DataScience_old\Notebooks\Datasets\Celebs\img_align_celeba_png.7z\img_align_celeba_png"
 #              ,r"D:\DataScience\Notebooks\ComputerVision\FaceRecognitionFramework\images")

In [22]:
vs = VideoStream(src=0,usePiCamera=True,resolution=(800,608))

vs.start()
time.sleep(2)

while True:
    frame = vs.read()
    if frame is None or frame.shape[0]==0:
        continue
    frame = cv2.cvtColor(cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY),cv2.COLOR_GRAY2BGR)
    try:
        frame = identify_image(frame,rec,match_thresh=19,verbose=False,cropimage=True)
    except Exception as e :
        print(str(e))
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF

    # if the `q` key was pressed, break from the loop
    if key == ord("q"):
        break
        
cv2.destroyAllWindows()
vs.stop()

In [23]:
cap = cv2.VideoCapture(0)

cap.set(cv2.CAP_PROP_FRAME_WIDTH,cap_w)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT,cap_h)

rolling_Genders = {}
rolling_ages = {}
rolling_emotions = {}

while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()
    if ret == False:
        continue
    # Display the resulting frame
    #try:
    frame = identify_image(frame,rec,match_thresh=16,verbose=False,cropimage=True)
    #except Exception as e :
        #print(str(e))
        
    cv2.imshow('frame',frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()