In [1]:
import os
import cv2
import numpy as np
import pandas as pd

from insightface.app import FaceAnalysis

In [2]:
# configure model
faceapp=FaceAnalysis(name='buffalo_sc',
                     root='insightface_model',
                     providers=['CPUExecutionProvider'])
faceapp.prepare(ctx_id=0,det_size=(640,640),det_thresh=0.5)
# don't set threshold below 0.3

Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: insightface_model\models\buffalo_sc\det_500m.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: insightface_model\models\buffalo_sc\w600k_mbf.onnx recognition ['None', 3, 112, 112] 127.5 127.5
set det-size: (640, 640)


# DATA PREPROCESSING

## Extract Facial Features and labels

In [3]:
import re

In [4]:

def clean_name(string):
    string=string.title()
    string=re.sub(r'[^A-Za-z]',' ',string)
    return string

In [5]:
l=os.listdir(path='images')
person_info=[]
for f in l:
    role,name=f.split('-')
    name=clean_name(name)
    role=clean_name(role)
    print("Role=",role,end='\t')
    print("Name=",name)
    
    #path of each image in respective folder
    #name of img
    img_files=os.listdir(path=f'images/{f}')
    print(img_files)
#     path of img
    for file in img_files:
        path=f'./images/{f}/{file}'
#       step 1: read the image
        img_arr=cv2.imread(path)
#         step 2: get facial info
        result=faceapp.get(img_arr,max_num=1) #max_num is 1 cuz only 1face in to train
#        step 3: extract facial features 
        if len(result)>0:
            res=result[0]
            embedding=res['embedding']
#             step 4: save all info name,role,embedding in a list
            person_info.append([name,role,embedding])


        
        



   




Role= Student	Name= Angelina Jolie
['angelina-1.jpeg', 'angelina-10.jpeg', 'angelina-2.jpeg', 'angelina-3.jpeg', 'angelina-4.jpeg', 'angelina-5.jpeg', 'angelina-6.jpeg', 'angelina-7.jpeg', 'angelina-8.jpeg', 'angelina-9.jpeg']
Role= Student	Name= Chris Evans
['evans_1.jpeg', 'evans_10.jpeg', 'evans_2.jpeg', 'evans_3.jpeg', 'evans_4.jpeg', 'evans_5.jpeg', 'evans_6.jpeg', 'evans_7.jpeg', 'evans_8.jpeg', 'evans_9.jpeg']
Role= Student	Name= Scarlett Johansson
['scarlet_10.jpeg', 'scarlet_13.jpeg', 'scarlet_14.jpeg', 'scarlet_17.jpeg', 'scarlet_18.jpeg', 'scarlet_20.jpeg', 'scarlet_25.jpeg', 'scarlet_31.jpeg', 'scarlet_4.jpeg', 'scarlet_7.jpeg']
Role= Teacher	Name= Barack Obama
['obama_1.jpeg', 'obama_10.jpeg', 'obama_2.jpeg', 'obama_3.jpeg', 'obama_4.jpeg', 'obama_5.jpeg', 'obama_6.jpeg', 'obama_7.jpeg', 'obama_8.jpeg', 'obama_9.jpeg']
Role= Teacher	Name= Morgan Freeman
['freeman_1.jpeg', 'freeman_10.jpeg', 'freeman_2.jpeg', 'freeman_3.jpeg', 'freeman_4.jpeg', 'freeman_5.jpeg', 'freeman_6.

In [6]:
df=pd.DataFrame(person_info,columns=['Name','Role','Facial_Features'])
df #Store in redis after removing redundancy

Unnamed: 0,Name,Role,Facial_Features
0,Angelina Jolie,Student,"[-0.8410335, 1.6623635, -0.745449, -2.138221, ..."
1,Angelina Jolie,Student,"[0.09012404, -0.4169108, -0.37650695, 0.424011..."
2,Angelina Jolie,Student,"[0.86389506, 0.32952017, 0.05781859, -1.453824..."
3,Angelina Jolie,Student,"[1.6630238, 1.161926, -0.4108489, -2.6416948, ..."
4,Angelina Jolie,Student,"[0.864314, 0.9437618, 0.4265172, -1.1836909, -..."
5,Angelina Jolie,Student,"[0.40983105, 1.1813736, -0.9961994, -1.4018284..."
6,Angelina Jolie,Student,"[0.42182076, 2.0830736, 1.0647428, -2.273447, ..."
7,Angelina Jolie,Student,"[0.3949412, 0.84420955, -0.615458, -2.105093, ..."
8,Angelina Jolie,Student,"[1.0928565, 1.1707087, -0.2880756, -2.0103226,..."
9,Angelina Jolie,Student,"[-0.63545597, 1.7897248, 0.5982856, -3.550097,..."


# Step1: Read Image

In [7]:
# Load the image
img_test = cv2.imread('test_images/test_3.jpg')

cv2.imshow('test image', img_test)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Step2: Extract Features

In [8]:
res_test=faceapp.get(img_test,max_num=0)
for i,rt in enumerate(res_test):
#     print(i)
    bbox=rt['bbox'].astype(int)
    score=int(rt['det_score']*100)
    embed_test=rt['embedding']

In [9]:
# consider only 1 sample
len(embed_test)

512

In [10]:
df.head()

Unnamed: 0,Name,Role,Facial_Features
0,Angelina Jolie,Student,"[-0.8410335, 1.6623635, -0.745449, -2.138221, ..."
1,Angelina Jolie,Student,"[0.09012404, -0.4169108, -0.37650695, 0.424011..."
2,Angelina Jolie,Student,"[0.86389506, 0.32952017, 0.05781859, -1.453824..."
3,Angelina Jolie,Student,"[1.6630238, 1.161926, -0.4108489, -2.6416948, ..."
4,Angelina Jolie,Student,"[0.864314, 0.9437618, 0.4265172, -1.1836909, -..."


In [11]:
X_list=df['Facial_Features'].tolist()
X=np.asarray(X_list)
X.shape

(50, 512)

In [12]:
from sklearn.metrics import pairwise

In [13]:
y=embed_test.reshape(1,-1) #1x512
euclidean_distance=pairwise.euclidean_distances(X,y)
manhattan_distance=pairwise.manhattan_distances(X,y)
cosine_similar=pairwise.cosine_similarity(X,y)


In [14]:
data_search=df.copy()
data_search['euclidean']=euclidean_distance
data_search['manhattan']=manhattan_distance
data_search['cosine']=cosine_similar

In [15]:
data_search

Unnamed: 0,Name,Role,Facial_Features,euclidean,manhattan,cosine
0,Angelina Jolie,Student,"[-0.8410335, 1.6623635, -0.745449, -2.138221, ...",35.328033,643.420105,-0.044062
1,Angelina Jolie,Student,"[0.09012404, -0.4169108, -0.37650695, 0.424011...",32.449764,584.148831,0.135127
2,Angelina Jolie,Student,"[0.86389506, 0.32952017, 0.05781859, -1.453824...",37.117546,679.4174,-0.104925
3,Angelina Jolie,Student,"[1.6630238, 1.161926, -0.4108489, -2.6416948, ...",36.873985,672.38546,-0.043783
4,Angelina Jolie,Student,"[0.864314, 0.9437618, 0.4265172, -1.1836909, -...",34.319042,625.331543,0.064824
5,Angelina Jolie,Student,"[0.40983105, 1.1813736, -0.9961994, -1.4018284...",34.673912,637.396951,0.01977
6,Angelina Jolie,Student,"[0.42182076, 2.0830736, 1.0647428, -2.273447, ...",35.434174,651.596312,-0.04793
7,Angelina Jolie,Student,"[0.3949412, 0.84420955, -0.615458, -2.105093, ...",35.748096,651.375726,-0.056647
8,Angelina Jolie,Student,"[1.0928565, 1.1707087, -0.2880756, -2.0103226,...",33.760391,622.294044,0.09258
9,Angelina Jolie,Student,"[-0.63545597, 1.7897248, 0.5982856, -3.550097,...",37.442749,682.246667,-0.04558


# Identify person using Cosine Similarity

In [16]:
s_opt=0.5

In [17]:
datafilter=data_search.query(f'cosine>{s_opt}')
datafilter.reset_index(drop=True,inplace=True)
if len(datafilter)>0:
    argmax=datafilter['cosine'].argmax()
    name_cos,role_cos=datafilter.loc[argmax][['Name','Role']]
else:
    name_cos='Unknown'
    role_cos='Unknown'

In [18]:
print(name_cos,role_cos)

Scarlett Johansson Student


In [19]:
datafilter

Unnamed: 0,Name,Role,Facial_Features,euclidean,manhattan,cosine
0,Scarlett Johansson,Student,"[0.15684888, -0.15252304, -1.1749347, 0.223224...",18.872494,329.913499,0.731325
1,Scarlett Johansson,Student,"[0.9988017, -0.3464318, -1.6072202, -0.3437336...",20.994419,376.273663,0.663559
2,Scarlett Johansson,Student,"[0.41819817, -1.2333512, -1.1655853, 0.3192767...",19.246262,337.445122,0.73199
3,Scarlett Johansson,Student,"[0.74071056, -0.2172078, -2.1699548, -0.382094...",21.683872,391.648487,0.647056
4,Scarlett Johansson,Student,"[-0.2931065, -0.027625293, -3.0639057, -0.0226...",20.010487,356.554965,0.71468
5,Scarlett Johansson,Student,"[0.9100211, -0.7951274, -2.354409, 0.8837526, ...",18.425682,331.417541,0.762974
6,Scarlett Johansson,Student,"[0.5179118, -0.70606494, -1.5367962, 0.9526467...",22.201698,400.143882,0.599273
7,Scarlett Johansson,Student,"[-0.97057724, -0.09699072, -2.1180046, 1.34241...",19.143875,342.204666,0.736652
8,Scarlett Johansson,Student,"[0.87918776, -0.58773625, 0.08178866, 0.654351...",21.456215,385.317746,0.617932
9,Scarlett Johansson,Student,"[-1.4453042, -0.7219956, -1.7936003, 0.79554, ...",19.419188,355.943036,0.713054


# Multiple People prediction

In [33]:
#     cosine similarity
def search_algo(df,feature_column,test_vector,name_role=['Name','Role'],thresh=0.5):
    df=df.copy()
    X_list=df[feature_column].tolist()
    x=np.asarray(X_list)
    
    similar=pairwise.cosine_similarity(x,test_vector.reshape(1,-1))
    similar_arr=np.array(similar).flatten()
    df['cosine']=similar_arr
    
    data_filter=df.query(f'cosine>{thresh}')
#    
    if len(data_filter)>0:
        data_filter.reset_index(drop=True,inplace=True)
        argmax=data_filter['cosine'].argmax()
        person_name,person_role=data_filter.loc[argmax][name_role]
        
    else:
        person_name=person_role='Unknown'
        
    return person_name,person_role


In [47]:
test_image=cv2.imread('test_images/test_2.jpg')
cv2.imshow('test image',test_image)
cv2.waitKey()
cv2.destroyAllWindows()
# test_image

In [48]:
results=faceapp.get(test_image)
test_copy=test_image.copy()
for res in results:
    x1,y1,x2,y2= res['bbox'].astype(int)
    embeddings=res['embedding']
    person_name,person_role=search_algo(df,'Facial_Features',test_vector=embeddings,name_role=['Name','Role'],thresh=0.5)
#     print(person_name,person_role)
    if person_name=="Unknown":
        color=(0,0,255) #bgr
    else:
        color=(0,255,0)
    
    cv2.rectangle(test_copy,(x1,y1),(x2,y2),color)
    text_gen=person_name
    cv2.putText(test_copy,text_gen,(x1,y1),cv2.FONT_HERSHEY_DUPLEX,0.6,color,2)
    

cv2.imshow('test image',test_copy)
cv2.waitKey()
cv2.destroyAllWindows()