# Extract Features from SCFace images with ArcFace

Images are processed with default model from arcface. Information of identity, camera and distance are also saved.

In [1]:
import insightface
import cv2
import numpy as np
import pickle
import os
import time
import pandas as pd
os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT']='0'

In [2]:
cwd = os.getcwd()
img_folder = '/media/rafael/Windows-SSD/recfac/bases/SCFace/SCface_database/surveillance_cameras_all'

In [None]:
# Initialize model
model = insightface.app.FaceAnalysis(ga_name=None)

# set to -1 for CPU or positive for # of GPUs
ctx_id = 0

model.prepare(ctx_id = ctx_id, nms=0.4)

In [6]:
def enroll(imgPath):
    rgbImg = cv2.cvtColor(cv2.imread(imgPath),cv2.COLOR_BGR2RGB)
    """if rgbImg.shape[0] > 400 or rgbImg.shape[1] > 400:
        ratio = 400./rgbImg.shape[0]
        rgbImg = cv2.resize(rgbImg, None, fx = ratio, fy = ratio)"""
    center = np.array([int(rgbImg.shape[0]/2),int(rgbImg.shape[1]/2)])
    faces = model.get(rgbImg)
    dist=[]
    rep = np.zeros(512)
    img_name = os.path.splitext(os.path.basename(imgPath))[0]
    identity = img_name[0:3]
    cam = img_name[4:8]
    distance = img_name[9:10]
    
    # Indicate if no face is detected
    if len(faces) == 0:
        print("No face found on {}".format(imgPath))
        rep[:] = np.nan
        status = "no face"
        return status, id, cam, distance, rep
    
    # Compute centroids of faces and distances from certer of image
    for idx, face in enumerate(faces):
        box=face.bbox.astype(np.int).flatten()
        centroid = np.array([int((box[0]+box[2])/2),int((box[1]+box[3])/2)])
        dist.append(np.linalg.norm(center-centroid))
    
    # Get embeddings of the face with centroid closest to the center of the image
    idx_face = dist.index(min(dist))
    rep = faces[idx_face].embedding
    
    status = "ok"    
    return status, identity, cam, distance, rep

In [None]:
# Initialize the template dictionary
templates = {}
print ("Enrolling faces.")
start_time = time.time()

# Get list of images to enroll
image_list = os.listdir(img_folder)
number_of_images = len(image_list)

# Loop through folder and extract the embedding for central face of each image
for i,img in enumerate(image_list):
    # Get the embedding for each face and store it in a template dictionary
    print("Processing file {}. ({}/{})".format(img,i,number_of_images),end='\r')
    imgPath = os.path.join(img_folder , img)
    templates[img] = enroll(imgPath)

# Write templates to gallery file
print ("\nEnrolling {} files took {:0.2f} seconds.".format(len(templates), time.time() - start_time))

In [79]:
#Create gallery file and write templates dictionary to it
with open('scface_surveillance_nms0.2.gal',"wb") as gallery:
    pickle.dump(templates, gallery)

In [19]:
features_df = pd.DataFrame.from_dict(templates, orient='index', columns=['status','id','cam','distance','features'])
features_df.head()

Unnamed: 0,status,id,cam,distance,features
001_cam1_1.jpg,ok,1,cam1,1,"[-0.7638826, -0.5204826, -0.55712885, 1.693900..."
001_cam1_2.jpg,ok,1,cam1,2,"[-0.106640525, -0.29566392, -2.4521015, 0.3750..."
001_cam1_3.jpg,ok,1,cam1,3,"[1.2538581, -1.7230742, -0.0017990551, 1.30142..."
001_cam2_1.jpg,ok,1,cam2,1,"[1.2045783, 1.1709319, -1.270426, 1.1458263, -..."
001_cam2_2.jpg,ok,1,cam2,2,"[-0.048381254, 0.52329177, -2.4461622, 1.38223..."


In [20]:
#check how many images failed to enroll for each camera
features_df.loc[features_df['status'] == 'no face'].groupby(['cam']).count()['features']

cam
cam1    31
cam2    13
cam3     6
cam4     3
cam5    55
cam6     1
cam7    61
Name: features, dtype: int64

In [21]:
#check how many images failed to enroll for each distance
features_df.loc[features_df['status'] == 'no face'].groupby(['distance']).count()['features']

distance
1    105
2     54
3     11
Name: features, dtype: int64

In [22]:
len(features_df)

2860

In [23]:
#remove images that failed to enroll (status = 'no face')
features_df = features_df[features_df.status == 'ok']
len(features_df)

2690

In [65]:
#remove images from IR cameras (cam6, cam7 and cam8)
features_df= features_df[features_df.cam != 'cam6']
features_df= features_df[features_df.cam != 'cam7']
features_df= features_df[features_df.cam != 'cam8']
len(features_df)

1842

In [69]:
ids = list(features_df['id'].unique())
len(ids)

130

In [67]:
cams = list(features_df['cam'].unique())
cams

['cam1', 'cam2', 'cam3', 'cam4', 'cam5']

In [70]:
distances = list(features_df['distance'].unique())
distances

['1', '2', '3']

In [18]:
feat1, feat2 = features_df.loc['002_cam1_3.jpg']['features'], features_df.loc['001_cam1_3.jpg']['features']
np.dot(feat1, feat2)/(np.linalg.norm(feat1)*np.linalg.norm(feat2))

0.094679445