### This jupyter notebook is to recognize faces on live camera

In [1]:
# importing libraries

from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torchvision import datasets
from torch.utils.data import DataLoader
from PIL import Image, ImageDraw
import cv2
import time
import os
import mmcv, cv2
from IPython import display
import numpy as np



In [2]:
# initializing MTCNN and InceptionResnetV1 

mtcnn0 = MTCNN(image_size=240, margin=0, keep_all=False, min_face_size=40) # keep_all=False
mtcnn = MTCNN(image_size=240, margin=0, keep_all=True, min_face_size=40) # keep_all=True
resnet = InceptionResnetV1(pretrained='vggface2').eval() 

In [3]:
# Read data from folder

dataset = datasets.ImageFolder('photos') # photos folder path 
idx_to_class = {i:c for c,i in dataset.class_to_idx.items()} # accessing names of peoples from folder names

def collate_fn(x):
    return x[0]

loader = DataLoader(dataset, collate_fn=collate_fn)

name_list = [] # list of names corrospoing to cropped photos
embedding_list = [] # list of embeding matrix after conversion from cropped faces to embedding matrix using resnet

for img, idx in loader:
    face, prob = mtcnn0(img, return_prob=True) 
    if face is not None and prob>0.92:
        emb = resnet(face.unsqueeze(0)) 
        embedding_list.append(emb.detach()) 
        name_list.append(idx_to_class[idx])        

# save data
data = [embedding_list, name_list] 
torch.save(data, 'data.pt') # saving data.pt file
load_data = torch.load('data.pt') 
embedding_list = load_data[0] 
name_list = load_data[1] 

In [4]:
video = mmcv.VideoReader('20221116_231147.mp4')
frames = [Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in video]

display.Video('20221116_231147.mp4', width=640)

In [6]:
frames_tracked = []
for i, frame in enumerate(frames):
    print('\rTracking frame: {}'.format(i + 1), end='')
    #print(frame)
    x = np.array(frame)
    img = Image.fromarray(x)
    #img =frame
    img_cropped_list, prob_list = mtcnn(img, return_prob=True) 
    if img_cropped_list is not None:
        boxes, _ = mtcnn.detect(img)
        for i, prob in enumerate(prob_list):
            if prob>0.90:
                emb = resnet(img_cropped_list[i].unsqueeze(0)).detach() 
                
                dist_list = [] # list of matched distances, minimum distance is used to identify the person
                
                for idx, emb_db in enumerate(embedding_list):
                    dist = torch.dist(emb, emb_db).item()
                    dist_list.append(dist)

                min_dist = min(dist_list) # get minumum dist value
                min_dist_idx = dist_list.index(min_dist) # get minumum dist index
                name = name_list[min_dist_idx] # get name corrosponding to minimum dist
                
                box = boxes[i] 
                
                original_frame = frame.copy() # storing copy of frame before drawing on it

                frame = np.array(frame)
                if min_dist<0.9:
                    frame = cv2.putText(frame, name+' '+str(min_dist), (int(box[0]),int(box[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0),1, cv2.LINE_AA)
                    print(box)
                    #draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)
                #draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)
                #frame = cv2.rectangle(frame, (int(box[0]),int(box[1])) , (int(box[2]),int(box[3])), (255,0,0), 2)
                frame = Image.fromarray(frame, 'RGB')
                frames_tracked.append(frame.resize((640, 360), Image.BILINEAR))
                #print('tracked')
#                frame = cv2.rectangle(frame, (int(box[0]),int(box[1])) , (int(box[2]),int(box[3])), (255,0,0), 2)
    '''         
    # Detect faces
    boxes, _ = mtcnn.detect(frame)
    
    # Draw faces
    frame_draw = frame.copy()
    draw = ImageDraw.Draw(frame_draw)
    for box in boxes:
        draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)
    
    # Add to frame list
    frames_tracked.append(frame_draw.resize((640, 360), Image.BILINEAR))
    '''
print('\nDone')

Tracking frame: 1[ 575.8802   131.19029 1051.7288   731.9874 ]
Tracking frame: 2

  frames_tracked.append(frame.resize((640, 360), Image.BILINEAR))


[ 572.34534  131.15764 1056.7426   735.3607 ]
Tracking frame: 3[ 578.365    128.54277 1057.0063   736.3305 ]
Tracking frame: 4[ 583.20404  135.31259 1045.4138   732.4888 ]
Tracking frame: 5[ 580.4615   135.75089 1035.497    723.6887 ]
Tracking frame: 6[ 589.1375   139.70508 1035.0677   722.0658 ]
Tracking frame: 7[ 592.9174   138.3352  1045.3624   728.99084]
Tracking frame: 8[ 593.82184  136.09207 1044.5404   724.42365]
Tracking frame: 9[ 587.61584  133.75525 1051.2443   731.4957 ]
Tracking frame: 10[ 572.22815   113.998215 1062.5658    738.88544 ]
Tracking frame: 11[ 599.51855  132.03777 1048.3478   724.03656]
Tracking frame: 12[ 601.11584   123.912415 1060.1752    723.80383 ]
Tracking frame: 13[ 603.9678   131.74454 1058.4926   721.17065]
Tracking frame: 14[ 600.09766  120.4126  1062.4827   730.464  ]
Tracking frame: 15[ 614.7779   132.41003 1071.5453   726.6289 ]
Tracking frame: 16[ 626.15015  135.53456 1078.6624   730.9084 ]
Tracking frame: 17[ 617.47974   127.719635 1086.6942    7

  if method is "Min":
  if method is "Min":
  if method is "Min":
  if method is "Min":
  if method is "Min":


KeyboardInterrupt: 

In [None]:
'''
d = display.display(frames_tracked[0], display_id=True)
i = 1
try:
    while True:
        d.update(frames_tracked[i % len(frames_tracked)])
        i += 1
except KeyboardInterrupt:
    pass
'''

'\nd = display.display(frames_tracked[0], display_id=True)\ni = 1\ntry:\n    while True:\n        d.update(frames_tracked[i % len(frames_tracked)])\n        i += 1\nexcept KeyboardInterrupt:\n    pass\n'

In [None]:
dim = frames_tracked[0].size
fourcc = cv2.VideoWriter_fourcc(*'FMP4')    
video_tracked = cv2.VideoWriter('video_tracked.mp4', fourcc, 25.0, dim)
for frame in frames_tracked:
    video_tracked.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
video_tracked.release()

OpenCV: FFMPEG: tag 0x34504d46/'FMP4' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


In [None]:
from PIL import Image
video = mmcv.VideoReader('input.avi')
frames = [Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in video]

display.Video('input.avi', width=640)

In [None]:
frames_tracked = []
for i, frame in enumerate(frames):
    print('\rTracking frame: {}'.format(i + 1), end='')
    #print(frame)
    x = np.array(frame)
    img = Image.fromarray(x)
    #img =frame
    img_cropped_list, prob_list = mtcnn(img, return_prob=True) 
    if img_cropped_list is not None:
        boxes, _ = mtcnn.detect(img)
        for i, prob in enumerate(prob_list):
            if prob>0.90:
                emb = resnet(img_cropped_list[i].unsqueeze(0)).detach() 
                
                dist_list = [] # list of matched distances, minimum distance is used to identify the person
                
                for idx, emb_db in enumerate(embedding_list):
                    dist = torch.dist(emb, emb_db).item()
                    dist_list.append(dist)

                min_dist = min(dist_list) # get minumum dist value
                min_dist_idx = dist_list.index(min_dist) # get minumum dist index
                name = name_list[min_dist_idx] # get name corrosponding to minimum dist
                
                box = boxes[i] 
                
                original_frame = frame.copy() # storing copy of frame before drawing on it

                frame = np.array(frame)
                if min_dist<0.9:
                    frame = cv2.putText(frame, name+' '+str(min_dist), (int(box[0]),int(box[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0),1, cv2.LINE_AA)
                    
                    print('\n' + box)
                    #draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)
                #draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)
                #frame = cv2.rectangle(frame, (int(box[0]),int(box[1])) , (int(box[2]),int(box[3])), (255,0,0), 2)
                frame = Image.fromarray(frame, 'RGB')
                frames_tracked.append(frame.resize((640, 360), Image.BILINEAR))
                #print('tracked')
#                frame = cv2.rectangle(frame, (int(box[0]),int(box[1])) , (int(box[2]),int(box[3])), (255,0,0), 2)
    '''         
    # Detect faces
    boxes, _ = mtcnn.detect(frame)
    
    # Draw faces
    frame_draw = frame.copy()
    draw = ImageDraw.Draw(frame_draw)
    for box in boxes:
        draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)
    
    # Add to frame list
    frames_tracked.append(frame_draw.resize((640, 360), Image.BILINEAR))
    '''
print('\nDone')

NameError: name 'frames' is not defined

In [None]:
dim = frames_tracked[0].size
fourcc = cv2.VideoWriter_fourcc(*'FMP4')    
video_tracked = cv2.VideoWriter('video_tracked_2.mp4', fourcc, 25.0, dim)
for frame in frames_tracked:
    video_tracked.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
video_tracked.release()

OpenCV: FFMPEG: tag 0x34504d46/'FMP4' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'
