### This jupyter notebook is to recognize faces on live camera

In [1]:
# importing libraries

from facenet_pytorch import MTCNN, InceptionResnetV1
import torch
from torchvision import datasets
from torch.utils.data import DataLoader
from PIL import Image, ImageDraw
import cv2
import time
import os
import mmcv, cv2
from IPython import display
import numpy as np



In [2]:
# initializing MTCNN and InceptionResnetV1 

mtcnn0 = MTCNN(image_size=240, margin=0, keep_all=False, min_face_size=40) # keep_all=False
mtcnn = MTCNN(image_size=240, margin=0, keep_all=True, min_face_size=40) # keep_all=True
resnet = InceptionResnetV1(pretrained='vggface2').eval() 

In [3]:
# Read data from folder

dataset = datasets.ImageFolder('photos') # photos folder path 
idx_to_class = {i:c for c,i in dataset.class_to_idx.items()} # accessing names of peoples from folder names

def collate_fn(x):
    return x[0]

loader = DataLoader(dataset, collate_fn=collate_fn)

name_list = [] # list of names corrospoing to cropped photos
embedding_list = [] # list of embeding matrix after conversion from cropped faces to embedding matrix using resnet

for img, idx in loader:
    face, prob = mtcnn0(img, return_prob=True) 
    if face is not None and prob>0.92:
        emb = resnet(face.unsqueeze(0)) 
        embedding_list.append(emb.detach()) 
        name_list.append(idx_to_class[idx])        

# save data
data = [embedding_list, name_list] 
torch.save(data, 'data.pt') # saving data.pt file
load_data = torch.load('data.pt') 
embedding_list = load_data[0] 
name_list = load_data[1] 

In [5]:
video = mmcv.VideoReader('20221116_231147.mp4')
frames = [Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in video]

display.Video('20221116_231147.mp4', width=640)

In [6]:
frames_tracked = []
for i, frame in enumerate(frames):
    print('\rTracking frame: {}'.format(i + 1), end='')
    #print(frame)
    x = np.array(frame)
    img = Image.fromarray(x)
    #img =frame
    img_cropped_list, prob_list = mtcnn(img, return_prob=True) 
    if img_cropped_list is not None:
        boxes, _ = mtcnn.detect(img)
        for i, prob in enumerate(prob_list):
            if prob>0.90:
                emb = resnet(img_cropped_list[i].unsqueeze(0)).detach() 
                
                dist_list = [] # list of matched distances, minimum distance is used to identify the person
                
                for idx, emb_db in enumerate(embedding_list):
                    dist = torch.dist(emb, emb_db).item()
                    dist_list.append(dist)

                min_dist = min(dist_list) # get minumum dist value
                min_dist_idx = dist_list.index(min_dist) # get minumum dist index
                name = name_list[min_dist_idx] # get name corrosponding to minimum dist
                
                box = boxes[i] 
                
                original_frame = frame.copy() # storing copy of frame before drawing on it

                frame = np.array(frame)
                if min_dist<0.9:
                    frame = cv2.putText(frame, name+' '+str(min_dist), (int(box[0]),int(box[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0),1, cv2.LINE_AA)
                    print('success')
                    #draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)
                #draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)
                #frame = cv2.rectangle(frame, (int(box[0]),int(box[1])) , (int(box[2]),int(box[3])), (255,0,0), 2)
                frame = Image.fromarray(frame, 'RGB')
                frames_tracked.append(frame.resize((640, 360), Image.BILINEAR))
                #print('tracked')
#                frame = cv2.rectangle(frame, (int(box[0]),int(box[1])) , (int(box[2]),int(box[3])), (255,0,0), 2)
    '''         
    # Detect faces
    boxes, _ = mtcnn.detect(frame)
    
    # Draw faces
    frame_draw = frame.copy()
    draw = ImageDraw.Draw(frame_draw)
    for box in boxes:
        draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)
    
    # Add to frame list
    frames_tracked.append(frame_draw.resize((640, 360), Image.BILINEAR))
    '''
print('\nDone')

Tracking frame: 1success
Tracking frame: 2

  frames_tracked.append(frame.resize((640, 360), Image.BILINEAR))


success
Tracking frame: 3success
Tracking frame: 4success
Tracking frame: 5success
Tracking frame: 6success
Tracking frame: 7success
Tracking frame: 8success
Tracking frame: 9success
Tracking frame: 10success
Tracking frame: 11success
Tracking frame: 12success
Tracking frame: 13success
Tracking frame: 14success
Tracking frame: 15success
Tracking frame: 16success
Tracking frame: 17success
Tracking frame: 18success
Tracking frame: 19success
Tracking frame: 20success
Tracking frame: 21success
Tracking frame: 22success
Tracking frame: 23success
Tracking frame: 24success
Tracking frame: 25success
Tracking frame: 26success
Tracking frame: 27success
Tracking frame: 28success
Tracking frame: 29success
Tracking frame: 30success
Tracking frame: 31success
Tracking frame: 32success
Tracking frame: 33success
Tracking frame: 34success
Tracking frame: 35success
Tracking frame: 36success
Tracking frame: 37success
Tracking frame: 38success
Tracking frame: 39success
Tracking frame: 40success
Tracking fr

In [None]:
'''
d = display.display(frames_tracked[0], display_id=True)
i = 1
try:
    while True:
        d.update(frames_tracked[i % len(frames_tracked)])
        i += 1
except KeyboardInterrupt:
    pass
'''

'\nd = display.display(frames_tracked[0], display_id=True)\ni = 1\ntry:\n    while True:\n        d.update(frames_tracked[i % len(frames_tracked)])\n        i += 1\nexcept KeyboardInterrupt:\n    pass\n'

In [7]:
dim = frames_tracked[0].size
fourcc = cv2.VideoWriter_fourcc(*'FMP4')    
video_tracked = cv2.VideoWriter('video_tracked.mp4', fourcc, 25.0, dim)
for frame in frames_tracked:
    video_tracked.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
video_tracked.release()

OpenCV: FFMPEG: tag 0x34504d46/'FMP4' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


In [8]:
from PIL import Image
video = mmcv.VideoReader('input.avi')
frames = [Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in video]

display.Video('input.avi', width=640)

In [9]:
frames_tracked = []
for i, frame in enumerate(frames):
    print('\rTracking frame: {}'.format(i + 1), end='')
    #print(frame)
    x = np.array(frame)
    img = Image.fromarray(x)
    #img =frame
    img_cropped_list, prob_list = mtcnn(img, return_prob=True) 
    if img_cropped_list is not None:
        boxes, _ = mtcnn.detect(img)
        for i, prob in enumerate(prob_list):
            if prob>0.90:
                emb = resnet(img_cropped_list[i].unsqueeze(0)).detach() 
                
                dist_list = [] # list of matched distances, minimum distance is used to identify the person
                
                for idx, emb_db in enumerate(embedding_list):
                    dist = torch.dist(emb, emb_db).item()
                    dist_list.append(dist)

                min_dist = min(dist_list) # get minumum dist value
                min_dist_idx = dist_list.index(min_dist) # get minumum dist index
                name = name_list[min_dist_idx] # get name corrosponding to minimum dist
                
                box = boxes[i] 
                
                original_frame = frame.copy() # storing copy of frame before drawing on it

                frame = np.array(frame)
                if min_dist<0.9:
                    frame = cv2.putText(frame, name+' '+str(min_dist), (int(box[0]),int(box[1])), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0),1, cv2.LINE_AA)
                    print('success')
                    #draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)
                #draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)
                #frame = cv2.rectangle(frame, (int(box[0]),int(box[1])) , (int(box[2]),int(box[3])), (255,0,0), 2)
                frame = Image.fromarray(frame, 'RGB')
                frames_tracked.append(frame.resize((640, 360), Image.BILINEAR))
                #print('tracked')
#                frame = cv2.rectangle(frame, (int(box[0]),int(box[1])) , (int(box[2]),int(box[3])), (255,0,0), 2)
    '''         
    # Detect faces
    boxes, _ = mtcnn.detect(frame)
    
    # Draw faces
    frame_draw = frame.copy()
    draw = ImageDraw.Draw(frame_draw)
    for box in boxes:
        draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6)
    
    # Add to frame list
    frames_tracked.append(frame_draw.resize((640, 360), Image.BILINEAR))
    '''
print('\nDone')

Tracking frame: 1

  frames_tracked.append(frame.resize((640, 360), Image.BILINEAR))


Tracking frame: 105
Done


In [10]:
dim = frames_tracked[0].size
fourcc = cv2.VideoWriter_fourcc(*'FMP4')    
video_tracked = cv2.VideoWriter('video_tracked_2.mp4', fourcc, 25.0, dim)
for frame in frames_tracked:
    video_tracked.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
video_tracked.release()

OpenCV: FFMPEG: tag 0x34504d46/'FMP4' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'
