# Face detection/recognition in videos, using Amazon Rekognition

- The idea of this notebook is to show how to use Amazon Rekognition images for detecting faces and recognizing these faces in videos.

- So, we will:
  - use **pytube** to download a given video from YouTube.
  - use **OpenCV** for reading, transforming and writing the video for each step of transformation
  
## Steps
1. Download the video from YouTube
1. Extract a piece and a few frames (2/sec - 5mins) from the video
1. For each frame, calls Amazon Rekognition face detect
1. For each detected face, add it to a collection (index it)
1. Manually name each face-identity 
1. Render a new video with the bounding boxes around the faces and their respective names

In [None]:
import cv2
import boto3
import botocore
import matplotlib.pyplot as plt
import os
import json
import numpy as np
import threading
import sys

from IPython.core.display import display, HTML
from PIL import Image
from ipywidgets import FloatProgress, VBox, HTML
from IPython.display import display

### Install pytube

In [None]:
if not 'pytube' in sys.modules:
    !pip install pytube

from pytube import YouTube

### Helper functions/objects

In [None]:
# Encoder for converting numpy to json
class NumPyArangeEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist() # or map(int, obj)
        return json.JSONEncoder.default(self, obj)

In [None]:
def detect_faces(img):
    return reko.detect_faces(
        Image={
            'Bytes': bytearray( img )
        },
        Attributes=['ALL']
    )["FaceDetails"]

In [None]:
def search_faces(img):
    resp = []
    try:
        resp = reko.search_faces_by_image(
            CollectionId='face_detection',
            Image={
                "Bytes": bytearray(img)
            },
            MaxFaces=5,
            FaceMatchThreshold=min_threshold
        )["FaceMatches"]
    except Exception as e:
        pass
    return resp

In [None]:
def index_face(img):
    try:
        return reko.index_faces(
            CollectionId='face_detection',
            Image={
                "Bytes": bytearray(img)
            }
        )["FaceRecords"]
    except Exception as e:
        #print(e)
        return []

In [None]:
def face_detection(frame,frame_id):
    global faces
    global consumers
    global consumers_lock
    global bar, label, short_max_frames
    
    h,w,_ = frame.shape
    
    img = cv2.imencode(".jpg", frame )[1]
    resp = detect_faces(img)
    
    for i in resp:     
        bbox = i['BoundingBox']
        x1,y1 = ( int(bbox['Left'] * w), int(bbox['Top'] * h) )
        x2,y2 = ( x1 + int(bbox['Width'] * w), y1 + int(bbox['Height'] * h) )
        
        face = frame[y1:y2, x1:x2]        
        faces[frame_id] = [] if faces.get(frame_id) is None else faces[frame_id]
        faces[frame_id].append({'face': face, 'bbox': (x1, y1, x2, y2)})
        
    consumers_lock.acquire()
    consumers -= 1
    bar.value += 100/(short_max_frames)
    label.value = "{}% Consumers[{}]".format(int(bar.value), consumers)
    consumers_lock.release()

In [None]:
def face_indexing(face):
    global consumers_lock, consumers
    global bar, label, faces
    
    face_img = face['face']
    
    hf, wf, _ = face_img.shape
    min_size = min(hf, wf)
    if min_size > 0:
        
        if min_size < 90.0:
            scale = 90.0 / min_size
            face_img = cv2.resize(face_img, (0,0), fx=scale, fy=scale )

        face_img_ = cv2.imencode(".jpg", face_img )[1]
        resp = search_faces(face_img_)

        if len(resp) == 0:
            resp = index_face(face_img_)
            if len(resp) > 0:  
                face_id = resp[0]["Face"]["FaceId"]
                face['face_id'] = face_id
                people.append(face_id)
                cv2.imwrite("/tmp/face_%s.jpg" % face_id, face['face'])
        else:
            face_id = resp[0]["Face"]["FaceId"]
            face['face_id'] = face_id
        
    consumers_lock.acquire()
    consumers -= 1
    bar.value += 100/len(faces)
    label.value = "{}%".format(int(bar.value))
    consumers_lock.release()

In [None]:
def render_video(file_name, w=640, h=360):
    display(HTML('<video controls width="%s" height="%s"><source src="%s" type="video/mp4"/></video>' % (w,h,file_name) ))

# MAIN PROGRAM

In [None]:
file_name = "video" # name of the saved video

### Download the video

In [None]:
if YouTube and not os.path.exists(file_name + '.mp4'):
    print('Downloading...')
    stream = YouTube('https://www.youtube.com/watch?v=gobyhRLjp6Y').streams
    stream.first().download(filename=file_name)
    print('Done')

In [None]:
youtube=False
needs_rotate=True
min_threshold=75 # confidence level for a face matching
step=15 # step for getting the frames. i.e: get one, jump 15, get another, jump 15...

meta = cv2.VideoCapture("%s.mp4" % file_name )
fps=int(meta.get(cv2.CAP_PROP_FPS)) # orignal video FPS

duration=min(3, meta.get(cv2.CAP_PROP_FRAME_COUNT) / fps / 60) # max duration of the new video
max_frames=fps * 60 * duration # max number of frames of the original video
new_fps=fps/step # new video fps

target_width=320
target_height=640

print("FPS: {} DURATION: {}".format(fps, duration))

In [None]:
reko = boto3.client("rekognition")

### Destroy/Create the collection

In [None]:
try:
    reko.delete_collection(CollectionId='face_detection')
    reko.create_collection(
        CollectionId='face_detection'
    )
except Exception as e:
    print(e)

## Extract frames into a new video (5mins 2fps)
- Here, the original video will be converted in a short version with less duration and fps

In [None]:
%%time

bar = FloatProgress(min=0, max=100)
label = HTML("0%")
box = VBox(children=[label, bar])
display(box)

fourcc = cv2.VideoWriter_fourcc(*'X264')
out = cv2.VideoWriter('short-%s.mp4' % file_name, fourcc, new_fps, (target_width,target_height))

cap = cv2.VideoCapture("%s.mp4" % file_name)

frame_counter = 0
while(cap.isOpened()):
    if frame_counter > max_frames:
        break
        
    cap.set( cv2.CAP_PROP_POS_FRAMES, frame_counter )
    
    _, frame = cap.read()
    if frame is None:
        break
    
    # rotate 90 degrees cw
    if needs_rotate:
        frame = cv2.transpose(frame)
        frame = cv2.flip(frame, flipCode=1)
    
    frame = cv2.resize(frame, (target_width,target_height) )
    
    
    
    out.write(frame)
    frame_counter += step
    
    bar.value += 100/(max_frames/step)
    label.value = "{}%".format(int(bar.value))

cap.release()
out.release()

In [None]:
render_video("short-%s.mp4" % file_name)

## Detect Faces
- Here, each frame will be sent to Amaon Rekognition for face detection. It is expected that the output be a list of faces and it's respective bounding boxes

In [None]:
max_consumers = 20 # number of workers that will run in parallel for face detection
consumers = 0
consumers_lock = threading.Lock()

In [None]:
%%time

import time

from ipywidgets import FloatProgress, VBox, HTML
from IPython.display import display

bar = FloatProgress(min=0, max=100)
label = HTML("0%")
box = VBox(children=[label, bar])
display(box)

short_max_frames = new_fps * 60 * duration

faces = {}
frame_id = 0
cap = cv2.VideoCapture("short-%s.mp4" % file_name)
while(cap.isOpened()):
    _, frame = cap.read()
    
    if frame is None:
        break
    
    while consumers > max_consumers:
        time.sleep(0.5)
    
    consumers_lock.acquire()
    consumers += 1
    consumers_lock.release()
    
    consumer = threading.Thread(target=face_detection, args=(frame,frame_id,))
    frame_id += 1
    consumer.start()
    
while consumers > 0:
    time.sleep(0.5)

cap.release()

## Index faces
- Here, each face will be sent for face indexing. If a given face was already indexed it will be found in the collection and its face_id will be returned. Otherwise, the face is then indexed (added to the collection) and a new face_id is generated

In [None]:
!rm -f /tmp/*.jpg

In [None]:
max_consumers=1

In [None]:
%%time

bar = FloatProgress(min=0, max=100)
label = HTML("0%")
box = VBox(children=[label, bar])
display(box)

people=[]
for frame_id in faces:
    for i in faces[frame_id]:
        
        while consumers > max_consumers:
            time.sleep(0.5)

        consumers_lock.acquire()
        consumers += 1
        consumers_lock.release()

        consumer = threading.Thread(target=face_indexing, args=(i,))
        consumer.start()

## Face tagging
- The faces found in the indexing process will be rendered below. You need to edit the dict returned and then load it for the next step

In [None]:
import os

counter = 0

fig=plt.figure(figsize=(8, 8))

rows = 0
cols = 0
counter = 1

database = "people_name = {\n"

for i in people:
   
    path = '/tmp/face_%s.jpg' % i
    database += "    '%s': 'Anon',\n" % i
    #print(path)
    if not os.path.exists(path):
        continue

    cols = cols % 4
    rows = rows % 8
        
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    
    fig.add_subplot(8, 4, counter)
    plt.imshow(img)
    
    counter += 1
database += "}\n"
print(database)
plt.show()

### Past here the structure above "people_name", edit it with the correct names and eval it

In [None]:
# people_name = {....} here

## Export parameters (optional)
- you can save the metadata for future use

In [None]:
import json
import numpy as np
f = open("faces.json", "w")
f.write(json.dumps(faces, cls=NumPyArangeEncoder) )
f.flush()
f.close()

In [None]:
import json

f = open("people_name.json", "w")
f.write(json.dumps(people_name) )
f.flush()
f.close()

## Import paramters (optional)
- you can import the previous saved data

In [None]:
import json
people_name = json.loads( open("people_name.json", "r").read())

In [None]:
faces = json.loads( open("faces.json", "r").read())

## Process video
- Finally we will get the short version of the video and all the metadata and render a new video with the information on top of its frames

In [None]:
%%time

bar = FloatProgress(min=0, max=100)
label = HTML("0%")
box = VBox(children=[label, bar])
display(box)

max_frames = len(faces)

fourcc = cv2.VideoWriter_fourcc(*'X264')

out = cv2.VideoWriter('short-processed-%s.mp4' % file_name, fourcc, new_fps, (target_width,target_height))

cap = cv2.VideoCapture('short-%s.mp4' % file_name)
print(cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT) )

frame_counter = 0
while(cap.isOpened()):
    _, frame = cap.read()
    
    if frame is None:
        break
        
    h,w,_ = frame.shape
    thick = int((h + w) // 600)

    if faces.get(frame_counter) is not None:
        for i in faces[frame_counter]:
            (x1,y1,x2,y2) = i['bbox']

            face_id = i.get('face_id')

            cv2.rectangle(frame, (x1, y1), (x2, y2), (255,255,255), thick)
            if face_id is not None:
                cv2.putText(frame, people_name[face_id], (x1, y1), 0, 1, (255,255,0), thick)

    out.write(frame)
    
    frame_counter += 1
    
    bar.value += 100/max_frames
    label.value = "{}%".format(int(bar.value))

cap.release()
out.release()

In [None]:
render_video('short-processed-%s.mp4' % file_name)