In [1]:
import cv2
import math
import numpy as np
import matplotlib.pyplot as plt
import mediapipe as mp
import os
from sklearn.neighbors import KDTree
from PIL import Image
import tensorflow as tf
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp

mp_face_mesh = mp.solutions.face_mesh

# Load drawing_utils and drawing_styles
mp_drawing = mp.solutions.drawing_utils 
mp_drawing_styles = mp.solutions.drawing_styles

# matplotlib 3d plot jupyter notebook settings
%matplotlib inline
%matplotlib widget

# Face detection
from facenet_pytorch import MTCNN, InceptionResnetV1

folder = 'images/'


In [2]:
def landmarks_from_image(image, face_mesh=None):
  if face_mesh is None:
    face_mesh = mp_face_mesh.FaceMesh(
        static_image_mode=True,
        refine_landmarks=True,
        max_num_faces=1,
        min_detection_confidence=0.5)
  
  results = face_mesh.process(image)

  if results.multi_face_landmarks is None:
    return None, None, None, None, None, None, None

  landmarks = np.array([[lmk.x, lmk.y, lmk.z] for lmk in results.multi_face_landmarks[0].landmark])
  
  # normalize
  overall_vector = landmarks.copy()[:, :2]

  roll_vector = (landmarks[9] - landmarks[200])[:2]
  roll_vector /= np.linalg.norm(roll_vector)

  # rotate to align tilt vector with upwards
  roll_angle = math.atan2(roll_vector[1], roll_vector[0])+np.pi/2
  rotation = np.array([[math.cos(roll_angle), -math.sin(roll_angle)], [math.sin(roll_angle), math.cos(roll_angle)]])
  overall_vector = rotation.dot(overall_vector.T).T

  center = overall_vector.mean(axis=0)
  overall_vector -= center
  bounding_box = np.array([overall_vector.min(axis=0), overall_vector.max(axis=0)])
  scale = np.abs(bounding_box[0][:2] - bounding_box[1][:2]).max()
  overall_vector /= scale/2
  overall_vector = overall_vector.flatten()

  # yaw angle
  yaw_vector = (landmarks[50] - landmarks[280])[[0,2]]
  yaw_vector /= np.linalg.norm(yaw_vector)
  yaw_angle = math.atan2(yaw_vector[1], yaw_vector[0])

  # smile vector
  smile_vector = np.array([
    landmarks[0], landmarks[267], landmarks[269], landmarks[270], landmarks[409], 
    landmarks[375], landmarks[321], landmarks[405], landmarks[314], landmarks[17], 
    landmarks[84], landmarks[181], landmarks[91], landmarks[146], landmarks[185], 
    landmarks[40], landmarks[39], landmarks[37], landmarks[13], landmarks[312], 
    landmarks[311], landmarks[310], landmarks[318], landmarks[402], landmarks[317], 
    landmarks[14], landmarks[87], landmarks[178], landmarks[88], landmarks[95], 
    landmarks[191], landmarks[80], landmarks[81], landmarks[82]
  ]).flatten()

  # eye vector
  eye_vector = np.array([
    landmarks[475], landmarks[474], landmarks[477], landmarks[476], landmarks[473], 
    landmarks[386], landmarks[387], landmarks[388], landmarks[466], landmarks[263], 
    landmarks[249], landmarks[390], landmarks[373], landmarks[374], landmarks[380], 
    landmarks[381], landmarks[382], landmarks[362], landmarks[398], landmarks[384], 
    landmarks[385], landmarks[470], landmarks[469], landmarks[472], landmarks[471], 
    landmarks[468], landmarks[158], landmarks[157], landmarks[173], landmarks[133], 
    landmarks[155], landmarks[154], landmarks[153], landmarks[145], landmarks[144], 
    landmarks[163], landmarks[7], landmarks[33], landmarks[246], landmarks[161], 
    landmarks[160], landmarks[159]
  ]).flatten()

  # eyebrow vector
  eyebrow_vector = np.array([
    landmarks[336], landmarks[296], landmarks[334], landmarks[293], landmarks[300], 
    landmarks[285], landmarks[295], landmarks[282], landmarks[283], landmarks[276],
    landmarks[70], landmarks[63], landmarks[105], landmarks[66], landmarks[107], 
    landmarks[46], landmarks[53], landmarks[52], landmarks[65], landmarks[55]
  ]).flatten()
  
  return overall_vector, landmarks, roll_angle, yaw_angle, smile_vector, eye_vector, eyebrow_vector

In [3]:
dataset = []

with mp_face_mesh.FaceMesh(
    static_image_mode=True,
    refine_landmarks=True,
    max_num_faces=1,
    min_detection_confidence=0.5) as face_mesh:
  
  # for file in folder 'images'
  for file in sorted(os.listdir('images')):
    print(file)
    image = cv2.imread('images/' + file)
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    # Convert the BGR image to RGB and process it with MediaPipe Face Mesh.
    results = face_mesh.process(image)
    landmarks = np.array([[lmk.x, lmk.y, lmk.z] for lmk in results.multi_face_landmarks[0].landmark])
    
    # normalize
    center = landmarks.mean(axis=0)
    landmarks -= center
    bounding_box = np.array([landmarks.min(axis=0), landmarks.max(axis=0)])
    scale = np.abs(bounding_box[0][:2] - bounding_box[1][:2]).max()
    landmarks /= scale/2
    landmarks = landmarks.flatten()

    dataset.append(landmarks)

I0000 00:00:1722344852.065863       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


.DS_Store


error: OpenCV(4.10.0) /Users/xperience/GHA-Actions-OpenCV/_work/opencv-python/opencv-python/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'


In [4]:
# Create KD Tree
tree = KDTree(dataset, metric='euclidean')

ValueError: Expected 2D array, got 1D array instead:
array=[].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [5]:
test_img = cv2.cvtColor(cv2.imread('open.jpg'), cv2.COLOR_RGB2BGR)

with mp_face_mesh.FaceMesh(
    static_image_mode=True,
    refine_landmarks=True,
    max_num_faces=1,
    min_detection_confidence=0.5) as face_mesh:
    
    results = face_mesh.process(test_img)
    landmarks = np.array([[lmk.x, lmk.y, lmk.z] for lmk in results.multi_face_landmarks[0].landmark])
    
    # normalize
    center = landmarks.mean(axis=0)
    landmarks -= center
    bounding_box = np.array([landmarks.min(axis=0), landmarks.max(axis=0)])
    scale = np.abs(bounding_box[0][:2] - bounding_box[1][:2]).max()
    landmarks /= scale/2
    landmarks = landmarks.flatten()
    
    dist, idx = tree.query(landmarks.reshape(1, -1), k=4)


I0000 00:00:1722344854.525819       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2


NameError: name 'tree' is not defined

In [6]:
dist, idx

NameError: name 'dist' is not defined

In [3]:
# Create a face detection pipeline using MTCNN:
mtcnn = MTCNN(image_size=160, keep_all=True)

# Create an inception resnet (in eval mode):
resnet = InceptionResnetV1(pretrained='vggface2').eval()

In [4]:
img = Image.open("baseline.jpg")

# Get cropped and prewhitened image tensor
img_cropped = mtcnn(img)

# Calculate embedding (unsqueeze to add batch dimension)
my_face_embeddings = resnet(img_cropped[0].unsqueeze(0))


In [5]:
# use resnet to find my face in all of the images in a folder and find face landmarks

matches = []
id = 0

with mp_face_mesh.FaceMesh(
        static_image_mode=True,
        refine_landmarks=True,
        max_num_faces=1,
        min_detection_confidence=0.5) as face_mesh:
    for file in sorted(os.listdir(folder)):
        if not file.endswith('.jpg') and not file.endswith('.jpeg'):
            continue
        img = Image.open(folder + file)
        
        # detect faces
        batch_boxes, batch_probs = mtcnn.detect(img, landmarks=False)
        faces = mtcnn.extract(img, batch_boxes, save_path=None)

        if faces is None:
            continue

        one_face = len(faces.shape) == 3
        if one_face:
            faces = [faces]
            
        for i, face in enumerate(faces):
            img_embedding = resnet(face.unsqueeze(0))
            dist = (img_embedding - my_face_embeddings).norm().item()
            print(file, dist)
            if dist < 1.0:
                print('found')

                # Find face landmarks

                # crop image to bounding box plus margin
                og_x1, og_y1, og_x2, og_y2 = batch_boxes[i].astype(int)
                margin = 0.5
                x1 = max(0, og_x1 - int((og_x2 - og_x1) * margin))
                x2 = min(img.size[0], og_x2 + int((og_x2 - og_x1) * margin))
                y1 = max(0, og_y1 - int((og_y2 - og_y1) * margin))
                y2 = min(img.size[1], og_y2 + int((og_y2 - og_y1) * margin))
                cropped_img = np.array(img)[y1:y2, x1:x2]

                # Convert the BGR imgage to RGB and process it with MediaPipe Face Mesh.
                overall_vector, landmarks, roll_angle, yaw_angle, smile_vector, eye_vector, eyebrow_vector = landmarks_from_image(cropped_img, face_mesh)
                if overall_vector is None:
                    continue

                matches.append({
                    'id': id,
                    'file': file, 
                    'bounding_box': batch_boxes[i], 
                    'embedding': img_embedding, 
                    'dist': dist, 
                    'overall_vector': overall_vector, 
                    'landmarks': landmarks,
                    'roll_angle': roll_angle,
                    'yaw_angle': yaw_angle,
                    'smile_vector': smile_vector,
                    'eye_vector': eye_vector,
                    'eyebrow_vector': eyebrow_vector,
                })

                id += 1

I0000 00:00:1722353655.865596       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M2
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


0044E96C-A3F9-4449-8C8D-16CF44AAB6DA_1_105_c.jpeg 1.206972360610962
009F0A96-6263-492E-829F-E95FF03B1D90_1_105_c.jpeg 0.955698549747467
found
00A629C4-179B-499D-A64B-688A003A9796_1_105_c.jpeg 0.7448868751525879
found
00A629C4-179B-499D-A64B-688A003A9796_1_105_c.jpeg 1.3083053827285767
00A629C4-179B-499D-A64B-688A003A9796_1_105_c.jpeg 1.2413995265960693
00A629C4-179B-499D-A64B-688A003A9796_1_105_c.jpeg 1.368999719619751
00A629C4-179B-499D-A64B-688A003A9796_1_105_c.jpeg 1.3231209516525269
00A629C4-179B-499D-A64B-688A003A9796_1_105_c.jpeg 1.3623707294464111
00A629C4-179B-499D-A64B-688A003A9796_1_105_c.jpeg 1.327272653579712
00C8FCE0-CBF2-48A9-B5FA-100F16F7A997_1_105_c.jpeg 1.249463438987732
00C8FCE0-CBF2-48A9-B5FA-100F16F7A997_1_105_c.jpeg 1.4111530780792236
00C8FCE0-CBF2-48A9-B5FA-100F16F7A997_1_105_c.jpeg 1.4128124713897705
00C8FCE0-CBF2-48A9-B5FA-100F16F7A997_1_105_c.jpeg 1.045100212097168
00C8FCE0-CBF2-48A9-B5FA-100F16F7A997_1_105_c.jpeg 1.3615145683288574
00C8FCE0-CBF2-48A9-B5FA-100F

In [6]:
# Create KD Tree from vectorized landmarks
overall_vector_database = np.array([match['overall_vector'] for match in matches])
overall_vector_tree = KDTree(overall_vector_database, metric='euclidean')

smile_vector_database = np.array([match['smile_vector'] for match in matches])
smile_vector_tree = KDTree(smile_vector_database, metric='euclidean')

eye_vector_database = np.array([match['eye_vector'] for match in matches])
eye_vector_tree = KDTree(eye_vector_database, metric='euclidean')

eyebrow_vector_database = np.array([match['eyebrow_vector'] for match in matches])
eyebrow_vector_tree = KDTree(eyebrow_vector_database, metric='euclidean')

yaw_angle_database = np.array([[match['yaw_angle']] for match in matches])
yaw_angle_tree = KDTree(yaw_angle_database, metric='euclidean')

In [21]:
# generate distance matrix
dist_matrix = np.zeros((len(matches), len(matches)))

for i, match in enumerate(matches):
    overall_vector_dist, overall_vector_idx = overall_vector_tree.query(match['overall_vector'].reshape(1, -1), k=len(matches))
    smile_vector_dist, smile_vector_idx = smile_vector_tree.query(match['smile_vector'].reshape(1, -1), k=len(matches))
    eye_vector_dist, eye_vector_idx = eye_vector_tree.query(match['eye_vector'].reshape(1, -1), k=len(matches))
    eyebrow_vector_dist, eyebrow_vector_idx = eyebrow_vector_tree.query(match['eyebrow_vector'].reshape(1, -1), k=len(matches))
    yaw_angle_dist, yaw_angle_idx = yaw_angle_tree.query(np.array(match['yaw_angle']).reshape(1, -1), k=len(matches))

    same_image = (overall_vector_dist <= 0.1) * 1.0


    dist_matrix[i, overall_vector_idx[0]] += 0.5 * overall_vector_dist[0]
    dist_matrix[i, overall_vector_idx[0]] += 0.0 * same_image[0]
    dist_matrix[i, smile_vector_idx[0]]   += 0.0 * smile_vector_dist[0]
    dist_matrix[i, eye_vector_idx[0]]     += 0.0 * eye_vector_dist[0]
    dist_matrix[i, eyebrow_vector_idx[0]] += 0.0 * eyebrow_vector_dist[0]
    dist_matrix[i, yaw_angle_idx[0]]      += 2.0 * yaw_angle_dist[0]

# make diagonal 0
np.fill_diagonal(dist_matrix, 0)

# exp formula
dist_matrix = np.exp(dist_matrix)

In [22]:
dist_matrix = np.floor(dist_matrix*10000).astype(int).tolist()


# Create the routing index manager.
manager = pywrapcp.RoutingIndexManager(len(matches),1,0)

# Create Routing Model.
routing = pywrapcp.RoutingModel(manager)

def distance_callback(from_index, to_index):
    """Returns the distance between the two nodes."""
    # Convert from routing variable Index to distance matrix NodeIndex.
    from_node = manager.IndexToNode(from_index)
    to_node = manager.IndexToNode(to_index)
    return dist_matrix[from_node][to_node]

transit_callback_index = routing.RegisterTransitCallback(distance_callback)

# Define cost of each arc.
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)

# Setting first solution heuristic.
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.first_solution_strategy = (
    routing_enums_pb2.FirstSolutionStrategy.AUTOMATIC)

In [23]:
solution = routing.SolveWithParameters(search_parameters)
if solution:
    index = routing.Start(0)
    route = [manager.IndexToNode(index)]
    while not routing.IsEnd(index):
        index = solution.Value(routing.NextVar(index))
        route.append(manager.IndexToNode(index))

In [24]:
route

[0,
 882,
 91,
 854,
 707,
 806,
 863,
 663,
 38,
 88,
 693,
 724,
 32,
 282,
 245,
 403,
 603,
 267,
 313,
 659,
 242,
 627,
 779,
 259,
 142,
 486,
 67,
 896,
 340,
 575,
 419,
 490,
 61,
 427,
 642,
 775,
 624,
 263,
 118,
 73,
 102,
 423,
 375,
 162,
 855,
 353,
 401,
 189,
 903,
 471,
 76,
 845,
 216,
 238,
 789,
 496,
 728,
 886,
 883,
 119,
 415,
 664,
 477,
 888,
 626,
 563,
 131,
 518,
 633,
 41,
 341,
 137,
 44,
 335,
 514,
 566,
 316,
 580,
 240,
 372,
 507,
 746,
 860,
 729,
 895,
 640,
 205,
 761,
 763,
 754,
 468,
 138,
 892,
 371,
 658,
 576,
 294,
 781,
 136,
 591,
 117,
 766,
 757,
 79,
 323,
 516,
 752,
 755,
 657,
 101,
 130,
 405,
 595,
 461,
 20,
 671,
 662,
 367,
 743,
 179,
 594,
 92,
 78,
 867,
 200,
 269,
 13,
 853,
 153,
 55,
 904,
 289,
 66,
 140,
 497,
 491,
 276,
 77,
 29,
 299,
 887,
 643,
 161,
 417,
 4,
 278,
 540,
 677,
 249,
 129,
 365,
 801,
 598,
 336,
 320,
 239,
 333,
 810,
 106,
 799,
 501,
 152,
 641,
 470,
 25,
 700,
 338,
 582,
 46,
 28,
 751,


In [25]:
from math import degrees as rad_to_deg

# align images based on bounding box and show in the order given by "route"
for i, idx in enumerate(route):
    match = matches[idx]
    print(match['file'])
    img = cv2.imread(folder + match['file'])


    x1, y1, x2, y2 = match['bounding_box'].astype(int)
    center_x = (x1 + x2) // 2
    center_y = (y1 + y2) // 2
    delta_x = img.shape[1] // 2 - center_x
    delta_y = img.shape[0] // 2 - center_y

    translation_matrix = np.float32([[1, 0, delta_x], [0, 1, delta_y]])
    img = cv2.warpAffine(img, translation_matrix, (img.shape[1], img.shape[0]))

    # Make bounding box height 30% of image height by zooming in
    desired_scale = 0.15
    actual_scale = (y2 - y1) / img.shape[0]
    zoom = desired_scale / actual_scale
    # resize to 640 x 360
    width = 1920
    height = 1080
    border_v = 0
    border_h = 0
    if (height/width) >= (img.shape[0]/img.shape[1]):
        border_v = int((((height/width)*img.shape[1])-img.shape[0])/2)
    else:
        border_h = int((((width/height)*img.shape[0])-img.shape[1])/2)
    img = cv2.copyMakeBorder(img, border_v, border_v, border_h, border_h, cv2.BORDER_CONSTANT, 0)
    img = cv2.resize(img, (width, height))

    if img.size == 0:
        continue


    # rotate image
    cy, cx = [ i/2 for i in img.shape[:-1] ]
    rot_mat = cv2.getRotationMatrix2D((cx,cy), rad_to_deg(match['roll_angle']), zoom)
    img = cv2.warpAffine(img, rot_mat, img.shape[1::-1], flags=cv2.INTER_LINEAR)

    
    # save image to disk
    cv2.imwrite('out/' + str(i) + '.jpg', img)

009F0A96-6263-492E-829F-E95FF03B1D90_1_105_c.jpeg
F7F5542E-3481-4B5D-9E66-7F5CEDABB4B0_1_105_c.jpeg
1A3244B2-5A2C-49D8-8E2D-AF3FC5788CA1_1_105_c.jpeg
EDC5D868-36DC-4B65-B729-9AA43EBF55E0_4_5005_c.jpeg
C8D9BE0C-8FFD-466F-B7A4-B5FC4D352425_1_105_c.jpeg
E0F6D1D5-068B-4F23-AC86-F9448FD9A3CD_1_105_c.jpeg
F104ABC8-B12C-40C1-9FDB-7CF392BA0C08_1_105_c.jpeg
BB4EDD0E-FA04-4F72-BC30-7FBBBDF098CE_1_105_c.jpeg
0B07270A-2555-4497-B198-ADF741B0296A_1_105_c.jpeg
194F3811-6FB3-426C-8F7D-6F7D9A0ED8F8_1_105_c.jpeg
C4625BBB-DA2D-47E3-B6D0-5534B9F49095_1_105_c.jpeg
CE2F2B26-60B3-4210-A8AC-239DED58621C_1_105_c.jpeg
090CDED5-5B09-473D-A6BD-2801A152BE62_1_105_c.jpeg
54A54B5D-3851-4A49-BCE4-743DC9247A7A_1_105_c.jpeg
47F8445A-3634-4A61-8E16-4044E5D71287_1_105_c.jpeg
75A24332-0D48-4B41-B551-217579253D87_1_105_c.jpeg
AAFB6798-9166-471E-8CA4-C72941C3336A_1_105_c.jpeg
50F65DED-C630-443C-827B-E97495860E08_1_105_c.jpeg
5C845279-4A20-4098-BE42-9ABC93204137_1_105_c.jpeg
B9BFC46D-9638-4DDB-9DB2-E812EDF1B74E_1_105_c.jpeg

In [26]:
!ffmpeg -framerate 12 -pattern_type glob -i 'out/*.jpg' -c:v libx264 -pix_fmt yuv420p out.mp4 -y

ffmpeg version 7.0.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.3.9.4)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/7.0.1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --