In [1]:
import numpy as np
import pandas as pd
import cv2
import mediapipe as mp
import face_recognition_models
import dlib
import os
from mediapipe.python.solutions.drawing_utils import _normalized_to_pixel_coordinates
from time import time
import matplotlib.pyplot as plt
import imutils
from imutils import face_utils
import face_recognition
import shutil

In [2]:
face_recognition_model = face_recognition_models.face_recognition_model_location()
face_encoder = dlib.face_recognition_model_v1(face_recognition_model)

In [3]:
landmark_points_68 = [162,234,93,58,172,136,149,148,152,377,378,365,397,288,323,454,389,71,63,105,66,107,336,
                  296,334,293,301,168,197,5,4,75,97,2,326,305,33,160,158,133,153,144,362,385,387,263,373,
                  380,61,39,37,0,267,269,291,405,314,17,84,181,78,82,13,312,308,317,14,87]
    
landmark_points_5_1 = [ 2, #bottom of nose tip
                     362, #left eye towards centre
                     263, #left eye away from centre
                     33,  #right eye away from centre
                     133 #right eye towards centre 
                    ]
landmark_points_5_2 = [ 2, #bottom of nose tip
                     263, #left eye away from centre
                     362, #left eye towards centre
                     133, #right eye towards centre 
                     33  #right eye away from centre
                    ]


In [4]:
mp_drawing = mp.solutions.drawing_utils

mp_face_detection = mp.solutions.face_detection
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.7)

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1,min_detection_confidence=0.5)

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


### MP + Dlib utils


In [5]:
def landmark_pt_list(mesh_results,width,height):
    landmark_points_68 = [162,234,93,58,172,136,149,148,152,377,378,365,397,288,323,454,389,71,63,105,66,107,336,
                  296,334,293,301,168,197,5,4,75,97,2,326,305,33,160,158,133,153,144,362,385,387,263,373,
                  380,61,39,37,0,267,269,291,405,314,17,84,181,78,82,13,312,308,317,14,87]
    
    landmark_points_5_1 = [ 2, #bottom of nose tip
                     362, #left eye towards centre
                     263, #left eye away from centre
                     33,  #right eye away from centre
                     133 #right eye towards centre 
                    ]
    landmark_points_5_2 = [ 2, #bottom of nose tip
                     263, #left eye away from centre
                     362, #left eye towards centre
                     133, #right eye towards centre 
                     33  #right eye away from centre
                    ]

    if mesh_results.multi_face_landmarks:
        for i,face_landmarks in enumerate(mesh_results.multi_face_landmarks): 
            if i==0:
                raw_landmark_set = []
                for index in landmark_points_5_1:
                    x = int(face_landmarks.landmark[index].x * width)
                    y = int(face_landmarks.landmark[index].y * height)
                    landmark_point=dlib.point([x,y])
                    raw_landmark_set.append(landmark_point)
                    #display(landmark_point)
                all_points=dlib.points(raw_landmark_set)
#         return dlib.points([{
#             "nose_tip": [raw_landmark_set[0]],
#             "left_eye": raw_landmark_set[1:3],
#             "right_eye": raw_landmark_set[3:],
#         }])
        return all_points

def bounding_rect(detection_results,width,height):
    if detection_results.detections:
        for i,detection in enumerate(detection_results.detections):
            if i==0:
                # bbox data
                bbox = detection.location_data.relative_bounding_box
                xy_min = _normalized_to_pixel_coordinates(bbox.xmin, bbox.ymin, height,width)
                xy_max = _normalized_to_pixel_coordinates(bbox.xmin + bbox.width, bbox.ymin + bbox.height,height,width)
                if xy_min is None or xy_max is None:
                    #print("face out of frame")
                    return
                else:
                    xmin,ymin =xy_min
                    xmax,ymax = xy_max
                    #bbox_points = { "xmin" : xmin,"ymin" : ymin,"xmax" : xmax,"ymax" : ymax}
                    rectangle= dlib.rectangle(left=xmin, top=ymax, right=xmax, bottom=ymin)
                    return rectangle

def ret_encoding(filepath,num_jitters=1):
    #image_input = cv2.imread(filepath)
    #image_input = cv2.cvtColor(image_input, cv2.COLOR_BGR2RGB)
    image_input =face_recognition.load_image_file(filepath)
    width,height=image_input.shape[:-1]
    detection_results = face_detection.process(image_input)
    mesh_results = face_mesh.process(image_input)
    
    all_points=  landmark_pt_list(mesh_results,width,height)  
    b_box=bounding_rect(detection_results,width,height)
    if (all_points is None) or (b_box is None):
        return 
    raw_landmark_set=dlib.full_object_detection(b_box,all_points)
    #display(all_points)
    #display(b_box)
    encodings=face_encoder.compute_face_descriptor(image_input, raw_landmark_set, num_jitters)

    return encodings

### Linear sorting

In [6]:
def get_img_list(folder):
    img_list=[]
    for file in os.listdir(folder):
        if not file.startswith('.') and os.path.isfile(os.path.join(folder, file)):
            filepath = os.path.join(folder, file)
            filepath=filepath.replace('\\' , '/')
            img_list.append(file)
    return img_list        
    print("got image list")

In [7]:
def get_d(enc1, enc2):
    enc1=np.array(enc1)
    enc2=np.array(enc2)
    d=np.linalg.norm(enc1 - enc2, axis=0)
    return d

In [8]:
# not df
def encode_list(folder, img_list):
    enc_dict={}
    for img in img_list:
        img_path = os.path.join(folder,img)
        encoding=(ret_encoding(img_path))
        enc_dict[img]=encoding 
    return enc_dict

In [9]:
# not df
def get_closest(folder, start_img, img_list, enc_dict):
    img_list.remove(start_img)
#     enc1=(ret_encoding(os.path.join(folder,start_img)))
    enc1=enc_dict[start_img]
    dist=[]
    dist_dict={}
    for img in img_list:
#         file2=filebase+str(i)+".jpg".replace('\\' , '/')
#         test_img = os.path.join(folder,img)
#         enc2=(ret_encoding(test_img))
        enc2 = enc_dict[img]
        if (enc1 is not None) and (enc2 is not None):
#             print(file2)
            d = get_d(enc1, enc2)
#             print(d)
            dist.append(d)
            dist_dict[d]=img
    dist.sort()
    print(len(dist))
    return dist[0], dist_dict[dist[0]]

In [10]:
def save_sorted(folder, image, counter, dist):
    sorted_name = "linear_sort_"+str(counter)+"_"+str(round(dist, 2))+".jpg"
    sortfolder="sorted2"
    newfolder = os.path.join(folder,sortfolder)
    old_name=os.path.join(folder,image)
    new_name=os.path.join(newfolder,sorted_name)
    if not os.path.exists(newfolder):
        os.makedirs(newfolder)
    shutil.copy(old_name, new_name)
    print('saved, ',sorted_name)


### dataframe creation and sorting

In [25]:
def encode_list_df(folder, img_list):
#     enc_dict={}
    csv_name="face_encodings.csv"
    col1="file_name"
    col2="encoding"
    curr=0
    total = len(img_list)

    # encodings column list for splitting
    col_list=[]
    for i in range(128):
        col_list.append(col2+str(i))

    #initializing the dataframe
    image_data=pd.DataFrame(columns=[col1, col2])

    
    for img in img_list:
        if curr%10==0:print(curr,"/",total)
        curr+=1
        filepath = os.path.join(folder,img)        
        filepath=filepath.replace('\\' , '/')  ## cv2 accepts files with "/" instead of "\"
        encodings=ret_encoding(filepath)
        if encodings is not None:              ## checking if a face is found
            data=pd.DataFrame({col1:img,col2:[np.array(encodings)]})
            image_data = pd.concat([image_data,data],ignore_index=True)  

    #splitting the encodings column
    output_data = pd.DataFrame(image_data[col2].to_list(), columns=col_list)
    #adding the filename column and then puting it first
    output_data[[col1]]=pd.DataFrame(image_data[col1].tolist(),index=image_data.index)
    clms = output_data.columns.tolist()
    clms = clms[-1:] + clms[:-1]
    output_data=output_data[clms]
    # saving without index
    output_data.to_csv(csv_name, index=False)
    df = pd.read_csv(csv_name)
    return output_data

In [21]:
def get_closest_df(folder, start_img, df_enc):
    if start_img == "median":
        enc1 = df_enc.median().to_list()
#         print("in median")
    else:
#         enc1 = get 2-129 from df via stimg key
        enc1 = df_enc.loc[start_img].to_list()
        df_enc=df_enc.drop(start_img)
#         print("in new img",len(df_enc.index))
    
#     img_list.remove(start_img)
#     enc1=enc_dict[start_img]
    
    dist=[]
    dist_dict={}
    for index, row in df_enc.iterrows():
#         print(row['c1'], row['c2'])
#     for img in img_list:
        enc2 = row
        if (enc1 is not None) and (enc2 is not None):
            d = get_d(enc1, enc2)
            dist.append(d)
            dist_dict[d]=index
    dist.sort()
#     print(len(dist))
    return dist[0], dist_dict[dist[0]], df_enc

In [22]:
# test if new and old make a face
def is_face(image):
    # For static images:
    # I think this list is not used
    IMAGE_FILES = []
    with mp_face_detection.FaceDetection(model_selection=1, 
                                        min_detection_confidence=0.6
                                        ) as face_detection:
        # image = cv2.imread(file)
        # Convert the BGR image to RGB and process it with MediaPipe Face Detection.
#         detection_results = face_detection.process(image)

        results = face_detection.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        # Draw face detections of each face.
        if not results.detections:
            is_face = False
        else:
            is_face = True
        # annotated_image = image.copy()
        # for detection in results.detections:
        #     is_face = True
        #     print('Nose tip:')
        #     print(mp_face_detection.get_key_point(
        #       detection, mp_face_detection.FaceKeyPoint.NOSE_TIP))
        #     mp_drawing.draw_detection(annotated_image, detection)
        # cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)

        return is_face


In [23]:
# test if new and old make a face
def test_pair(last_file, new_file):
    try:
        img = cv2.imread(new_file)
        height, width, layers = img.shape
        size = (width, height)
        print('loaded img 1')
        
        last_img = cv2.imread(new_file)
        last_height, last_width, last_layers = last_img.shape
        last_size = (last_width, last_height)
        print('loaded img 2')
        
        # test to see if this is actually an face, to get rid of blank ones/bad ones
        if is_face(img):
            print('new file is face')
            # if not the first image
#             if i>0:
            # blend this image with the last image
            blend = cv2.addWeighted(img, 0.5, last_img, 0.5, 0.0)
            print('blended faces')
            blended_face = is_face(blend)
            print('is_face ',blended_face)
            # if blended image has a detectable face, append the img
            if blended_face:
#                     img_array.append(img)
                print('is a face! adding it')
                return True
            else:
                print('skipping this one')
                return False
            # for the first one, just add the image
            # this may need to be refactored in case the first one is bad?
#             else:
#                 print('this is maybe the first round?')
#                 img_array.append(img)
        else:
            print('new_file is not face: ',new_file)
            return False

#         i+=1

    except:
        print('failed:',new_file)
        return False


In [35]:
folder="/Users/michaelmandiberg/Documents/projects-active/facemap_production/images1674272442.9258912/test"
img_list = get_img_list(folder)

# start_img = img_list[1]
start_img = "median"

# enc_dict = encode_list(folder, img_list)
df_enc = encode_list_df(folder, img_list)

df_enc.set_index('file_name', inplace=True)


#with lists/dicts
# dist=0
# for i in range(len(img_list)-1):
#     save_sorted(folder, start_img, i, dist)
#     dist, start_img = get_closest(folder, start_img,img_list, enc_dict)
    
#     print(dist)
#     print (start_img)
    
#     if dist > .37: 
#         continue

#with df
dist=0
print(len(df_enc.index))


    
for i in range(len(df_enc.index)-2):
    dist, start_img, df_enc = get_closest_df(folder, start_img,df_enc)
#     print(folder, start_img, i, dist)
    save_sorted(folder, start_img, i, dist)
    print(i)
    print(len(df_enc.index))
    print(dist)
    print (start_img)
    
    if dist > .37: 
        break
    
    
# I don't know why, but this isn't working
# for i in range(len(df_enc.index)-2):
#     dist, start_img, df_enc = get_closest_df(folder, start_img,df_enc)
# #     print(folder, start_img, i, dist)
#     save_sorted(folder, start_img, i, dist)

#     if i>0:
#         #test blend
# #         last_file = os.path.join(folder,)
#         blend_is_face = (test_pair(os.path.join(folder,last_img), os.path.join(folder,start_img)))
#         print('blend_is_face ',blend_is_face)
#         if blend_is_face:
# #         print(test_pair(last_img,start_img))
#             save_sorted(folder, start_img, i, dist)
#             last_img = start_img
# #         else:
# #             start_img = last_img
            
#     print(i)
#     print(len(df_enc.index))
#     print(dist)
#     print (start_img)
    
#     if dist > .37: 
#         break
        
       

    

0 / 43
10 / 43
20 / 43
30 / 43
40 / 43
43
saved,  linear_sort_0_0.18.jpg
0
43
0.18447030753633525
faceimg_crop1_X-20toX1_Y-4toY4_Z-3toZ3_maxResize0.5_ct674_594.jpg
saved,  linear_sort_1_0.27.jpg
1
42
0.27107725459729737
faceimg_crop1_X-20toX1_Y-4toY4_Z-3toZ3_maxResize0.5_ct674_605.jpg
saved,  linear_sort_2_0.29.jpg
2
41
0.2924295106427055
faceimg_crop1_X-20toX1_Y-4toY4_Z-3toZ3_maxResize0.5_ct674_581.jpg
saved,  linear_sort_3_0.31.jpg
3
40
0.3130172183743331
faceimg_crop1_X-20toX1_Y-4toY4_Z-3toZ3_maxResize0.5_ct261_6.jpg
saved,  linear_sort_4_0.27.jpg
4
39
0.2719206930593076
faceimg_crop1_X-20toX1_Y-4toY4_Z-3toZ3_maxResize0.5_ct674_589.jpg
saved,  linear_sort_5_0.35.jpg
5
38
0.3477292769209953
faceimg_crop1_X-20toX1_Y-4toY4_Z-3toZ3_maxResize0.5_ct674_585.jpg
saved,  linear_sort_6_0.37.jpg
6
37
0.36954107793852337
faceimg_crop1_X-20toX1_Y-4toY4_Z-3toZ3_maxResize0.5_ct674_604.jpg
saved,  linear_sort_7_0.35.jpg
7
36
0.3461390179431415
faceimg_crop1_X-20toX1_Y-4toY4_Z-3toZ3_maxResize0.5_ct2