In [1]:
from deepface import DeepFace

In [293]:
import os
from pathlib import Path

def get_all_image_paths(path):
    for root, dirs, files in os.walk(path):
        for filename in files:
            if filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith(".jpeg"):
                yield Path(root, filename)

image_paths = list(get_all_image_paths("../images"))

In [294]:
import pandas as pd

faces = pd.read_csv("faces.csv", index_col=0)

In [295]:
face = faces.iloc[0]

In [296]:
face

image_id    IMG_20220810_115949.jpg
x                              1102
y                              1754
w                               326
h                               367
Name: 0, dtype: object

In [297]:
x,y,w,h = face["x"], face["y"], face["w"], face["h"]

import cv2

img = cv2.imread("../images"+"/"+face["image_id"])

img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [300]:
represent = DeepFace.represent(img[y:y+h, x:x+w],detector_backend="skip", model_name="VGG-Face")

In [301]:
len(represent[0]["embedding"])

2622

In [302]:
from pycozo.client import Client

In [303]:
client = Client('sqlite', 'file.db')

In [304]:
client.run('''
:create face_embeddings {k:Int=> v: <F32; 2622>}
''')

Unnamed: 0,status
0,OK


In [305]:
res = client.run('''
?[k,v] <- [[$k,$v]]
:put face_embeddings {k:Int=> v: <F32; 2622>}
''',{'k':0, 'v':represent[0]["embedding"]})

In [311]:
def get_embeddings(faces):
    for face in faces.itertuples():
        face_id, image_id, x, y, w ,h = face
        img = cv2.imread("../images"+"/"+image_id)
        represent = DeepFace.represent(img[y:y+h, x:x+w],detector_backend="skip", model_name="VGG-Face")

        res = client.run('''
        ?[k,v] <- [[$k,$v]]
        :put face_embeddings {k:Int=> v: <F32; 2622>}
        ''',{'k':face_id, 'v':represent[0]["embedding"]})

        if res["status"].values[0] != "OK":
            print("Failed to insert face embedding for face id: {face_id}")
            break
            yield face_id, None
        else:
            yield face_id, represent[0]["embedding"]



In [312]:
import csv
with open('face_embeddings.csv', 'w', newline='') as csvfile:
    fieldnames = ['face_id', 'embedding']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for face_id, embedding in get_embeddings(faces):
        if embedding is not None:
            writer.writerow({'face_id': face_id, 'embedding': embedding})
            csvfile.flush()

In [313]:
client.run('''
::hnsw create face_embeddings:similarity{
    fields: [v],
    dim:2622,
    ef:16,
    m:32,
    distance: L2,
    ef_construction: 20,
    extend_candidates: false,
    keep_pruned_connections: false,
}
''')

Unnamed: 0,status
0,OK


In [314]:
client.run('''
?[dist, k, v] := ~face_embeddings:similarity{ k, v |
        query: q,
        k : 10,
        ef: 2000,
        bind_distance: dist
    }, q = vec($q) 

''', {'q':represent[0]["embedding"]})

Unnamed: 0,dist,k,v
0,1528.198975,0,"[1.5252106189727783, 1.450548529624939, 1.4506..."
1,3969.231201,25,"[0.3521648049354553, 2.4342830181121826, 3.080..."
2,4685.795898,38,"[3.0855047702789307, 5.179976940155029, 5.5852..."
3,4719.463867,23,"[2.3121466636657715, 3.348961353302002, 3.0003..."
4,4973.105469,3,"[2.9668283462524414, 3.074146032333374, 2.4727..."
5,4987.464844,19,"[0.5375211834907532, 0.9217845797538757, 3.179..."
6,5047.678711,109,"[1.7654213905334473, 2.0654611587524414, 1.800..."
7,5809.060547,21,"[1.359513282775879, 1.9702047109603882, 1.8589..."
8,5821.587891,5,"[1.6776843070983887, 3.6337873935699463, 3.315..."
9,5878.029785,32,"[2.8657302856445312, 3.3850526809692383, 3.248..."


In [324]:
result = client.run('''
similar[dist, k, v] := ~face_embeddings:similarity{ k, v |
        query: q,
        k : 1000,
        ef: 1000,
        bind_distance: dist
    }, q = vec($q)

?[k,image_id,dist] := similar[dist,k,v],*faces{id:k, image_id, x, y, w, h}
''', {'q':represent[0]["embedding"]})
result

Unnamed: 0,k,image_id,dist
0,0,IMG_20220810_115949.jpg,1528.198975
1,1,IMG_20220810_115949.jpg,15064.774414
2,2,IMG_20220810_115949.jpg,13248.799805
3,3,IMG_20220810_181806.jpg,4973.105469
4,4,IMG_20220810_181819.jpg,10923.759766
...,...,...,...
196,196,IMG_20221016_150842.jpg,17786.480469
197,197,IMG_20221016_150842.jpg,18619.082031
198,198,IMG_20221016_150842.jpg,15998.669922
199,199,IMG_20221016_151011.jpg,12039.833008


In [325]:
len(result)

201

In [None]:
def show_face(face_id):
    image_id, x, y, w ,h = faces.loc[face_id]

    print(face_id, image_id, x, y, w ,h)
    img = cv2.imread("../images"+"/"+image_id)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 20)
    plt.imshow(img)
    plt.show()

show_face(15)

In [327]:
result_sorted = result.sort_values(by=["dist"])
result_sorted

Unnamed: 0,k,image_id,dist
0,0,IMG_20220810_115949.jpg,1528.198975
25,25,IMG_20220812_184338_1.jpg,3969.231201
38,38,IMG_20220815_093652.jpg,4685.795898
23,23,IMG_20220812_184338.jpg,4719.463867
3,3,IMG_20220810_181806.jpg,4973.105469
...,...,...,...
49,49,IMG_20220906_143019.jpg,26010.693359
13,13,IMG_20220811_193952.jpg,27991.179688
86,86,IMG_20220914_162935.jpg,29036.701172
178,178,IMG_20221016_130439.jpg,32977.593750


In [None]:
import matplotlib.pyplot as plt

for row in list(result_sorted.itertuples())[:20]:
    # show image
    print(row[3])
    face_id = row[0]
    show_face(face_id)


In [99]:
represent[0]["embedding"]

[-0.5354427099227905,
 0.30354341864585876,
 -0.535990834236145,
 0.8943162560462952,
 -0.20328429341316223,
 -0.010440249927341938,
 0.45285624265670776,
 -0.7727059125900269,
 -0.38045498728752136,
 -1.4848867654800415,
 -0.05742365121841431,
 -1.075291633605957,
 -0.5713295936584473,
 1.0852614641189575,
 -0.9145270586013794,
 -0.12647010385990143,
 0.7566992044448853,
 -0.23216578364372253,
 0.02286236733198166,
 -0.40827929973602295,
 -0.4207913875579834,
 -1.2406048774719238,
 1.1996772289276123,
 0.974852442741394,
 1.0779144763946533,
 0.8926209807395935,
 0.9780884981155396,
 0.45392897725105286,
 -0.8928558826446533,
 -0.9062016606330872,
 0.04169142246246338,
 0.6118016839027405,
 -0.39422792196273804,
 0.791703462600708,
 -0.15203681588172913,
 0.140337273478508,
 1.4239681959152222,
 -0.8483406901359558,
 0.7302632927894592,
 0.0885499119758606,
 -0.7255919575691223,
 -0.31754016876220703,
 -0.3119663596153259,
 -0.7852936387062073,
 -0.08914727717638016,
 1.17242503166198