In [1]:
import os
import os.path

IMAGE_DIR='images'
FACE_DIR='faces'

if not os.path.exists(FACE_DIR):
    os.mkdir(FACE_DIR)

In [2]:
import PIL
import numpy
import face_recognition

MAXDIM = 2048

def detect_faces(filename):
    filepath = os.path.join(IMAGE_DIR, filename)
    try:
        im = PIL.Image.open(filepath)
        im = im.convert('RGB')
    except:
        return None
    scale = 0.0
    if im.height > MAXDIM or im.width > MAXDIM:
        origheight = im.height
        im.thumbnail((MAXDIM,MAXDIM))
        scale = origheight / im.height
    image = numpy.array(im)
    face_locations = face_recognition.face_locations(image, model='hog')
    return (filename, image, face_locations, scale)

def get_encodings(filename, image, face_locations):
    if len(face_locations) == 0:
        return [], []
    face_encodings = face_recognition.face_encodings(image, face_locations)
    return face_encodings

In [3]:
%%time
import time

all_filenames = [fn for fn in os.listdir(IMAGE_DIR)
                 if not os.path.exists(os.path.join(FACE_DIR, fn + '.ndjson'))]

BATCHSIZE=1024

def process_batch(batch_filenames):
    from multiprocessing import Pool
    import json
    with Pool(8) as pool:
        image_faces = filter(None, pool.map(detect_faces, batch_filenames))

    faces = []
    for filename, image, face_locations, scale in image_faces:    
        face_encodings = get_encodings(filename, image, face_locations)
        if scale:
            face_locations = [tuple([int(scale * loc) for loc in location]) for location in face_locations]
        with open(os.path.join(FACE_DIR, filename + '.ndjson'), 'w') as facefile:
            for loc, enc in zip(face_locations, face_encodings):
                print(json.dumps({'id': filename, 'location': loc, 'encoding': list(enc)}), file=facefile)

for batch in range(0, len(all_filenames), BATCHSIZE):
    starttime = time.time()
    batch_filenames = all_filenames[batch:batch+BATCHSIZE]
    print("Starting batch of {} images from index {}".format(len(batch_filenames), batch))
    process_batch(batch_filenames)
    print("Batch completed in {:.02f} seconds".format(time.time() - starttime))


Starting batch of 1024 images from index 0
Batch completed in 190.89 seconds
Starting batch of 1024 images from index 1024
Batch completed in 149.72 seconds
Starting batch of 1024 images from index 2048
Batch completed in 165.41 seconds
Starting batch of 1024 images from index 3072
Batch completed in 150.31 seconds
Starting batch of 1024 images from index 4096
Batch completed in 156.80 seconds
Starting batch of 1024 images from index 5120
Batch completed in 152.18 seconds
Starting batch of 1024 images from index 6144
Batch completed in 162.08 seconds
Starting batch of 1024 images from index 7168
Batch completed in 149.72 seconds
Starting batch of 1024 images from index 8192
Batch completed in 154.49 seconds
Starting batch of 1024 images from index 9216
Batch completed in 138.46 seconds
Starting batch of 1024 images from index 10240
Batch completed in 158.30 seconds
Starting batch of 1024 images from index 11264
Batch completed in 151.25 seconds
Starting batch of 1024 images from index 



Batch completed in 168.07 seconds
Starting batch of 1024 images from index 61440
Batch completed in 168.90 seconds
Starting batch of 1024 images from index 62464
Batch completed in 150.24 seconds
Starting batch of 1024 images from index 63488
Batch completed in 168.11 seconds
Starting batch of 1024 images from index 64512
Batch completed in 163.75 seconds
Starting batch of 1024 images from index 65536
Batch completed in 164.70 seconds
Starting batch of 1024 images from index 66560
Batch completed in 156.32 seconds
Starting batch of 1024 images from index 67584
Batch completed in 165.28 seconds
Starting batch of 1024 images from index 68608
Batch completed in 144.17 seconds
Starting batch of 1024 images from index 69632
Batch completed in 155.73 seconds
Starting batch of 1024 images from index 70656
Batch completed in 145.35 seconds
Starting batch of 1024 images from index 71680
Batch completed in 177.93 seconds
Starting batch of 1024 images from index 72704
Batch completed in 162.33 se

Batch completed in 154.39 seconds
Starting batch of 1024 images from index 164864
Batch completed in 151.96 seconds
Starting batch of 1024 images from index 165888
Batch completed in 164.80 seconds
Starting batch of 1024 images from index 166912
Batch completed in 160.54 seconds
Starting batch of 1024 images from index 167936
Batch completed in 165.63 seconds
Starting batch of 1024 images from index 168960
Batch completed in 157.47 seconds
Starting batch of 1024 images from index 169984
Batch completed in 157.08 seconds
Starting batch of 1024 images from index 171008
Batch completed in 156.53 seconds
Starting batch of 1024 images from index 172032
Batch completed in 164.91 seconds
Starting batch of 1024 images from index 173056
Batch completed in 166.36 seconds
Starting batch of 1024 images from index 174080
Batch completed in 160.63 seconds
Starting batch of 1024 images from index 175104
Batch completed in 146.20 seconds
Starting batch of 1024 images from index 176128
Batch completed 

In [102]:
query = face_recognition.load_image_file('osma-kuva.jpg')
locations = face_recognition.face_locations(query)
print(locations)
face_encoding = face_recognition.face_encodings(query)[0]
print(face_encoding)


[(82, 225, 211, 96)]
[-0.112992   -0.01856994 -0.0725853  -0.09649391 -0.16523461 -0.07953854
 -0.00780557 -0.05252969  0.1658026  -0.04653539  0.21516499 -0.00837498
 -0.21343192 -0.07469685  0.02241262  0.03791903 -0.23449044 -0.0604848
 -0.03576201 -0.07497106  0.05139757  0.08219927  0.05865814  0.10649666
 -0.07659778 -0.24931926 -0.18466085 -0.14079696  0.11696776 -0.14492443
  0.06956836  0.04394766 -0.12295131 -0.07461664  0.03020756  0.01194246
 -0.05413097 -0.14314765  0.20410654 -0.04227009 -0.13628951 -0.09877791
  0.04835489  0.2705321   0.17048754 -0.02328808 -0.00470283  0.00742704
  0.10339205 -0.24527058  0.10911448  0.09507908  0.1628359   0.00407935
  0.08549176 -0.08851896  0.01210636  0.24765711 -0.20378277  0.14176355
  0.03077297 -0.11151616 -0.05128714 -0.04091938  0.18774602  0.12404893
 -0.09956311 -0.19925664  0.18692112 -0.20792027 -0.15209027  0.18592787
 -0.09615736 -0.18231547 -0.22061628 -0.03854696  0.42444381  0.09479684
 -0.1213178   0.03775262 -0.072

In [101]:
%%time

encodings = numpy.array([face[2] for face in faces2])
distances = face_recognition.face_distance(encodings, face_encoding)
ranking = distances.argsort()[:10]
print(ranking)
print(distances[ranking])
for idx in ranking:
    print(faces2[idx][0], faces2[idx][1])

[10218   472  3159 11971  4706 11517  2478  3244 10645  9490]
[0.57957923 0.58041262 0.58547511 0.59846391 0.60128366 0.60553537
 0.60595661 0.60844668 0.60984907 0.61123429]
musketti.M012:HK19651016:1436 (142, 379, 409, 111)
musketti.M012:HK19670603:2116 (122, 73, 158, 37)
lusto.M011-11266 (106, 185, 196, 96)
muistaja_kerava.M011-95357 (170, 246, 232, 183)
musketti.M012:HK6001:60022 (171, 468, 439, 200)
musketti.M012:HK19321130:1586-1878 (211, 268, 340, 139)
ksm.urn:nbn:fi-ksmuseohttp%3A%2F%2Fwww.profium.com%2Farchive%2FArchivedObject-9D6F0AE6-AB49-6F18-5ED8-3508536AF8DA (219, 442, 374, 287)
muistaja_nurmijarvi.M011-95505 (128, 235, 235, 127)
ksm.urn:nbn:fi-ksmuseohttp%3A%2F%2Fwww.profium.com%2Farchive%2FArchivedObject-E27A5F34-E87E-7030-5C94-3C5ABF3E7082 (201, 448, 387, 262)
musketti.M012:HK19321130:440-1877 (182, 268, 311, 139)
CPU times: user 20 ms, sys: 0 ns, total: 20 ms
Wall time: 18.5 ms
