In [1]:
import os
from PIL import Image

In [2]:
# download datatset of faces
# if not os.path.exists('/content/ibug_300W_large_face_landmark_dataset'):
#     !wget http://dlib.net/files/data/ibug_300W_large_face_landmark_dataset.tar.gz

In [3]:
image_paths = os.listdir('testset')

train_image_paths = []
test_image_paths = []

i=0
while len(train_image_paths) < 150:
    i+=1
    # i into 3 digit number
    if i < 10:
        filename = f'image_000{i}'
    elif i < 100:
        filename = f'image_00{i}'
    else:
        filename = f'image_0{i}'

    if filename+'.png' not in image_paths:
        continue
    
    train_image_paths.append(os.path.join('testset', filename+'.png'))
    test_image_paths.append(os.path.join('testset', filename+'_mirror.jpg'))

In [4]:
len(train_image_paths), len(test_image_paths)

(150, 150)

In [5]:
train_image_paths[:5], test_image_paths[:5]

(['testset/image_0001.png',
  'testset/image_0002.png',
  'testset/image_0003.png',
  'testset/image_0004.png',
  'testset/image_0005.png'],
 ['testset/image_0001_mirror.jpg',
  'testset/image_0002_mirror.jpg',
  'testset/image_0003_mirror.jpg',
  'testset/image_0004_mirror.jpg',
  'testset/image_0005_mirror.jpg'])

In [6]:
from facial_detection.facial_detection import FacialDetection

facial_detection = FacialDetection()

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
# facial embeddings -> image file name
from tqdm import tqdm


train_data = {}
ground_truth = {}
train_embeddings = []
test_embeddings = []

for train_path, test_path in tqdm(zip(train_image_paths[:10], test_image_paths[:10])):
    train_image = Image.open(train_path).convert('RGB')
    test_image = Image.open(test_path).convert('RGB')

    detected_face = facial_detection.detect_face(train_image)

    embedding = tuple(facial_detection.get_facial_embeddings(detected_face).numpy().reshape(-1))

    train_embeddings.append(embedding)
    train_data[embedding] = train_path.split('/')[-1]

    detected_face = facial_detection.detect_face(test_image)
    embedding = tuple(facial_detection.get_facial_embeddings(detected_face).numpy().reshape(-1))
    test_embeddings.append(embedding)
    ground_truth[embedding] = train_path.split('/')[-1]


10it [00:04,  2.39it/s]


In [None]:
ground_truth[test_embeddings[0]], train_data[train_embeddings[0]]

('image_0001.png', 'image_0001.png')

In [12]:
from database.db import Database
import time

# dist -> searchmethod -> (speed, accuracy)
collected_metrics = {}

# collect speed and accuracy for each search method
for dist_func in ['euclidean','cosine']:
    for search_method in ['lsh','vector_compression','linear']:
        db = Database(search_method, dist_func)

        for embedding in train_embeddings:
            db.add_entry(embedding, train_data[embedding])

        for bucket in db.lsh.:
            print("Bucket:")
            # Print all items in the bucket
            for item in bucket:
                print("   Embedding:", item[0])  # Embedding
                print("   Extra data:", item[1])  # Extra data (if provided during indexing)

        correct = 0
        start_time = time.time()
        for embedding in test_embeddings:
            result = db.query_entry(embedding)
            print(result)

            if result == ground_truth[embedding]:
                correct+=1

        end_time = time.time()
        collected_metrics[dist_func]={search_method: (len(test_embeddings)/(end_time-start_time), correct/len(test_embeddings))}


AttributeError: 'LSHash' object has no attribute 'buckets'

In [None]:
# make bar plots for each metric
import matplotlib.pyplot as plt

speeds = [collected_metrics[method][0] for method in collected_metrics]
accuracies = [collected_metrics[method][1] for method in collected_metrics]

fig, ax = plt.subplots(1,2, figsize=(10,5))
ax[0].bar(collected_metrics.keys(), speeds)
ax[0].set_title('Speeds')
ax[0].set_ylabel('Images per second')
ax[1].bar(collected_metrics.keys(), accuracies)
ax[1].set_title('Accuracies')
ax[1].set_ylabel('Accuracy')
plt.show()