In [None]:
import numpy as np
import glob
import sys
from tqdm.auto import tqdm
import os
import random

In [None]:
# Paths used in the script
embeddings_real_images = './embeddings/real/'
scores_path = './similarity_scores/'

# Get all the file names with *.npy in the embeddings_real_images folder
file_names = [os.path.basename(file) for file in glob.glob(
    embeddings_real_images + '*.npy')]

In [None]:
def similarity(embeddings1, embeddings2):
    embeddings1=embeddings1.astype(np.float64)
    embeddings2=embeddings2.astype(np.float64)
    # Cosine similarity
    dot = np.sum(np.multiply(embeddings1, embeddings2), axis=0)
    norm = np.linalg.norm(embeddings1, axis=0) * np.linalg.norm(embeddings2, axis=0)
    similarity = dot/norm
    similarity = min(1,similarity)
    return similarity

In [None]:
# Real images
mated_comp_count = 0
non_mated_comp_count = 0
equal_count = 0

real_images_mated = np.zeros(0, dtype=float, order='C')
real_images_non_mated = np.zeros(0, dtype=float, order='C')

# extract filenames from only npy files
file_names = [os.path.basename(file) for file in glob.glob(embeddings_real_images + '*.npy')]
ids = [file.split('d')[0] for file in file_names] # extract id
ids = [*set(ids)] # remove duplicates

for id in tqdm(ids, file=sys.stdout, unit=' images'):
    images_of_id = glob.glob(embeddings_real_images + id + '*')
    reference_feature = random.choice(images_of_id)
    reference_feature_embedding = np.load(reference_feature)
    for file in glob.glob(embeddings_real_images + '*.npy'):
        filename = os.path.basename(file)
        if filename.split('d')[0] == id: # Mated comparison
            if not (file == reference_feature):
                mated_comp_count += 1
                mated_feature_embedding = np.load(file)
                real_images_mated = np.append(real_images_mated,
                                              similarity(reference_feature_embedding, mated_feature_embedding))
            else:
                equal_count += 1
        else: # Non-mated comparison
            non_mated_comp_count += 1
            non_mated_feature_embedding = np.load(file)
            real_images_non_mated = np.append(real_images_non_mated,
                                              similarity(reference_feature_embedding, non_mated_feature_embedding))

print(f'Real mated comparisons: {mated_comp_count}')
print(f'Real non-mated comparisons: {non_mated_comp_count}')
print(f'Equal comparisons skipped:  {equal_count}')

In [None]:
# Saving the similarity score if scores exist

if (len(real_images_mated) == 0 and len(real_images_non_mated) == 0):
    print(f'No embeddings found in the {embeddings_real_images} directory',
          file=sys.stderr)
else:
    if not os.path.isdir(scores_path):
        os.makedirs(scores_path)

    if len(real_images_mated) == 0:
        print('No mated comparisons for real images',
              file=sys.stderr)
    else:
        np.save((scores_path + 'real_images_mated'), real_images_mated)

    if len(real_images_non_mated) == 0:
        print('No non-mated comparisons for real images',
              file=sys.stderr)
    else:
        np.save((scores_path + 'real_images_non-mated'), real_images_non_mated)