In [None]:
import numpy as np
import glob
import sys
from tqdm.auto import tqdm
import os
import random
import itertools

# Paths used in the script
embeddings_real_images = './embeddings/real/'
embeddings_synthetic_images = './embeddings/synthetic/'
scores_path = './similarity_scores/'

In [None]:
def similarity(embeddings1, embeddings2):
    embeddings1=embeddings1.astype(np.float64)
    embeddings2=embeddings2.astype(np.float64)
    # Cosine similarity
    dot = np.sum(np.multiply(embeddings1, embeddings2), axis=0)
    norm = np.linalg.norm(embeddings1, axis=0) * np.linalg.norm(embeddings2, axis=0)
    similarity = dot/norm
    similarity = min(1,similarity)
    return similarity

In [None]:
file_names = [os.path.basename(file) for file in glob.glob(embeddings_real_images + '*.npy')]
if not file_names:
    print(f'No embeddings found in {embeddings_real_images}', file=sys.stderr)
else:
    bar = tqdm(total=11, file=sys.stdout, desc='OCC combos')
    for sunglass, glass, cap, mask in itertools.product([0, 1], repeat=4):
        # Ignore no occlusions, and ignore sunglass together with glass
        if (sunglass and glass) or not any([sunglass, glass, cap, mask]):
            continue
        # Creating filter based on selected occlusions
        select_occlusions = ''
        non_selected_occlusions = ['_mask', '_glass', '_sunglass', '_cap']
        if mask:
            select_occlusions = '_mask'
            non_selected_occlusions.remove('_mask')
        if glass:
            select_occlusions = select_occlusions + '_glass'
            non_selected_occlusions.remove('_glass')
        if sunglass:
            select_occlusions = select_occlusions + '_sunglass'
            non_selected_occlusions.remove('_sunglass')
        if cap:
            select_occlusions = select_occlusions + '_cap'
            non_selected_occlusions.remove('_cap')
    
        # Adding synthetic images to list based on occlusion filter
        synthetic_file_names = [os.path.basename(file) for file in glob.glob(embeddings_synthetic_images + '*.npy')]
        filtered_file_names = []
        for file in synthetic_file_names:
            if select_occlusions in file and not any(str in file for str in non_selected_occlusions):
                filtered_file_names.append(file)

        if not filtered_file_names:
            bar.update(1)
            print(f'No embeddings for occlusions {select_occlusions} found in {embeddings_synthetic_images}',
                  file=sys.stderr)
            continue
        
        mated_comp_count = 0
        non_mated_comp_count = 0
        equal_count = 0
        
        real_vs_synthetic_images_mated = np.zeros(0, dtype=float, order='C')
        real_vs_synthetic_images_non_mated = np.zeros(0, dtype=float, order='C')
        
        file_names = [os.path.basename(file) for file in glob.glob(embeddings_real_images + '*.npy')]
        ids = [file.split('d')[0] for file in file_names]
        ids = [*set(ids)]
        
        for id in tqdm(ids, file=sys.stdout, colour='grey', unit=' images'):
            images_of_id = glob.glob(embeddings_real_images + id + '*')
            reference_feature = random.choice(images_of_id)
            reference_feature_embedding = np.load(reference_feature)
            reference_feature_filename = os.path.basename(reference_feature)
            reference_feature_name = os.path.splitext(os.path.basename(reference_feature))[0]
            for file in filtered_file_names:
                filename = file.split('_', 1)[0]
                file_path = embeddings_synthetic_images + file
                if file.split('d')[0] == id: # Mated comparison
                    if not (filename == reference_feature_name):
                        mated_comp_count += 1
                        mated_feature_embedding = np.load(file_path)
                        real_vs_synthetic_images_mated = np.append(real_vs_synthetic_images_mated,
                                                  similarity(reference_feature_embedding, mated_feature_embedding))
                    else:
                        equal_count +=1
                else: # Non-mated comparison
                    non_mated_comp_count += 1
                    non_mated_feature_embedding = np.load(file_path)
                    real_vs_synthetic_images_non_mated = np.append(real_vs_synthetic_images_non_mated,
                                                                   similarity(reference_feature_embedding, non_mated_feature_embedding))            
        print(f'Real vs synthetic mated comparisons: {mated_comp_count}')
        print(f'Real vs synthetic non-mated comparisons: {non_mated_comp_count}')
        print (f'Equal comparisons skipped:  {equal_count}')
        
        # Saving the similarity score if scores exist
        if not os.path.isdir(scores_path):
            os.makedirs(scores_path)

        if (len(real_vs_synthetic_images_mated) == 0):
            print(f'No mated comparisons for the occlusions {select_occlusions}',
                  file=sys.stderr)
        else:
            np.save((scores_path + 'real_vs_synthetic_images_mated'
                + select_occlusions + '.npy'), real_vs_synthetic_images_mated)

        if (len(real_vs_synthetic_images_non_mated) == 0):
            print(f'No non-mated comparisons for the occlusions {select_occlusions}',
                  file=sys.stderr)
        else:
            np.save((scores_path + 'real_vs_synthetic_images_non-mated'
                + select_occlusions + '.npy'), real_vs_synthetic_images_non_mated)
    
        bar.update(1)
    bar.close()