In [None]:
import numpy as np
import glob
import sys
from tqdm.auto import tqdm
import os
import random
import itertools

# Paths used in the script
real_features_root = './features/real/'
synthetic_features_root = './features/synthetic/'
scores_root = './similarity_scores/'

# Set delimiter in filename to split id and picture number
delimiter = 'd'

# Cosine similarity
def similarity(feature_1, feature_2):
    feature_1 = feature_1.astype(np.float64)
    feature_2 = feature_2.astype(np.float64)
    dot = np.sum(np.multiply(feature_1, feature_2), axis=0)
    norm = np.linalg.norm(feature_1, axis=0) * np.linalg.norm(feature_2, axis=0)
    similarity = dot/norm
    similarity = min(1,similarity)
    return similarity

# Adding features to lists
real_features = [os.path.basename(x) for x in glob.glob(real_features_root + '*.npy')]
synthetic_features = [os.path.basename(x) for x in glob.glob(synthetic_features_root + '*.npy')]
if not real_features or not synthetic_features:
    if not real_features:
        sys.exit(f'No features found in folder {real_features_root}')
    if not synthetic_features:
        sys.exit(f'No features found in folder {synthetic_features_root}')

# Create scores root if not exist
if not os.path.isdir(scores_root):
    os.makedirs(scores_root)

In [None]:
print('\n*** Real only comparisons ***\n')
mated = np.zeros(0, dtype=float, order='C')
non_mated = np.zeros(0, dtype=float, order='C')
subjects = list(set([file.split(delimiter)[0] for file in real_features]))
skipped = 0

for probe in tqdm(real_features, file=sys.stdout, unit=' probes'):
    probe_feature = np.load(real_features_root + probe)
    probe_subject = probe.split(delimiter)[0]
    for reference in real_features:
        if probe == reference:
            skipped += 1
            continue
        reference_feature = np.load(real_features_root + reference)
        reference_subject = reference.split(delimiter)[0]
        similarity_score = similarity(probe_feature, reference_feature)
        if probe_subject == reference_subject:  # Mated comparison
            mated = np.append(mated, similarity_score)
        else: # non-mated
            non_mated = np.append(non_mated, similarity_score)

print(f'Number of subjects: {len(subjects)}')
print(f'Number of real features: {len(real_features)}')
print(f'Real only mated comparisons: {len(mated)}')
print(f'Real only non-mated comparisons: {len(non_mated)}')
print(f'Equal files skipped: {skipped}')

if np.any(mated):
    np.save(os.path.join(scores_root,'real_mated.npy'), mated)
else:
    print(f'No mated comparisons for the real only',file=sys.stderr)

if np.any(non_mated):
    np.save(os.path.join(scores_root,'real_non-mated.npy'), non_mated)
else:
    print(f'No non-mated comparisons for the real only',file=sys.stderr)

In [None]:
print('\n*** Synthetic occluded combination vs real comparisons ***\n')
subjects = list(set([file.split(delimiter)[0] for file in real_features]))
# Extract the different occlusions combinations that exist
different_occlusions = ['cap', 'glass', 'sunglass', 'mask']
occlusion_combinations = ['cap', 'glass', 'glass_cap', 'sunglass', 'sunglass_cap', 'mask', 'mask_cap', 'mask_glass', 'mask_glass_cap', 'mask_sunglass', 'mask_sunglass_cap']
occlusion_list = []
for feature in synthetic_features:
    filename = os.path.splitext(feature)[0]
    filename = filename.split('_')[1:]
    filtered = [x for x in filename if x in different_occlusions]
    filtered = '_'.join(filtered)
    if filtered in occlusion_combinations:
        occlusion_list.append(filtered)
occlusion_list = list(set(occlusion_list))

for occlusion in occlusion_list:
    print(f'\nSynthetic {occlusion} occluded vs real comparisons')
    filtered_synthetic_features = glob.glob(synthetic_features_root + '*[0-9]_' + occlusion + '.npy')
    mated = np.zeros(0, dtype=float, order='C')
    non_mated = np.zeros(0, dtype=float, order='C')
    skipped = 0
    for probe in tqdm(filtered_synthetic_features, file=sys.stdout, unit=' probes', desc=occlusion):
        probe_name = os.path.splitext(os.path.basename(probe))[0]
        probe_image = probe_name.split('_')[0]
        probe_subject = probe_image.split(delimiter)[0]
        probe_feature = np.load(probe)
        for reference in real_features:
            reference_name = os.path.splitext(reference)[0]
            if probe_image == reference_name:
                skipped += 1
                continue
            reference_subject = reference_name.split(delimiter)[0]
            reference_feature = np.load(real_features_root + reference)
            similarity_score = similarity(probe_feature, reference_feature)
            if probe_subject == reference_subject:  # Mated comparison
                mated = np.append(mated, similarity_score)
            else: # non-mated
                non_mated = np.append(non_mated, similarity_score)

    print(f'Number of subjects: {len(subjects)}')
    print(f'Number of real features: {len(real_features)}')
    print(f'Number of synthetic features: {len(synthetic_features)}')
    print(f'Synthetic {occlusion} vs real mated comparisons: {len(mated)}')
    print(f'Synthetic {occlusion} vs real non-mated comparisons: {len(non_mated)}')
    print(f'Equal files skipped: {skipped}')

    if np.any(mated):
        filename = f'real_vs_synthetic_{occlusion}_mated.npy'
        np.save(os.path.join(scores_root,filename), mated)
    else:
        print(f'No mated comparisons for the real vs synthetic {occlusion}',file=sys.stderr)

    if np.any(non_mated):
        filename = f'real_vs_synthetic_{occlusion}_non-mated.npy'
        np.save(os.path.join(scores_root,filename), non_mated)
    else:
        print(f'No non-mated comparisons for the real vs synthetic {occlusion}',file=sys.stderr)

In [None]:
print('\n*** Synthetic occluded all combinations vs real comparisons ***\n')
mated = np.zeros(0, dtype=float, order='C')
non_mated = np.zeros(0, dtype=float, order='C')
subjects = list(set([feature.split(delimiter)[0] for feature in synthetic_features]))
skipped = 0

for subject in tqdm(subjects, file=sys.stdout, unit=' subjects'):
    features_of_subject = glob.glob(synthetic_features_root + subject + '*.npy')
    filtered_features_of_subject = list(set([os.path.basename(feature).split('_')[0] for feature in features_of_subject]))
    selected_feature = random.choice(filtered_features_of_subject)
    occlusion_combinations_with_feature = glob.glob(synthetic_features_root + selected_feature + '*.npy')
    for probe in occlusion_combinations_with_feature:
        probe_name = os.path.splitext(os.path.basename(probe))[0]
        probe_name_without_occlusion = probe_name.split('_')[0]
        probe_subject = probe_name.split(delimiter)[0]
        probe_feature = np.load(probe)
        for reference in real_features:
            reference_name = os.path.splitext(reference)[0]
            if reference_name == probe_name_without_occlusion:
                skipped += 1
                continue
            reference_subject = reference_name.split(delimiter)[0]
            reference_feature = np.load(os.path.join(real_features_root,reference))
            similarity_score = similarity(probe_feature, reference_feature)
            if probe_subject == reference_subject: # Mated comparison    
                mated = np.append(mated, similarity_score)
            else: # non-mated
                non_mated = np.append(non_mated, similarity_score)

print(f'Number of subjects: {len(subjects)}')
print(f'Number of real features: {len(real_features)}')
print(f'Number of synthetic features: {len(synthetic_features)}')
print(f'Synthetic occluded all combinations vs real mated comparisons: {len(mated)}')
print(f'Synthetic occluded all combinations vs real non-mated comparisons: {len(non_mated)}')
print (f'Equal files skipped:  {skipped}')

# Saving the similarity score if scores exist
if np.any(mated):
    np.save(os.path.join(scores_root,'real_vs_synthetic_mated.npy'), mated)
else:
    print(f'No mated comparisons for the all synthetic occlusion combinations vs real',file=sys.stderr)

if np.any(non_mated):
    np.save(os.path.join(scores_root,'real_vs_synthetic_non-mated.npy'), non_mated)
else:
    print(f'No non-mated comparisons for all synthetic occlusion combinations vs real',file=sys.stderr)