In [None]:
import numpy as np
import glob
import sys
from tqdm.auto import tqdm
import os
import random
import itertools

# Paths used in the script
real_features_root = './features/real/'
synthetic_features_root = './features/synthetic/'
scores_root = './similarity_scores/'

# Cosine similarity
def similarity(reference_1, reference_2):
    reference_1=reference_1.astype(np.float64)
    reference_2=reference_2.astype(np.float64)
    dot = np.sum(np.multiply(reference_1, reference_2), axis=0)
    norm = np.linalg.norm(reference_1, axis=0) * np.linalg.norm(reference_2, axis=0)
    similarity = dot/norm
    similarity = min(1,similarity)
    return similarity

# Adding embeddings to lists
real_features = [os.path.basename(x) for x in glob.glob(real_features_root + '*.npy')]
synthetic_features = [os.path.basename(x) for x in glob.glob(synthetic_features_root + '*.npy')]
if not real_features or not synthetic_features:
    if not real_features:
        sys.exit(f'No features found in folder {real_features_root}')
    if not synthetic_features:
        sys.exit(f'No features found in folder {synthetic_features_root}')

# Create scores root if not exist
if not os.path.isdir(scores_root):
    os.makedirs(scores_root)

In [None]:
print('\n*** Real only comparisons ***\n')
mated = np.zeros(0, dtype=float, order='C')
non_mated = np.zeros(0, dtype=float, order='C')
subjects = list(set([file.split('d')[0] for file in real_features]))

for subject in tqdm(subjects, file=sys.stdout, unit=' subjects'):
    features_of_subject = glob.glob(real_features_root + subject + '*.npy')
    feature = random.choice(features_of_subject)
    feature_name = os.path.splitext(os.path.basename(feature))[0]
    reference_feature = np.load(feature)
    for probe in real_features:
        probe_name = os.path.splitext(os.path.basename(probe))[0]
        if feature_name == probe_name:
            continue
        probe_path = os.path.join(real_features_root, probe)
        probe_feature = np.load(probe_path)
        similarity_score = similarity(reference_feature, probe_feature)
        if probe.split('d')[0] == subject: # Mated comparison
            mated = np.append(mated, similarity_score)
        else: # non-mated
            non_mated = np.append(non_mated, similarity_score)

print(f'Number of real features: {len(real_features)}')
print(f'Number of synthetic features: {len(synthetic_features)}')
print(f'Real vs synthetic mated comparisons: {len(mated)}')
print(f'Real vs synthetic non-mated comparisons: {len(non_mated)}')
print (f'Equal files skipped:  {len(real_features) - len(mated)}')

if np.any(mated):
    np.save(os.path.join(scores_root,'real_mated.npy'), mated)
else:
    print(f'No mated comparisons for the real only',file=sys.stderr)

if np.any(non_mated):
    np.save(os.path.join(scores_root,'real_non-mated.npy'), non_mated)
else:
    print(f'No non-mated comparisons for the real only',file=sys.stderr)

In [None]:
print('\n*** Real vs synthetic occluded combination comparisons ***\n')
subjects = list(set([file.split('d')[0] for file in real_features]))
occlusion_list = list(set(
    ['_'.join(os.path.splitext(feature)[0].split('_')[1:]) for feature in synthetic_features]))

from time import sleep
for occlusion in occlusion_list:
    print(f'\n*** Real vs synthetic {occlusion} occluded comparisons ***\n')
    filtered_synthetic_features = glob.glob(synthetic_features_root + '*[0-9]_' + occlusion + '.npy')
    mated = np.zeros(0, dtype=float, order='C')
    non_mated = np.zeros(0, dtype=float, order='C')
    for subject in tqdm(subjects, file=sys.stdout, unit=' subjects', desc=occlusion):
        features_of_subject = glob.glob(real_features_root + subject + '*.npy')
        feature = random.choice(features_of_subject)
        feature_name = os.path.splitext(os.path.basename(feature))[0]
        reference_feature = np.load(feature)
        for probe in filtered_synthetic_features:
            probe_name = os.path.basename(probe).split('_', 1)[0]
            if feature_name == probe_name:
                continue
            probe_feature = np.load(probe)
            similarity_score = similarity(reference_feature, probe_feature)
            if probe_name.split('d')[0] == subject: # Mated comparison
                mated = np.append(mated, similarity_score)
            else: # non-mated
                non_mated = np.append(non_mated, similarity_score)

    print(f'Number of real features: {len(real_features)}')
    print(f'Number of synthetic features: {len(synthetic_features)}')
    print(f'Real vs synthetic mated comparisons: {len(mated)}')
    print(f'Real vs synthetic non-mated comparisons: {len(non_mated)}')
    print (f'Equal files skipped:  {len(real_features) - len(mated)}')

    if np.any(mated):
        filename = f'real_vs_synthetic_mated_{occlusion}.npy'
        np.save(os.path.join(scores_root,filename), mated)
    else:
        print(f'No mated comparisons for the real vs synthetic {occlusion}',file=sys.stderr)

    if np.any(non_mated):
        filename = f'real_vs_synthetic_non-mated_{occlusion}.npy'
        np.save(os.path.join(scores_root,filename), non_mated)
    else:
        print(f'No non-mated comparisons for the real vs synthetic {occlusion}',file=sys.stderr)

In [None]:
print('\n*** Synthetic comparisons ***\n')
mated = np.zeros(0, dtype=float, order='C')
non_mated = np.zeros(0, dtype=float, order='C')
subjects = list(set([file.split('d')[0] for file in synthetic_features]))

for subject in tqdm(subjects, file=sys.stdout, unit=' subjects'):
    features_of_subject = glob.glob(synthetic_features_root + subject + '*.npy')
    feature = random.choice(features_of_subject)
    feature_name = os.path.splitext(os.path.basename(feature))[0].split('_')[0]
    reference_feature = np.load(feature)
    for probe in synthetic_features:
        probe_name = os.path.splitext(probe)[0].split('_', 1)[0]
        if feature_name == probe_name:
            continue
        probe_path = os.path.join(synthetic_features_root, probe)
        probe_feature = np.load(probe_path)
        similarity_score = similarity(reference_feature, probe_feature)
        if probe.split('d')[0] == subject: # Mated comparison
            mated = np.append(mated, similarity_score)
        else: # non-mated
            non_mated = np.append(non_mated, similarity_score)

print(f'Number of real features: {len(real_features)}')
print(f'Number of synthetic features: {len(synthetic_features)}')
print(f'Synthetic only mated comparisons: {len(mated)}')
print(f'Synthetic only non-mated comparisons: {len(non_mated)}')
print (f'Equal files skipped:  {len(synthetic_features) - len(mated)}')

if np.any(mated):
    np.save(os.path.join(scores_root,'synthetic_mated.npy'), mated)
else:
    print(f'No mated comparisons for synthetic only',file=sys.stderr)

if np.any(non_mated):
    np.save(os.path.join(scores_root,'synthetic_non-mated.npy'), non_mated)
else:
    print(f'No non-mated comparisons for synthetic only',file=sys.stderr)

In [None]:
print('\n*** Real versus all synthetic comparisons ***\n')
mated = np.zeros(0, dtype=float, order='C')
non_mated = np.zeros(0, dtype=float, order='C')
subjects = list(set([feature.split('d')[0] for feature in real_features]))

for subject in tqdm(subjects, file=sys.stdout, unit=' subjects'):
    features_of_subject = glob.glob(real_features_root + subject + '*')
    feature = random.choice(features_of_subject)
    feature_name = os.path.splitext(os.path.basename(feature))[0]
    reference_feature = np.load(feature)
    for probe in synthetic_features:
        probe_name = probe.split('_', 1)[0]
        if feature_name == probe_name:
            continue
        probe_path = os.path.join(synthetic_features_root, probe)
        probe_feature = np.load(probe_path)
        similarity_score = similarity(reference_feature, probe_feature)
        if probe.split('d')[0] == subject: # Mated comparison
            mated = np.append(mated, similarity_score)
        else: # non-mated
            non_mated = np.append(non_mated, similarity_score)

print(f'Number of real features: {len(real_features)}')
print(f'Number of synthetic features: {len(synthetic_features)}')
print(f'Real vs synthetic mated comparisons: {len(mated)}')
print(f'Real vs synthetic non-mated comparisons: {len(non_mated)}')
print (f'Equal files skipped:  {len(real_features) - len(mated)}')

# Saving the similarity score if scores exist
if np.any(mated):
    np.save(os.path.join(scores_root,'real_vs_synthetic_mated.npy'), mated)
else:
    print(f'No mated comparisons for the real vs all synthetic',file=sys.stderr)

if np.any(non_mated):
    np.save(os.path.join(scores_root,'real_vs_synthetic_non-mated.npy'), non_mated)
else:
    print(f'No non-mated comparisons for the real vs all synthetic',file=sys.stderr)