In [8]:
# exploring RFW data
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data
from torchvision import models,transforms
import matplotlib.pyplot as plt
import pickle
from collections import OrderedDict
import csv
import collections
from  PIL import Image
from tqdm.notebook import tqdm_notebook
from scipy.spatial import distance
import warnings
warnings.filterwarnings('ignore')

In [9]:
def corr2_coeff(A, B):
    # Rowwise mean of input arrays & subtract from input arrays themeselves
    A_mA = A - A.mean(1)[:, None]
    B_mB = B - B.mean(1)[:, None]

    # Sum of squares across rows
    ssA = (A_mA**2).sum(1)
    ssB = (B_mB**2).sum(1)

    # Finally get corr coeff
    return torch.matmul(A_mA, B_mB.T) / torch.sqrt(torch.matmul(ssA[:, None],ssB[None]))
def cos_sim(a, b, eps=1e-8):
    """
    added eps for numerical stability
    """
    a_n, b_n = a.norm(dim=1)[:, None], b.norm(dim=1)[:, None]
    a_norm = a / torch.max(a_n, eps * torch.ones_like(a_n))
    b_norm = b / torch.max(b_n, eps * torch.ones_like(b_n))
    sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1))
    return sim_mt


In [12]:
reference_outputs = torch.load("outputs/RFW/ft/reference_outputs.pt")
reference_identities = np.load('outputs/RFW/ft/reference_identities.npy',allow_pickle=True)
reference_ethnicities = np.load('outputs/RFW/ft/reference_ethnicities.npy',allow_pickle=True)
reference_faceIDs = np.load('outputs/RFW/ft/reference_faceIDs.npy',allow_pickle=True)

candidate_outputs = torch.load("outputs/RFW/ft/candidate_outputs.pt")
candidate_identities = np.load('outputs/RFW/ft/candidate_identities.npy',allow_pickle=True)
candidate_ethnicities = np.load('outputs/RFW/ft/candidate_ethnicities.npy',allow_pickle=True)
candidate_faceIDs = np.load('outputs/RFW/ft/candidate_faceIDs.npy',allow_pickle=True)

cor = corr2_coeff(reference_outputs,candidate_outputs).cpu().detach().numpy()
cos = cos_sim(reference_outputs,candidate_outputs).cpu().detach().numpy()
cor_identification = pd.DataFrame(columns=['candidate_identity','candidate_ethnicity','reference_identity'])
for i, cor_row in tqdm_notebook(enumerate(cor.T),total=len(cor.T)):
    identity = candidate_identities[i]
    ethnicity = candidate_ethnicities[i]
    max_ref = np.argmax(cor_row)
    reference_identity = reference_identities[max_ref]
    match = 1 if identity == reference_identity else  0
    row = {'candidate_identity': identity,
            'candidate_ethnicity': ethnicity, 
            'reference_identity': reference_identity,
            'match': match}

    cor_identification = cor_identification.append(row,ignore_index=True)

cos_identification = pd.DataFrame(columns=['candidate_identity','candidate_ethnicity','reference_identity'])
for i, cos_row in tqdm_notebook(enumerate(cos.T),total=len(cos.T)):
    identity = candidate_identities[i]
    ethnicity = candidate_ethnicities[i]
    max_ref = np.argmax(cos_row)
    reference_identity = reference_identities[max_ref]
    match = 1 if identity == reference_identity else  0
    row = {'candidate_identity': identity,
            'candidate_ethnicity': ethnicity, 
            'reference_identity': reference_identity,
            'match': match}

    cos_identification = cos_identification.append(row,ignore_index=True)

cos_id_acc = pd.DataFrame(columns=['ethnicity','accuracy'])
for ethnicity in cos_identification.candidate_ethnicity.unique():
    eth_cos = cos_identification.loc[cos_identification.candidate_ethnicity == ethnicity]
    accuracy = sum(eth_cos.match)/len(eth_cos)
    #print(len(eth_cos))
    row = {'ethnicity': ethnicity,
            'accuracy': accuracy}

    cos_id_acc = cos_id_acc.append(row,ignore_index=True)
print(cos_id_acc)


cor_id_acc = pd.DataFrame(columns=['ethnicity','accuracy'])
for ethnicity in cor_identification.candidate_ethnicity.unique():
    eth_cor = cor_identification.loc[cor_identification.candidate_ethnicity == ethnicity]
    accuracy = sum(eth_cor.match)/len(eth_cor)
    #print(len(eth_cos))
    row = {'ethnicity': ethnicity,
            'accuracy': accuracy}

    cor_id_acc = cor_id_acc.append(row,ignore_index=True)
cor_id_acc

  0%|          | 0/29117 [00:00<?, ?it/s]

  0%|          | 0/29117 [00:00<?, ?it/s]

   ethnicity  accuracy
0    African  0.385718
1      Asian  0.432865
2  Caucasian  0.532053
3     Indian  0.532201


Unnamed: 0,ethnicity,accuracy
0,African,0.384636
1,Asian,0.430639
2,Caucasian,0.532191
3,Indian,0.528775


In [13]:
reference_outputs = torch.load("outputs/RFW/scratch/reference_outputs.pt")
reference_identities = np.load('outputs/RFW/scratch/reference_identities.npy',allow_pickle=True)
reference_ethnicities = np.load('outputs/RFW/scratch/reference_ethnicities.npy',allow_pickle=True)
reference_faceIDs = np.load('outputs/RFW/scratch/reference_faceIDs.npy',allow_pickle=True)

candidate_outputs = torch.load("outputs/RFW/scratch/candidate_outputs.pt")
candidate_identities = np.load('outputs/RFW/scratch/candidate_identities.npy',allow_pickle=True)
candidate_ethnicities = np.load('outputs/RFW/scratch/candidate_ethnicities.npy',allow_pickle=True)
candidate_faceIDs = np.load('outputs/RFW/scratch/candidate_faceIDs.npy',allow_pickle=True)

cor = corr2_coeff(reference_outputs,candidate_outputs).cpu().detach().numpy()
cos = cos_sim(reference_outputs,candidate_outputs).cpu().detach().numpy()
cor_identification = pd.DataFrame(columns=['candidate_identity','candidate_ethnicity','reference_identity'])
for i, cor_row in tqdm_notebook(enumerate(cor.T),total=len(cor.T)):
    identity = candidate_identities[i]
    ethnicity = candidate_ethnicities[i]
    max_ref = np.argmax(cor_row)
    reference_identity = reference_identities[max_ref]
    match = 1 if identity == reference_identity else  0
    row = {'candidate_identity': identity,
            'candidate_ethnicity': ethnicity, 
            'reference_identity': reference_identity,
            'match': match}

    cor_identification = cor_identification.append(row,ignore_index=True)

cos_identification = pd.DataFrame(columns=['candidate_identity','candidate_ethnicity','reference_identity'])
for i, cos_row in tqdm_notebook(enumerate(cos.T),total=len(cos.T)):
    identity = candidate_identities[i]
    ethnicity = candidate_ethnicities[i]
    max_ref = np.argmax(cos_row)
    reference_identity = reference_identities[max_ref]
    match = 1 if identity == reference_identity else  0
    row = {'candidate_identity': identity,
            'candidate_ethnicity': ethnicity, 
            'reference_identity': reference_identity,
            'match': match}

    cos_identification = cos_identification.append(row,ignore_index=True)

cos_id_acc = pd.DataFrame(columns=['ethnicity','accuracy'])
for ethnicity in cos_identification.candidate_ethnicity.unique():
    eth_cos = cos_identification.loc[cos_identification.candidate_ethnicity == ethnicity]
    accuracy = sum(eth_cos.match)/len(eth_cos)
    #print(len(eth_cos))
    row = {'ethnicity': ethnicity,
            'accuracy': accuracy}

    cos_id_acc = cos_id_acc.append(row,ignore_index=True)
print(cos_id_acc)


cor_id_acc = pd.DataFrame(columns=['ethnicity','accuracy'])
for ethnicity in cor_identification.candidate_ethnicity.unique():
    eth_cor = cor_identification.loc[cor_identification.candidate_ethnicity == ethnicity]
    accuracy = sum(eth_cor.match)/len(eth_cor)
    #print(len(eth_cos))
    row = {'ethnicity': ethnicity,
            'accuracy': accuracy}

    cor_id_acc = cor_id_acc.append(row,ignore_index=True)
cor_id_acc

  0%|          | 0/29117 [00:00<?, ?it/s]

  0%|          | 0/29117 [00:00<?, ?it/s]

   ethnicity  accuracy
0    African  0.385718
1      Asian  0.432865
2  Caucasian  0.532053
3     Indian  0.532201


Unnamed: 0,ethnicity,accuracy
0,African,0.384636
1,Asian,0.430639
2,Caucasian,0.532191
3,Indian,0.528775


In [16]:
reference_outputs = torch.load("outputs/RFW/ft/model/reference2_outputs.pt")
reference_identities = np.load('outputs/RFW/ft/model/reference2_identities.npy',allow_pickle=True)
reference_ethnicities = np.load('outputs/RFW/ft/model/reference2_ethnicities.npy',allow_pickle=True)
reference_faceIDs = np.load('outputs/RFW/ft/model/reference2_faceIDs.npy',allow_pickle=True)

candidate_outputs = torch.load("outputs/RFW/ft/model/candidate2_outputs.pt")
candidate_identities = np.load('outputs/RFW/ft/model/candidate2_identities.npy',allow_pickle=True)
candidate_ethnicities = np.load('outputs/RFW/ft/model/candidate2_ethnicities.npy',allow_pickle=True)
candidate_faceIDs = np.load('outputs/RFW/ft/model/candidate2_faceIDs.npy',allow_pickle=True)

cor = corr2_coeff(reference_outputs,candidate_outputs).cpu().detach().numpy()
cos = cos_sim(reference_outputs,candidate_outputs).cpu().detach().numpy()
cor_identification = pd.DataFrame(columns=['candidate_identity','candidate_ethnicity','reference_identity'])
for i, cor_row in tqdm_notebook(enumerate(cor.T),total=len(cor.T)):
    identity = candidate_identities[i]
    ethnicity = candidate_ethnicities[i]
    max_ref = np.argmax(cor_row)
    reference_identity = reference_identities[max_ref]
    match = 1 if identity == reference_identity else  0
    row = {'candidate_identity': identity,
            'candidate_ethnicity': ethnicity, 
            'reference_identity': reference_identity,
            'match': match}

    cor_identification = cor_identification.append(row,ignore_index=True)

cos_identification = pd.DataFrame(columns=['candidate_identity','candidate_ethnicity','reference_identity'])
for i, cos_row in tqdm_notebook(enumerate(cos.T),total=len(cos.T)):
    identity = candidate_identities[i]
    ethnicity = candidate_ethnicities[i]
    max_ref = np.argmax(cos_row)
    reference_identity = reference_identities[max_ref]
    match = 1 if identity == reference_identity else  0
    row = {'candidate_identity': identity,
            'candidate_ethnicity': ethnicity, 
            'reference_identity': reference_identity,
            'match': match}

    cos_identification = cos_identification.append(row,ignore_index=True)

cos_id_acc = pd.DataFrame(columns=['ethnicity','accuracy'])
for ethnicity in cos_identification.candidate_ethnicity.unique():
    eth_cos = cos_identification.loc[cos_identification.candidate_ethnicity == ethnicity]
    accuracy = sum(eth_cos.match)/len(eth_cos)
    #print(len(eth_cos))
    row = {'ethnicity': ethnicity,
            'accuracy': accuracy}

    cos_id_acc = cos_id_acc.append(row,ignore_index=True)
print(cos_id_acc)


cor_id_acc = pd.DataFrame(columns=['ethnicity','accuracy'])
for ethnicity in cor_identification.candidate_ethnicity.unique():
    eth_cor = cor_identification.loc[cor_identification.candidate_ethnicity == ethnicity]
    accuracy = sum(eth_cor.match)/len(eth_cor)
    #print(len(eth_cos))
    row = {'ethnicity': ethnicity,
            'accuracy': accuracy}

    cor_id_acc = cor_id_acc.append(row,ignore_index=True)
cor_id_acc

  0%|          | 0/29117 [00:00<?, ?it/s]

  0%|          | 0/29117 [00:00<?, ?it/s]

   ethnicity  accuracy
0    African  0.496348
1      Asian  0.530124
2  Caucasian  0.633877
3     Indian  0.625377


Unnamed: 0,ethnicity,accuracy
0,African,0.494455
1,Asian,0.529428
2,Caucasian,0.632771
3,Indian,0.623184


In [17]:
reference_outputs = torch.load("outputs/RFW/scratch/model/reference2_outputs.pt")
reference_identities = np.load('outputs/RFW/scratch/model/reference2_identities.npy',allow_pickle=True)
reference_ethnicities = np.load('outputs/RFW/scratch/model/reference2_ethnicities.npy',allow_pickle=True)
reference_faceIDs = np.load('outputs/RFW/scratch/model/reference2_faceIDs.npy',allow_pickle=True)

candidate_outputs = torch.load("outputs/RFW/scratch/model/candidate2_outputs.pt")
candidate_identities = np.load('outputs/RFW/scratch/model/candidate2_identities.npy',allow_pickle=True)
candidate_ethnicities = np.load('outputs/RFW/scratch/model/candidate2_ethnicities.npy',allow_pickle=True)
candidate_faceIDs = np.load('outputs/RFW/scratch/model/candidate2_faceIDs.npy',allow_pickle=True)

cor = corr2_coeff(reference_outputs,candidate_outputs).cpu().detach().numpy()
cos = cos_sim(reference_outputs,candidate_outputs).cpu().detach().numpy()
cor_identification = pd.DataFrame(columns=['candidate_identity','candidate_ethnicity','reference_identity'])
for i, cor_row in tqdm_notebook(enumerate(cor.T),total=len(cor.T)):
    identity = candidate_identities[i]
    ethnicity = candidate_ethnicities[i]
    max_ref = np.argmax(cor_row)
    reference_identity = reference_identities[max_ref]
    match = 1 if identity == reference_identity else  0
    row = {'candidate_identity': identity,
            'candidate_ethnicity': ethnicity, 
            'reference_identity': reference_identity,
            'match': match}

    cor_identification = cor_identification.append(row,ignore_index=True)

cos_identification = pd.DataFrame(columns=['candidate_identity','candidate_ethnicity','reference_identity'])
for i, cos_row in tqdm_notebook(enumerate(cos.T),total=len(cos.T)):
    identity = candidate_identities[i]
    ethnicity = candidate_ethnicities[i]
    max_ref = np.argmax(cos_row)
    reference_identity = reference_identities[max_ref]
    match = 1 if identity == reference_identity else  0
    row = {'candidate_identity': identity,
            'candidate_ethnicity': ethnicity, 
            'reference_identity': reference_identity,
            'match': match}

    cos_identification = cos_identification.append(row,ignore_index=True)

cos_id_acc = pd.DataFrame(columns=['ethnicity','accuracy'])
for ethnicity in cos_identification.candidate_ethnicity.unique():
    eth_cos = cos_identification.loc[cos_identification.candidate_ethnicity == ethnicity]
    accuracy = sum(eth_cos.match)/len(eth_cos)
    #print(len(eth_cos))
    row = {'ethnicity': ethnicity,
            'accuracy': accuracy}

    cos_id_acc = cos_id_acc.append(row,ignore_index=True)
print(cos_id_acc)


cor_id_acc = pd.DataFrame(columns=['ethnicity','accuracy'])
for ethnicity in cor_identification.candidate_ethnicity.unique():
    eth_cor = cor_identification.loc[cor_identification.candidate_ethnicity == ethnicity]
    accuracy = sum(eth_cor.match)/len(eth_cor)
    #print(len(eth_cos))
    row = {'ethnicity': ethnicity,
            'accuracy': accuracy}

    cor_id_acc = cor_id_acc.append(row,ignore_index=True)
cor_id_acc

  0%|          | 0/29117 [00:00<?, ?it/s]

  0%|          | 0/29117 [00:00<?, ?it/s]

   ethnicity  accuracy
0    African  0.385718
1      Asian  0.432865
2  Caucasian  0.532053
3     Indian  0.532201


Unnamed: 0,ethnicity,accuracy
0,African,0.384636
1,Asian,0.430639
2,Caucasian,0.532191
3,Indian,0.528775
