In [1]:
# exploring RFW data
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data
from torchvision import models,transforms
import matplotlib.pyplot as plt
import pickle
from collections import OrderedDict
import csv
import collections
from  PIL import Image
from tqdm.notebook import tqdm_notebook
from scipy.spatial import distance
import warnings
warnings.filterwarnings('ignore')
import math

In [2]:
__all__ = ['ResNet', 'resnet50']

def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=8631, include_top=True):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.include_top = include_top
        
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        
        if not self.include_top:
            return x
        
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

def resnet50(**kwargs):
    """Constructs a ResNet-50 model.
    """
    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
    return model

In [3]:
model = resnet50()
fname = 'weights/resnet50_ft_weight.pkl'

In [4]:

with open(fname, 'rb') as f:
    weights = pickle.load(f, encoding='latin1')

own_state = model.state_dict()
for name, param in weights.items():
    if name in own_state:
        try:
            own_state[name].copy_(torch.from_numpy(param))
        except Exception:
            raise RuntimeError('While copying the parameter named {}, whose dimensions in the model are {} and whose '\
                                'dimensions in the checkpoint are {}.'.format(name, own_state[name].size(), param.shape))
    else:
        raise KeyError('unexpected key "{}" in state_dict'.format(name))


In [5]:
# create df to contain all identities, their image file names, their ethnicities
path = "data/RFW/images/test/txts/"
img_path = 'data/RFW/images/test/data/'

# African images
african_images = pd.read_csv(path + 'African/African_images.txt', sep="\t", header=None)
african_images.columns = ['File', 'Label']
african_images['identityID'] = african_images['File'].str[:-9]
african_images['faceID'] = african_images['File'].str[-8:-4]
african_images['Ethnicity'] = 'African'
# Asian images
asian_images = pd.read_csv(path + 'Asian/Asian_images.txt', sep="\t", header=None)
asian_images.columns = ['File', 'Label']
asian_images['identityID'] = asian_images['File'].str[:-9]
asian_images['faceID'] = asian_images['File'].str[-8:-4]
asian_images['Ethnicity'] = 'Asian'
# Caucasian images
caucasian_images = pd.read_csv(path + 'Caucasian/Caucasian_images.txt', sep="\t", header=None)
caucasian_images.columns = ['File', 'Label']
caucasian_images['identityID'] = caucasian_images['File'].str[:-9]
caucasian_images['faceID'] = caucasian_images['File'].str[-8:-4]
caucasian_images['Ethnicity'] = 'Caucasian'
# Indian images
indian_images = pd.read_csv(path + 'Indian/Indian_images.txt', sep="\t", header=None)
indian_images.columns = ['File', 'Label']
indian_images['identityID'] = indian_images['File'].str[:-9]
indian_images['faceID'] = indian_images['File'].str[-8:-4]
indian_images['Ethnicity'] = 'Indian'
all_images = pd.concat([african_images,asian_images,caucasian_images,indian_images])
all_images

Unnamed: 0,File,Label,identityID,faceID,Ethnicity
0,m.0c7mh2_0003.jpg,0,m.0c7mh2,0003,African
1,m.0c7mh2_0001.jpg,0,m.0c7mh2,0001,African
2,m.0c7mh2_0002.jpg,0,m.0c7mh2,0002,African
3,m.026tq86_0003.jpg,1,m.026tq86,0003,African
4,m.026tq86_0001.jpg,1,m.026tq86,0001,African
...,...,...,...,...,...
10303,m.027nbyf_0002.jpg,2982,m.027nbyf,0002,Indian
10304,m.027nbyf_0001.jpg,2982,m.027nbyf,0001,Indian
10305,m.027nbyf_0005.jpg,2982,m.027nbyf,0005,Indian
10306,m.098d5s_0002.jpg,2983,m.098d5s,0002,Indian


In [6]:
# remove any duplicate identities
v = all_images.reset_index().groupby('identityID').Ethnicity.nunique()
dup = v[v>1].index.tolist()
all_images = all_images[~all_images['identityID'].isin(dup)]
all_images

Unnamed: 0,File,Label,identityID,faceID,Ethnicity
0,m.0c7mh2_0003.jpg,0,m.0c7mh2,0003,African
1,m.0c7mh2_0001.jpg,0,m.0c7mh2,0001,African
2,m.0c7mh2_0002.jpg,0,m.0c7mh2,0002,African
3,m.026tq86_0003.jpg,1,m.026tq86,0003,African
4,m.026tq86_0001.jpg,1,m.026tq86,0001,African
...,...,...,...,...,...
10303,m.027nbyf_0002.jpg,2982,m.027nbyf,0002,Indian
10304,m.027nbyf_0001.jpg,2982,m.027nbyf,0001,Indian
10305,m.027nbyf_0005.jpg,2982,m.027nbyf,0005,Indian
10306,m.098d5s_0002.jpg,2983,m.098d5s,0002,Indian


In [7]:
# get first image from each identity and use it as reference
identities = np.array(all_images.identityID.unique().tolist()).astype(object)
file_end =  np.array('_0001.jpg'.split()*len(identities)).astype(object)
first_images = identities + file_end
first_images

array(['m.0c7mh2_0001.jpg', 'm.026tq86_0001.jpg', 'm.02wz3nc_0001.jpg',
       ..., 'm.02793d7_0001.jpg', 'm.027nbyf_0001.jpg',
       'm.098d5s_0001.jpg'], dtype=object)

In [8]:
references = all_images[all_images['File'].isin(first_images)]
candidates = all_images[~all_images['File'].isin(first_images)]
print(len(references),len(candidates))

11403 29117


In [9]:
candidates

Unnamed: 0,File,Label,identityID,faceID,Ethnicity
0,m.0c7mh2_0003.jpg,0,m.0c7mh2,0003,African
2,m.0c7mh2_0002.jpg,0,m.0c7mh2,0002,African
3,m.026tq86_0003.jpg,1,m.026tq86,0003,African
5,m.026tq86_0002.jpg,1,m.026tq86,0002,African
6,m.02wz3nc_0002.jpg,2,m.02wz3nc,0002,African
...,...,...,...,...,...
10301,m.027nbyf_0004.jpg,2982,m.027nbyf,0004,Indian
10302,m.027nbyf_0003.jpg,2982,m.027nbyf,0003,Indian
10303,m.027nbyf_0002.jpg,2982,m.027nbyf,0002,Indian
10305,m.027nbyf_0005.jpg,2982,m.027nbyf,0005,Indian


In [10]:
# create dataset class for RFW
class resnetRFW(data.Dataset):
    
    '''
    This will be a class to load data from RFW for resnet50 model
    '''
     
    mean_bgr = np.array([91.4953, 103.8827, 131.0912])  # from resnet50_ft.prototxt

    def __init__(self,img_path,img_df):
        """
        :param img_path: dataset directory
        :param img_df: contains image file names and other information
        """
        assert os.path.exists(img_path), "root: {} not found.".format(img_path)
        self.img_path = img_path
        self.img_df = img_df
        self.img_info = []

        for i, row in self.img_df.iterrows():
            self.img_info.append({
                'img_file': row.Ethnicity + '/' + row.identityID + '/' + row.File,
                'identityID': row.identityID,
                'Ethnicity': row.Ethnicity,
                'faceID': row.faceID,
            })
            if i % 5000 == 0:
                print("processing: {} images".format(i))

    def __len__(self):
        return len(self.img_info)

    def __getitem__(self, index):
        info = self.img_info[index]
        img_file = info['img_file']
        img = Image.open(os.path.join(self.img_path, img_file))
        img = transforms.Resize(256)(img)
        img = transforms.CenterCrop(224)(img)
        img = np.array(img, dtype=np.uint8)
        assert len(img.shape) == 3  # assumes color images and no alpha channel

        Ethnicity = info['Ethnicity']
        identityID = info['identityID']
        faceID = info['faceID']
        return self.transform(img), identityID, Ethnicity, faceID
  

    def transform(self, img):
        img = img[:, :, ::-1]  # RGB -> BGR
        img = img.astype(np.float32)
        img -= self.mean_bgr
        img = img.transpose(2, 0, 1)  # C x H x W
        img = torch.from_numpy(img).float()
        return img

    def untransform(self, img, lbl):
        img = img.numpy()
        img = img.transpose(1, 2, 0)
        img += self.mean_bgr
        img = img.astype(np.uint8)
        img = img[:, :, ::-1]
        return img, lbl

def load_resnet50(weights="weights/resnet50_scratch_weight.pkl"):
    # load resnet50 model and modify it to match the one from the github to load the weights from the pkl
# resnet50 trained on VGGFace2
    resnet50 = models.resnet50(pretrained=False)
    resnet50.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    resnet50.layer2[0].conv1 = nn.Conv2d(256, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
    resnet50.layer2[0].conv2 = nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    resnet50.layer3[0].conv1 = nn.Conv2d(512, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
    resnet50.layer3[0].conv2 = nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    resnet50.layer4[0].conv1 = nn.Conv2d(1024, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
    resnet50.layer4[0].conv2 = nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    resnet50.avgpool = nn.AvgPool2d(kernel_size=7, stride=1, padding=0)
    resnet50.fc = nn.Linear(in_features=2048,out_features=8631)
    with open("weights/resnet50_scratch_weight.pkl", 'rb') as f:
        weights = pickle.load(f, encoding='latin1')
    weights = dict(map(lambda x: (x[0], torch.from_numpy(x[1])), weights.items()))
    weights = OrderedDict(weights)
    resnet50.load_state_dict(weights)
    resnet50 = torch.nn.Sequential(*(list(resnet50.children())[:-1]))
    return resnet50

In [15]:
device = torch.device("mps" if torch.has_mps else "cpu")
print(device)
kwargs = {'num_workers': 4, 'pin_memory': True} if torch.cuda.is_available() else {}
# load reference images
reference_dataset = resnetRFW(img_path,references.reset_index(drop=True))
reference_loader = torch.utils.data.DataLoader(reference_dataset, batch_size=32, shuffle=False, **kwargs)
# load candidate images
candidate_dataset = resnetRFW(img_path,candidates.reset_index(drop=True))
candidate_loader = torch.utils.data.DataLoader(candidate_dataset, batch_size=32, shuffle=False, **kwargs)


mps
processing: 0 images
processing: 5000 images
processing: 10000 images
processing: 0 images
processing: 5000 images
processing: 10000 images
processing: 15000 images
processing: 20000 images
processing: 25000 images


In [16]:
# load model and assign weights

# resnet50 = load_resnet50(weights="weights/resnet50_ft_weight.pkl")
print(device)
resnet50 = model.to(device=device)


mps


In [17]:
def apply_model(model,dataloader,file_prefix,device):
    model.eval()
    outputs = []
    identities = []
    ethnicities = []
    faceIDs = []
    with torch.no_grad():
        for i, (imgs, identityID, Ethnicity, faceID) in tqdm_notebook(enumerate(dataloader),total=len(dataloader)):
            imgs = imgs.to(device)
            x = model(imgs)
            out = x.view(x.size(0),-1)
            outputs.append(out)
            identities.append(np.array(identityID))
            ethnicities.append(np.array(Ethnicity))
            faceIDs.append(np.array(faceID))

    outputs=torch.cat(outputs)
    identities= np.concatenate(np.array(identities)).ravel()
    ethnicities= np.concatenate(np.array(ethnicities)).ravel()
    faceIDs= np.concatenate(np.array(faceIDs)).ravel()

    torch.save(outputs, file_prefix + '_outputs.pt')
    np.save(file_prefix + '_identities.npy', identities)
    np.save(file_prefix + '_ethnicities.npy', ethnicities)
    np.save(file_prefix + '_faceIDs.npy', faceIDs)
    return outputs, identities, ethnicities, faceIDs

In [18]:
# apply model to references

reference_outputs, reference_identities, reference_ethnicities, reference_faceIDs = apply_model(resnet50,reference_loader,'outputs/RFW/ft/reference',device)
""" outputs = []
identities = []
ethnicities = []
faceIDs = []
with torch.no_grad():
    for i, (imgs, identityID, Ethnicity, faceID) in tqdm_notebook(enumerate(reference_loader),total=len(reference_loader)):
        x = resnet50(imgs)
        out = x.view(x.size(0),-1)
        outputs.append(out)
        identities.append(np.array(identityID))
        ethnicities.append(np.array(Ethnicity))
        faceIDs.append(np.array(faceID))

outputs=torch.cat(outputs)
identities= np.array(identities)
ethnicities= np.array(ethnicities)
faceIDs= np.array(faceIDs)

torch.save(outputs, 'reference_outputs.pt')
np.save('reference_identities.npy', identities)
np.save('reference_ethnicities.npy', ethnicities)
np.save('reference_faceIDs.npy', faceIDs) """



  0%|          | 0/357 [00:00<?, ?it/s]

" outputs = []\nidentities = []\nethnicities = []\nfaceIDs = []\nwith torch.no_grad():\n    for i, (imgs, identityID, Ethnicity, faceID) in tqdm_notebook(enumerate(reference_loader),total=len(reference_loader)):\n        x = resnet50(imgs)\n        out = x.view(x.size(0),-1)\n        outputs.append(out)\n        identities.append(np.array(identityID))\n        ethnicities.append(np.array(Ethnicity))\n        faceIDs.append(np.array(faceID))\n\noutputs=torch.cat(outputs)\nidentities= np.array(identities)\nethnicities= np.array(ethnicities)\nfaceIDs= np.array(faceIDs)\n\ntorch.save(outputs, 'reference_outputs.pt')\nnp.save('reference_identities.npy', identities)\nnp.save('reference_ethnicities.npy', ethnicities)\nnp.save('reference_faceIDs.npy', faceIDs) "

In [19]:
# apply model to candidates

candidate_outputs, candidate_identities, candidate_ethnicities, candidate_faceIDs = apply_model(resnet50,candidate_loader,'outputs/RFW/ft/candidate',device)
""" with torch.no_grad():
    for i, (imgs, identityID, Ethnicity, faceID) in tqdm_notebook(enumerate(candidate_loader),total=len(candidate_loader)):
        x = resnet50(imgs)
        out = x.view(x.size(0),-1)
        outputs.append(out)
        identities.append(np.array(identityID))
        ethnicities.append(np.array(Ethnicity))
        faceIDs.append(np.array(faceID))

outputs=torch.cat(outputs)
identities= np.array(identities)
ethnicities= np.array(ethnicities)
faceIDs= np.array(faceIDs)

torch.save(outputs, 'candidate_outputs.pt')
np.save('candidate_identities.npy', identities)
np.save('candidate_ethnicities.npy', ethnicities)
np.save('candidate_faceIDs.npy', faceIDs) """

  0%|          | 0/910 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
# face verification (1:1)

def face_verification(reference, candidate, metric, threshold=None):
    ''' 
    this function performs face verification given a reference face and a candidate face
    returns 0 if the faces do not match and 1 if they do
    '''
    if metric == 'correlation':
        if threshold is None:
            threshold = 0.8
        cor = np.abs(corr2_coeff(reference,candidate))
        return cor
        """if cor > threshold:
            return 1
        else:
            return 0"""
        
    elif metric == 'cosine':
        if threshold is None:
            threshold = 0.5
        cos = distance.cosine(reference,candidate)
        if cos <= threshold:
            return 1
        else:
            return 0
    else:
        return "Please use one of 'correlation', or 'cosine' as an input for metric"
        
def corr2_coeff(A, B):
    # Rowwise mean of input arrays & subtract from input arrays themeselves
    A_mA = A - A.mean(1)[:, None]
    B_mB = B - B.mean(1)[:, None]

    # Sum of squares across rows
    ssA = (A_mA**2).sum(1)
    ssB = (B_mB**2).sum(1)

    # Finally get corr coeff
    return torch.matmul(A_mA, B_mB.T) / torch.sqrt(torch.matmul(ssA[:, None],ssB[None]))
def cos_sim(a, b, eps=1e-8):
    """
    added eps for numerical stability
    """
    a_n, b_n = a.norm(dim=1)[:, None], b.norm(dim=1)[:, None]
    a_norm = a / torch.max(a_n, eps * torch.ones_like(a_n))
    b_norm = b / torch.max(b_n, eps * torch.ones_like(b_n))
    sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1))
    return sim_mt


In [None]:
# for now since i didnt update the files i have to ravel here otherwise just load normally

'''reference_outputs = torch.load("outputs/RFW/reference_outputs.pt")
reference_identities = np.concatenate(np.load('outputs/RFW/reference_identities.npy',allow_pickle=True)).ravel()
reference_ethnicities = np.concatenate(np.load('outputs/RFW/reference_ethnicities.npy',allow_pickle=True)).ravel()
reference_faceIDs = np.concatenate(np.load('outputs/RFW/reference_faceIDs.npy',allow_pickle=True)).ravel()
print(reference_outputs.shape,reference_identities.shape,reference_ethnicities.shape,reference_faceIDs.shape)

candidate_outputs = torch.load("outputs/RFW/candidate_outputs.pt")
candidate_identities = np.concatenate(np.load('outputs/RFW/candidate_identities.npy',allow_pickle=True)).ravel()
candidate_ethnicities = np.concatenate(np.load('outputs/RFW/candidate_ethnicities.npy',allow_pickle=True)).ravel()
candidate_faceIDs = np.concatenate(np.load('outputs/RFW/candidate_faceIDs.npy',allow_pickle=True)).ravel()
print(candidate_outputs.shape,candidate_identities.shape,candidate_ethnicities.shape,candidate_faceIDs.shape)

candidate_identities'''

torch.Size([11403, 2048]) (11403,) (11403,) (11403,)
torch.Size([29117, 2048]) (29117,) (29117,) (29117,)


array(['m.0c7mh2', 'm.0c7mh2', 'm.026tq86', ..., 'm.027nbyf', 'm.027nbyf',
       'm.098d5s'], dtype='<U10')

In [None]:
reference_outputs = torch.load("outputs/RFW/ft/reference_outputs.pt")
reference_identities = np.load('outputs/RFW/ft/reference_identities.npy',allow_pickle=True)
reference_ethnicities = np.load('outputs/RFW/ft/reference_ethnicities.npy',allow_pickle=True)
reference_faceIDs = np.load('outputs/RFW/ft/reference_faceIDs.npy',allow_pickle=True)
print(reference_outputs.shape,reference_identities.shape,reference_ethnicities.shape,reference_faceIDs.shape)

candidate_outputs = torch.load("outputs/RFW/ft/candidate_outputs.pt")
candidate_identities = np.load('outputs/RFW/ft/candidate_identities.npy',allow_pickle=True)
candidate_ethnicities = np.load('outputs/RFW/ft/candidate_ethnicities.npy',allow_pickle=True)
candidate_faceIDs = np.load('outputs/RFW/ft/candidate_faceIDs.npy',allow_pickle=True)
print(candidate_outputs.shape,candidate_identities.shape,candidate_ethnicities.shape,candidate_faceIDs.shape)

candidate_identities

torch.Size([11403, 2048]) (11403,) (11403,) (11403,)
torch.Size([29117, 2048]) (29117,) (29117,) (29117,)


array(['m.0c7mh2', 'm.0c7mh2', 'm.026tq86', ..., 'm.027nbyf', 'm.027nbyf',
       'm.098d5s'], dtype='<U10')

In [None]:
cor = corr2_coeff(reference_outputs,candidate_outputs).cpu().detach().numpy()
cos = cos_sim(reference_outputs,candidate_outputs).cpu().detach().numpy()

In [None]:
thresh = 0.7
verification = pd.DataFrame(columns=['reference_identity','reference_ethnicity', 'cor_matches', 'cos_matches'])
for i, (cor_row, cos_row) in tqdm_notebook(enumerate(zip(cor,cos)),total=len(cor)):
  identity = reference_identities[i]
  ethnicity = reference_ethnicities[i]
  match_cor = candidate_identities[cor_row>thresh]
  match_cos = candidate_identities[cos_row>thresh]  

  row = {'reference_identity': identity,
           'reference_ethnicity': ethnicity, 
           'cor_matches': match_cor, 
           'cos_matches': match_cos}
  verification = verification.append(row,ignore_index=True)

verification

In [None]:
thresh = 0.65
cos_verification = pd.DataFrame(columns=['reference_identity','reference_ethnicity','TP','TN','FP','FN','matches','not_matches'])
for i, cos_row in tqdm_notebook(enumerate(cos),total=len(cos)):
  identity = reference_identities[i]
  ethnicity = reference_ethnicities[i]
  matches = candidate_identities[cos_row>thresh]  
  not_matches = candidate_identities[cos_row<=thresh]
  TP = sum(matches == identity)
  FP = sum(matches != identity)
  TN = sum(not_matches != identity)
  FN = sum(not_matches == identity)
  

  row = {'reference_identity': identity,
           'reference_ethnicity': ethnicity, 
           'TP': TP,
           'TN': TN,
           'FP': FP,
           'FN': FN,
           'matches': matches, 
           'not_matches': not_matches}
  cos_verification = cos_verification.append(row,ignore_index=True)

cos_verification[cos_verification.FN == 0][cos_verification.FP == 0]

  0%|          | 0/11403 [00:00<?, ?it/s]

Unnamed: 0,reference_identity,reference_ethnicity,TP,TN,FP,FN,matches,not_matches
324,m.0974m7,African,2,29115,0,0,"[m.0974m7, m.0974m7]","[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
406,m.0k00nk,African,3,29114,0,0,"[m.0k00nk, m.0k00nk, m.0k00nk]","[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
441,m.01ky6fb,African,4,29113,0,0,"[m.01ky6fb, m.01ky6fb, m.01ky6fb, m.01ky6fb]","[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
542,m.05zl3c,African,2,29115,0,0,"[m.05zl3c, m.05zl3c]","[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
641,m.02z21c6,African,2,29115,0,0,"[m.02z21c6, m.02z21c6]","[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
...,...,...,...,...,...,...,...,...
11034,m.0h3ry9b,Indian,2,29115,0,0,"[m.0h3ry9b, m.0h3ry9b]","[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
11037,m.0593ll,Indian,2,29115,0,0,"[m.0593ll, m.0593ll]","[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
11195,m.04jb36f,Indian,2,29115,0,0,"[m.04jb36f, m.04jb36f]","[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
11200,m.01yv6p,Indian,2,29115,0,0,"[m.01yv6p, m.01yv6p]","[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."


In [None]:
African_cos = cos_verification.loc[cos_verification.reference_ethnicity == 'African']
print(African_cos.sum(axis=0))

reference_identity     m.0c7mh2m.026tq86m.02wz3ncm.0c012t4m.0p8s_gxm....
reference_ethnicity    AfricanAfricanAfricanAfricanAfricanAfricanAfri...
TP                                                                  4728
TN                                                              86693849
FP                                                                125651
FN                                                                  2666
dtype: object


In [None]:
Asian_cos = cos_verification.loc[cos_verification.reference_ethnicity == 'Asian']
Asian_cos.sum(axis=0)

reference_identity     m.0281bfhm.0421bfhm.02r80dqm.06w18lbm.02tcmtm....
reference_ethnicity    AsianAsianAsianAsianAsianAsianAsianAsianAsianA...
TP                                                                  4613
TN                                                              72371880
FP                                                                 93146
FN                                                                  2574
dtype: object

In [None]:
Indian_cos = cos_verification.loc[cos_verification.reference_ethnicity == 'Indian']
Indian_cos.sum(axis=0)

reference_identity     m.05zdk2m.04064_hm.06414fsm.0dtsglm.02ww2f6m.0...
reference_ethnicity    IndianIndianIndianIndianIndianIndianIndianIndi...
TP                                                                  4944
TN                                                              86531724
FP                                                                 54936
FN                                                                  2354
dtype: object

In [None]:
Caucasian_cos = cos_verification.loc[cos_verification.reference_ethnicity == 'Caucasian']
Caucasian_cos.sum(axis=0)

reference_identity     m.0cqh0qm.02r6ydbm.0415yw4m.049pq8m.03cdg6lm.0...
reference_ethnicity    CaucasianCaucasianCaucasianCaucasianCaucasianC...
TP                                                                  3914
TN                                                              86110744
FP                                                                 10104
FN                                                                  3324
dtype: object

In [None]:
thresh = 0.65
cor_verification = pd.DataFrame(columns=['reference_identity','reference_ethnicity','TP','TN','FP','FN','matches','not_matches'])
for i, cor_row in tqdm_notebook(enumerate(cor),total=len(cor)):
  identity = reference_identities[i]
  ethnicity = reference_ethnicities[i]
  matches = candidate_identities[cor_row>thresh]  
  not_matches = candidate_identities[cor_row<=thresh]
  TP = sum(matches == identity)
  FP = sum(matches != identity)
  TN = sum(not_matches != identity)
  FN = sum(not_matches == identity)
  

  row = {'reference_identity': identity,
           'reference_ethnicity': ethnicity, 
           'TP': TP,
           'TN': TN,
           'FP': FP,
           'FN': FN,
           'matches': matches, 
           'not_matches': not_matches}
  cor_verification = cor_verification.append(row,ignore_index=True)

cor_verification[cor_verification.FN == 0][cor_verification.FP == 0]

  0%|          | 0/11403 [00:00<?, ?it/s]

Unnamed: 0,reference_identity,reference_ethnicity,TP,TN,FP,FN,matches,not_matches
38,m.03dwmn,African,1,29116,0,0,[m.03dwmn],"[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
71,m.033mrs,African,3,29114,0,0,"[m.033mrs, m.033mrs, m.033mrs]","[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
74,m.04_zbx,African,2,29115,0,0,"[m.04_zbx, m.04_zbx]","[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
76,m.0g5576,African,2,29115,0,0,"[m.0g5576, m.0g5576]","[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
112,m.0ngtn1l,African,2,29115,0,0,"[m.0ngtn1l, m.0ngtn1l]","[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
...,...,...,...,...,...,...,...,...
11341,m.026n0ps,Indian,1,29116,0,0,[m.026n0ps],"[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
11353,m.0h19lp,Indian,2,29115,0,0,"[m.0h19lp, m.0h19lp]","[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
11354,m.0cz9878,Indian,1,29116,0,0,[m.0cz9878],"[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."
11374,m.03h654p,Indian,4,29113,0,0,"[m.03h654p, m.03h654p, m.03h654p, m.03h654p]","[m.0c7mh2, m.0c7mh2, m.026tq86, m.026tq86, m.0..."


In [None]:
African_cor = cor_verification.loc[cor_verification.reference_ethnicity == 'African']
African_cor.sum(axis=0)

In [None]:
Asian_cor = cor_verification.loc[cor_verification.reference_ethnicity == 'Asian']
Asian_cor.sum(axis=0)

In [None]:
Indian_cor = cor_verification.loc[cor_verification.reference_ethnicity == 'Indian']
Indian_cor.sum(axis=0)

In [None]:
Caucasian_cor = cor_verification.loc[cor_verification.reference_ethnicity == 'Caucasian']
Caucasian_cor.sum(axis=0)

In [None]:
# Face Identification

'''
here i want to do face identification
basically take the outputs from the candidates and check against all references
take the reference image that has highest correlation/similarity
if the identities match then correct otherwise wrong
'''

cor_identification = pd.DataFrame(columns=['candidate_identity','candidate_ethnicity','reference_identity'])
for i, cor_row in tqdm_notebook(enumerate(cor.T),total=len(cor.T)):
    identity = candidate_identities[i]
    ethnicity = candidate_ethnicities[i]
    max_ref = np.argmax(cor_row)
    reference_identity = reference_identities[max_ref]
    match = 1 if identity == reference_identity else  0
    row = {'candidate_identity': identity,
            'candidate_ethnicity': ethnicity, 
            'reference_identity': reference_identity,
            'match': match}

    cor_identification = cor_identification.append(row,ignore_index=True)

cos_identification = pd.DataFrame(columns=['candidate_identity','candidate_ethnicity','reference_identity'])
for i, cos_row in tqdm_notebook(enumerate(cos.T),total=len(cos.T)):
    identity = candidate_identities[i]
    ethnicity = candidate_ethnicities[i]
    max_ref = np.argmax(cos_row)
    reference_identity = reference_identities[max_ref]
    match = 1 if identity == reference_identity else  0
    row = {'candidate_identity': identity,
            'candidate_ethnicity': ethnicity, 
            'reference_identity': reference_identity,
            'match': match}

    cos_identification = cos_identification.append(row,ignore_index=True)

cos_id_acc = pd.DataFrame(columns=['ethnicity','accuracy'])
for ethnicity in cos_identification.candidate_ethnicity.unique():
    eth_cos = cos_identification.loc[cos_identification.candidate_ethnicity == ethnicity]
    accuracy = sum(eth_cos.match)/len(eth_cos)
    #print(len(eth_cos))
    row = {'ethnicity': ethnicity,
            'accuracy': accuracy}

    cos_id_acc = cos_id_acc.append(row,ignore_index=True)
cos_id_acc

  0%|          | 0/29117 [00:00<?, ?it/s]

  0%|          | 0/29117 [00:00<?, ?it/s]

Unnamed: 0,ethnicity,accuracy
0,African,0.385718
1,Asian,0.432865
2,Caucasian,0.532053
3,Indian,0.532201


In [None]:
cor_id_acc = pd.DataFrame(columns=['ethnicity','accuracy'])
for ethnicity in cor_identification.candidate_ethnicity.unique():
    eth_cor = cor_identification.loc[cor_identification.candidate_ethnicity == ethnicity]
    accuracy = sum(eth_cor.match)/len(eth_cor)
    #print(len(eth_cos))
    row = {'ethnicity': ethnicity,
            'accuracy': accuracy}

    cor_id_acc = cor_id_acc.append(row,ignore_index=True)
cor_id_acc

Unnamed: 0,ethnicity,accuracy
0,African,0.384636
1,Asian,0.430639
2,Caucasian,0.532191
3,Indian,0.528775


In [None]:
torch.max(cos_sim(candidate_outputs,reference_outputs),dim=1)

array([0.6257814 , 0.35835811, 0.47202954, ..., 0.2597217 , 0.2957376 ,
       0.3611128 ], dtype=float32)

In [None]:
cos_sim(candidate_outputs,reference_outputs)[0].cpu().detach().numpy()

array([0.6257814 , 0.35835811, 0.47202954, ..., 0.2597218 , 0.2957372 ,
       0.361113  ], dtype=float32)

In [None]:
cor_identification = pd.DataFrame(columns=['candidate_identity','candidate_ethnicity','reference_identity'])
for i, cor_row in tqdm_notebook(enumerate(cor.T),total=len(cor.T)):
    identity = candidate_identities[i]
    ethnicity = candidate_ethnicities[i]
    max_ref = np.argmax(cor_row)
    reference_identity = reference_identities[max_ref]
    match = 1 if identity == reference_identity else  0
    row = {'candidate_identity': identity,
            'candidate_ethnicity': ethnicity, 
            'reference_identity': reference_identity,
            'match': match}

    cor_identification = cor_identification.append(row,ignore_index=True)
cor_identification

  0%|          | 0/29117 [00:00<?, ?it/s]

Unnamed: 0,candidate_identity,candidate_ethnicity,reference_identity,match
0,m.0c7mh2,African,m.0n_gk9j,0.0
1,m.0c7mh2,African,m.0c7mh2,1.0
2,m.026tq86,African,m.026tq86,1.0
3,m.026tq86,African,m.02qvybd,0.0
4,m.02wz3nc,African,m.0dm44t,0.0
...,...,...,...,...
29112,m.027nbyf,Indian,m.0hrgxh3,0.0
29113,m.027nbyf,Indian,m.027nbyf,1.0
29114,m.027nbyf,Indian,m.06zrlyg,0.0
29115,m.027nbyf,Indian,m.027nbyf,1.0


In [None]:
cos_identification = pd.DataFrame(columns=['candidate_identity','candidate_ethnicity','reference_identity'])
for i, cos_row in tqdm_notebook(enumerate(cos.T),total=len(cos.T)):
    identity = candidate_identities[i]
    ethnicity = candidate_ethnicities[i]
    max_ref = np.argmax(cos_row)
    reference_identity = reference_identities[max_ref]
    match = 1 if identity == reference_identity else  0
    row = {'candidate_identity': identity,
            'candidate_ethnicity': ethnicity, 
            'reference_identity': reference_identity,
            'match': match}

    cos_identification = cos_identification.append(row,ignore_index=True)
cos_identification

  0%|          | 0/29117 [00:00<?, ?it/s]

Unnamed: 0,candidate_identity,candidate_ethnicity,reference_identity,match
0,m.0c7mh2,African,m.0n_gk9j,0.0
1,m.0c7mh2,African,m.0c7mh2,1.0
2,m.026tq86,African,m.026tq86,1.0
3,m.026tq86,African,m.02qvybd,0.0
4,m.02wz3nc,African,m.0dm44t,0.0
...,...,...,...,...
29112,m.027nbyf,Indian,m.0hrgxh3,0.0
29113,m.027nbyf,Indian,m.027nbyf,1.0
29114,m.027nbyf,Indian,m.06zrlyg,0.0
29115,m.027nbyf,Indian,m.027nbyf,1.0


In [None]:
cos_id_acc = pd.DataFrame(columns=['ethnicity','accuracy'])
for ethnicity in cos_identification.candidate_ethnicity.unique():
    eth_cos = cos_identification.loc[cos_identification.candidate_ethnicity == ethnicity]
    accuracy = sum(eth_cos.match)/len(eth_cos)
    #print(len(eth_cos))
    row = {'ethnicity': ethnicity,
            'accuracy': accuracy}

    cos_id_acc = cos_id_acc.append(row,ignore_index=True)
cos_id_acc

7394
7187
7238
7298


Unnamed: 0,ethnicity,accuracy
0,African,0.385718
1,Asian,0.432865
2,Caucasian,0.532053
3,Indian,0.532201


In [34]:
import platform
platform.platform()

'macOS-12.4-arm64-arm-64bit'

In [35]:
torch.has_mps

AttributeError: module 'torch' has no attribute 'has_mps'