In [1]:
import torch
import torchvision
import PIL
import numpy
import collections
import types
from scipy.spatial import distance
import os

In [2]:
def _forward_impl(self, x): # overwrite forward function from resnet-18
    x = self.conv1(x)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.maxpool(x)
    
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)
    
    x = self.avgpool(x)
    x = torch.flatten(x,1)
    # x = self.fc(x) skip this step!!
    
    return x

network = torchvision.models.resnet18(pretrained=True) # load pretrained model resnet-18
network.eval()
network._forward_impl = types.MethodType(_forward_impl, network) # overwrite forward function to skip fully connected layer

# define image transformations
transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(300),
    torchvision.transforms.CenterCrop(224), # take only center from image
    torchvision.transforms.ToTensor(), # PIL image to tensor
    lambda x : x.repeat(3,1,1), # model expects RGB layers, as we have only Grayscale images we copy it such that we have 3 Channels with the same Image
    torchvision.transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    ),
    lambda x : x.unsqueeze(0) # required by pytorch (add batch dimension)
])

def extract(path):
    with torch.no_grad():
        image = PIL.Image.open(path) # load image
        tensor = transform(image) # apply transformation defined above
        feature = network(tensor)
        return feature.numpy().flatten() # convert to a 1-dimensional vector
    
# load gallery
print("Enrolling Gallery...")
gallery = {}
for index in range(1,16):
    subject = F"subject{index:02d}" # get subject info
    gallery[subject] = extract(F"yalefaces/{subject}.normal") # add the normal positions of each face to the gallery
    

print("Scoring...")
variations = collections.defaultdict(lambda: [0,0]) # store number of correct identifiactions  and total number of indetifications per variation
for filename in os.listdir("yalefaces"):
    probe_subject, variation = os.path.splitext(filename) # split subject and filename extension
    path = os.path.join("yalefaces", filename) # create filepath
    
    if not os.path.isfile(path): # ignore directories
        continue
    
    probe_feature = extract(path) # apply transformations
 
    min_distance = 1e8 # set to high value in the beginning
    for gallery_subject, gallery_feature in gallery.items(): # iterate over the defined gallery
        dist = distance.euclidean(gallery_feature, probe_feature) # compute euclidean distance between gallery feature and probe feature
        if dist < min_distance: # check if distance is smaller, if true set it to best subject
            min_distance = dist
            best_subject = gallery_subject
    
    # check if face was recognized for each variation
    variations[variation][0] += best_subject == probe_subject # we increase by one if the subject was correctly identified
    variations[variation][1] += 1 # we increase this after each iteration such that we have a total number
    
# print rates for each variation
for variation, results in variations.items():
    print(F"Recognized {results[0]:2} of {results[1]} faces ({100 * results[0] / results[1]:6.2f}%) with {variation[1:]}")
    
# print total accuracy
total = numpy.sum(list(variations.values()), axis=0)
print(F"\nAccuracy: {total[0]:3} of {total[1]} faces ({100 * total[0] / total[1]:6.2f}%)")


Enrolling Gallery...
Scoring...
Recognized  8 of 14 faces ( 57.14%) with centerlight
Recognized 10 of 15 faces ( 66.67%) with leftlight
Recognized 15 of 15 faces (100.00%) with normal
Recognized 13 of 15 faces ( 86.67%) with noglasses
Recognized 14 of 15 faces ( 93.33%) with happy
Recognized 14 of 15 faces ( 93.33%) with wink
Recognized 15 of 15 faces (100.00%) with sleepy
Recognized 15 of 15 faces (100.00%) with surprised
Recognized  8 of 15 faces ( 53.33%) with rightlight
Recognized 14 of 15 faces ( 93.33%) with sad
Recognized  9 of 15 faces ( 60.00%) with glasses

Accuracy: 135 of 164 faces ( 82.32%)
