# The classifier of people with glasses 

The task is: implement a classifier for images with a human face, separating them into people with glasses and everyone else. It is assumed that the approach will be developed with an emphasis on its further use in a `mobile application in real-time`.

Limitations:
- common dependencies (opencv, dlib, numpy, scipy, tensorflow, ...) can be used, but they must be included in the installation instructions
- any pre-trained models or ready-made algorithms can be used
- any publicly available datasets for training and testing are allowed
- languages: C++, Python 

In [1]:
# Ensure edits to libraries are loaded and plotting is shown in the notebook.
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import os, sys
os.chdir("..")
os.getcwd()

'/work'

In [3]:
import os
import glob
import numpy as np
import cv2

# import matplotlib
# matplotlib.use('tkagg')
from matplotlib import pyplot as plt

In [4]:
import torch
torch.cuda.is_available()

True

In [5]:
import torch.nn as nn

# Dataset

A sample set of 40 images has been provided by the team for testing (20 each for with/without glasses). In addition I have considered 3 more datasets for this project. Details of all datasets are mentioned below: 

1. Sample Test Set:
    - with_glasses = 20 jpeg images
    - without_glasses = 20 jpeg images

<br> 

2. SoF
    - Dataset page: https://sites.google.com/view/sof-dataset
    - Images download link: https://drive.google.com/file/d/1ufydwhMYtOhxgQuHs9SjERnkX0fXxorO/
    - Metadata download link: https://drive.google.com/file/d/0BwO0RMrZJCioaTVURnZoZG5jUVE/view?usp=sharing&resourcekey=0-F8-ejyF8NX4GC129ustqLg 

<br> 

3. Facial Landmark Detection by Deep Multi-task Learning:
    - Dataset Page: http://mmlab.ie.cuhk.edu.hk/projects/TCDCN.html
    - Images download link: http://mmlab.ie.cuhk.edu.hk/projects/TCDCN/data/MTFL.zip

<br> 

4. MeGlass:
    - Dataset Page: https://github.com/cleardusk/MeGlass/tree/master
    - Images download link: https://drive.google.com/file/d/1V0c8p6MOlSFY5R-Hu9LxYZYLXd8B8j9q/view?usp=sharing
    - Metadata download link: https://github.com/cleardusk/MeGlass/blob/master/meta.txt

## MTFL dataset

In [6]:
from pathlib import Path
import pandas as pd

In [None]:
mtfl_dataset = Path("./data/MTFL")
mtfl_dataset

In [None]:
train = pd.read_csv(mtfl_dataset.joinpath('training.txt'), 
                    sep=' ',
                    header=None,
                    skipinitialspace = True,
                    names=['Path']+['x1','x2','x3','x4','x5','y1','y2','y3','y4','y5']+['Gender','Smile','Glasses','Pose'])
train['Path'] = train['Path'].str.replace('\\','/')
print(train.shape)

In [None]:
'''
--x1...x5,y1...y5: the locations for left eye, right eye, nose, left mouth corner, right mouth corner.
--gender: 1 for male, 2 for female
--smile: 1 for smiling, 2 for not smiling
--glasses: 1 for wearing glasses, 2 for not wearing glasses.
--head pose: 1 for left profile, 2 for left, 3 for frontal, 4 for right, 5 for right profile
'''

train.head()

In [None]:
mtfl_glasses_map = {1:'With Glasses',2:'No Glasses'}
mtfl_pose_map = {1: '-60',2:'-30',3:'0',4:'+30',5:'+60'}

In [None]:
all_glass_images = train.loc[train['Glasses'] == 1]

In [None]:
# visualizing a random image with keypoints
n = np.random.randint(low=2,high=1000)
row = all_glass_images.iloc[n]

img0 =  cv2.imread(str(mtfl_dataset.joinpath(row['Path'])))
img0 = cv2.cvtColor(img0,cv2.COLOR_BGR2RGB)
print(img0.shape)

plt.scatter(row['x1'],row['y1'],c='r')
plt.scatter(row['x2'],row['y2'],c='b')
plt.scatter(row['x3'],row['y3'],c='y')
plt.scatter(row['x4'],row['y4'],c='g')
plt.scatter(row['x5'],row['y5'],c='w')
plt.imshow(img0)
plt.title(f"{mtfl_glasses_map[int(row['Glasses'])]} Pose:{mtfl_pose_map[int(row['Pose'])]} deg")
plt.show()

# Approach-2: Finetune Retinaface model

In [7]:
sys.path.append("Pytorch_Retinaface")

## Step-1: MTFL Loader for Retinaface Model

In [8]:
import os
import torch
import pandas as pd
from PIL import Image

In [9]:
from torch.utils.data import Dataset
from torchvision.transforms import transforms
from torch.utils.data import DataLoader

In [None]:
mtfl_transforms = transforms.Compose([
    transforms.Resize((320, 320)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
train_dataset = MTFLDataset(annotations_file='./data/MTFL/training.txt',
                             img_dir='./data/MTFL',
                             transform=mtfl_transforms)

val_dataset = MTFLDataset(annotations_file='./data/MTFL/testing.txt',
                           img_dir='./data/MTFL',
                           transform=mtfl_transforms)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=1)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False, num_workers=1)

In [None]:
# def imshow(img, title):
#     np_img = img.numpy().transpose((1, 2, 0))
#     plt.imshow(np_img)
#     plt.title(title)
#     plt.show()

# # Iterate through the data loader and display the first 10 images along with their labels
# num_images = 10
# for i, (images, labels) in enumerate(train_loader):
#     if i >= num_images:
#         break
#     imshow(images[0], f'Label: {labels.item()} (Glasses)' if labels.item() == 1 else f'Label: {labels.item()} (No glasses)')    

In [None]:
class MTFLDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        self.annotations = pd.read_csv(annotations_file, header=None, delim_whitespace=True, skipinitialspace=True)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
#         print(f"self.img_dir: {self.img_dir}")
#         print(f"annotations file: {self.annotations.iloc[idx, 0]}")
        img_path = os.path.join(self.img_dir, *self.annotations.iloc[idx, 0].split("\\"))
        image = Image.open(img_path)
        glasses = self.annotations.iloc[idx, -2]
        label = torch.tensor(1 if glasses == 1 else 0, dtype=torch.long)

        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
class MTFLDataset(data.Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        self.annotations = pd.read_csv(annotations_file, header=None, delim_whitespace=True, skipinitialspace=True)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, *self.annotations.iloc[idx, 0].split("\\"))
        img = cv2.imread(img_path)
        height, width, _ = img.shape

        annotations = np.zeros((0, 15))
        for idx, label in enumerate(labels):
            annotation = np.zeros((1, 15))
            # bbox
            annotation[0, 0] = self.annotations.iloc[idx, 1] #label[0]  # x1
            annotation[0, 1] = self.annotations.iloc[idx, 6]  # y1
            annotation[0, 2] = self.annotations.iloc[idx, 2] #label[0] + label[2]  # x2
            annotation[0, 3] = self.annotations.iloc[idx, 7] #label[1] + label[3]  # y2

            # landmarks
            annotation[0, 4] = self.annotations.iloc[idx, 1]    # l0_x
            annotation[0, 5] = self.annotations.iloc[idx, 2]    # l0_y
            annotation[0, 6] = self.annotations.iloc[idx, 3]    # l1_x
            annotation[0, 7] = self.annotations.iloc[idx, 4]    # l1_y
            annotation[0, 8] = self.annotations.iloc[idx, 5]   # l2_x
            annotation[0, 9] = self.annotations.iloc[idx, 6]   # l2_y
            annotation[0, 10] = self.annotations.iloc[idx, 7]  # l3_x
            annotation[0, 11] = self.annotations.iloc[idx, 8]  # l3_y
            annotation[0, 12] = self.annotations.iloc[idx, 9]  # l4_x
            annotation[0, 13] = self.annotations.iloc[idx, 10]  # l4_y
            
            if (self.annotations.iloc[idx, -2]==1):
                annotation[0, 14] = 1
            else:
                annotation[0, 14] = -1

            annotations = np.append(annotations, annotation, axis=0)
            
        target = np.array(annotations)
        if self.preproc is not None:
            img, target = self.preproc(img, target)

        return torch.from_numpy(img), target

In [10]:
annotation = np.zeros((1, 15))

In [11]:
annotation.shape

(1, 15)

## Step-2: Loading pretrained weights

In [None]:
import torch.optim as optim
from torch.nn import BCEWithLogitsLoss

In [None]:
from models.retinaface import RetinaFace
from data import WiderFaceDetection, detection_collate, preproc, cfg_mnet, cfg_re50
# from utils import decode

In [None]:
retinaface_weights = "./weights/mobilenet0.25_Final.pth"

In [None]:
cfg_mnet

In [None]:
# Modify the output channels in the model
num_classes = 2  # Glasses or no glasses
cfg_mnet['num_classes'] = num_classes

In [None]:
def check_keys(model, pretrained_state_dict):
    ckpt_keys = set(pretrained_state_dict.keys())
    model_keys = set(model.state_dict().keys())
    used_pretrained_keys = model_keys & ckpt_keys
    unused_pretrained_keys = ckpt_keys - model_keys
    missing_keys = model_keys - ckpt_keys
    print('Missing keys:{}'.format(len(missing_keys)))
    print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
    print('Used keys:{}'.format(len(used_pretrained_keys)))
    assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
    return True


def remove_prefix(state_dict, prefix):
    ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
    print('remove prefix \'{}\''.format(prefix))
    f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
    return {f(key): value for key, value in state_dict.items()}

def load_model(model, pretrained_path, load_to_cpu):
    print('Loading pretrained model from {}'.format(pretrained_path))
    if load_to_cpu:
        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
    else:
        device = torch.cuda.current_device()
        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
    if "state_dict" in pretrained_dict.keys():
        pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
    else:
        pretrained_dict = remove_prefix(pretrained_dict, 'module.')
    check_keys(model, pretrained_dict)
    model.load_state_dict(pretrained_dict, strict=False)
    return model

In [None]:
# net and model
net = RetinaFace(cfg=cfg_mnet, phase = 'test')
net = load_model(net, retinaface_weights, True)
net.eval()
print('Finished loading model!')
print(net)

In [None]:
# Freeze the base model
for param in net.parameters():
    param.requires_grad = False

# Unfreeze the classification heads
for param in net.ClassHead.parameters():
    param.requires_grad = True

In [None]:
device = torch.device("cuda")
net = net.to(device)

In [None]:
# Set up the loss function
classification_loss = BCEWithLogitsLoss()

# Set up the optimizer
optimizer = optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=0.001, momentum=0.9, weight_decay=5e-4)

In [None]:
num_epochs = 10

In [None]:
# Training loop
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        logits, _ = net(images)
        logits = logits.view(logits.size(0), num_classes, -1)

        # Calculate loss
        loss = classification_loss(logits, labels.view(-1, 1))

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print progress
        if (i + 1) % 10 == 0:
            print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item()}')

In [12]:
import glob

In [21]:
testset_folder = "./data/sample_test/"
all_files = []
for category in glob.glob(testset_folder+"/*"):
    for file in glob.glob(category+"/*"):
        all_files.append(file)

In [23]:
os.path.basename(all_files[0])

'11.jpg'

In [19]:
glob.glob(testset_folder+"/*")

['./data/sample_test/with_glasses', './data/sample_test/without_glasses']