<a href="https://colab.research.google.com/github/neelpawarcmu/deep-learning-course-projects/blob/main/Face_detection_and_verification_with_Resnet_50_design.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Summary

Here we build a Resnet-50 model from scratch using research methodologies and architectural details mentioned in [this](https://arxiv.org/pdf/1512.03385.pdf) paper, for face classification on a 400,000 image dataset. We further use the embedded states of the trained model for a face verification task on a similarly sized dataset.

## Imports and configs

In [None]:
from google.colab import drive
from IPython.display import clear_output
drive.mount('/content/drive')
!mkdir /content/drive/MyDrive/IDL-Kaggle/hw2/trained_models

In [None]:
!nvidia-smi

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')
#%cd '/content/drive/MyDrive/'
#!mkdir IDL-Kaggle
#!mkdir IDL-Kaggle/hw2
#!mkdir IDL-Kaggle/hw2/face-detection
#!mkdir IDL-Kaggle/hw2/face-verification

## Kaggle download and unzip

In [None]:
#Intall Kaggle API and create kaggle directory
!pip3 install kaggle==1.5.6
!mkdir .kaggle

#This data is used to login  into your Kaggle account
import json
token = {"username":"neelpawarcmu","key":"5e1b7eca0660a26aeb64eee16d6e6b68"}
with open('/content/.kaggle/kaggle.json', 'w') as file:
    json.dump(token, file)


In [None]:
!chmod 600 /content/.kaggle/kaggle.json
!cp /content/.kaggle/kaggle.json /root/.kaggle/
!kaggle config set -n path -v /content

In [None]:
!chmod 600 /content/.kaggle/kaggle.json
!cp /content/.kaggle/kaggle.json /root/.kaggle/
!kaggle config set -n path -v /content

In [None]:
# download main classification data
!kaggle competitions download -c idl-fall21-hw2p2s1-face-classification --force
!kaggle competitions download -c idl-fall21-hw2p2s2-face-verification --force

In [None]:
# download toy classification data
!kaggle competitions download -c idl-fall21-hw2p2s1-face-classification-toy --force

In [None]:
# unzip main data
from IPython.display import clear_output 
!unzip /content/competitions/idl-fall21-hw2p2s1-face-classification/idl-fall21-hw2p2s1-face-classification.zip -d main
!unzip competitions/idl-fall21-hw2p2s2-face-verification/idl-fall21-hw2p2s2-face-verification.zip -d main/verification

clear_output()

In [None]:
# unzip toy data
!unzip /content/competitions/idl-fall21-hw2p2s1-face-classification-toy/idl-fall21-hw2p2s1-face-classification-toy.zip -d toy

clear_output()

## Imports

In [None]:
import os
import numpy as np
from PIL import Image

import torch
import torchvision   
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
!mkdir saved_datasets

## Torchvision DataSet and DataLoader

In [None]:
def loadData(nametag, filemap, load, save, batch_size):
  #filemap is a hash map that stores path names for saving and loading datasets and dataloaders
  if load:
    train_dataset = torch.load( filemap['save_path'] + 'train_dataset.pt')
    train_dataloader = torch.load( filemap['save_path'] + 'train_dataloader.pt')
    val_dataset = torch.load( filemap['save_path'] + 'val_dataset.pt')
    val_dataloader = torch.load( filemap['save_path'] + 'val_dataloader.pt')
  else:
    train_dataset = torchvision.datasets.ImageFolder(root = filemap['root_train_folder'], 
                                                     transform= torchvision.transforms.Compose([
                                                                                    torchvision.transforms.RandomRotation(20),
                                                                                    torchvision.transforms.RandomHorizontalFlip(0.5),
                                                                                    torchvision.transforms.ColorJitter(0.5, 0.5, 0.5),
                                                                                    torchvision.transforms.ToTensor(), 
                                                                                    torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))          
                                                     ]))
                                                     
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True, num_workers = 8)
    val_dataset = torchvision.datasets.ImageFolder(root = filemap['root_val_folder'] , transform= torchvision.transforms.Compose([
                                                                                    torchvision.transforms.ToTensor(), 
                                                                                    torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))                   
                                                     ]))
    
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size = batch_size, shuffle = False, num_workers = 8)
    
  if save:
    torch.save(train_dataset,  filemap['save_path'] + 'train_dataset.pt')
    torch.save(train_dataloader,  filemap['save_path'] + 'train_dataloader.pt')
    torch.save(val_dataset,  filemap['save_path'] + 'val_dataset.pt')
    torch.save(val_dataloader,  filemap['save_path'] + 'val_dataloader.pt')

  print('---------------------------------')
  print(nametag)
  print('---------------------------------')
  print('train data:', train_dataset.__len__(), len(train_dataset.classes))
  print('val data:', val_dataset.__len__(), len(val_dataset.classes))
  print('train dataloader:', len(train_dataloader))
  print('val dataloader:', len(val_dataloader))
  print('\n')
  
  return {
      't_set': train_dataset, 
      't_loader': train_dataloader,
      'v_set': val_dataset,
      'v_loader': val_dataloader
      }

In [None]:
#hash maps that store specific path names for saving and loading datasets and dataloaders
!mkdir main
!mkdir toy
#old path: face_detection/toy_dataset/
toy_paths = {
    'save_path' : 'saved_datasets/toy_', 
    'root_train_folder' : 'toy/train_data',
    'root_val_folder' : 'toy/val_data',
}

main_paths = {
    'save_path' : 'saved_datasets/main_',
    'root_train_folder' : 'main/train_data',
    'root_val_folder' : 'main/val_data',
}

In [None]:
# create datasets and dataloaders by calling our loadData function
!mkdir saved_datasets
toymap = loadData('Toy classification data', toy_paths, load=False, save=True, batch_size=128)
mainmap = loadData('Main classification data', main_paths, load=False, save=True, batch_size=128)

mkdir: cannot create directory ‘saved_datasets’: File exists


  cpuset_checked))


---------------------------------
Toy classification data
---------------------------------
train data: 39841 4000
val data: 8000 4000
train dataloader: 312
val dataloader: 63


---------------------------------
Main classification data
---------------------------------
train data: 380638 4000
val data: 8000 4000
train dataloader: 2974
val dataloader: 63




In [None]:
dataset_in_use = 'main'
#dataset_in_use = 'toy'

if dataset_in_use == 'toy':
  train_dataset = toymap['t_set']
  train_dataloader = toymap['t_loader']
  val_dataset = toymap['v_set']
  val_dataloader = toymap['v_loader']
else:
  train_dataset = mainmap['t_set']
  train_dataloader = mainmap['t_loader']
  val_dataset = mainmap['v_set']
  val_dataloader = mainmap['v_loader']

In [None]:
print('--------------Active data---------------')
print('train data:', train_dataset.__len__(), len(train_dataset.classes))
print('val data:', val_dataset.__len__(), len(val_dataset.classes))
print('train dataloader:', len(train_dataloader))
print('val dataloader:', len(val_dataloader))
print('----------------------------------------')

--------------Active data---------------
train data: 380638 4000
val data: 8000 4000
train dataloader: 2974
val dataloader: 63
----------------------------------------


## Residual Block

Resnet: https://arxiv.org/pdf/1512.03385.pdf

Here is a basic usage of shortcut in Resnet

###Resnet 50

In [None]:
class Bottleneck(nn.Sequential):
  def __init__(self, input_channel_size, output_channel_size, kernel_size=1, stride=1):
    super().__init__()
    self.expansion = expansion = 4
    self.conv1 = nn.Conv2d(input_channel_size, output_channel_size, kernel_size=1, stride=1, padding=1, bias=False)
    self.bn1 = nn.BatchNorm2d(output_channel_size)
    self.conv2 = nn.Conv2d(output_channel_size, output_channel_size, kernel_size=3, stride=stride, padding=1, bias=False)
    self.bn2 = nn.BatchNorm2d(output_channel_size)
    self.relu = nn.ReLU()
    self.conv3 = nn.Conv2d(output_channel_size, output_channel_size * expansion, kernel_size=1, stride=1, bias=False)
    self.bn3 = nn.BatchNorm2d(output_channel_size * expansion)
    
    if stride == 1 or input_channel_size == expansion * output_channel_size:
        self.shortcut = nn.Identity()
    else:
        self.shortcut = nn.Sequential(nn.Conv2d(input_channel_size, output_channel_size * expansion, kernel_size=1, stride=stride),
                                     nn.BatchNorm2d(output_channel_size * expansion))

        
  def forward(self, x):
    print(x.shape)
    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)
    print(out.shape)
    out = self.conv2(x)
    out = self.bn2(out)
    out = self.relu(out)
    print(out.shape)
    out = self.conv3(x)
    out = self.bn3(out)
    print(out.shape)
    
    shortcut = self.shortcut(x)
    return self.relu(out + shortcut)


class Resnet50(nn.Module):
    def __init__(self, num_blocks, hiddens, num_features, num_classes, feat_dim=512):
        super().__init__()
        self.expansion = expansion = 4
        self.in_channel = 64

        self.conv1 = nn.Conv2d(num_features, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
    
        self.block1 = self.stackBlocks(Bottleneck, hiddens[0], num_blocks[0], stride = 1)
        self.block2 = self.stackBlocks(Bottleneck, hiddens[1], num_blocks[1], stride = 2)
        self.block3 = self.stackBlocks(Bottleneck, hiddens[2], num_blocks[2], stride = 2)
        self.block4 = self.stackBlocks(Bottleneck, hiddens[3], num_blocks[3], stride = 2)
        
        self.relu = nn.ReLU()
        self.mean_pool = nn.AdaptiveAvgPool2d((1,1))
        self.flatten = nn.Flatten()
        self.linear = nn.Linear(512*self.expansion, num_classes)
    
    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.block4(self.block3(self.block2(self.block1(out))))
                          
        out = self.mean_pool(out)

        out = self.flatten(out)
        out = self.linear(out)
        
        return out
            
    def stackBlocks(self, block, out_channel, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channel, out_channel, stride))
            self.in_channel = out_channel * self.expansion
        return nn.Sequential(*layers)

In [None]:
numEpochs = 200
in_features = 3 # RGB channels

learningRate = 0.1
weightDecay = 5e-5

num_classes = len(train_dataset.classes)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

network = Resnet50(num_blocks = [3, 4, 6, 3], 
                   hiddens = [64,128,256,512], 
                   num_features = 3,
                   num_classes = 4000)

network = network.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(network.parameters(), 
                            lr=learningRate, 
                            weight_decay=weightDecay, 
                            momentum=0.9
                            )
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                       mode="min", 
                                                       factor=0.9, 
                                                       patience=1, 
                                                       verbose=True
                                                       )
network

In [None]:
modelPath = '/content/drive/MyDrive/IDL-Kaggle/hw2/trained_models/'
epochToLoad = 'epoch-86.pt'#0-indexed
loadModel = False
if loadModel:
  network.load_state_dict(torch.load(modelPath+epochToLoad))
  print("keys matched")

keys matched


In [None]:
# Train!

for epoch in range(numEpochs):
    
    # Train
    network.train()
    avg_loss = 0.0
    for batch_num, (x, y) in enumerate(train_dataloader):
        optimizer.zero_grad()
        
        x, y = x.to(device), y.to(device)

        outputs = network(x)
        outputs = outputs.to(device)

        loss = criterion(outputs, y.long())
        loss.backward()
        optimizer.step()

        avg_loss += loss.item()

        if batch_num % 80 == 79:
            print('Epoch: {}\tBatch: {}\tAvg-Loss: {:.4f}'.format(epoch, batch_num+1, avg_loss/400))
            avg_loss = 0.0
        
    # Validate
    network.eval()
    num_correct = 0
    val_loss = 0.0
    for batch_num, (x, y) in enumerate(val_dataloader):
        x, y = x.to(device), y.to(device)
        outputs = network(x)
        outputs = outputs.to(device)
        num_correct += (torch.argmax(outputs, axis=1) == y).sum().item()
    
        val_loss_batch = criterion(outputs, y.long())
        val_loss += val_loss_batch.item()
    scheduler.step(val_loss)
        
    #print('val_loss', val_loss.item())
    print('LR = ', optimizer.param_groups[0]['lr'])

    modelPath = '/content/drive/MyDrive/IDL-Kaggle/hw2/trained_models'
    torch.save(network.state_dict(), os.path.join(modelPath, f'epoch-{epoch}.pt'))
    
    
    print('Epoch: {}, Validation Accuracy: {:.4f}'.format(epoch, num_correct / len(val_dataset)))

  cpuset_checked))
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch: 0	Batch: 80	Avg-Loss: 1.2482
Epoch: 0	Batch: 160	Avg-Loss: 1.4926
Epoch: 0	Batch: 240	Avg-Loss: 1.3845
Epoch: 0	Batch: 320	Avg-Loss: 1.3095
Epoch: 0	Batch: 400	Avg-Loss: 1.2611
Epoch: 0	Batch: 480	Avg-Loss: 1.2137
Epoch: 0	Batch: 560	Avg-Loss: 1.3385
Epoch: 0	Batch: 640	Avg-Loss: 1.2227
Epoch: 0	Batch: 720	Avg-Loss: 1.1676
Epoch: 0	Batch: 800	Avg-Loss: 1.1343
Epoch: 0	Batch: 880	Avg-Loss: 1.1028
Epoch: 0	Batch: 960	Avg-Loss: 1.0637
Epoch: 0	Batch: 1040	Avg-Loss: 1.0447
Epoch: 0	Batch: 1120	Avg-Loss: 1.0112
Epoch: 0	Batch: 1200	Avg-Loss: 0.9953
Epoch: 0	Batch: 1280	Avg-Loss: 0.9806
Epoch: 0	Batch: 1360	Avg-Loss: 0.9552
Epoch: 0	Batch: 1440	Avg-Loss: 0.9312
Epoch: 0	Batch: 1520	Avg-Loss: 0.9086
Epoch: 0	Batch: 1600	Avg-Loss: 0.8936
Epoch: 0	Batch: 1680	Avg-Loss: 0.8787
Epoch: 0	Batch: 1760	Avg-Loss: 0.8545
Epoch: 0	Batch: 1840	Avg-Loss: 0.8534
Epoch: 0	Batch: 1920	Avg-Loss: 0.8326
Epoch: 0	Batch: 2000	Avg-Loss: 0.8196
Epoch: 0	Batch: 2080	Avg-Loss: 0.8056
Epoch: 0	Batch: 2160	Avg-

In [None]:
class TestDataset(Dataset):
    def __init__(self, file_list):
        self.file_list = file_list
        self.target_list = target_list
        self.n_class = len(list(set(target_list)))

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        img = Image.open(self.file_list[index])
        img = torchvision.transforms.ToTensor()(img)
        img = torchvision.transforms.Normalize(mean= (0.485, 0.456, 0.406), std= (0.229, 0.224, 0.225))(img)
        label = self.target_list[index]
        return img

In [None]:
def parse_data(datadir):
    ID_list = []
    for root, directories, filenames in os.walk(datadir):
        img_list = [] * len(filenames)
        for filename in filenames:
            if filename.endswith('.jpg'):
                filei = os.path.join(root, filename)
                index = int(filename.split('.')[0])
                img_list[index] = filei
    
    # construct a dictionary, where key and value correspond to ID and target
    uniqueID_list = sorted(list(set(ID_list)))
    class_n = len(uniqueID_list)
    target_dict = dict(zip(uniqueID_list, range(class_n)))
    label_list = [target_dict[ID_key] for ID_key in ID_list]
    return img_list

In [None]:
imageFolder_dataset = torchvision.datasets.ImageFolder(root='/content/main/train_data', 
                                                       transform=torchvision.transforms.Compose([
                                                torchvision.transforms.ToTensor(),
                                                torchvision.transforms.Normalize(
                                                      mean=[0.485, 0.456, 0.406],
                                                      std=[0.229, 0.224, 0.225],
                                                  )
                                                ]))

In [None]:
file_list = parse_data('/content/main/test_data')
test_dataset = TestDataset(file_list)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=8)

In [None]:
network.eval()
predictions = np.array([]*test_dataset.__len__())
counter = 0
reverse_dict = {
    value : key for key, value in imageFolder_dataset.class_to_idx.items()
}

for batch_num, x in enumerate(test_dataloader):
    x = x.to(device)
    outputs = network(x)
    
    pred_labels = torch.argmax(outputs, axis=1)
    pred_labels = pred_labels.view(-1)
    temp = pred_labels.to('cpu').numpy()
    
    for idx, value in enumerate(temp):
      temp[idx] = reverse_dict[value]

    predictions.append(int(temp))

In [None]:
df = pd.DataFrame(predictions.astype(int), columns = ['label'])
df.index.name = 'id'
df.index = df.index.astype(str) + '.jpg'
df.to_csv("/content/drive/MyDrive/IDL-Kaggle/hw2/submission.csv")

In [None]:
class TestDataset(Dataset):
    def __init__(self, file_list):
        self.file_list = file_list
    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        img = Image.open(self.file_list[index])
        img = torchvision.transforms.ToTensor()(img)
        img = torchvision.transforms.Normalize(mean= (0.485, 0.456, 0.406), std= (0.229, 0.224, 0.225))(img)
        return img

In [None]:
def parse_data(datadir):
    ID_list = []
    for root, directories, filenames in os.walk(datadir):
        img_list = []*len(filenames)
        for filename in filenames:
            if filename.endswith('.jpg'):
                filei = os.path.join(root, filename)
                number = int(filename.split('.')[0])
                img_list[number] = filei
    uniqueID_list = sorted(list(set(ID_list)))
    class_n = len(uniqueID_list)
    target_dict = dict(zip(uniqueID_list, range(class_n)))
    label_list = [target_dict[ID_key] for ID_key in ID_list]
    return img_list

In [None]:
file_list = parse_data('/content/main/verification/verification_data')
test_dataset = TestDataset(file_list)
verification_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=8)

In [None]:
network.eval()
embeddings = []
for batch_num, (feats) in enumerate(verification_loader):
    feats = feats.to(device)
    network = network.to(device)
    feature = network(feats)
    feature = feature.to("cpu").detach().numpy()
    for i in feature:
        embeddings.append(i)

In [None]:
# Create Validation ROC
compute_sim = nn.CosineSimilarity(0)
verf_val_predictions = []
true_labels = []

# print(len(embeddings))

# Now we need to use these features to make predictions
# open verification_pairs_val.txt
f = open('/content/main/verification/verification_pairs_val.txt', 'r')
for line in f:
    line = line.strip().split(' ')
    image1, image2, label = line.split(' ')
    true_labels.append(int(label))
    embedding1 = torch.tensor(embeddings[int(images[0].split('/')[1].split('.')[0])])
    embedding2 = torch.tensor(embeddings[int(images[1].split('/')[1].split('.')[0])])
    verf_val_predictions.append(compute_sim(embedding1, embedding2).item())


In [None]:
network.eval()
compute_sim = nn.CosineSimilarity(0)
verf_predictions = []
f = open('/content/main/verification/verification_pairs_test.txt', 'r')
for line in f:
    line = line.strip()
    images = line.split(' ')
    image1, image2 = images[0], images[1]embedding1 = torch.tensor(embeddings[int(images[0].split('/')[1].split('.')[0])])
    embedding2 = torch.tensor(embeddings[int(images[1].split('/')[1].split('.')[0])])
    output = compute_sim(embedding1, embedding2).item() 
    verf_predictions.append((line, output))

In [None]:
df = pd.DataFrame(verf_predictions, columns = ['id','Category'])
df.to_csv("/content/drive/MyDrive/IDL-Kaggle/hw2/face_verification_test_results/submission_verif.csv",index=False)