For this I used resnet34 trained on classification data to get embeddings on faces, and used those embeddings to find how far away each picture pair was. I did not use center loss or other types of clustering loss functions (Due to time constraints), but expect my model would have performed much better for verificaiton if I did. 

In [None]:
import numpy as np
import torch 
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision   
import torch.nn as nn
import torch.nn.functional as F
import json
import pandas as pd
from datetime import datetime
import pdb 
from google.colab import drive
import pandas as pd
from psutil import virtual_memory
import os
from zipfile import ZipFile
import matplotlib.pyplot as plt
from PIL import Image
import random
import shutil
from PIL import Image
from scipy.spatial import distance 
#!pip install wandb --upgrade
#import wandb


In [None]:
class SetUpColab():

  def __init__(self):
    pass
  
  #Determines how much ram the runtime has
  @staticmethod
  def runtime_info():
    ram_gb = virtual_memory().total / 1e9
    print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))
    if ram_gb < 20:
      print('Not using a high-RAM runtime')
    else:
      print('You are using a high-RAM runtime!')
    !nvidia-smi
    
  @staticmethod
  def mount_google_drive():
    drive.mount('/content/gdrive')
  
  #Sets up environement for use with kaggle api
  @staticmethod
  def set_up_kaggle():
    !pip uninstall -y kaggle
    !pip install --upgrade pip
    !pip install kaggle==1.5.6
    !mkdir .kaggle

    token = {"username":"nicholasmagal","key":"9bf671834d75b58fac2b037da15f4cf0"}
    with open('/content/.kaggle/kaggle.json', 'w') as file:
      json.dump(token, file)
    
    for i in range(2):
      !chmod 600 /content/.kaggle/kaggle.json
      !cp /content/.kaggle/kaggle.json /root/.kaggle/
      !kaggle config set -n path -v /content
  
  @staticmethod
  def change_dir(path):
    os.chdir(path)
  
  @staticmethod
  def setup_wandb():
    wandb.login()

  
  #Calls above methods to do a complete Collab setup, ready to run ml models :D Note may want to change this per competition
  @staticmethod
  def complete_set_up():
    SetUpColab.runtime_info()
    SetUpColab.mount_google_drive()
    SetUpColab.set_up_kaggle()
    #SetUpColab.setup_wandb()
    


In [None]:
SetUpColab.complete_set_up()
data_url = 'idl-fall21-hw2p2s2-face-verification'
data_path = '/content/competitions/' + data_url
!kaggle competitions download -c idl-fall21-hw2p2s2-face-verification
SetUpColab.change_dir(data_path)
!unzip idl-fall21-hw2p2s2-face-verification.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: verification_data/verification_data/00064100.jpg  
  inflating: verification_data/verification_data/00064101.jpg  
  inflating: verification_data/verification_data/00064102.jpg  
  inflating: verification_data/verification_data/00064103.jpg  
  inflating: verification_data/verification_data/00064104.jpg  
  inflating: verification_data/verification_data/00064105.jpg  
  inflating: verification_data/verification_data/00064106.jpg  
  inflating: verification_data/verification_data/00064107.jpg  
  inflating: verification_data/verification_data/00064108.jpg  
  inflating: verification_data/verification_data/00064109.jpg  
  inflating: verification_data/verification_data/00064110.jpg  
  inflating: verification_data/verification_data/00064111.jpg  
  inflating: verification_data/verification_data/00064112.jpg  
  inflating: verification_data/verification_data/00064113.jpg  
  inflating: verification_data/verifica

#Model

In [None]:
class ResidualBlockResnet34(nn.Module):
  def __init__(self, input_channel_size, keep_dim = True, stride=1):
    super().__init__()

    #Depending if we are changing our dimensions, we will have to initlize our parameters differently
    if keep_dim == True:  
      output_size = input_channel_size
    
    else:
      output_size = int(input_channel_size*2)

    if stride > 1:
      self.shortcut_x = nn.Sequential(
      nn.Conv2d(in_channels=input_channel_size, out_channels= output_size, kernel_size = 3, stride = stride, bias = False),
      nn.BatchNorm2d(output_size))
      self.conv0 = nn.Conv2d(in_channels=input_channel_size, out_channels = output_size, kernel_size=3,stride=stride, bias = False)
    
    else:
      self.shortcut_x = nn.Identity()
      self.conv0 = nn.Conv2d(in_channels=input_channel_size, out_channels = output_size, kernel_size=3,stride=stride, padding = 'same', bias = False)

    self.bn_0 = nn.BatchNorm2d(output_size)
    self.reLU_0 = nn.ReLU()

    self.conv1 = nn.Conv2d(in_channels=output_size, out_channels = output_size, kernel_size = 3, stride = 1, padding='same', bias = False)
    self.bn_1 = nn.BatchNorm2d(output_size)
    self.reLU_1 = nn.ReLU()

  def forward(self,x):
    #pdb.set_trace()
    shortcut = self.shortcut_x(x)
    
    out = self.conv0(x)
    out = self.bn_0(out)
    out = self.reLU_0(out)
    
    out = self.conv1(out)
    out = self.bn_1(out)
    out = self.reLU_1(out)

    out = out + shortcut

    return(out)

In [None]:
class Resnet34(nn.Module):
  def __init__(self, in_channels):
    super().__init__()

    self.cnn_layers = nn.Sequential(
        nn.Conv2d(in_channels=in_channels,out_channels=64,kernel_size=3, stride =1, padding='same',bias=False),
        nn.BatchNorm2d(64),
        nn.ReLU(), 
        ResidualBlockResnet34(input_channel_size = 64),
        ResidualBlockResnet34(input_channel_size = 64),
        ResidualBlockResnet34(input_channel_size = 64),
        ResidualBlockResnet34(input_channel_size = 64, keep_dim = False, stride = 2),
        ResidualBlockResnet34(input_channel_size = 128),
        ResidualBlockResnet34(input_channel_size = 128),
        ResidualBlockResnet34(input_channel_size = 128),
        ResidualBlockResnet34(input_channel_size = 128, keep_dim = False, stride = 2),
        ResidualBlockResnet34(input_channel_size = 256),
        ResidualBlockResnet34(input_channel_size = 256),
        ResidualBlockResnet34(input_channel_size = 256),
        ResidualBlockResnet34(input_channel_size = 256),
        ResidualBlockResnet34(input_channel_size = 256),
        ResidualBlockResnet34(input_channel_size = 256, keep_dim = False, stride = 2),
        ResidualBlockResnet34(input_channel_size = 512),
        ResidualBlockResnet34(input_channel_size = 512),
        nn.AdaptiveAvgPool2d((1,1)),#add a dropout here to combat overfitting
        nn.Flatten(),
        nn.Dropout(p=0.2) 
    )

    self.linear_layer = nn.Linear(512,4000)

  
  def forward(self, x, return_embedding=False):
    x = self.cnn_layers(x)
    embedding = x
    x = self.linear_layer(x)

    if return_embedding == True:
      return(x,embedding)
    return(x)


In [None]:
class CenterLoss(nn.Module):
    """
    Args:
        num_classes (int): number of classes.
        feat_dim (int): feature dimension.
    """
    def __init__(self, num_classes, feat_dim, device=torch.device('cpu')):
        super(CenterLoss, self).__init__()
        self.num_classes = num_classes
        self.feat_dim = feat_dim
        self.device = device
        
        self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim).to(self.device))

    def forward(self, x, labels):
        """
        Args:
            x: feature matrix with shape (batch_size, feat_dim).
            labels: ground truth labels with shape (batch_size).
        """
        batch_size = x.size(0)
        distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
                  torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
        distmat.addmm_(1, -2, x, self.centers.t())

        classes = torch.arange(self.num_classes).long().to(self.device)
        labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
        mask = labels.eq(classes.expand(batch_size, self.num_classes))

        dist = []
        for i in range(batch_size):
            value = distmat[i][mask[i]]
            value = value.clamp(min=1e-12, max=1e+12) # for numerical stability
            dist.append(value)
        dist = torch.cat(dist)
        loss = dist.mean()

        return loss

In [None]:
class Resnet34_modified_closeness(nn.Module):
  def __init__(self, in_channels):
    super().__init__()

    self.cnn_layers = nn.Sequential(
        nn.Conv2d(in_channels=in_channels,out_channels=64,kernel_size=3, stride =1, padding='same',bias=False),
        nn.BatchNorm2d(64),
        nn.ReLU(), 
        ResidualBlockResnet34(input_channel_size = 64),
        ResidualBlockResnet34(input_channel_size = 64),
        ResidualBlockResnet34(input_channel_size = 64),
        ResidualBlockResnet34(input_channel_size = 64, keep_dim = False, stride = 2),
        ResidualBlockResnet34(input_channel_size = 128),
        ResidualBlockResnet34(input_channel_size = 128),
        ResidualBlockResnet34(input_channel_size = 128),
        ResidualBlockResnet34(input_channel_size = 128, keep_dim = False, stride = 2),
        ResidualBlockResnet34(input_channel_size = 256),
        ResidualBlockResnet34(input_channel_size = 256),
        ResidualBlockResnet34(input_channel_size = 256),
        ResidualBlockResnet34(input_channel_size = 256),
        ResidualBlockResnet34(input_channel_size = 256),
        ResidualBlockResnet34(input_channel_size = 256, keep_dim = False, stride = 2),
        ResidualBlockResnet34(input_channel_size = 512),
        ResidualBlockResnet34(input_channel_size = 512),
        nn.AdaptiveAvgPool2d((1,1)),#add a dropout here to combat overfitting
        nn.Flatten(),
        nn.Dropout(p=0.2) 
    )

    self.linear_layer = nn.Linear(512,4000)
    self.clossness_layer = nn.nn.Linear(512,4000) 
    self.close_ReLU = nn.ReL(inplace=True)

  
  def forward(self, x, return_embedding=False):
    
    output = self.cnn_layers(x)
    embedding = output
    output = self.linear_layer(output)

    closeness_output = self.clossness_layer(x)
    closenss_output = self.close_ReLU(closeness_output)

    if return_embedding == True:
      return(output,embedding)
    return(output,closenss_output)


In [None]:
class ModelComponents():
  def __init__(self):
    pass
  
  @staticmethod
  def load_model_for_inference(save_path, model):
    checkpoint = torch.load(save_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    return model

  @staticmethod
  def create_device():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Device is on",device)
    return(device)
    
    def train(self, model, train_loader, dev_dataloader, optimizer, optimizer_close, epochs, loss_function, loss_function_close, path, device, length_val, scheduler):
      for epoch in (range(epochs)):
        avg_loss = 0.0

        #training on data
        model.train()
        for batch_num, (feats, labels) in enumerate(train_loader):
          feats, labels = feats.to(device), labels.to(device)
          optimizer_close.zero_grad()
          optimizer.zero_grad()

          output, clossness_output = model(feats)
          loss = loss_function(output, labels.long())

          c_loss = loss_function_close(clossness_output, labels.long() )

          total_loss = loss + c_loss *1

          total_loss.backward()
          optimizer.step()

          for param in loss_function_close.parameters():
                param.grad.data *= (1. / 1)
          
          optimizer_closs.step()
          
          avg_loss += loss.item()

          if batch_num % 10 == 9:
              print('Epoch: {}\tBatch: {}\tAvg-Loss: {:.4f}'.format(epoch, batch_num+1, avg_loss/50))
              wandb.log({"average loss" : avg_loss/50, "epoch" : epoch})
              avg_loss = 0.0 

          #Clean Up
          torch.cuda.empty_cache()
          del feats
          del labels
        del loss
      
      #Running our model against validation examples
      with torch.no_grad():
        model.eval()
        num_correct = 0
        for batch_num, (x, y) in enumerate(dev_dataloader):
          x, y = x.to(device), y.to(device)
          outputs = model(x)
          num_correct += (torch.argmax(outputs, axis=1) == y).sum().item()
        
        val_acc = num_correct / length_val
        print('Epoch: {}, Validation Accuracy: {:.2f}'.format(epoch, val_acc))
        wandb.log({"Validation Accuracy" : val_acc})
      
        scheduler.step(val_acc)

        to_save_path = path + str(epoch) + '.pt'
        self.save_model(model, optimizer, to_save_path, scheduler)
  
  def face_verification(self, comparison_list, model, device):

    cosine_similarity = []
    image_pair_list =[]

    model.eval()
    for image_pair in comparison_list:
      #print(image_pair)
      img0, img1 = self.return_image_pair(image_pair)
      
      img0 = img0.to(device)
      img1 = img1.to(device)
      

      img0_embedding_logits, img0_embedding = model(img0, return_embedding=True)
      img1_embedding_logits, img1_embedding = model(img1, return_embedding=True)

      img0_embedding = img0_embedding.squeeze()
      img1_embedding = img1_embedding.squeeze()

      compute_sim = nn.CosineSimilarity(dim=0)
      similarity = compute_sim(img0_embedding,img1_embedding).item()

      image_pair_list.append(image_pair)
      cosine_similarity.append(similarity)
      #print('Cosine Similiarity',compute_sim(img0_embedding,img1_embedding).item() )
    return(image_pair_list,cosine_similarity)

  
  def get_image_pair_list(self, pair_file_path):
    #Read in file as a list of lines
    with open(pair_file_path) as f:
      lines = f.readlines()

    #Now splitting each line to get the pairs seperated
    for index, image_line in enumerate(lines):
      lines[index] = image_line.split()

    return(lines) 

  def return_image_pair(self, image_tuple_paths):
    img1 = Image.open(image_tuple_paths[0])
    img2 = Image.open(image_tuple_paths[1])

    img1 = torchvision.transforms.ToTensor()(img1).unsqueeze(0)
    img1 = torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(img1)

    img2 = torchvision.transforms.ToTensor()(img2).unsqueeze(0)
    img2 = torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(img2)

    return img1, img2

#Training Model

In [None]:
numEpochs = 10
num_feats = 3
closs_weight = 1
lr_cent = 0.5
feat_dim = 10

weightDecay = 5e-5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

network = Resnet34_modified_closeness(num_feats, hidden_sizes, num_classes, feat_dim)


criterion_label = nn.CrossEntropyLoss()
criterion_closs = CenterLoss(num_classes, feat_dim, device)
optimizer_label = torch.optim.SGD(network.parameters(), lr=learningRate, weight_decay=weightDecay, momentum=0.9)
optimizer_closs = torch.optim.SGD(criterion_closs.parameters(), lr=lr_cent)

#Evaluating Similarity

In [None]:
cd verification_data

/content/competitions/idl-fall21-hw2p2s2-face-verification/verification_data


In [None]:
component_builder = ModelComponents()

#Model Parts
device = component_builder.create_device()
inference_model = Resnet34(3)
inference_model.to(device)
model_path = '/content/gdrive/MyDrive/IDL/HW/HW2/P1/saves_run_1/23.pt'
inference_model = component_builder.load_model_for_inference(model_path, inference_model)

#Data parts
test_data_set_path = '/content/competitions/idl-fall21-hw2p2s2-face-verification/verification_pairs_test.txt'
test_image_pairs_list = component_builder.get_image_pair_list(test_data_set_path)

#Running 
image_pairs, similarity= component_builder.face_verification(test_image_pairs_list, inference_model, device)

Device is on cuda:0


In [None]:
#Now getting the similiarity inputted 
df = pd.DataFrame(similarity)
df.columns = ['Category']

#Now getting the index ready
concat_image_pair = []
for pair in image_pairs:
  concat_image_pair.append(" ".join(pair))

df.insert(0,'Id',concat_image_pair) 
df=df.set_index('Id')

df.to_csv('/content/gdrive/MyDrive/IDL/HW/HW2/P2/saves_run_0/result1.csv')