In [1]:
import torch 
import cv2
import os
import torchvision
import random as rand
import numpy as np
from PIL import Image
from torch import nn, optim, utils
from torch.utils import data, tensorboard
from matplotlib import pyplot as plt
from torchvision import models
from torchvision.transforms import v2
from glob import glob
from sklearn.metrics import accuracy_score
torch.cuda.is_available()



True

In [2]:
imgs_dir = glob('oneshot/Face Dataset/**/*.jpg')
val_imgs_dir = imgs_dir[0:int(0.3*len(imgs_dir))]
train_imgs_dir = imgs_dir[int(0.3*len(imgs_dir)):len(imgs_dir)]

In [3]:
class FaceDataset(data.Dataset):
    def __init__(self, imgs_dir):
        self.img_dict = {}
        self.T = v2.Compose([
            v2.ToTensor(),
            v2.Resize(256),
            v2.CenterCrop(224),
            v2.Normalize(mean = [0.48235, 0.45882, 0.40784], std=[0.00392156862745098, 0.00392156862745098, 0.00392156862745098]),
            v2.RandomHorizontalFlip(),
            v2.RandomInvert(),
            v2.RandomRotation(degrees=127),
            v2.RandomVerticalFlip()
        ])
        for img_path in imgs_dir:
            img_data = img_path.split('/')[-2::]
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#             img = self.T(img)
            if img_data[0] not in self.img_dict.keys():
                self.img_dict[img_data[0]] = [img]
            else:
                self.img_dict[img_data[0]].append(img)
                
    def __len__(self):
        return len(self.img_dict)
    
    def __getitem__(self, index):
        face_anchor = rand.choice(list(self.img_dict.keys()))
        while True:
            face_imgs = rand.choices(self.img_dict[face_anchor], k=2)
            if ~np.array_equal(face_imgs[0],face_imgs[1]):
                anc = self.T(face_imgs[0])
                pos = self.T(face_imgs[1])
                break
        while True:
            face_neg = rand.choice(list(self.img_dict.keys()))
            if face_neg!=face_anchor:
                neg = self.T(rand.choice(self.img_dict[face_neg]))
                break
        del face_imgs
        return anc, pos, neg

In [4]:
# class EncoderCNN(nn.Module):
#     def __init__(self, ):
#         super().__init__()
#         self.vgg = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
#         self.vgg = nn.Sequential(*list(self.vgg.children())[0:-1])
#         self.drop = nn.Dropout(p=0.4)
#         self.linear = nn.Linear(in_features=25088, out_features=4096)
#         self.relu = nn.LeakyReLU()
        
#     def forward(self, img):
#         features = self.vgg(img)
#         bs, ch, hi, wd = features.shape
#         features = torch.reshape(features, [bs,-1])
#         features = self.drop(features)
#         out_lin = self.linear(features)
#         return self.relu(out_lin)
class ResLink(nn.Module):
    def __init__(self, in_ch) -> None:
        super(ResLink, self).__init__()
        self.con1 = nn.Conv2d(in_ch, in_ch*2, kernel_size=(3,3), stride=(1,1), padding=(1,1), bias=True)
        self.btn = nn.BatchNorm2d(2*in_ch)
    
    def forward(self, x):
        x = self.con1(x)
        x = self.btn(x)
        return x
        
class CNNBlock(nn.Module):
    def __init__(self, in_ch) -> None:
        super(CNNBlock, self).__init__()
        self.con1_1 = nn.Conv2d(in_ch, in_ch*2, kernel_size=(3,3), stride=(1,1), padding=(1,1), bias=False)
        self.btn1_1 = nn.BatchNorm2d(in_ch*2)
        self.rel1_1 = nn.ReLU()
        self.con2_1 = nn.Conv2d(in_ch*2, in_ch*2, kernel_size=(3,3), stride=(1,1), padding=(1,1), bias=False)
        self.btn2_1 = nn.BatchNorm2d(in_ch*2)
        self.rel2_1 = nn.ReLU()
        self.res_link = ResLink(in_ch)
        self.downsample = nn.Sequential(
            nn.Conv2d(in_ch*4, out_channels=in_ch*4, kernel_size=(3,3), stride=(2,2)),
            nn.BatchNorm2d(in_ch*4),
        )

    def forward(self, x):
        x1 = self.con1_1(x)
        x1 = self.btn1_1(x1)
        x1 = self.rel1_1(x1)
        x1 = self.con2_1(x1)
        x1 = self.btn2_1(x1)
        x1 = self.rel2_1(x1)
        x2 = self.res_link(x)
        x = torch.cat([x1, x2], dim=1)
        x = self.downsample(x)
        return x

class EncoderCNN(nn.Module):
    def __init__(self, in_ch, out_in_ch) -> None:
        super(EncoderCNN, self).__init__()
        self.con_in = nn.Conv2d(in_ch, out_channels=out_in_ch, kernel_size=(3,3), stride=(1,1))
        self.btn_in = nn.BatchNorm2d(32)
        self.rel_in = nn.ReLU()
        self.cnn1 = CNNBlock(out_in_ch)
        self.cnn2 = CNNBlock(out_in_ch*4)
        # self.cnn3 = CNNBlock(out_in_ch*16)
        self.avg = nn.AdaptiveAvgPool2d((1,1))
        self.out = nn.Sequential(
            nn.Dropout(),
            nn.Linear(in_features=out_in_ch*64, out_features=out_in_ch*16),
        )
    
    def forward(self, img):
        x = self.con_in(img)
        x = self.btn_in(x)
        x = self.rel_in(x)
        x = self.cnn1(x)
        x = self.cnn2(x)
        # x = self.cnn3(x)
        x = self.avg(x)
        x = x.squeeze()
        print(x.shape)
        x = self.out(x)
        return x

In [5]:
class Model(nn.Module):
    def __init__(self, ):
        super().__init__()
        self.pos = EncoderCNN(3, 32)
        self.neg = EncoderCNN(3,32)
    def forward(self, anc, pos, neg):
        out_pos = self.pos(pos)
        out_anc = self.pos(anc)
        out_neg = self.pos(neg)
        return out_pos, out_anc, out_neg

In [7]:
train_face_data = FaceDataset(train_imgs_dir)
train_face_data = data.DataLoader(train_face_data, batch_size = 4)
val_face_data = FaceDataset(val_imgs_dir)
val_face_data = data.DataLoader(val_face_data, batch_size = 4)
writer = tensorboard.SummaryWriter(log_dir='log')
n_epochs = 50
thresh = 0.9
step=0
criterion = nn.TripletMarginWithDistanceLoss()
model = Model().to('cuda')
adam = optim.Adam(model.parameters(), lr=3e-5)



In [8]:
for epoch in range(n_epochs):
    print(f"Epoch: {epoch} ||", end=' ')
    model.train()
    tot_loss = 0
    n=0
    j=0
    for anc, pos, neg in train_face_data:
        adam.zero_grad()
        out_pos, out_anc, out_neg = model(anc.to('cuda'), pos.to('cuda'), neg.to('cuda'))
        loss = criterion(
            out_anc, out_pos, out_neg
        )
        tot_loss+=loss.item()
        writer.add_scalar('Training Loss', loss.item(), global_step = step)
        step+=1
        loss.backward()
        adam.step()
        n+=1
    model.eval()
    same_dists = []
    not_same_dists = []
    print(f"Loss: {tot_loss/n}", end = " ")
    with torch.no_grad():
        for anc, pos, neg in val_face_data:
            out_pos, out_anc, out_neg = model(anc.to('cuda'), pos.to('cuda'), neg.to('cuda'))
            same_dist = nn.CosineSimilarity()(out_anc, out_pos)
            not_same_dist = nn.CosineSimilarity()(out_pos, out_neg)
            same_dists.extend(same_dist.cpu().numpy())
            not_same_dists.extend(not_same_dist.cpu().numpy())
    avg_same_dist = sum(same_dists) / len(same_dists)
    avg_not_same_dist = sum(not_same_dists) / len(not_same_dists)
    print(f"|| Validation Cosine Similarity - Same: {avg_same_dist} || Not Same: {avg_not_same_dist}")

Epoch: 0 || 



torch.Size([4, 512])
torch.Size([4, 512])


OutOfMemoryError: CUDA out of memory. Tried to allocate 48.00 MiB (GPU 0; 3.81 GiB total capacity; 2.22 GiB already allocated; 42.94 MiB free; 2.34 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [30]:
model.pos

EncoderCNN(
  (con_in): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (btn_in): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (rel_in): ReLU()
  (cnn1): CNNBlock(
    (con1_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (btn1_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (rel1_1): ReLU()
    (con2_1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (btn2_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (rel2_1): ReLU()
    (res_link): ResLink(
      (con1): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (btn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (downsample): Sequential(
      (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats

In [13]:
len(val_imgs_dir), len(train_imgs_dir), len(imgs_dir)

(2461, 5743, 8204)

In [7]:
img_dict = {}
for img_path in imgs_dir:
    img_data = img_path.split('/')[-2::]
    # img = cv2.imread(img_path)
    # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    if img_data[0] not in img_dict.keys():
        img_dict[img_data[0]] = [img_path]
    else:
        img_dict[img_data[0]].append(img_path)

In [8]:
T = v2.Compose([
            v2.ToTensor(),
            v2.Resize(256),
            v2.CenterCrop(224),
            v2.Normalize(mean = [0.48235, 0.45882, 0.40784], std=[0.00392156862745098, 0.00392156862745098, 0.00392156862745098])
        ])



In [9]:
similarity = nn.PairwiseDistance()

In [10]:
def validation(img_dict):
    while True:
        img_1 = rand.choice(list(img_dict.keys()))
        img_2 = rand.choice(list(img_dict.keys()))
        if img_1!=img_2:
            img_1_list = rand.choices(img_dict[img_1], k=2)
            img_2_list = rand.choices(img_dict[img_2], k=2)
            if img_1_list[0]!=img_1_list[1] or img_2_list[0]!=img_2_list[1]:
                break;
    img_1 = [model.pos(torch.unsqueeze(T(cv2.imread(img)).to('cuda'), dim=0)) for img in img_1_list]
    img_2 = [model.pos(torch.unsqueeze(T(cv2.imread(img)).to('cuda'), dim=0)) for img in img_2_list]
    return img_1, img_2

In [17]:
img1, img2 = validation(img_dict)

torch.Size([512])
torch.Size([512])
torch.Size([512])
torch.Size([512])


In [18]:
img1_1, img1_2 = img1
img2_1, img2_2 = img2

In [19]:
similarity(img1_1, img1_2)

tensor(3.1944, device='cuda:0', grad_fn=<NormBackward1>)

In [20]:
similarity(img1_1, img2_1)

tensor(5.8567, device='cuda:0', grad_fn=<NormBackward1>)

In [21]:
similarity(img2_1, img2_2)

tensor(4.6384, device='cuda:0', grad_fn=<NormBackward1>)

In [22]:
similarity(img2_2, img1_2)

tensor(1.9392, device='cuda:0', grad_fn=<NormBackward1>)