In [26]:
import torch, os

In [37]:
from models.descriptors import *
model = EfficientNetV2B0_128(pretrained=True)

In [38]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [39]:
from models.detectors import Yolov5Detector
detector = Yolov5Detector()

In [40]:
model.to(device)
model.eval()

EfficientNetV2B0_128(
  (backbone): EfficientNet(
    (conv_stem): Conv2dSame(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (act1): Swish()
    (blocks): Sequential(
      (0): Sequential(
        (0): ConvBnAct(
          (conv): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
          (act1): Swish()
        )
      )
      (1): Sequential(
        (0): EdgeResidual(
          (conv_exp): Conv2dSame(16, 64, kernel_size=(3, 3), stride=(2, 2), bias=False)
          (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
          (act1): Swish()
          (se): Identity()
          (conv_pwl): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, trac

In [41]:
import torchvision.transforms as T
transform = T.Compose([T.Resize((224, 224)),
                    lambda x : x/255.0,
                    T.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])])

In [42]:
import timm
import cv2
import numpy as np
def extract_embedding(imgpath):
  embs = []
  frame = cv2.imread(imgpath)
  detections = detector.detect(frame)
  for det in detections.int():
    x1 = det[0].item()
    x2 = det[2].item()
    y1 = det[1].item()
    y2 = det[3].item()
    face = frame[y1:y2, x1:x2, :]
    face = np.ascontiguousarray(face[:, :, ::-1].transpose(2, 0, 1)) # to 3xHxW, BGR to RGB
    facetensor = torch.tensor(face, device=device)
    facetensor = transform(facetensor)
    topleft = (x1, y1)
    botright = (x2, y2)
    cv2.rectangle(frame, topleft, botright, (0, 0, 255), 4)
    emb = model(torch.unsqueeze(facetensor, 0))
    embs.append(emb)
  cv2.imshow('mat', frame)
  cv2.waitKey(0)
  cv2.destroyAllWindows()
  return embs

In [43]:
out = extract_embedding('./data/cats.jpg')

In [44]:
len(out)

2

In [45]:
def L2(a, b):
    return torch.linalg.norm(a - b)

In [46]:
L2(out[0], out[1])

tensor(1.4341, device='cuda:0', grad_fn=<CopyBackwards>)