In [1]:
from face2embeddings.data.dataset import Arc2FaceDataset
from pathlib import Path
from torch.utils.data import DataLoader
from tqdm import tqdm 
import torch
import os
import time
from torchvision.models import Swin_V2_S_Weights, swin_v2_s
from torchinfo import summary
from face2embeddings.model import FaceSwin

In [21]:
dataset = Arc2FaceDataset(path_to_dataset=Path(r"C:\Users\emely\OneDrive\Desktop\face-auth-dataset\train"))

In [22]:
train_dataloader = DataLoader(
      dataset,
      batch_size=3,
      shuffle=True,
      num_workers=os.cpu_count(),
      pin_memory=True,
  )

In [13]:
model = swin_v2_s(weights=Swin_V2_S_Weights.DEFAULT)

In [19]:
named_layers = dict(model.named_modules())
named_layers

{'': SwinTransformer(
   (features): Sequential(
     (0): Sequential(
       (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
       (1): Permute()
       (2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
     )
     (1): Sequential(
       (0): SwinTransformerBlockV2(
         (norm1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
         (attn): ShiftedWindowAttentionV2(
           (qkv): Linear(in_features=96, out_features=288, bias=True)
           (proj): Linear(in_features=96, out_features=96, bias=True)
           (cpb_mlp): Sequential(
             (0): Linear(in_features=2, out_features=512, bias=True)
             (1): ReLU(inplace=True)
             (2): Linear(in_features=512, out_features=3, bias=False)
           )
         )
         (stochastic_depth): StochasticDepth(p=0.0, mode=row)
         (norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
         (mlp): MLP(
           (0): Linear(in_features=96, out_features=384, bias=True)
      

In [22]:
model

SwinTransformer(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (1): Permute()
      (2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
    )
    (1): Sequential(
      (0): SwinTransformerBlockV2(
        (norm1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
        (attn): ShiftedWindowAttentionV2(
          (qkv): Linear(in_features=96, out_features=288, bias=True)
          (proj): Linear(in_features=96, out_features=96, bias=True)
          (cpb_mlp): Sequential(
            (0): Linear(in_features=2, out_features=512, bias=True)
            (1): ReLU(inplace=True)
            (2): Linear(in_features=512, out_features=3, bias=False)
          )
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
        (norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (0): Linear(in_features=96, out_features=384, bias=True)
          (1): GELU(approximate='

In [12]:
summary(model, input_size=(1, 3, 256, 256),depth=7, col_names=["input_size", "output_size", "num_params", "trainable"])

Layer (type:depth-idx)                                  Input Shape               Output Shape              Param #                   Trainable
SwinTransformer                                         [1, 3, 256, 256]          [1, 1000]                 --                        True
├─Sequential: 1-1                                       [1, 3, 256, 256]          [1, 8, 8, 768]            --                        True
│    └─Sequential: 2-1                                  [1, 3, 256, 256]          [1, 64, 64, 96]           --                        True
│    │    └─Conv2d: 3-1                                 [1, 3, 256, 256]          [1, 96, 64, 64]           4,704                     True
│    │    └─Permute: 3-2                                [1, 96, 64, 64]           [1, 64, 64, 96]           --                        --
│    │    └─LayerNorm: 3-3                              [1, 64, 64, 96]           [1, 64, 64, 96]           192                       True
│    └─Sequential: 2-2  

In [9]:
model

SwinTransformer(
  (features): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (1): Permute()
      (2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
    )
    (1): Sequential(
      (0): SwinTransformerBlockV2(
        (norm1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
        (attn): ShiftedWindowAttentionV2(
          (qkv): Linear(in_features=96, out_features=288, bias=True)
          (proj): Linear(in_features=96, out_features=96, bias=True)
          (cpb_mlp): Sequential(
            (0): Linear(in_features=2, out_features=512, bias=True)
            (1): ReLU(inplace=True)
            (2): Linear(in_features=512, out_features=3, bias=False)
          )
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
        (norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
        (mlp): MLP(
          (0): Linear(in_features=96, out_features=384, bias=True)
          (1): GELU(approximate='

In [None]:
swin_v2_s

In [2]:
face_swin = FaceSwin(train_from_default=True)

In [3]:
face_swin.to("cuda")

FaceSwin(
  (features_extractor): Sequential(
    (0): Sequential(
      (0): Sequential(
        (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
        (1): Permute()
        (2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
      )
      (1): Sequential(
        (0): SwinTransformerBlockV2(
          (norm1): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
          (attn): ShiftedWindowAttentionV2(
            (qkv): Linear(in_features=96, out_features=288, bias=True)
            (proj): Linear(in_features=96, out_features=96, bias=True)
            (cpb_mlp): Sequential(
              (0): Linear(in_features=2, out_features=512, bias=True)
              (1): ReLU(inplace=True)
              (2): Linear(in_features=512, out_features=3, bias=False)
            )
          )
          (stochastic_depth): StochasticDepth(p=0.0, mode=row)
          (norm2): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
          (mlp): MLP(
            (0): Linear(in_features=

In [5]:
x1 = torch.randn(2, 3, 256, 256).to("cuda")
x2 = torch.randn(2, 3, 256, 256).to("cuda")
face_swin.forward(x1=x1, x2=x2)

tensor([[0.4903],
        [0.4875]], device='cuda:0', grad_fn=<SigmoidBackward0>)