In [1]:
import sys

from torch import nn
from torch.utils.data import DataLoader
from torchvision.models import mobilenet_v3_large
from torchvision.transforms import Normalize, RandomHorizontalFlip, Resize
import torch

sys.path.insert(0, '..')

## View MobileNetV3 Architecture for Reference

In [2]:
# weights = MobileNet_V3_Large_Weights.IMAGENET1K_V2
model = mobilenet_v3_large(pretrained=True)

print(model)

MobileNetV3(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False

## Remove Last Layer

In [3]:
model_submodule_excl_last = list(model.children())[:-1]
model_submodule_excl_last = nn.Sequential(*model_submodule_excl_last)

model_submodule_excl_last

Sequential(
  (0): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      

In [4]:
model_submodule_excl_last(torch.rand(10, 3, 224, 224)).shape

torch.Size([10, 960, 1, 1])

In [5]:
model_last_submodule = list(list(model.children())[-1].children())[:-1]  # Remove last layer from last submodule
model_last_submodule = nn.Sequential(*model_last_submodule)

model_last_submodule

Sequential(
  (0): Linear(in_features=960, out_features=1280, bias=True)
  (1): Hardswish()
  (2): Dropout(p=0.2, inplace=True)
)

In [6]:
# output = model_submodule_excl_last(torch.rand(10, 3, 224, 224))
# model_last_submodule(output)

In [7]:
output = model_submodule_excl_last(torch.rand(10, 3, 224, 224))
output = torch.flatten(output, 1)
model_last_submodule(output).shape

torch.Size([10, 1280])

## Import `SiameseDuplicateImageNetwork` for Testing

In [8]:
from scripts.dataset_duplicate_image import DuplicateImageDataset
from scripts.model import SiameseDuplicateImageNetwork

In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [10]:
model = SiameseDuplicateImageNetwork().to(device)

In [11]:
print(model)

SiameseDuplicateImageNetwork(
  (model): Sequential(
    (0): SiameseNetwork(
      (net1): Sequential(
        (0): Sequential(
          (0): Sequential(
            (0): ConvNormActivation(
              (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
              (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
              (2): Hardswish()
            )
            (1): InvertedResidual(
              (block): Sequential(
                (0): ConvNormActivation(
                  (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
                  (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
                  (2): ReLU(inplace=True)
                )
                (1): ConvNormActivation(
                  (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
                  (1): BatchNorm2d(16, eps=0.001, mom

In [12]:
train_dataset = DuplicateImageDataset('../data/Training Data', 
    transforms=[
        Resize((256, 256), antialias=True),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ],
    upsample_transforms_dict={
        'hflip': RandomHorizontalFlip(p=1)
    })
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [13]:
feature1, feature2, _ = next(iter(train_dataloader))
feature1, feature2

(tensor([[[[1.6014, 1.5712, 1.5250,  ..., 1.3397, 1.3493, 1.3130],
           [1.6053, 1.5875, 1.5598,  ..., 1.3740, 1.4374, 1.4089],
           [1.6527, 1.6226, 1.5948,  ..., 1.3976, 1.4341, 1.4253],
           ...,
           [1.5830, 1.6181, 1.6046,  ..., 1.2952, 1.2813, 1.2762],
           [1.6546, 1.6580, 1.6571,  ..., 1.2383, 1.2804, 1.2342],
           [1.6334, 1.6578, 1.6285,  ..., 1.2132, 1.2192, 1.2272]],
 
          [[2.4698, 2.4389, 2.3946,  ..., 2.2104, 2.2203, 2.1831],
           [2.4820, 2.4638, 2.4367,  ..., 2.2455, 2.3103, 2.2812],
           [2.5305, 2.4996, 2.4714,  ..., 2.2696, 2.3069, 2.2980],
           ...,
           [1.4263, 1.4621, 1.4484,  ..., 1.2335, 1.2229, 1.2177],
           [1.4995, 1.5030, 1.5020,  ..., 1.1755, 1.2220, 1.1747],
           [1.4778, 1.5027, 1.4727,  ..., 1.1498, 1.1594, 1.1676]],
 
          [[3.8007, 3.7699, 3.7248,  ..., 3.4564, 3.4662, 3.4292],
           [3.7826, 3.7644, 3.7372,  ..., 3.4913, 3.5558, 3.5268],
           [3.7978, 3.76

In [14]:
feature1 = feature1.to(device)
feature2 = feature2.to(device)

output = model(feature1, feature2)
print(output)

tensor([[0.0021],
        [0.0035],
        [0.0054],
        [0.0018],
        [0.0061],
        [0.0032],
        [0.0056],
        [0.0012],
        [0.0012],
        [0.0034],
        [0.0039],
        [0.0049],
        [0.0024],
        [0.0040],
        [0.0029],
        [0.0020],
        [0.0005],
        [0.0044],
        [0.0035],
        [0.0048],
        [0.0033],
        [0.0014],
        [0.0055],
        [0.0041],
        [0.0032],
        [0.0031],
        [0.0025],
        [0.0049],
        [0.0035],
        [0.0033],
        [0.0028],
        [0.0025],
        [0.0018],
        [0.0033],
        [0.0031],
        [0.0026],
        [0.0054],
        [0.0038],
        [0.0057],
        [0.0026],
        [0.0047],
        [0.0024],
        [0.0040],
        [0.0022],
        [0.0061],
        [0.0034],
        [0.0039],
        [0.0055],
        [0.0045],
        [0.0024],
        [0.0037],
        [0.0046],
        [0.0036],
        [0.0038],
        [0.0046],
        [0

In [15]:
output.shape

torch.Size([64, 1])