In [1]:
import sys

from torch import nn
from torch.utils.data import DataLoader
from torchvision.models import mobilenet_v3_large
from torchvision.transforms import Normalize, RandomHorizontalFlip, Resize
import torch

sys.path.insert(0, '..')

## View MobileNetV3 Architecture for Reference

In [2]:
# weights = MobileNet_V3_Large_Weights.IMAGENET1K_V2
model = mobilenet_v3_large(pretrained=True)

print(model)

MobileNetV3(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False

## Remove Last Layer

In [3]:
model_submodule_excl_last = list(model.children())[:-1]
model_submodule_excl_last = nn.Sequential(*model_submodule_excl_last)

model_submodule_excl_last

Sequential(
  (0): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      

In [4]:
model_submodule_excl_last(torch.rand(10, 3, 224, 224)).shape

torch.Size([10, 960, 1, 1])

In [5]:
model_last_submodule = list(list(model.children())[-1].children())[:-1]  # Remove last layer from last submodule
model_last_submodule = nn.Sequential(*model_last_submodule)

model_last_submodule

Sequential(
  (0): Linear(in_features=960, out_features=1280, bias=True)
  (1): Hardswish()
  (2): Dropout(p=0.2, inplace=True)
)

In [6]:
# output = model_submodule_excl_last(torch.rand(10, 3, 224, 224))
# model_last_submodule(output)

In [7]:
output = model_submodule_excl_last(torch.rand(10, 3, 224, 224))
output = torch.flatten(output, 1)
model_last_submodule(output).shape

torch.Size([10, 1280])

## Import `SiameseDuplicateImageNetwork` for Testing

In [8]:
from scripts.dataset_duplicate_image import DuplicateImageDataset
from scripts.model import SiameseDuplicateImageNetwork

In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [10]:
model = SiameseDuplicateImageNetwork().to(device)

In [11]:
print(model)

SiameseDuplicateImageNetwork(
  (model): Sequential(
    (0): SiameseNetwork(
      (net1): Sequential(
        (0): Sequential(
          (0): Sequential(
            (0): ConvNormActivation(
              (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
              (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
              (2): Hardswish()
            )
            (1): InvertedResidual(
              (block): Sequential(
                (0): ConvNormActivation(
                  (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
                  (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
                  (2): ReLU(inplace=True)
                )
                (1): ConvNormActivation(
                  (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
                  (1): BatchNorm2d(16, eps=0.001, mom

In [12]:
train_dataset = DuplicateImageDataset('../data/Airbnb Data/Training Data', 
    transforms=[
        Resize((256, 256), antialias=True),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ],
    upsample_transforms_dict={
        'hflip': RandomHorizontalFlip(p=1)
    })
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [13]:
feature1, feature2, _ = next(iter(train_dataloader))
feature1, feature2

(tensor([[[[2.3035, 2.3108, 2.3531,  ..., 2.4115, 2.3236, 2.2198],
           [2.3041, 2.3135, 2.3596,  ..., 2.4503, 2.3625, 2.2585],
           [2.3114, 2.3454, 2.3804,  ..., 2.4720, 2.3870, 2.2828],
           ...,
           [2.0305, 1.8679, 2.1974,  ..., 1.6205, 1.3971, 1.3049],
           [1.8629, 1.8382, 2.1913,  ..., 1.6188, 1.3863, 1.2880],
           [2.1591, 2.1694, 1.8395,  ..., 1.6048, 1.3298, 1.2494]],
 
          [[2.1804, 2.1878, 2.2311,  ..., 2.4658, 2.3760, 2.2698],
           [2.1810, 2.1905, 2.2377,  ..., 2.5055, 2.4157, 2.3095],
           [2.1884, 2.2232, 2.2590,  ..., 2.5277, 2.4408, 2.3342],
           ...,
           [1.9187, 1.7525, 2.1044,  ..., 1.6276, 0.9703, 0.5646],
           [1.7474, 1.7222, 2.0982,  ..., 1.6259, 0.9593, 0.5473],
           [2.0503, 2.0608, 1.7385,  ..., 1.6117, 0.9015, 0.5078]],
 
          [[1.9973, 2.0047, 2.0477,  ..., 2.6300, 2.5406, 2.4349],
           [1.9979, 2.0074, 2.0543,  ..., 2.6695, 2.5802, 2.4744],
           [2.0053, 2.03

In [14]:
feature1 = feature1.to(device)
feature2 = feature2.to(device)

output = model(feature1, feature2)
print(output)

tensor([[-0.1264],
        [-0.1258],
        [-0.1269],
        [-0.1245],
        [-0.1290],
        [-0.1276],
        [-0.1266],
        [-0.1262],
        [-0.1264],
        [-0.1277],
        [-0.1240],
        [-0.1270],
        [-0.1239],
        [-0.1263],
        [-0.1279],
        [-0.1252],
        [-0.1273],
        [-0.1250],
        [-0.1278],
        [-0.1252],
        [-0.1274],
        [-0.1248],
        [-0.1227],
        [-0.1262],
        [-0.1271],
        [-0.1247],
        [-0.1298],
        [-0.1269],
        [-0.1266],
        [-0.1285],
        [-0.1255],
        [-0.1271],
        [-0.1271],
        [-0.1272],
        [-0.1268],
        [-0.1279],
        [-0.1279],
        [-0.1265],
        [-0.1233],
        [-0.1258],
        [-0.1233],
        [-0.1248],
        [-0.1239],
        [-0.1222],
        [-0.1268],
        [-0.1269],
        [-0.1271],
        [-0.1250],
        [-0.1276],
        [-0.1275],
        [-0.1265],
        [-0.1281],
        [-0.

In [15]:
output.shape

torch.Size([64, 1])