In [1]:
!pip install efficientnet_pytorch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: efficientnet_pytorch
  Building wheel for efficientnet_pytorch (setup.py) ... [?25l[?25hdone
  Created wheel for efficientnet_pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16444 sha256=adf9d5651975b4607e18d977639798684b77d170e58dd68edd26d452470f6617
  Stored in directory: /root/.cache/pip/wheels/29/16/24/752e89d88d333af39a288421e64d613b5f652918e39ef1f8e3
Successfully built efficientnet_pytorch
Installing collected packages: efficientnet_pytorch
Successfully installed efficientnet_pytorch-0.7.1


In [2]:
import os
import random
from torchvision import transforms
from PIL import Image
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
from tqdm.auto import tqdm as tqdm_auto
from sklearn.metrics import roc_auc_score, roc_curve
from efficientnet_pytorch import EfficientNet
from collections import Counter
import itertools
import copy

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Dataset-related

In [5]:
class BalancedTripletDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.subjects = [subj for subj in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, subj))]
        self.subject_to_images = {subject: os.listdir(os.path.join(root_dir, subject)) for subject in self.subjects}
        self.triplets = self.generate_triplets()

    def generate_triplets(self):
        triplets = []
        for subject in self.subjects:
            subject_images = self.subject_to_images[subject]
            positive_pairs = list(itertools.combinations(subject_images, 2))
            for anchor_img, positive_img in positive_pairs:
                anchor_image_path = os.path.join(self.root_dir, subject, anchor_img)
                positive_image_path = os.path.join(self.root_dir, subject, positive_img)
                triplets.append((anchor_image_path, positive_image_path, subject))
        return triplets

    def __len__(self):
        return len(self.triplets)

    def __getitem__(self, index):
        anchor_image_path, positive_image_path, anchor_subject = self.triplets[index]
        negative_image_path = self.get_negative_image(anchor_subject)

        anchor_image = Image.open(anchor_image_path).convert("RGB")
        positive_image = Image.open(positive_image_path).convert("RGB")
        negative_image = Image.open(negative_image_path).convert("RGB")

        if self.transform:
            anchor_image = self.transform(anchor_image)
            positive_image = self.transform(positive_image)
            negative_image = self.transform(negative_image)

        return anchor_image, positive_image, negative_image

    def get_negative_image(self, anchor_subject):
        negative_subject = random.choice([subj for subj in self.subjects if subj != anchor_subject])
        negative_image_name = random.choice(self.subject_to_images[negative_subject])
        negative_image_path = os.path.join(self.root_dir, negative_subject, negative_image_name)
        return negative_image_path

In [6]:
img_size = 224

train_imgs_path = "/content/drive/MyDrive/ULTRADATA/Motos_reencuadradas/Train"
test_imgs_path  = "/content/drive/MyDrive/ULTRADATA/Motos_reencuadradas/Test"

transform_train = transforms.Compose([
  transforms.Resize((img_size, img_size)),
  transforms.RandomHorizontalFlip(),
  transforms.RandomRotation(15),
  transforms.ToTensor(),
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
  # , transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1)
])

transform_valid = transforms.Compose([
    transforms.Resize((img_size, img_size)), # transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


siamese_train_dataset = BalancedTripletDataset(root_dir=train_imgs_path, transform=transform_train)
siamese_test_dataset = BalancedTripletDataset(root_dir=test_imgs_path, transform=transform_valid)

In [7]:
batch_size = 32
shuffle = True
num_workers = 2

siamese_train_dataloader = DataLoader(siamese_train_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
siamese_val_dataloader   = DataLoader(siamese_test_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)

## Model-related

In [8]:
def count_trainable_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def unfreeze_layers(model, num_layers_to_unfreeze):
    ct = 0
    children_list = list(model.children())
    for child in reversed(children_list):
        ct += 1
        if ct <= num_layers_to_unfreeze:
            for param in child.parameters():
                param.requires_grad = True
        else:
            break

In [9]:
class EfficientNetFeatureExtractor(nn.Module):
    def __init__(self, model_name='efficientnet-b0', pretrained=True):
        super(EfficientNetFeatureExtractor, self).__init__()
        self.efficient_net = EfficientNet.from_pretrained(model_name) if pretrained else EfficientNet.from_name(model_name)

        # Get the number of output features from the EfficientNet model
        num_output_features = self.efficient_net._fc.in_features

        # Remove the classification head to use it as a feature extractor
        self.efficient_net._fc = nn.Identity()

        # Define the fully connected layer with the appropriate input size
        self.fc = nn.Linear(num_output_features, 256)  # 128

        # Add a normalization layer
        self.norm = nn.BatchNorm1d(256)  # 128

        # Freeze the pre-trained EfficientNet model parameters
        for param in self.efficient_net.parameters():
            param.requires_grad = False

    def forward(self, x):
        x = self.efficient_net(x)
        x = self.fc(x)
        x = self.norm(x)
        return x

class SiameseNetwork(nn.Module):
    def __init__(self, feature_extractor, model_name, init_method, batch_norm, learning_rate, epochs, dataset, loss_function, accuracy_threshold, img_size):
        super(SiameseNetwork, self).__init__()
        self.feature_extractor = feature_extractor

        # Define parameters for saving:
        self.name = model_name
        self.init_method = init_method
        self.batch_norm = batch_norm
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.dataset = dataset
        self.loss_function = loss_function
        self.img_size = img_size

    def forward(self, anchor, positive, negative):
        output1 = self.feature_extractor(anchor)
        output2 = self.feature_extractor(positive)
        output3 = self.feature_extractor(negative)
        return output1, output2, output3

    def compare_images(self, preprocessed_image1, preprocessed_image2, threshold):
        self.eval()  # Set the model to evaluation mode

        with torch.no_grad():
            # Extract the feature vectors for the two images
            feature_vector1 = self.feature_extractor(preprocessed_image1)
            feature_vector2 = self.feature_extractor(preprocessed_image2)

            # Calculate the similarity between the feature vectors
            similarity = torch.norm(feature_vector1 - feature_vector2).item()

        # Compare the similarity to the threshold and return the result
        return similarity <= threshold # <= threshold   # Add for returning a True/False

feature_extractor = EfficientNetFeatureExtractor(model_name='efficientnet-b0', pretrained=True)

siamese_network = SiameseNetwork(
    feature_extractor=feature_extractor, 
    model_name="EfficientNet", 
    init_method=None, 
    batch_norm=None, 
    learning_rate=1e-4, 
    epochs=20, 
    dataset="Motos_recortes", 
    loss_function="triplet_loss", 
    accuracy_threshold=0.5, 
    img_size=img_size
)

siamese_network.to(device)

n_params = count_trainable_parameters(siamese_network)
print(f"Model's trainable parameters: {n_params}")

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth
100%|██████████| 20.4M/20.4M [00:00<00:00, 158MB/s]


Loaded pretrained weights for efficientnet-b0
Model's trainable parameters: 328448


## Selecting best accuracy

In [10]:
best_model = "/content/drive/MyDrive/ULTRADATA/trained_models/name-EfficientNet_lr-0.0001_valid-acc-92,0_train-acc-96,0_epochs-20_dataset-Motos_recortes_loss-fn-triplet_loss.pth"

best_model_state_dict = torch.load(best_model, map_location=torch.device('cpu'))

siamese_network.load_state_dict(best_model_state_dict)

<All keys matched successfully>

In [11]:
def calculate_best_accuracy_threshold(model, dataloader):
    # Set the model to evaluation mode
    model.eval()

    # Initialize two lists to store distances
    positive_distances = []
    negative_distances = []

    # Calculate distances
    with torch.inference_mode():
        for idx, batch in enumerate(dataloader):
            anchor, positive, negative = batch
            anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)  # Move tensors to GPU if necessary

            # Get embeddings
            anchor_embedding, positive_embedding, negative_embedding = model(anchor, positive, negative)

            # Calculate positive and negative distances
            positive_distance = torch.norm(anchor_embedding - positive_embedding, dim=1)
            negative_distance = torch.norm(anchor_embedding - negative_embedding, dim=1)

            positive_distances.extend(positive_distance.cpu().numpy())
            negative_distances.extend(negative_distance.cpu().numpy())

            print(idx)

    # Create true labels and predicted distances
    true_labels = [1] * len(positive_distances) + [0] * len(negative_distances)
    predicted_distances = positive_distances + negative_distances

    # Calculate ROC curve
    fpr, tpr, thresholds = roc_curve(true_labels, [-dist for dist in predicted_distances])

    # Find the optimal threshold
    optimal_idx = np.argmax(tpr - fpr)
    optimal_threshold = thresholds[optimal_idx]

    return optimal_threshold, fpr, tpr, thresholds, true_labels, predicted_distances

# Use the function with your model and dataloader
optimal_threshold, fpr, tpr, thresholds, true_labels, predicted_distances = calculate_best_accuracy_threshold(siamese_network, siamese_train_dataloader)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65


In [12]:
optimal_threshold

-17.108862

In [13]:
predictions = predicted_distances < -optimal_threshold
predictions.astype(np.int)

(predictions == true_labels).mean()

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  predictions.astype(np.int)


0.8955861414333175

In [14]:
df = pd.DataFrame({"Predicted Distances": predicted_distances, "Labels": true_labels, "predictions": predictions.astype(int)})

df[df["Labels"] != df["predictions"]]

Unnamed: 0,Predicted Distances,Labels,predictions
32,18.595259,1,0
40,18.192444,1,0
50,17.348272,1,0
68,17.656647,1,0
70,17.674101,1,0
...,...,...,...
4189,16.602791,0,1
4204,11.893538,0,1
4207,16.542631,0,1
4210,15.125884,0,1


In [15]:
df[df["Labels"] == df["predictions"]]

Unnamed: 0,Predicted Distances,Labels,predictions
0,15.209055,1,1
1,16.455830,1,1
2,11.008506,1,1
3,13.422953,1,1
4,13.665083,1,1
...,...,...,...
4206,18.952648,0,0
4208,20.835539,0,0
4209,21.954981,0,0
4211,27.520887,0,0


In [16]:
1 - 447 / 3767

0.8813379346960446