In [1]:
import os
import random
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models
import cv2
import numpy as np
from PIL import Image
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

In [2]:
# Path to the image dataset directory
root_dir = "/data/NNDL/data/image"

# Create a dictionary to group images by car model
car_model_dict = {}

for dirpath, _, filenames in os.walk(root_dir):
    for file in filenames:
        if file.endswith(".jpg"):
            # Extract car model from the path (2nd-level directory)
            parts = dirpath.split(os.sep)
            car_model = parts[-2]  # The 2nd-level directory is the car model
            
            if car_model not in car_model_dict:
                car_model_dict[car_model] = []
            car_model_dict[car_model].append(os.path.join(dirpath, file))

# Function to create positive and negative pairs for car model verification
def create_pairs(car_model_dict, num_pairs=10000):
    pairs = []
    models = list(car_model_dict.keys())
    
    for _ in range(num_pairs):
        # Positive pair (same car model)
        model = random.choice(models)
        if len(car_model_dict[model]) >= 2:
            img1, img2 = random.sample(car_model_dict[model], 2)
            pairs.append((img1, img2, 0))  # label 0 for similar
        
        # Negative pair (different car models)
        model1, model2 = random.sample(models, 2)
        img1 = random.choice(car_model_dict[model1])
        img2 = random.choice(car_model_dict[model2])
        pairs.append((img1, img2, 1))  # label 1 for dissimilar

    return pairs

# Generate 10,000 pairs for car model verification
pairs = create_pairs(car_model_dict, num_pairs=10000)

# Convert to a DataFrame for easier processing
pairs_df = pd.DataFrame(pairs, columns=["img1", "img2", "label"])

# Save the pairs to a CSV file
pairs_df.to_csv("car_model_pairs.csv", index=False)
print("Dataset pairs created and saved as car_model_pairs.csv!")


Dataset pairs created and saved as car_model_pairs.csv!


In [3]:
# Data augmentation and normalization for training and validation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.RandomGrayscale(p=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Custom dataset class for loading and preprocessing image pairs
class CarPairDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.pairs_df = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.pairs_df)

    def __getitem__(self, idx):
        row = self.pairs_df.iloc[idx]
        img1_path = row["img1"]
        img2_path = row["img2"]
        label = torch.tensor(row["label"], dtype=torch.float32)

        img1 = self.load_image(img1_path)
        img2 = self.load_image(img2_path)

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        return (img1, img2), label

    def load_image(self, image_path):
        img = cv2.imread(image_path)
        if img is None:
            print(f"Warning: Could not read {image_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        return Image.fromarray(img)

In [8]:
# Load the dataset
csv_file = "car_model_pairs.csv"
car_dataset = CarPairDataset(csv_file=csv_file, transform=transform)

# Split dataset into training and validation sets
total_size = len(car_dataset)
train_size = int(0.8 * total_size)
val_size = total_size - train_size

train_dataset, val_dataset = random_split(car_dataset, [train_size, val_size])

# Create DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=16, pin_memory=True, prefetch_factor=2, persistent_workers=True)
val_dataloader = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=16, pin_memory=True, prefetch_factor=2, persistent_workers=True)

print(f"Train set size: {len(train_dataset)}, Validation set size: {len(val_dataset)}")

Train set size: 15970, Validation set size: 3993


In [9]:
# Define Siamese Network
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        resnet50 = models.resnet50(pretrained=True)
        self.feature_extractor = nn.Sequential(*list(resnet50.children())[:-1])

        self.fc = nn.Sequential(
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.Linear(512, 128)
        )

    def forward_once(self, x):
        x = self.feature_extractor(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

    def forward(self, input1, input2):
        output1 = self.forward_once(input1)
        output2 = self.forward_once(input2)
        return output1, output2

# Contrastive loss function
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = torch.nn.functional.pairwise_distance(output1, output2)
        loss = torch.mean((1 - label) * torch.pow(euclidean_distance, 2) +
                          label * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))
        return loss

In [10]:


# Training setup
model = SiameseNetwork().cuda()
criterion = ContrastiveLoss(margin=1.0)
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

num_epochs = 50
best_val_loss = float('inf')
early_stop_counter = 0
patience = 5

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    # Training phase
    for (img1, img2), labels in tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        img1, img2, labels = img1.cuda(), img2.cuda(), labels.cuda()
        
        optimizer.zero_grad()
        output1, output2 = model(img1, img2)
        
        loss = criterion(output1, output2, labels)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()

        # Compute accuracy
        similarity_scores = torch.nn.functional.pairwise_distance(output1, output2)
        predictions = (similarity_scores > 0.5).float()  # Assuming 0.5 as the threshold
        correct_predictions += (predictions == labels).sum().item()
        total_samples += labels.size(0)
    
    avg_train_loss = total_loss / len(train_dataloader)
    train_accuracy = correct_predictions / total_samples
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_train_loss:.4f}, Training Accuracy: {train_accuracy:.4f}")

    # Validation phase
    model.eval()
    val_loss = 0.0
    correct_val_predictions = 0
    total_val_samples = 0

    with torch.no_grad():
        for (val_img1, val_img2), val_labels in val_dataloader:
            val_img1, val_img2, val_labels = val_img1.cuda(), val_img2.cuda(), val_labels.cuda()
            val_output1, val_output2 = model(val_img1, val_img2)
            val_loss += criterion(val_output1, val_output2, val_labels).item()

            # Compute validation accuracy
            val_similarity_scores = torch.nn.functional.pairwise_distance(val_output1, val_output2)
            val_predictions = (val_similarity_scores > 0.5).float()
            correct_val_predictions += (val_predictions == val_labels).sum().item()
            total_val_samples += val_labels.size(0)

    avg_val_loss = val_loss / len(val_dataloader)
    val_accuracy = correct_val_predictions / total_val_samples
    print(f"Validation Loss: {avg_val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

    # Early stopping and model saving
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        early_stop_counter = 0
        torch.save(model.state_dict(), "make_verification_resnet50.pth")
        print("Best model saved!")
    else:
        early_stop_counter += 1
        print(f"Early stopping counter: {early_stop_counter}/{patience}")

    # Reduce learning rate if validation loss doesn't improve
    scheduler.step(avg_val_loss)
    
    if early_stop_counter >= patience:
        print("Early stopping triggered. Training stopped.")
        break



Epoch 1/50: 100%|██████████| 125/125 [01:31<00:00,  1.36it/s]

Epoch [1/50], Training Loss: 0.2008, Training Accuracy: 0.6885





Validation Loss: 0.1646, Validation Accuracy: 0.7571
Best model saved!


Epoch 2/50: 100%|██████████| 125/125 [01:32<00:00,  1.36it/s]


Epoch [2/50], Training Loss: 0.1512, Training Accuracy: 0.7915
Validation Loss: 0.1464, Validation Accuracy: 0.8022
Best model saved!


Epoch 3/50: 100%|██████████| 125/125 [01:32<00:00,  1.36it/s]


Epoch [3/50], Training Loss: 0.1300, Training Accuracy: 0.8297
Validation Loss: 0.1445, Validation Accuracy: 0.8014
Best model saved!


Epoch 4/50: 100%|██████████| 125/125 [01:32<00:00,  1.36it/s]


Epoch [4/50], Training Loss: 0.1179, Training Accuracy: 0.8512
Validation Loss: 0.1324, Validation Accuracy: 0.8157
Best model saved!


Epoch 5/50: 100%|██████████| 125/125 [01:31<00:00,  1.37it/s]


Epoch [5/50], Training Loss: 0.1088, Training Accuracy: 0.8734
Validation Loss: 0.1250, Validation Accuracy: 0.8332
Best model saved!


Epoch 6/50: 100%|██████████| 125/125 [01:31<00:00,  1.37it/s]


Epoch [6/50], Training Loss: 0.0988, Training Accuracy: 0.8925
Validation Loss: 0.1224, Validation Accuracy: 0.8427
Best model saved!


Epoch 7/50: 100%|██████████| 125/125 [01:32<00:00,  1.35it/s]


Epoch [7/50], Training Loss: 0.0922, Training Accuracy: 0.9003
Validation Loss: 0.1248, Validation Accuracy: 0.8352
Early stopping counter: 1/5


Epoch 8/50: 100%|██████████| 125/125 [01:32<00:00,  1.35it/s]


Epoch [8/50], Training Loss: 0.0871, Training Accuracy: 0.9089
Validation Loss: 0.1213, Validation Accuracy: 0.8342
Best model saved!


Epoch 9/50: 100%|██████████| 125/125 [01:31<00:00,  1.36it/s]


Epoch [9/50], Training Loss: 0.0807, Training Accuracy: 0.9207
Validation Loss: 0.1156, Validation Accuracy: 0.8510
Best model saved!


Epoch 10/50: 100%|██████████| 125/125 [01:31<00:00,  1.36it/s]


Epoch [10/50], Training Loss: 0.0758, Training Accuracy: 0.9272
Validation Loss: 0.1222, Validation Accuracy: 0.8315
Early stopping counter: 1/5


Epoch 11/50: 100%|██████████| 125/125 [01:31<00:00,  1.36it/s]


Epoch [11/50], Training Loss: 0.0717, Training Accuracy: 0.9349
Validation Loss: 0.1182, Validation Accuracy: 0.8417
Early stopping counter: 2/5


Epoch 12/50: 100%|██████████| 125/125 [01:31<00:00,  1.36it/s]


Epoch [12/50], Training Loss: 0.0678, Training Accuracy: 0.9411
Validation Loss: 0.1187, Validation Accuracy: 0.8445
Early stopping counter: 3/5


Epoch 13/50: 100%|██████████| 125/125 [01:32<00:00,  1.35it/s]


Epoch [13/50], Training Loss: 0.0628, Training Accuracy: 0.9482
Validation Loss: 0.1180, Validation Accuracy: 0.8347
Early stopping counter: 4/5


Epoch 14/50: 100%|██████████| 125/125 [01:31<00:00,  1.37it/s]


Epoch [14/50], Training Loss: 0.0524, Training Accuracy: 0.9625
Validation Loss: 0.1105, Validation Accuracy: 0.8545
Best model saved!


Epoch 15/50: 100%|██████████| 125/125 [01:32<00:00,  1.36it/s]


Epoch [15/50], Training Loss: 0.0455, Training Accuracy: 0.9739
Validation Loss: 0.1112, Validation Accuracy: 0.8500
Early stopping counter: 1/5


Epoch 16/50: 100%|██████████| 125/125 [01:32<00:00,  1.35it/s]


Epoch [16/50], Training Loss: 0.0436, Training Accuracy: 0.9744
Validation Loss: 0.1116, Validation Accuracy: 0.8537
Early stopping counter: 2/5


Epoch 17/50: 100%|██████████| 125/125 [01:31<00:00,  1.37it/s]


Epoch [17/50], Training Loss: 0.0413, Training Accuracy: 0.9781
Validation Loss: 0.1093, Validation Accuracy: 0.8583
Best model saved!


Epoch 18/50: 100%|██████████| 125/125 [01:32<00:00,  1.36it/s]


Epoch [18/50], Training Loss: 0.0398, Training Accuracy: 0.9805
Validation Loss: 0.1116, Validation Accuracy: 0.8512
Early stopping counter: 1/5


Epoch 19/50: 100%|██████████| 125/125 [01:32<00:00,  1.35it/s]


Epoch [19/50], Training Loss: 0.0377, Training Accuracy: 0.9825
Validation Loss: 0.1113, Validation Accuracy: 0.8512
Early stopping counter: 2/5


Epoch 20/50: 100%|██████████| 125/125 [01:32<00:00,  1.36it/s]


Epoch [20/50], Training Loss: 0.0362, Training Accuracy: 0.9833
Validation Loss: 0.1163, Validation Accuracy: 0.8425
Early stopping counter: 3/5


Epoch 21/50: 100%|██████████| 125/125 [01:31<00:00,  1.36it/s]


Epoch [21/50], Training Loss: 0.0356, Training Accuracy: 0.9831
Validation Loss: 0.1140, Validation Accuracy: 0.8507
Early stopping counter: 4/5


Epoch 22/50: 100%|██████████| 125/125 [01:32<00:00,  1.36it/s]


Epoch [22/50], Training Loss: 0.0317, Training Accuracy: 0.9880
Validation Loss: 0.1077, Validation Accuracy: 0.8610
Best model saved!


Epoch 23/50: 100%|██████████| 125/125 [01:32<00:00,  1.35it/s]


Epoch [23/50], Training Loss: 0.0285, Training Accuracy: 0.9910
Validation Loss: 0.1087, Validation Accuracy: 0.8580
Early stopping counter: 1/5


Epoch 24/50: 100%|██████████| 125/125 [01:32<00:00,  1.35it/s]


Epoch [24/50], Training Loss: 0.0278, Training Accuracy: 0.9914
Validation Loss: 0.1125, Validation Accuracy: 0.8487
Early stopping counter: 2/5


Epoch 25/50: 100%|██████████| 125/125 [01:31<00:00,  1.36it/s]


Epoch [25/50], Training Loss: 0.0278, Training Accuracy: 0.9912
Validation Loss: 0.1116, Validation Accuracy: 0.8562
Early stopping counter: 3/5


Epoch 26/50: 100%|██████████| 125/125 [01:31<00:00,  1.36it/s]


Epoch [26/50], Training Loss: 0.0262, Training Accuracy: 0.9918
Validation Loss: 0.1063, Validation Accuracy: 0.8615
Best model saved!


Epoch 27/50: 100%|██████████| 125/125 [01:31<00:00,  1.37it/s]


Epoch [27/50], Training Loss: 0.0252, Training Accuracy: 0.9921
Validation Loss: 0.1107, Validation Accuracy: 0.8547
Early stopping counter: 1/5


Epoch 28/50: 100%|██████████| 125/125 [01:31<00:00,  1.36it/s]


Epoch [28/50], Training Loss: 0.0253, Training Accuracy: 0.9923
Validation Loss: 0.1100, Validation Accuracy: 0.8552
Early stopping counter: 2/5


Epoch 29/50: 100%|██████████| 125/125 [01:31<00:00,  1.36it/s]


Epoch [29/50], Training Loss: 0.0249, Training Accuracy: 0.9930
Validation Loss: 0.1110, Validation Accuracy: 0.8573
Early stopping counter: 3/5


Epoch 30/50: 100%|██████████| 125/125 [01:32<00:00,  1.35it/s]


Epoch [30/50], Training Loss: 0.0238, Training Accuracy: 0.9932
Validation Loss: 0.1126, Validation Accuracy: 0.8540
Early stopping counter: 4/5


Epoch 31/50: 100%|██████████| 125/125 [01:32<00:00,  1.35it/s]


Epoch [31/50], Training Loss: 0.0224, Training Accuracy: 0.9945
Validation Loss: 0.1095, Validation Accuracy: 0.8550
Early stopping counter: 5/5
Early stopping triggered. Training stopped.


In [11]:
# Define the test dataset class
class TestCarPairDataset(Dataset):
    def __init__(self, txt_file, root_dir, transform=None):
        self.pairs = []
        self.root_dir = root_dir
        self.transform = transform
        
        # Read the text file and parse image pairs
        with open(txt_file, "r") as file:
            lines = file.readlines()
            
        for line in lines:
            img1_rel, img2_rel, label = line.strip().split()
            img1_path = os.path.join(root_dir, img1_rel)
            img2_path = os.path.join(root_dir, img2_rel)
            label = 1 - int(label)  # Swap labels (1 -> 0, 0 -> 1)
            self.pairs.append((img1_path, img2_path, label))

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        img1_path, img2_path, label = self.pairs[idx]
        label = torch.tensor(label, dtype=torch.float32)
        
        img1 = self.load_image(img1_path)
        img2 = self.load_image(img2_path)
        
        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)
        
        return (img1, img2), label

    def load_image(self, image_path):
        """Load an image from the given path and convert it to PIL format."""
        img = cv2.imread(image_path)
        if img is None:
            print(f"Warning: Could not read {image_path}")
            img = np.zeros((224, 224, 3), dtype=np.uint8)  # Return a black image if loading fails
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
        return Image.fromarray(img)

# Define the same transformation used during training
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [12]:
# Load the trained model
model = SiameseNetwork().cuda()
model.load_state_dict(torch.load("make_verification_resnet50.pth"))
model.eval()


  model.load_state_dict(torch.load("make_verification_resnet50.pth"))


SiameseNetwork(
  (feature_extractor): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
         

In [13]:
# Path to the test dataset text file
test_txt_file = "/data/NNDL/data/train_test_split/verification/verification_pairs_easy.txt"
test_root_dir = "/data/NNDL/data/image"

# Create the test dataset and DataLoader
test_dataset = TestCarPairDataset(txt_file=test_txt_file, root_dir=test_root_dir, transform=test_transform)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=8, pin_memory=True)

### Easy

In [14]:
# Test the model
correct_predictions = 0
total_samples = 0

with torch.no_grad():
    for (img1, img2), labels in test_dataloader:
        img1, img2, labels = img1.cuda(), img2.cuda(), labels.cuda()
        output1, output2 = model(img1, img2)
        
        similarity_scores = torch.nn.functional.pairwise_distance(output1, output2)
        predictions = (similarity_scores > 0.5).float()  # Using 0.5 as the threshold
        
        correct_predictions += (predictions == labels).sum().item()
        total_samples += labels.size(0)

# Calculate and print accuracy
test_accuracy = correct_predictions / total_samples
print(f"Test Accuracy: {test_accuracy:.4f}")

Test Accuracy: 0.8864


### Medium

In [15]:
# Path to the test dataset text file
test_txt_file = "/data/NNDL/data/train_test_split/verification/verification_pairs_medium.txt"
test_root_dir = "/data/NNDL/data/image"

# Create the test dataset and DataLoader
test_dataset = TestCarPairDataset(txt_file=test_txt_file, root_dir=test_root_dir, transform=test_transform)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=8, pin_memory=True)

In [16]:
# Test the model
correct_predictions = 0
total_samples = 0

with torch.no_grad():
    for (img1, img2), labels in test_dataloader:
        img1, img2, labels = img1.cuda(), img2.cuda(), labels.cuda()
        output1, output2 = model(img1, img2)
        
        similarity_scores = torch.nn.functional.pairwise_distance(output1, output2)
        predictions = (similarity_scores > 0.5).float()  # Using 0.5 as the threshold
        
        correct_predictions += (predictions == labels).sum().item()
        total_samples += labels.size(0)

# Calculate and print accuracy
test_accuracy = correct_predictions / total_samples
print(f"Test Accuracy: {test_accuracy:.4f}")

Test Accuracy: 0.8772


### Hard

In [17]:
# Path to the test dataset text file
test_txt_file = "/data/NNDL/data/train_test_split/verification/verification_pairs_hard.txt"
test_root_dir = "/data/NNDL/data/image"

# Create the test dataset and DataLoader
test_dataset = TestCarPairDataset(txt_file=test_txt_file, root_dir=test_root_dir, transform=test_transform)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=8, pin_memory=True)

In [None]:
# Test the model
correct_predictions = 0
total_samples = 0

with torch.no_grad():
    for (img1, img2), labels in test_dataloader:
        img1, img2, labels = img1.cuda(), img2.cuda(), labels.cuda()
        output1, output2 = model(img1, img2)
        
        similarity_scores = torch.nn.functional.pairwise_distance(output1, output2)
        predictions = (similarity_scores > 0.5).float()  # Using 0.5 as the threshold
        
        correct_predictions += (predictions == labels).sum().item()
        total_samples += labels.size(0)

# Calculate and print accuracy
test_accuracy = correct_predictions / total_samples
print(f"Test Accuracy: {test_accuracy:.4f}")

Test Accuracy: 0.8261


: 