# Task 3
This serves as a template which will guide you through the implementation of this task. It is advised to first read the whole template and get a sense of the overall structure of the code before trying to fill in any of the TODO gaps.
This is the jupyter notebook version of the template. For the python file version, please refer to the file `template_solution.py`.

First, we import necessary libraries:

In [38]:
import numpy as np
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset
import os
import torch
from torchvision import transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from sklearn.model_selection import KFold
# Add any other imports you need here
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [39]:
# The device is automatically set to GPU if available, otherwise CPU
# If you want to force the device to CPU, you can change the line to
# device = torch.device("cpu")
# When using the GPU, it is important that your model and all data are on the
# same device.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [40]:
"""
Transform, resize and normalize the images and then use a pretrained model to extract
the embeddings.
"""
# TODO: define a transform to pre-process the images
# The required pre-processing depends on the pre-trained model you choose
# below.
# See https://pytorch.org/vision/stable/models.html#using-the-pre-trained-models
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to (224, 224)
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize images
])

train_dataset = datasets.ImageFolder(root="/content/drive/MyDrive/Colab Notebooks/IML FS24/task3/dataset/", transform=train_transforms)
# Hint: adjust batch_size and num_workers to your PC configuration, so that you don't
# run out of memory (VRAM if on GPU, RAM if on CPU)
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=64,
                          shuffle=False,
                          pin_memory=True, num_workers=16)

# TODO: define a model for extraction of the embeddings (Hint: load a pretrained model,
# more info here: https://pytorch.org/vision/stable/models.html)
model = models.resnet18(pretrained=True)
model.to(device)
embedding_size = 512
model.fc = nn.Linear(model.fc.in_features, embedding_size)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# pick your model
num_images = len(train_dataset)
embeddings = np.zeros((num_images, embedding_size))

# TODO: Use the model to extract the embeddings. Hint: remove the last layers of the
# model to access the embeddings the model generates.
# Loop through the dataset and extract embeddings
#for i, (image, _) in enumerate(train_loader):
#    image = image.to(device)
#    with torch.no_grad():
#        features = model(image)
#    embeddings[i * train_loader.batch_size: (i + 1) * train_loader.batch_size] = features.cpu().numpy()
with torch.no_grad():
        start_idx = 0
        for inputs, _ in train_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            batch_size = outputs.shape[0]
            embeddings[start_idx:start_idx+batch_size] = outputs.cpu().numpy()
            start_idx += batch_size
np.save('/content/drive/MyDrive/Colab Notebooks/IML FS24/task3/dataset/embeddings_2.npy', embeddings)

In [41]:
def get_data(file, train=True):
    """
    Load the triplets from the file and generate the features and labels.

    input: file: string, the path to the file containing the triplets
          train: boolean, whether the data is for training or testing

    output: X: numpy array, the features
            y: numpy array, the labels
    """
    triplets = []
    with open(file) as f:
        for line in f:
            triplets.append(line)

    # generate training data from triplets
    train_dataset = datasets.ImageFolder(root="/content/drive/MyDrive/Colab Notebooks/IML FS24/task3/dataset/",
                                         transform=None)
    filenames = [s[0].split('/')[-1].replace('.jpg', '') for s in train_dataset.samples]
    embeddings = np.load('/content/drive/MyDrive/Colab Notebooks/IML FS24/task3/dataset/embeddings_2.npy')
    # TODO: Normalize the embeddings
    embeddings /= np.linalg.norm(embeddings, axis=1, keepdims=True)

    #file_to_embedding = {filename: emb for filename, emb in zip(filenames, embeddings)}
    file_to_embedding = {}
    for i in range(len(filenames)):
        file_to_embedding[filenames[i]] = embeddings[i]
    X = []
    y = []
    for t in triplets:
        emb = [file_to_embedding[a] for a in t.split()]
        X.append(np.hstack([emb[0], emb[1], emb[2]]))
        y.append(1)
        if train:
            X.append(np.hstack([emb[0], emb[2], emb[1]]))
            y.append(0)
    X = np.vstack(X)
    y = np.hstack(y)
    return X, y

Hint: adjust batch_size and num_workers to your PC configuration, so that you don't run out of memory (VRAM if on GPU, RAM if on CPU)

In [51]:
def create_loader_from_np(X, y=None, train=True, batch_size=64, shuffle=True, num_workers=4):
    if train:
        dataset = TensorDataset(torch.from_numpy(X).type(torch.float),
                                torch.from_numpy(y).type(torch.long))
    else:
        if y is not None:
            dataset = TensorDataset(torch.from_numpy(X).type(torch.float),
                                    torch.from_numpy(y).type(torch.long))
        else:
            dataset = TensorDataset(torch.from_numpy(X).type(torch.float))
    loader = DataLoader(dataset=dataset,
                        batch_size=batch_size,
                        shuffle=shuffle,
                        pin_memory=True, num_workers=num_workers)
    return loader


TODO: define a model. Here, the basic structure is defined, but you need to fill in the details

In [52]:
class Net(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.fc = nn.Linear(input_size, 1)

    def forward(self, x):
        x = self.fc(x)
        #x = F.relu(x)
        x = torch.sigmoid(x)
        return x

In [53]:
TRAIN_TRIPLETS = '/content/drive/MyDrive/Colab Notebooks/IML FS24/task3/train_triplets.txt'

# load the training data
X, y = get_data(TRAIN_TRIPLETS)
# Create data loaders for the training data
train_loader = create_loader_from_np(X, y, train = True, batch_size=64)
# delete the loaded training data to save memory, as the data loader copies
#del X
#del y

In [54]:
TEST_TRIPLETS = '/content/drive/MyDrive/Colab Notebooks/IML FS24/task3/test_triplets.txt'

# repeat for testing data
X_test, y_test = get_data(TEST_TRIPLETS, train=False)
test_loader = create_loader_from_np(X_test, train = False, batch_size=2048, shuffle=False)
#del X_test
#del y_test

In [55]:
def evaluate_model(model, loader):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = F.binary_cross_entropy(outputs.squeeze(), labels.float(), reduction='sum')
            total_loss += loss.item()
            predicted = (outputs >= 0.5).int()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    loss = total_loss / total
    acc = correct / total
    return loss, acc

In [56]:
def train_model(train_loader, val_loader):
    model = Net(3 * 512)
    model.to(device)
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    n_epochs = 10
    for epoch in range(n_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels.float())
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch {epoch+1}/{n_epochs}, Train Loss: {epoch_loss:.4f}")

        # Validate the model
        val_loss, val_acc = evaluate_model(model, val_loader)
        print(f"Epoch {epoch+1}/{n_epochs}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

    return model

In [57]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

val_losses = []
val_accuracies = []

for fold, (train_index, val_index) in enumerate(kf.split(X)):
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    train_loader = create_loader_from_np(X_train, y_train, train=True, batch_size=64)
    val_loader = create_loader_from_np(X_val, y_val, train=False, batch_size=64, shuffle=False)

    model = train_model(train_loader, val_loader)

    val_loss, val_acc = evaluate_model(model, val_loader)
    val_losses.append(val_loss)
    val_accuracies.append(val_acc)

    print(f"Fold {fold+1}: Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}")

print(f"Average Validation Loss: {np.mean(val_losses):.4f}, Average Validation Accuracy: {np.mean(val_accuracies):.4f}")

model.eval()
predictions = []
# Iterate over the test data
with torch.no_grad(): # We don't need to compute gradients for testing
    for [x_batch] in test_loader:
        x_batch= x_batch.to(device)
        predicted = model(x_batch)
        predicted = predicted.cpu().numpy()
        # Rounding the predictions to 0 or 1
        predicted[predicted >= 0.5] = 1
        predicted[predicted < 0.5] = 0
        predictions.append(predicted)
    predictions = np.vstack(predictions)
np.savetxt("/content/drive/MyDrive/Colab Notebooks/IML FS24/task3/results_task3_2.txt", predictions, fmt='%i')

Epoch 1/10, Train Loss: 0.6864
Epoch 1/10, Val Loss: 0.6826, Val Acc: 32.0728
Epoch 2/10, Train Loss: 0.6811
Epoch 2/10, Val Loss: 0.6812, Val Acc: 32.0651
Epoch 3/10, Train Loss: 0.6793
Epoch 3/10, Val Loss: 0.6805, Val Acc: 32.0704
Epoch 4/10, Train Loss: 0.6780
Epoch 4/10, Val Loss: 0.6801, Val Acc: 32.0755
Epoch 5/10, Train Loss: 0.6775
Epoch 5/10, Val Loss: 0.6797, Val Acc: 32.0675
Epoch 6/10, Train Loss: 0.6768
Epoch 6/10, Val Loss: 0.6801, Val Acc: 32.0791
Epoch 7/10, Train Loss: 0.6763
Epoch 7/10, Val Loss: 0.6823, Val Acc: 32.0454
Epoch 8/10, Train Loss: 0.6759
Epoch 8/10, Val Loss: 0.6793, Val Acc: 32.0853
Epoch 9/10, Train Loss: 0.6755
Epoch 9/10, Val Loss: 0.6808, Val Acc: 32.0738
Epoch 10/10, Train Loss: 0.6751
Epoch 10/10, Val Loss: 0.6789, Val Acc: 32.0753
Fold 1: Validation Loss: 0.6789, Validation Accuracy: 32.0753
Epoch 1/10, Train Loss: 0.6862
Epoch 1/10, Val Loss: 0.6829, Val Acc: 32.0688
Epoch 2/10, Train Loss: 0.6809
Epoch 2/10, Val Loss: 0.6819, Val Acc: 32.0780
