# Validation Set Approach

First, we import necessary libraries:

In [1]:
# import libraries
from __future__ import print_function, division

# torch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
# torchvision
from torchvision import datasets, transforms
from torchvision.models import resnet50, ResNet50_Weights
# data handling
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
import time
import os
import copy
from tqdm import tqdm

In [2]:
# The device is automatically set to GPU if available, otherwise CPU
# If you want to force the device to CPU, you can change the line to
# device = torch.device("cpu")
# When using the GPU, it is important that your model and all data are on the 
# same device.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

no


In [8]:
"""
Transform, resize and normalize the images and then use a pretrained model to extract 
the embeddings.
"""
if not all(os.path.exists(path) for path in ["dataset/train_embeddings.npy", 
                                             "dataset/valid_embeddings.npy"]):

    # See https://pytorch.org/vision/stable/models.html#using-the-pre-trained-models
    
    # data transforms as found in: https://www.analyticsvidhya.com/blog/2023/02/fast-food-classification-using-transfer-learning-with-pytorch/
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'valid': transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }

    # define the model to embed into from <https://pytorch.org/vision/stable/models.html)
    model = resnet50(weights="DEFAULT") 
    # The dimensionality of a vector embedding is equivalent to the size of the second-to-last layer in the model and, 
    # thus, interchangeable with the vector’s size or length.
    embedding_size = model.fc.in_features
    # remove the last layer (fully connected layer)
    model = nn.Sequential(*(list(model.children())[:-1]))
    # rove the model to device
    model = model.to(device)
    # set the model to evaluation mode
    model.eval()

    # freeze the parameters of the model
    for param in model.parameters(): param.requires_grad = False

    # prepare embeddings for train and validation set
    for setname in ["train", "valid"]:
        dataset = datasets.ImageFolder(root="dataset/", transform=data_transforms[setname])
        # Hint: adjust batch_size and num_workers to your PC configuration, so that you don't 
        # run out of memory (VRAM if on GPU, RAM if on CPU)
        loader = DataLoader(dataset=dataset,
                            batch_size=64,
                            shuffle=False,
                            pin_memory=True, 
                            num_workers=0)


        num_images = len(dataset)

         # extract embeddings from second to last layer of model
        embeddings = []
        with torch.no_grad():
            for img, _ in loader:
                img = img.to(device)
                output = model(img)
                output = output.view(output.size(0), -1)  # Flatten the output
                embeddings.append(output.cpu().numpy())

        # concatenate all embeddings
        embeddings = np.concatenate(embeddings, axis=0) 

        assert embeddings.shape == (num_images, embedding_size)

        # save and notify
        filename = f'dataset/{setname}_embeddings.npy'
        np.save(filename, embeddings)
        print("File <" + filename + "> saved!")

        # rm
        del filename, embeddings

File <dataset/train_embeddings.npy> saved!
File <dataset/valid_embeddings.npy> saved!


In [4]:
def get_data(file, train=True):
    """
    Load the triplets from the file and generate the features and labels.

    input: file: string, the path to the file containing the triplets
          train: boolean, whether the data is for training or testing

    output: X: numpy array, the features
            y: numpy array, the labels
    """
    triplets = []
    with open(file) as f:
        for line in f:
            triplets.append(line.strip())


    # generate training data from triplets
    train_dataset = datasets.ImageFolder(root="dataset/",
                                        transform=None)
    filenames = [s[0].split('/')[-1].split("\\")[-1].replace('.jpg', '') for s in train_dataset.samples]
    embeddings = np.load('dataset/embeddings.npy')
    # Normalize the embeddings
    embeddings = (embeddings - embeddings.mean(axis = 1)[:, np.newaxis]) / embeddings.std(axis = 1)[:, np.newaxis]

    file_to_embedding = {}
    for i in range(len(filenames)):
        file_to_embedding[filenames[i]] = embeddings[i]
    X = []
    y = []
    # use the individual embeddings to generate the features and labels for triplets
    for t in triplets:
        emb = [file_to_embedding[a] for a in t.split()]
        X.append(np.hstack([emb[0], emb[1], emb[2]]))
        y.append(1)
        # Generating negative samples (data augmentation)
        if train:
            X.append(np.hstack([emb[0], emb[2], emb[1]]))
            y.append(0)
    X = np.vstack(X)
    y = np.hstack(y)

    return X, y

Hint: adjust batch_size and num_workers to your PC configuration, so that you don't run out of memory (VRAM if on GPU, RAM if on CPU)

In [5]:
def create_loader_from_np(X, y = None, train = True, batch_size=50, shuffle=True, num_workers = 4):
    """
    Create a torch.utils.data.DataLoader object from numpy arrays containing the data.

    input: X: numpy array, the features
           y: numpy array, the labels
    
    output: loader: torch.data.util.DataLoader, the object containing the data
    """
    if train:
        dataset = TensorDataset(torch.from_numpy(X).type(torch.float), 
                                torch.from_numpy(y).type(torch.float))
    else:
        dataset = TensorDataset(torch.from_numpy(X).type(torch.float))
    loader = DataLoader(dataset=dataset,
                        batch_size=batch_size,
                        shuffle=shuffle,
                        pin_memory=True, num_workers=num_workers)
    return loader

TODO: define a model. Here, the basic structure is defined, but you need to fill in the details

In [6]:
class Net(nn.Module):
    """
    The model class, which defines our classifier.
    """
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(6144, 1024)
        self.fc2 = nn.Linear(1024, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 1)

    def forward(self, x):
        """
        The forward pass of the model.

        input: x: torch.Tensor, the input to the model

        output: x: torch.Tensor, the output of the model
        """
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.relu(x)
        x = self.fc4(x)
        return x
    
class Net2(nn.Module):
    """
    The model class, which defines our classifier.
    """
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(6144, 1024)
        self.bn1 = nn.BatchNorm1d(1024)
        self.dropout1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(1024, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.dropout2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.dropout3 = nn.Dropout(0.5)
        self.fc4 = nn.Linear(32, 1)

        # Initialize weights using Xavier initialization
        nn.init.xavier_uniform_(self.fc1.weight)
        nn.init.xavier_uniform_(self.fc2.weight)
        nn.init.xavier_uniform_(self.fc3.weight)
        nn.init.xavier_uniform_(self.fc4.weight)

    def forward(self, x):
        """
        The forward pass of the model.

        input: x: torch.Tensor, the input to the model

        output: x: torch.Tensor, the output of the model
        """
        x = self.fc1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.dropout1(x)
        
        x = self.fc2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.dropout2(x)
        
        x = self.fc3(x)
        x = self.bn3(x)
        x = F.relu(x)
        x = self.dropout3(x)
        
        x = self.fc4(x)
        return x
    

    
# define hyperparameters
input_size = int(6144) # 3 * embedding size (2048)
hidden_layer = int(512)
dropout_proba = float(0.5)
output_size = int(1)

In [7]:
from sklearn.model_selection import KFold


TRAIN_TRIPLETS = 'train_triplets.txt'

# load the training data
X, y = get_data(TRAIN_TRIPLETS)

# simple train loader
train_loader_simple = create_loader_from_np(X, y, train = True, batch_size=64)
# create KFold object
n_splits = 10
kfold = KFold(n_splits=n_splits, shuffle = True, random_state=42)
# Create data loaders for the training data
train_loader = {fold: create_loader_from_np(X = X[train_indices], y = y[train_indices], train = True, batch_size=64)
                       for fold, (train_indices, _) in enumerate(kfold.split(X))}
val_loader = {fold: create_loader_from_np(X = X[val_indices], y = y[val_indices], train = True, batch_size=64)
                       for fold, (_, val_indices) in enumerate(kfold.split(X))}
# create the data loader
data_loader = {"Train": copy.deepcopy(train_loader),
               "Valid": copy.deepcopy(val_loader)}

dataset_size = X.shape[0]
# delete the loaded training data to save memory, as the data loader copies
del X, y


In [None]:
TEST_TRIPLETS = 'test_triplets.txt'

# repeat for testing data
X_test, y_test = get_data(TEST_TRIPLETS, train=False)
test_loader = create_loader_from_np(X_test, train = False, batch_size=2048, shuffle=False)
del X_test
del y_test

In [None]:
"""
The training procedure of the model; it accepts the training data, defines the model 
and then trains it.

input: train_loader: torch.data.util.DataLoader, the object containing the training data
    
compute: model: torch.nn.Module, the trained model
"""

model = Net()
model.train()
model.to(device)
n_epochs = 10
# TODO: define a loss function, optimizer and proceed with training. Hint: use the part 
# of the training data as a validation split. After each epoch, compute the loss on the 
# validation split and print it out. This enables you to see how your model is performing 
# on the validation data before submitting the results on the server. After choosing the 
# best model, train it on the whole training data.

# define the loss criterion
criterion_CE = nn.CrossEntropyLoss()
criterion_MSE = nn.MSELoss()

# define possible optimizers
optimizer_sgd = optim.SGD(model.parameters(), lr=0.01, momentum=0.0)
optimizer_adam = optim.Adam(model.parameters(), lr=0.001)

# scheduler to adjust learning rate over epoch iterations
sgd_lr_scheduler = lr_scheduler.StepLR(optimizer_sgd, step_size=7, gamma=0.1)
adam_lr_scheduler = lr_scheduler.StepLR(optimizer_adam, step_size=7, gamma=0.1)


# optimizer = optim.SGD(model.parameters(), lr=0.01)
# scheduler = lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.7, total_iters=n_epochs)
# criterion = nn.MSELoss()
n_epochs = 10
criterion = criterion_MSE
optimizer = optimizer_sgd
scheduler = sgd_lr_scheduler
prediction_threshold = 0.5

# for epoch in range(n_epochs):        
#     print(f'\nepoch={epoch}')
#     running_loss = 0.0      
#     running_corrects = 0

#     for i, [X, y] in enumerate(train_loader_simple):
#         optimizer.zero_grad()
#         output = model(X).squeeze(1)
#         preds = np.where(F.sigmoid(output).detach().numpy() >= prediction_threshold, 1, 0)
#         # print(F.sigmoid(output))
#         # print((preds == y.T.numpy()))
#         loss = criterion(output, y)
#         loss.backward()
#         optimizer.step()
#         running_loss += loss.item()
#         running_corrects += np.sum(preds == y.numpy().T).item()
#         # print(running_corrects)

#         if i % 500 == 499:
#             print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 500:.3f}')
#             print(f'[{epoch + 1}, {i + 1:5d}] acc: {running_corrects / (500*len(y)):.3f}')
#             running_loss = 0.0
#             running_corrects = 0

#     scheduler.step()

In [None]:
since = time.time()

def train_model(model:nn.Sequential, 
                n_epochs:int,
                criterion:nn,
                optimizer:torch.optim,
                scheduler:lr_scheduler,
                data_loader:DataLoader,
                prediction_threshold:float = 0.5):
    """
    This f

    Args:
        model (nn.Sequential): CNN model class
        n_epochs (int): number of epochs to train
        criterion (nn): loss function
        optimizer (nn.optim): optimization  algorithm
        scheduler (lr_scheduler): learning rate adaption over epochs
        data_loader (DataLoader): data loader object holding training and validation data
        prediction_threshold (float, optional): decision boundary for binary classification. Defaults to 0.5.

    Returns:
        dict: best model weigths found
    """

    best_model_wts = copy.deepcopy(model.state_dict())
    best_accuracy = 0.0

    for epoch in range(n_epochs): 
        # info
        print(53 * "#")
        print(20 * "~", f' EPOCH {epoch + 1}/{n_epochs}', 20 * "~")
        print(53 * "#")
        # start time of epoch
        epoch_since = time.time()
        # decide which fold to use
        if epoch % n_epochs == 0: fold = 0

        # train and validation phase looping   
        for train in [True, False]:
            # set phase string
            phase = "Train" if train else "Valid"

            # set model mode
            if train: 
                model.train()
            else:
                model.eval()
            
            # init data loader
            data_loader_phase = copy.deepcopy(data_loader[phase][fold])

            # init epoch loss and corrects
            running_loss = 0.0
            running_corrects = 0
            i = 1

            with tqdm(data_loader_phase, unit = "batch") as tepoch:
            # iterate data loader
                for inputs, labels in tepoch: # tepoch: 
                    # set progressbar description and postfix
                    tepoch.set_description(f"{phase} phase")
                    tepoch.set_postfix({"LOSS": running_loss/i, "ACC": running_corrects/ (i*len(labels))})
                    # copy to device
                    inputs.to(device)
                    labels.to(device)

                    # set the gradient to zero
                    optimizer.zero_grad()

                    # step forward, track only if in train
                    with torch.set_grad_enabled(train):
                        outputs = model(inputs).squeeze(1)# .round().to(torch.long) # removing singleton dimension at axis 1
                        preds = np.where(F.sigmoid(outputs).detach().numpy() >= prediction_threshold, 1, 0)
                        loss = criterion(outputs, labels)

                        # propagate backwards if in train phase
                        if train:
                            loss.backward()
                            optimizer.step()
                

                    # calculate stats
                    running_loss += loss.item()
                    running_corrects += np.sum(preds == labels.numpy()).item()
                    i += 1

                if train: 
                    scheduler.step()

                epoch_loss = running_loss / i
                epoch_accuracy = running_corrects / (i*len(labels))
                # epoch_time = time.time() - epoch_since

                # print(f'Final {phase}: Loss = {epoch_loss:.4f}, Acc = {epoch_accuracy:.4f}, Time = {epoch_time:.0f}s')

                if not train and epoch_accuracy > best_accuracy:
                        best_accuracy = epoch_accuracy
                        best_model_wts = copy.deepcopy(model.state_dict())

        fold += 1

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best Valid Acc: {best_accuracy:4f}')

    # load best model weights
    # model.load_state_dict(best_model_wts)

    return best_model_wts

In [None]:
_ = train_model(model=model,
                n_epochs=10,
                criterion=criterion_MSE, 
                optimizer=optimizer_sgd, 
                scheduler=sgd_lr_scheduler,
                data_loader=data_loader,
                prediction_threshold=0.5
                )

###################################################
~~~~~~~~~~~~~~~~~~~~  EPOCH 1/10 ~~~~~~~~~~~~~~~~~~~~
###################################################


Train phase: 100%|██████████| 1674/1674 [00:41<00:00, 40.83batch/s, LOSS=0.0247, ACC=0.799]
Valid phase: 100%|██████████| 186/186 [00:06<00:00, 30.78batch/s, LOSS=0.023, ACC=0.699] 


###################################################
~~~~~~~~~~~~~~~~~~~~  EPOCH 2/10 ~~~~~~~~~~~~~~~~~~~~
###################################################


Train phase: 100%|██████████| 1674/1674 [00:48<00:00, 34.76batch/s, LOSS=0.0228, ACC=0.802]
Valid phase: 100%|██████████| 186/186 [00:05<00:00, 36.00batch/s, LOSS=0.0196, ACC=0.717] 


###################################################
~~~~~~~~~~~~~~~~~~~~  EPOCH 3/10 ~~~~~~~~~~~~~~~~~~~~
###################################################


Train phase: 100%|██████████| 1674/1674 [00:44<00:00, 37.95batch/s, LOSS=0.0209, ACC=0.805]
Valid phase: 100%|██████████| 186/186 [00:05<00:00, 36.88batch/s, LOSS=0.019, ACC=0.681] 


###################################################
~~~~~~~~~~~~~~~~~~~~  EPOCH 4/10 ~~~~~~~~~~~~~~~~~~~~
###################################################


Train phase: 100%|██████████| 1674/1674 [00:43<00:00, 38.53batch/s, LOSS=0.019, ACC=0.807]
Valid phase: 100%|██████████| 186/186 [00:05<00:00, 35.44batch/s, LOSS=0.0184, ACC=0.705] 


###################################################
~~~~~~~~~~~~~~~~~~~~  EPOCH 5/10 ~~~~~~~~~~~~~~~~~~~~
###################################################


Train phase: 100%|██████████| 1674/1674 [00:44<00:00, 37.72batch/s, LOSS=0.0187, ACC=0.808]
Valid phase: 100%|██████████| 186/186 [00:05<00:00, 37.17batch/s, LOSS=0.0191, ACC=0.695] 


###################################################
~~~~~~~~~~~~~~~~~~~~  EPOCH 6/10 ~~~~~~~~~~~~~~~~~~~~
###################################################


Train phase: 100%|██████████| 1674/1674 [00:52<00:00, 31.83batch/s, LOSS=0.0183, ACC=0.809]
Valid phase: 100%|██████████| 186/186 [00:05<00:00, 33.79batch/s, LOSS=0.0209, ACC=0.693]


###################################################
~~~~~~~~~~~~~~~~~~~~  EPOCH 7/10 ~~~~~~~~~~~~~~~~~~~~
###################################################


Train phase: 100%|██████████| 1674/1674 [00:46<00:00, 35.83batch/s, LOSS=0.0178, ACC=0.811]
Valid phase: 100%|██████████| 186/186 [00:05<00:00, 37.15batch/s, LOSS=0.0244, ACC=0.685] 


###################################################
~~~~~~~~~~~~~~~~~~~~  EPOCH 8/10 ~~~~~~~~~~~~~~~~~~~~
###################################################


Train phase: 100%|██████████| 1674/1674 [00:45<00:00, 36.80batch/s, LOSS=0.0186, ACC=0.807]
Valid phase: 100%|██████████| 186/186 [00:05<00:00, 35.50batch/s, LOSS=0.0159, ACC=0.718] 


###################################################
~~~~~~~~~~~~~~~~~~~~  EPOCH 9/10 ~~~~~~~~~~~~~~~~~~~~
###################################################


Train phase: 100%|██████████| 1674/1674 [00:51<00:00, 32.63batch/s, LOSS=0.0184, ACC=0.808]
Valid phase: 100%|██████████| 186/186 [00:05<00:00, 35.27batch/s, LOSS=0.0161, ACC=0.712] 


###################################################
~~~~~~~~~~~~~~~~~~~~  EPOCH 10/10 ~~~~~~~~~~~~~~~~~~~~
###################################################


Train phase: 100%|██████████| 1674/1674 [00:51<00:00, 32.65batch/s, LOSS=0.0184, ACC=0.809]
Valid phase: 100%|██████████| 186/186 [00:05<00:00, 34.04batch/s, LOSS=0.0152, ACC=0.716] 

Training complete in 8m 59s
Best Valid Acc: 0.717851





OrderedDict([('fc1.weight',
              tensor([[-0.0083, -0.0088,  0.0073,  ...,  0.0126,  0.0089,  0.0012],
                      [-0.0073, -0.0019,  0.0068,  ..., -0.0107, -0.0019, -0.0087],
                      [-0.0121, -0.0036, -0.0009,  ..., -0.0035, -0.0114, -0.0089],
                      ...,
                      [-0.0021,  0.0019,  0.0097,  ..., -0.0054, -0.0083,  0.0122],
                      [ 0.0115, -0.0023, -0.0041,  ..., -0.0013, -0.0029, -0.0019],
                      [ 0.0007, -0.0016,  0.0130,  ...,  0.0094, -0.0063, -0.0009]])),
             ('fc1.bias',
              tensor([ 0.0118, -0.0090, -0.0089,  ..., -0.0056, -0.0100, -0.0016])),
             ('fc2.weight',
              tensor([[-0.0252, -0.0211,  0.0151,  ...,  0.0205,  0.0430, -0.0071],
                      [-0.0205,  0.0068,  0.0029,  ..., -0.0097,  0.0278,  0.0069],
                      [-0.0273, -0.0071,  0.0171,  ..., -0.0232,  0.0262,  0.0302],
                      ...,
                    

In [None]:
train_loader

{0: <torch.utils.data.dataloader.DataLoader at 0x1efe29cb620>,
 1: <torch.utils.data.dataloader.DataLoader at 0x1efe2ea4350>,
 2: <torch.utils.data.dataloader.DataLoader at 0x1efe2ea7e30>,
 3: <torch.utils.data.dataloader.DataLoader at 0x1efe2ea7d70>,
 4: <torch.utils.data.dataloader.DataLoader at 0x1efe2ea7cb0>,
 5: <torch.utils.data.dataloader.DataLoader at 0x1efe2ea7bf0>,
 6: <torch.utils.data.dataloader.DataLoader at 0x1efe2ea7b30>,
 7: <torch.utils.data.dataloader.DataLoader at 0x1efe2ea7a70>,
 8: <torch.utils.data.dataloader.DataLoader at 0x1efe2ea79b0>,
 9: <torch.utils.data.dataloader.DataLoader at 0x1efe2ea78f0>}

In [None]:
"""
The testing procedure of the model; it accepts the testing data and the trained model and 
then tests the model on it.

input: model: torch.nn.Module, the trained model
       loader: torch.data.util.DataLoader, the object containing the testing data
        
compute: None, the function saves the predictions to a results.txt file
"""
model.eval()
predictions = []
# Iterate over the test data
with torch.no_grad(): # We don't need to compute gradients for testing
    for [x_batch] in test_loader:
        x_batch= x_batch.to(device)
        predicted = model(x_batch)
        predicted = predicted.cpu().numpy()
        # Rounding the predictions to 0 or 1
        predicted[predicted >= 0.5] = 1
        predicted[predicted < 0.5] = 0
        predictions.append(predicted)
    predictions = np.vstack(predictions)
np.savetxt("results.txt", predictions, fmt='%i')
print("Results saved to results.txt")

Python(38373) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(38374) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(38375) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(38376) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Results saved to results.txt


In [None]:
import tqdm

for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")
    progress_bar = tqdm(train_loader)
    for data, target in progress_bar:
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        progress_bar.set_description(f'Loss: {loss.item():.4f}')

print("Training finished.")