In [1]:
import numpy as np
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset
import os
import torch
import torchvision
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tqdm.auto import tqdm


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
from torchvision.models import efficientnet_b3 as ENB3
from torchvision.models import efficientnet_v2_s as ENV2S
from torchvision.models import regnet_y_128gf as RNY128
from torchvision.models import regnet_y_16gf as RNY16

In [3]:
embeddings_file_name = 'dataset/embeddings-RNY16-new.npy'
model_file = 'RNY16-6layer-relu.pth'
filename = 'results-RNY16-6layers-relu-reverse.txt'

In [4]:
def generate_embeddings(embeddings_file_name):
    """
    Transform, resize and normalize the images and then use a pretrained model to extract 
    the embeddings.
    """
    # TODO: define a transform to pre-process the images
    train_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    weights = torchvision.models.RegNet_Y_16GF_Weights.IMAGENET1K_SWAG_E2E_V1
    # train_dataset = datasets.ImageFolder(root="./dataset/", transform=weights.transforms())
    train_dataset = datasets.ImageFolder(root="./dataset/", transform=train_transforms)
    # Hint: adjust batch_size and num_workers to your PC configuration, so that you don't 
    # run out of memory
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=1,
                              shuffle=False,
                              pin_memory=True, num_workers=6)

    # TODO: define a model for extraction of the embeddings (Hint: load a pretrained model,
    #  more info here: https://pytorch.org/vision/stable/models.html)
    # model = nn.Module()
    model = RNY16(weights=weights)
    embeddings = []
    embedding_size = model.fc.in_features  # Dummy variable, replace with the actual embedding size once you pick your model

    num_images = len(train_dataset)
    embeddings = np.zeros((num_images, embedding_size))
    # TODO: Use the model to extract the embeddings. Hint: remove the last layers of the 
    # model to access the embeddings the model generates.

    # remove last layer
    model = nn.Sequential(*list(model.children())[:-1])
    
    model.eval()
    model.to(device)

    # extract embeddings
    for i, (images, _) in enumerate(train_loader):
        embeddings[i] = model(images.to(device)).flatten().cpu().detach().numpy()

    np.save(embeddings_file_name, embeddings)

In [5]:

# generate embedding for each image in the dataset
if(os.path.exists(embeddings_file_name) == False):
    generate_embeddings(embeddings_file_name)

In [6]:
def get_data(file, train=True):
    """
    Load the triplets from the file and generate the features and labels.

    input: file: string, the path to the file containing the triplets
          train: boolean, whether the data is for training or testing

    output: X: numpy array, the features
            y: numpy array, the labels
    """
    triplets = []
    with open(file) as f:
        for line in f:
            triplets.append(line)

    # generate training data from triplets
    train_dataset = datasets.ImageFolder(root="dataset/",
                                         transform=None)
    filenames = [s[0].split('\\')[-1].replace('.jpg', '') for s in train_dataset.samples]
    embeddings = np.load(embeddings_file_name)
    # TODO: Normalize the embeddings across the dataset
    embeddings = StandardScaler().fit_transform(embeddings)

    file_to_embedding = {}
    for i in range(len(filenames)):
        file_to_embedding[filenames[i]] = embeddings[i]
    X = []
    y = []
    test_reverse = []
    # use the individual embeddings to generate the features and labels for triplets
    for t in triplets:
        emb = [file_to_embedding[a] for a in t.split()]
        X.append(np.hstack([emb[0], emb[1], emb[2]]))
        y.append(1)
        # Generating negative samples (data augmentation)
        if train:
            X.append(np.hstack([emb[0], emb[2], emb[1]]))
            y.append(0)
        else:
            test_reverse.append(np.hstack([emb[0], emb[2], emb[1]]))
    X = np.vstack(X)
    y = np.hstack(y)
    if train:
        return X, y
    else:
        test_reverse = np.vstack(test_reverse)
        return X, test_reverse
    

In [7]:
TRAIN_TRIPLETS = 'train_triplets.txt'

# load the training and testing data
X, y = get_data(TRAIN_TRIPLETS)

In [8]:
X.shape[1]

9072

In [9]:
# Hint: adjust batch_size and num_workers to your PC configuration, so that you don't run out of memory
def create_loader_from_np(X, y = None, train = True, batch_size=64, shuffle=True, num_workers = 4):
    """
    Create a torch.utils.data.DataLoader object from numpy arrays containing the data.

    input: X: numpy array, the features
           y: numpy array, the labels
    
    output: loader: torch.data.util.DataLoader, the object containing the data
    """
    if train:
        dataset = TensorDataset(torch.from_numpy(X).type(torch.float), 
                                torch.from_numpy(y).type(torch.long))
    else:
        dataset = TensorDataset(torch.from_numpy(X).type(torch.float))
    loader = DataLoader(dataset=dataset,
                        batch_size=batch_size,
                        shuffle=shuffle,
                        pin_memory=True, num_workers=num_workers)
    return loader

In [10]:
# Create data loaders for the training and testing data
train_loader = create_loader_from_np(X, y, train = True, batch_size=64)

In [11]:
# TODO: define a model. Here, the basic structure is defined, but you need to fill in the details
class Net(nn.Module):
    """
    The model class, which defines our classifier.
    """
    def __init__(self):
        """
        The constructor of the model.
        """
        super().__init__()
        self.fc1 = nn.Linear(X.shape[1], 4096)
        self.fc2 = nn.Linear(4096, 256)
        self.fc3 = nn.Linear(256, 64)
        self.fc4 = nn.Linear(64, 1)

        nn.init.xavier_uniform_(self.fc1.weight)
        nn.init.xavier_uniform_(self.fc2.weight)
        nn.init.xavier_uniform_(self.fc3.weight)
        nn.init.xavier_uniform_(self.fc4.weight)


    def forward(self, x):
        """
        The forward pass of the model.

        input: x: torch.Tensor, the input to the model

        output: x: torch.Tensor, the output of the model
        """
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=0.3)
        x = F.relu(self.fc2(x))
        x = F.dropout(x, p=0.4)
        x = F.relu(self.fc3(x))
        x = F.dropout(x, p=0.5)
        x = F.sigmoid(self.fc4(x))
        
        return x


In [12]:
def train_model(train_loader):
    """
    The training procedure of the model; it accepts the training data, defines the model 
    and then trains it.

    input: train_loader: torch.data.util.DataLoader, the object containing the training data
    
    output: model: torch.nn.Module, the trained model
    """
    model = Net()
    model.train()
    model.to(device)
    n_epochs = 100
    old_valid_loss = [1, 1, 1, 1, 1]
    # TODO: define a loss function, optimizer and proceed with training. Hint: use the part 
    # of the training data as a validation split. After each epoch, compute the loss on the 
    # validation split and print it out. This enables you to see how your model is performing 
    # on the validation data before submitting the results on the server. After choosing the 
    # best model, train it on the whole training data.
    loss_fun = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
    # optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0001)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.3, patience=5, verbose=True)
    train_size = len(train_loader.dataset)
    valid_size = int(train_size * 0.1)
    train_size = train_size - valid_size
    epoch_train_data, epoch_valid_data = torch.utils.data.random_split(train_loader.dataset, [train_size, valid_size])
    epoch_train_loader = create_loader_from_np(epoch_train_data[:][0].numpy(), epoch_train_data[:][1].numpy(), train = True, batch_size=64)
    epoch_valid_loader = create_loader_from_np(epoch_valid_data[:][0].numpy(), epoch_valid_data[:][1].numpy(), train = True, batch_size=64)
    for epoch in tqdm(range(n_epochs)):
        train_loss = 0
        valid_loss = 0
        number_of_correct_train = 0
        number_of_correct_valid = 0
        for [x, t] in epoch_train_loader:
            optimizer.zero_grad()
            output = model(x.to(device)).flatten()
            prediction = (output >= 0.5).type(torch.int)
            t = t.to(device)
            number_of_correct_train += (prediction == t).sum().item()
            loss = loss_fun(output, t.type(torch.float))
            loss.backward()
            optimizer.step()
            train_loss += loss.item() 
        train_loss /= len(epoch_train_loader)
        with torch.no_grad():
            for [x, t] in epoch_valid_loader:
                output = model(x.to(device)).flatten()
                prediction = (output >= 0.5).type(torch.int)
                t = t.to(device)
                number_of_correct_valid += (prediction == t).sum().item()
                loss = loss_fun(output, t.to(device).type(torch.float))
                valid_loss += loss.item()
            valid_loss /= len(epoch_valid_loader)
        scheduler.step(number_of_correct_valid / valid_size)
        print('Epoch: {}, Training Loss: {:.4f}, Validation Loss: {:.4f}'.format(epoch+1, train_loss, valid_loss))
        print('Training Accuracy: {:.4f}, Validation Accuracy: {:.4f}'.format(number_of_correct_train / train_size, number_of_correct_valid / valid_size))
        if (optimizer.param_groups[0]['lr'] < 1e-5):
            break
        old_valid_loss[epoch % 5] = valid_loss
        early_stop_count = 0
        for i in range(5):
            if (old_valid_loss[i] > old_valid_loss[(epoch + 1) % 5]):
                early_stop_count += 1
        if (early_stop_count >= 4):
            print(f'Early stop at {epoch+1}')
            break
    return model

In [13]:
if os.path.isfile(model_file):
    # load the model from the file
    model = Net()
    model.load_state_dict(torch.load(model_file))
    model = model.to(device)
else:
    # define a model and train it
    model = train_model(train_loader)
    torch.save(model.state_dict(), model_file)

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch: 1, Training Loss: 0.5849, Validation Loss: 0.5208
Training Accuracy: 0.7025, Validation Accuracy: 0.7455
Epoch: 2, Training Loss: 0.5041, Validation Loss: 0.5018
Training Accuracy: 0.7596, Validation Accuracy: 0.7582
Epoch: 3, Training Loss: 0.4962, Validation Loss: 0.4947
Training Accuracy: 0.7631, Validation Accuracy: 0.7640
Epoch: 4, Training Loss: 0.4854, Validation Loss: 0.4859
Training Accuracy: 0.7715, Validation Accuracy: 0.7706
Epoch: 5, Training Loss: 0.4694, Validation Loss: 0.4774
Training Accuracy: 0.7834, Validation Accuracy: 0.7784
Epoch: 6, Training Loss: 0.4445, Validation Loss: 0.4536
Training Accuracy: 0.7969, Validation Accuracy: 0.7895
Epoch: 7, Training Loss: 0.4250, Validation Loss: 0.4541
Training Accuracy: 0.8082, Validation Accuracy: 0.7950
Epoch: 8, Training Loss: 0.4069, Validation Loss: 0.4405
Training Accuracy: 0.8172, Validation Accuracy: 0.8011
Epoch: 9, Training Loss: 0.3897, Validation Loss: 0.4284
Training Accuracy: 0.8265, Validation Accuracy:

In [14]:
def test_model(model, loader, reverse_loader, filename='results.txt'):
    """
    The testing procedure of the model; it accepts the testing data and the trained model and 
    then tests the model on it.

    input: model: torch.nn.Module, the trained model
           loader: torch.data.util.DataLoader, the object containing the testing data
        
    output: None, the function saves the predictions to a results.txt file
    """
    model.eval()
    predictions = []
    reverse_predictions = []
    # Iterate over the test data
    with torch.no_grad(): # We don't need to compute gradients for testing
        for [x_batch] in tqdm(loader):
            x_batch= x_batch.to(device)
            predicted = model(x_batch)
            predicted = predicted.cpu().numpy()
            # # Rounding the predictions to 0 or 1
            # predicted[predicted >= 0.5] = 1
            # predicted[predicted < 0.5] = 0
            predictions.append(predicted)
        predictions = np.vstack(predictions)
        for [x_batch] in tqdm(reverse_loader):
            x_batch= x_batch.to(device)
            predicted = model(x_batch)
            predicted = predicted.cpu().numpy()
            # # Rounding the predictions to 0 or 1
            # predicted[predicted >= 0.5] = 0
            # predicted[predicted < 0.5] = 1
            reverse_predictions.append(predicted)
        reverse_predictions = np.vstack(reverse_predictions)
        
    # count how many different bewteen predictions and reverse_predictions
    count = 0
    for i in range(len(predictions)):
        if predictions[i] < 0.5 and reverse_predictions[i] < 0.5:
            count += 1
        elif predictions[i] >= 0.5 and reverse_predictions[i] >= 0.5:
            count += 1
    print(f'There are {count} predisctions are not same! Too sad!')

    # which is more likely to be the correct prediction
    # if it's predictions more like to be 1, then we have 1
    # if it's reverse_predictions more like to be 1, then we have 0
    # if it's predictions more like to be 0, then we have 0
    # if it's reverse_predictions more like to be 0, then we have 1
    final_predictions = np.where(predictions > reverse_predictions, 1, 0)

    np.savetxt(filename, final_predictions, fmt='%i')

In [15]:
# test the model on the test data
TEST_TRIPLETS = 'test_triplets.txt'
X_test, X_reverse = get_data(TEST_TRIPLETS, train=False)
test_loader = create_loader_from_np(X_test, train = False, batch_size=2048, shuffle=False)
test_reverse_loader = create_loader_from_np(X_reverse, train = False, batch_size=2048, shuffle=False)


test_model(model, test_loader, test_reverse_loader, filename)
print("Results saved to", filename)

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

There are 13949 predisctions are not same! Too sad!
Results saved to results-RNY16-6layers-relu-reverse.txt
