In [1]:
# Imports
import numpy as np
from numpy import random as ran
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torchvision import transforms
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
from scipy.spatial import distance
import time
from PIL import Image
import random 

In [2]:
# fix seeds
torch.manual_seed(13)
random.seed(13)
np.random.seed(13)

In [3]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True

In [4]:
# mount drive to access data
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
from zipfile import ZipFile
with ZipFile('drive/MyDrive/Data.zip','r') as zipObj:
  zipObj.extractall('.')

with ZipFile('drive/MyDrive/tensorset_covnext_block5.zip','r') as zipObj:
  zipObj.extractall('.')

In [6]:
# dataset to load the feature tensors (storing them as numpy arrays takes much less memory)

class ImgDataset(Dataset):

    def __init__(self, data):
        self.data = data

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        filename = 'tensorset/' + str(self.data[idx][0]) + '.npy'
        im1 = np.load(filename)
        filename = 'tensorset/' + str(self.data[idx][1]) + '.npy'
        im2 = np.load(filename)
        filename = 'tensorset/' + str(self.data[idx][2]) + '.npy'
        im3 = np.load(filename)

        im1 = torch.from_numpy(im1)
        im2 = torch.from_numpy(im2)
        im3 = torch.from_numpy(im3)

        return im1,im2,im3

In [7]:
# loading pretrained model and selecting the blocks used for training

model = models.convnext_tiny(pretrained=True)
model.features = nn.Sequential(*[model.features[i] for i in range(6,8)])
model.classifier = nn.Sequential(*[model.classifier[i] for i in range(2)])
model = model.to(device)

In [8]:
fname = 'Data/'
food = fname + 'food/'
train = np.loadtxt(fname + "train_triplets.txt", dtype=str)

In [9]:
# create set of training triplets and set of validation triplets,
# such that both sets do not share any images
# goal: get a precise validation score

unique_images_train = set()
for i in range(train.shape[0]):
    for j in range(train.shape[1]):
        unique_images_train.add(train[i][j])

k = random.sample(unique_images_train, 1500)
train_triplets = list()
val_triplets = list()

for i in train:
    if (i[0] not in k) and (i[1] not in k) and (i[2] not in k): 
        train_triplets.append(i)
    elif (i[0] in k) and (i[1] in k) and (i[2] in k):
        val_triplets.append(i)

print("Triplets in train_set: ",len(train_triplets))
print("Triplets in val_set: ",len(val_triplets))
print("Discarded triplets: ",train.shape[0]-len(train_triplets)-len(val_triplets))

train = np.array(train_triplets)
val = np.array(val_triplets)

Triplets in train_set:  20181
Triplets in val_set:  1652
Discarded triplets:  37682


In [10]:
# triplet loss with cosine similarity as distance function

loss_fn = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - nn.functional.cosine_similarity(x, y))

In [11]:
# create datasets and dataloader

train_dataset = ImgDataset(train)
val_dataset = ImgDataset(val)

trainloader = DataLoader(train_dataset, batch_size=16,
                        shuffle=True, num_workers=0, pin_memory=True)

valloader = DataLoader(val_dataset, batch_size=16,
                        shuffle=True, num_workers=0, pin_memory=True)

In [12]:
# define utility functions to compute classification accuracy and
# perform evaluation / testing
cos = nn.CosineSimilarity(dim=1, eps=1e-6)

def accuracy(x,y,z):
    dist1 = cos(x,y)
    dist2 = cos(x,z)
    res = torch.gt(dist1,dist2)
    return torch.sum(res) / x.shape[0]

def evaluate(model: torch.nn.Module) -> torch.Tensor:
    # goes through the test dataset and computes the validation accuracy
    model.eval()  # bring the model into eval mode
    with torch.no_grad():
        acc_cum = 0.0
        num_eval_samples = 0
        for x_batch, y_batch, z_batch in valloader:

            # move data to GPU
            x_batch, y_batch, z_batch = x_batch.to(device), y_batch.to(device), z_batch.to(device)

            # forward pass
            x = model(x_batch)
            y = model(y_batch)
            z = model(z_batch)

            # calculate accuracy
            batch_size = x_batch.shape[0]
            num_eval_samples += batch_size
            acc_cum += accuracy(x,y,z) * batch_size
          
        avg_acc = acc_cum / num_eval_samples
        avg_acc = torch.tensor(avg_acc)
        return avg_acc

In [13]:
# Setup the optimizer (adaptive learning rate method)
optim = torch.optim.Adam(model.parameters(), lr=1e-4)


for epoch in range(10):
    # reset statistics trackers
    train_loss_cum = 0.0
    acc_cum = 0.0
    num_samples_epoch = 0
    t = time.time()
    # Go once through the training dataset (-> epoch)
    count = 0
    for x_batch,y_batch,z_batch in trainloader:
        # zero grads and put model into train mode
        optim.zero_grad()
        model.train()

        # move data to GPU
        x_batch, y_batch, z_batch = x_batch.to(device), y_batch.to(device), z_batch.to(device)
    
        # forward pass
        x = model(x_batch)
        y = model(y_batch)
        z = model(z_batch)
        
        # loss
        loss = loss_fn(x, y, z)
        
        # backward pass and gradient step
        loss.backward()
        optim.step()
        
        # keep track of train stats
        num_samples_batch = x_batch.shape[0]
        num_samples_epoch += num_samples_batch
        train_loss_cum += loss * num_samples_batch
        
        acc_cum += accuracy(x,y,z) * num_samples_batch

        # end epoch after 100 batches
        count += 1
        if count == 100:
            break

    # average the accumulated statistics
    avg_train_loss = train_loss_cum / num_samples_epoch
    avg_acc = acc_cum / num_samples_epoch
    test_acc = evaluate(model)
    epoch_duration = time.time() - t

    # print some infos
    print(f'Epoch {epoch} | Train loss: {train_loss_cum:.4f} | '
          f' Train accuracy: {avg_acc:.4f} | Test accuracy: {test_acc.item():.4f} |'
          f' Duration {epoch_duration:.2f} sec')

    # save checkpoint of model
    if (epoch % 5 == 0 or epoch % 4 == 0 or epoch % 3 == 0 or epoch % 2 == 0) and epoch > 0:
        save_path = f'model_epoch_{epoch}.pt'
        torch.save(model,
                   save_path)
        print(f'Saved model checkpoint to {save_path}')



Epoch 0 | Train loss: 1263.7588 |  Train accuracy: 0.6962 | Test accuracy: 0.7125 | Duration 47.51 sec
Epoch 1 | Train loss: 1046.9452 |  Train accuracy: 0.7344 | Test accuracy: 0.7167 | Duration 44.54 sec
Epoch 2 | Train loss: 1006.9693 |  Train accuracy: 0.7431 | Test accuracy: 0.7161 | Duration 44.44 sec
Saved model checkpoint to model_epoch_2.pt
Epoch 3 | Train loss: 971.3525 |  Train accuracy: 0.7550 | Test accuracy: 0.7143 | Duration 44.42 sec
Saved model checkpoint to model_epoch_3.pt
Epoch 4 | Train loss: 979.2999 |  Train accuracy: 0.7519 | Test accuracy: 0.7258 | Duration 44.42 sec
Saved model checkpoint to model_epoch_4.pt
Epoch 5 | Train loss: 941.6495 |  Train accuracy: 0.7575 | Test accuracy: 0.7282 | Duration 44.44 sec
Saved model checkpoint to model_epoch_5.pt
Epoch 6 | Train loss: 930.0148 |  Train accuracy: 0.7731 | Test accuracy: 0.7306 | Duration 44.48 sec
Saved model checkpoint to model_epoch_6.pt
Epoch 7 | Train loss: 951.3055 |  Train accuracy: 0.7781 | Test accu

In [14]:
# store best performing epoch in drive
!cp "model_epoch_5.pt" "drive/My Drive/covnext_epoch_5.pt"