**Hello all! This is a baseline model posted by the hosts for the competition. The model does image retrieval for k most similar images using Deep Metric learning. We have used the Pytorch-metric-learning library for this model (https://github.com/KevinMusgrave/pytorch-metric-learning). Competitors can use this model as a baseline and improve on it using the different mining functions available in the pytorch metric learning library.**

In [None]:
#install the pytorch metric learning library
!ls ../input/pytorchmetriclearning
!pip install pytorch_metric_learning --no-index --find-links=file:///kaggle/input/pytorchmetriclearning/ 

In [None]:
#install the faiss library
!pip install faiss-gpu --no-index --find-links=file:///kaggle/input/faiss-gpu/ 

In [None]:
import random
import os
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from torchvision import models , datasets
import torch.optim as optim
import faiss
from torchvision.datasets.folder import default_loader
import torchvision.transforms as transforms
from pytorch_metric_learning import miners, losses, samplers , distances
from pytorch_metric_learning.utils import accuracy_calculator
from PIL import ImageFile
import copy
import csv

In [None]:
#copy the resnet50 model for offline use
!mkdir -p /root/.cache/torch/hub/checkpoints
!cp ../input/resnet50/resnet50.pth /root/.cache/torch/hub/checkpoints/resnet50-19c8e357.pth

In [None]:
#Arguments
BATCH_SIZE = 128
WORKERS=15
OUTPUT_SIZE=256
num_epochs = 20
trainfile_path = '/kaggle/input/hotel-id-2021-fgvc8/train.csv'
lr = 0.00001
random.seed(224)
output_file = 'submission.csv'   
model_path = '/kaggle/input/batchallmodel/model_batchall_best.pth'

In [None]:
#Function for reading the images from train folder
def load_images(data_path):
    image_set = []
    
    for path, subdirs, files in os.walk(data_path):
        for name in files:
            if name.endswith('.jpg'):
                image_set.append(os.path.join(path,name))

    print('Total images', len(image_set))
    return image_set

train_image_set =load_images('/kaggle/input/hotel-id-2021-fgvc8/train_images')

#separating into training & validation sets
split_index = int(len(train_image_set) * 0.9) 
train_images = train_image_set[:split_index]
val_images = train_image_set[split_index:]

print('Training images -',len(train_images),'validation images -',len(val_images))

**Custom Dataloader for Pytorch**  
 The custom LoadDataset class is used to override the Pytorch Dataset creater since our training metadata needs extraction of the image ids and their labels.  
 This class takes the training set file to create the dataset with data,labels mapping. It has a custom collate function since some of the images (being crowdsourced) do not load properly and such images will be eliminated from the training process.  
 The __getitem__ function can replace an image which is not loaded with a different image in the batch using a random value for "index". We have not used it here.

In [None]:

def _extract_ids(im_path):
    path_splits = im_path.split(os.sep)
    img_id = path_splits[-1]
    
    return img_id

# custom collate functions for pytorch for images that are not loaded
# This will take a batch and eliminate all tensors that are set to None by the "_default_loader" function below.
def collate_fn(batch):
    batch = list(filter(lambda x: x is not None, batch))
    return torch.utils.data.dataloader.default_collate(batch)

class LoadDataset(torch.utils.data.Dataset):
    def __init__(self, paths, csv_file, classes=None, transform=None):
        self.paths = paths
        self.mapping_file = pd.read_csv(csv_file) if csv_file is not None else None
        if classes is None:
            self.classes, self.class_to_idx = self._find_classes()
        else:
            self.classes = classes
            self.class_to_idx = {classes[i]: i for i in range(len(classes))}
       
        self.samples = self._make_dataset()
        self.targets = [s[1] for s in self.samples]
       
        self.transform = transform
    
    #function will create a class to index mapping for the classes in the training set. The validation and test sets will use the same mappings for their images.
    def _find_classes(self):
        classes = set()
        for hotel_id in self.mapping_file['hotel_id'].tolist():
            classes.add(hotel_id)
        classes = list(classes)
        classes.sort()
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        return classes, class_to_idx
    
    def _make_dataset(self):
        samples = []
        num_missing = 0
        for path in self.paths:
            img_id = _extract_ids(path)
            if self.mapping_file is not None:
                row = self.mapping_file[self.mapping_file['image']==img_id]
                hotel_id = row['hotel_id'].values[0]
                if hotel_id in self.class_to_idx:
                    item = (path, self.class_to_idx[hotel_id])
                    samples.append(item)
            else:
                samples.append(path)

        return samples
    
    #if image cannot be loaded, set it to None
    def _default_loader(self,path):
        try:
           with open(path, 'rb') as f:
               with ImageFile.Image.open(f) as img:
                   return img.convert('RGB')
        except:
           return None
     
    def __getitem__(self, index):
        if self.mapping_file is not None:
            path, target = self.samples[index]
        else:
            path = self.samples[index]
            target = index
        image = self._default_loader(path)
        if image is not None:
            if self.transform is not None:
                image = self.transform(image)
            return image, target, path
        else:
            return None
    
    def __len__(self):
        return len(self.samples)

*For our data augmentation process*, we use Pytorch data tranforms to randomly select images and apply Color Jitter to them , flip them and make crops of size 224x224. For the test set we only take center crops of size 224 to match the training size.
The normalization has been done with the Imagenet mean values. We use these mean values since the CNN model we are using is a ResNet-50 trained on Imagenet.


In [None]:
#Data augmentation for training data 
train_transforms =  [transforms.RandomApply([
                                             transforms.ColorJitter(brightness=0.5, contrast=0.3, saturation=0.4, hue=0.2),
                                             transforms.RandomRotation(30),
                                             transforms.RandomHorizontalFlip(p=0.3)
                                            ],p=0.2),
                     transforms.RandomResizedCrop(224),
                     transforms.ToTensor(), 
                     transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                          std=[0.229, 0.224, 0.225]),
                     ]

#No augmentation for test set, center crop of 224 to match the training data
test_transforms =  [transforms.Resize((256,256)),
                    transforms.CenterCrop(224),
                    transforms.ToTensor(), 
                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                          std=[0.229, 0.224, 0.225])
                    ]

train_folder = LoadDataset(train_images, trainfile_path,transform=transforms.Compose(train_transforms))

#val_query_folder = LoadDataset(val_images, trainfile_path,classes=train_folder.classes,
#                                              transform=transforms.Compose(test_transforms))

**Creating batch sampler with 8 image samples per class**  
The Pytorch MPerClassSampler creates a bacth sampling of 8 images per class in each batch.
The Pytorch Dataloader takes the training set to divide into batches of size 128 with the sampler. Since the sampler function performs the shuffling of the images, the shuffle for the DataLoader is set to false.  
For the training purpose we have put aside 10% of the images for the validation set.


In [None]:

sampler = samplers.MPerClassSampler(train_folder.targets, m=8,batch_size=BATCH_SIZE, length_before_new_iter=len(train_folder))
train_loader = torch.utils.data.DataLoader(train_folder, batch_size=BATCH_SIZE, shuffle=False, sampler=sampler, num_workers=WORKERS, pin_memory=True ,collate_fn=collate_fn)
#val_query_loader = torch.utils.data.DataLoader(val_query_folder, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True,collate_fn=collate_fn)


**#Model for training using pretrained Resnet-50**  
A pre-trained Resnet-50 model is used for training the baseline model for the competition.The final layer from the Resnet-50 model is replaced by a fully connected layer to output 256 dimensional embeddings.

In [None]:

class Model(nn.Module):
    def __init__(self, output_dim):
        super().__init__()
        self.resnet_model = models.resnet50(pretrained=True)  
        self.num_features = self.resnet_model.fc.in_features
        self.pool = self.resnet_model.avgpool
        self.resnet_model = nn.Sequential(*list(self.resnet_model.children())[:-2])
        self.final_embedding = nn.Linear(self.num_features,output_dim)
        
    def forward(self,x):
        ft = self.resnet_model(x) 
        embedding = torch.squeeze(self.pool(ft))
        output = self.final_embedding(embedding)
        return output

# Custom pytorch metric learning accuracy calculation for retreiving k nearest embeddings and knn_labels.
# The original Pytorch metric learning library does not provide an accuracy measure to calculate the accuracy of the embeddings retrieved 
# by our Image Retrieval method. So here we write a custom class to override the Pytorch Metric learning library "AccuracyCalculator" class
# to return the retrieval accuracies for the 1,10,100 nearest(most similar) embeddings found for the query images in the validation set. 
# The calculate_knn_labels function gives us the class labels for the 100 nearest embeddings.

class AccCalculator(accuracy_calculator.AccuracyCalculator):
   
    def calculate_knn_labels(self, knn_labels, query_labels, **kwargs):
        return knn_labels
    
    def retrieval_at_k(self, k, knn_labels, query_labels):
        curr_knn_labels = knn_labels[:, :k]
        accuracy_per_sample = np.apply_along_axis(any, axis=1, arr=(curr_knn_labels == query_labels[:, None]))
        return accuracy_calculator.maybe_get_avg_of_avgs(accuracy_per_sample, query_labels, self.avg_of_avgs)
    
    def calculate_retrieval_at_1(self, knn_labels, query_labels, **kwargs):
        return self.retrieval_at_k(1, knn_labels, query_labels)
    
    def calculate_retrieval_at_10(self, knn_labels, query_labels, **kwargs):
        return self.retrieval_at_k(10, knn_labels, query_labels)
    
    def calculate_retrieval_at_100(self, knn_labels, query_labels, **kwargs):
        return self.retrieval_at_k(100, knn_labels, query_labels)

    def requires_knn(self):
        return super().requires_knn() + ["knn_labels"]

# Pytorch-metric-learning batch-all functions.  
This is the part which uses Deep Metric Learning methods to learn the relative distance between image pairs. For this, the model uses a mining function -The Triplet Miner, which will find the triplets i.e the 3 images -  
1.an sample anchor image (an image in the batch)  
2.a positive image which belongs to the same class and has features similar to the anchor.  
3. a negative image from a different class which is most dissimilar to the anchor.  

Using these three images,the Triplet Loss function then optimizes the model to maximize the distance between dissimilar images and minimize the distance between similar images from the same class. The margin value is used to define the difference in the distance between the positive sample and the  negative sample with respect to the anchor.  
The "all" in the function parameters indicates that the functions uses all the triplet pairs mined. 

In [None]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#Define our model for training
model = Model(OUTPUT_SIZE)
model = model.to(device)

optimizer = optim.Adam(model.parameters(), lr=lr)


distance = distances.CosineSimilarity()
loss = losses.TripletMarginLoss(margin=0.2,distance=distance,triplets_per_anchor="all")
miner = miners.TripletMarginMiner(margin=0.2,distance=distance, type_of_triplets="all")
acc_calculator = AccCalculator(include=("knn_labels",),
                             k=15)

In [None]:
def train(model,loss,miner,device,train_loader,optimizer,epoch):
    model.train()                                                      #set the model to training mode
    for batch_idx , (data,labels, _) in enumerate(train_loader):
        try:
            data , labels = data.to(device) , labels.to(device)
            optimizer.zero_grad()                                     # we set the gradient to zero before doing backprop since the training process accummulates the gradients.
            embeddings = model(data)
            miner_pairs = miner(embeddings, labels)                    #the miner uses the feature embeddings returned by the resnet model to find the triplets.
            loss_value = loss(embeddings,labels,miner_pairs)           #these triplets are then given to the Triplet loss function which the model will use to optimize 
            loss_value.backward()                                      #perform backprop
            optimizer.step()
            if batch_idx %100 ==0:
               print("Epoch {} Iteration {}: Loss = {}, Number of triplets = {}".format(epoch, batch_idx, loss_value.item(), miner.num_triplets))
        except Exception as e:
            print(e)
            print("batch_idx=",batch_idx,"data=",data)

    return outputs

# The embed function is for evaluating and retrieving the embeddings for the validation/test set.
# These embeddings are then used to find the most similar embeddings in the training set to determine the class label.
def embed(data_loader, model):
    outputs = {
        'embeddings': torch.Tensor([]),
        'labels': torch.Tensor([]),
    }
    
    model.eval()                      #set the trained model to evaluation mode.
    with torch.no_grad():
        for i, (inputs, labels, _) in enumerate(data_loader):
            inputs , labels = inputs.to(device) , labels.to(device)
            embeddings = model(inputs)
            outputs['embeddings'] = torch.cat((outputs['embeddings'], embeddings.detach().cpu()))
            outputs['labels'] = torch.cat((outputs['labels'], labels.detach().cpu()))
            if i%20 ==0:
                print("Completed: ",i)
    outputs = {key: value.numpy() for key, value in outputs.items()}
    return outputs

#Calculate the retrieval accuracies of the validation embeddings using the training embeddings.
def get_accuracies(acc_calculator, ref_embeddings, query_embeddings, ref_labels, query_labels,
                   embeddings_come_from_same_source=False):
    #normalizing the embeddings
    faiss.normalize_L2(ref_embeddings)
    faiss.normalize_L2(query_embeddings)
    
    accuracies = acc_calculator.get_accuracy(query_embeddings,
                                         ref_embeddings,
                                         query_labels,
                                         ref_labels,
                                         embeddings_come_from_same_source)

    return accuracies['knn_labels']


def test(train_loader, query_loader,model,device,outputs,acc_calculator):
    if training:
        outputs['train']= embed(train_loader,model)
        outputs['validation'] = embed(query_loader,model)
   
    return get_accuracies(
                acc_calculator,
                ref_embeddings=outputs['train']['embeddings'],
                query_embeddings=outputs['validation']['embeddings'],
                ref_labels=outputs['train']['labels'],
                query_labels=outputs['validation']['labels']
            )

# Function which calculates the map@k which is similar to the metrics used for this competition.
# The y_true is the labels in the validation and the y_pred are the labels returned by the  "get_accuracies" function above.
# Used against the validation set only.
def map_at_k(y_true, y_pred, k):
    """
    y_true: list of ground truths
    y_pred: list of y_predictions
    k: value to set
    """
    avg_precision = []
    count = 0
    idx = 0
    for true, pred in zip(y_true, y_pred):
        assert len(pred) >= k, f"Length of each prediction must be equal or greater than {k}!"
        idx += 1
        for i in range(k):
            if int(true) == int(pred[i]):
                count += 1
                avg_precision.append(1 / (i + 1))
                break
        else:
            avg_precision.append(0)

    return (sum(avg_precision) / len(avg_precision))



In [None]:
#For submission to the competition we have used a model trained on our local machine for about 40 epochs
#load the model trained on local machine to evaluate
saved_model = Model(OUTPUT_SIZE)
saved_model.load_state_dict(torch.load(model_path,device))
saved_model = saved_model.to(device)
saved_model.eval()

In [None]:
#loading the test images
test_image_set = load_images('/kaggle/input/hotel-id-2021-fgvc8/test_images')
test_query_folder = LoadDataset(test_image_set,None ,classes=train_folder.classes,
                                              transform=transforms.Compose(test_transforms))
test_query_loader = torch.utils.data.DataLoader(test_query_folder, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=True)

outputs = {
        'train': {},
        'test': {}
        }

In [None]:
#evaluate the saved model to get the test embeddings 
outputs['train']= torch.load('../input/embeddings/embeddings.csv')
outputs['test'] = embed(test_query_loader,saved_model)

In [None]:
#Retrieve the class labels for the k nearest embeddings
test_knn_labels = get_accuracies(
                acc_calculator,
                ref_embeddings=outputs['train']['embeddings'],
                query_embeddings=outputs['test']['embeddings'],
                ref_labels=outputs['train']['labels'],
                query_labels=outputs['test']['labels']
            )

In [None]:
#function to search for k=10 similar images for the test set.This can be done with either the faiss library or the CosineSimilarity function in Pytorch.
def getOutput(query_data,knn_labels,filename):
    hotel_id_to_class =  {v:k for k, v in query_data.class_to_idx.items()}
   
    data = []
    for img ,labels in zip(query_data.paths,knn_labels):
        img_name = img.split(os.sep)[-1]
        #find the unique classes predicted
        pred_classes = []
        for i in labels:
            if i not in pred_classes:
                pred_classes.append(i)
        
        labels = ' '.join(['%s' % hotel_id_to_class[i] for i in pred_classes[:5]])
        data.append([img_name,labels])
    df = pd.DataFrame(data,columns=['image','hotel_id'])
    df.to_csv(filename,index=False)


getOutput(test_query_folder,test_knn_labels.numpy(),output_file)