# Intro
Inference notebook for [Hotel-ID starter - classification - traning](https://www.kaggle.com/code/michaln/hotel-id-starter-classification-traning)



# Setup

In [1]:
# let's import the sys module 
import sys

# let's append the path to the timm library
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

# Imports

In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random # for generating random numbers
import os # for accessing directory structure
import math # for mathematical operations

In [3]:
from PIL import Image as pil_image # for loading images
from tqdm import tqdm # for progress bars

In [4]:
import torch # for deep learning
import torch.nn as nn # for neural networks
from torch.utils.data import DataLoader # for creating data loaders

import timm # for vision models

# Global

In [5]:
SEED = 42 # we se the seed to a constant value so that we can reproduce the results
IMG_SIZE = 256 # this is the size of the image. As we are using a ResNet-50 model, we need to resize the images to 256x256

PROJECT_FOLDER = "../input/hotel-id-to-combat-human-trafficking-2022-fgvc9/" # path to the project folder
TEST_DATA_FOLDER = PROJECT_FOLDER + "test_images/" # path to the test data folder

In [6]:
print(os.listdir(PROJECT_FOLDER)) # let's see what files are available in the project folder

['sample_submission.csv', 'train_images', 'train_masks', 'test_images']


In [7]:
def seed_everything(seed):# this function sets the seed for all the libraries
    random.seed(seed) # sets the seed for python built-in pseudo-random generator
    os.environ['PYTHONHASHSEED'] = str(seed) # sets the seed for hashing objects
    np.random.seed(seed) # sets the seed for numpy pseudo-random generator
    torch.manual_seed(seed) # this is used to set the seed for numpy pseudio-random generator
    torch.cuda.manual_seed(seed) # this is used to set the seed for numpy pseudio-random generator
    torch.backends.cudnn.deterministic = True # this is a flag that allows us to enable the inbuilt 
    # cudnn auto-tuner to find the fastest convolution algorithm. If set to true, the selection of
    # convolution algorithm will be deterministic. This will slow down the execution.But give us better results 

# Dataset and transformations

In [8]:
import albumentations as A # for image augmentations
import albumentations.pytorch as APT # for converting the images to tensors
import cv2  # for image processing

base_transform = A.Compose([ # this is the base transform that we will use for training
    A.ToFloat(), # converts the image to float
    APT.transforms.ToTensorV2(), # converts the image to tensor
])

In [9]:
"""
This function takes an image as input and pads it to make it a square image
get the width, height and the channel of the image. If the width is greater than the height then create a
new image with the width as the new height, and the height as the new width  
"""
def pad_image(img): # this function pads the image to make it a square image
    w, h, c = np.shape(img) # get the width, height and the channel of the image
    if w > h: # if the width is greater than the height then create a new image with the width as the new height, and the height as the new width
        pad = int((w - h) / 2) # get the padding
        img = cv2.copyMakeBorder(img, 0, 0, pad, pad, cv2.BORDER_CONSTANT, value=0) 
    else: # if the height is greater than the width then create a new image with the height as the new height, and the width as the new width
        pad = int((h - w) / 2) # get the padding
        img = cv2.copyMakeBorder(img, pad, pad, 0, 0, cv2.BORDER_CONSTANT, value=0)
        
    return img # return the padded image

"""
This function takes an image path as input and opens the image, converts it to RGB, pads it and resizes it to 256x256
"""
def open_and_preprocess_image(image_path): # this function opens the image, converts it to RGB, pads it and resizes it to 256x256
    img = cv2.imread(image_path) # open the image
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # convert the image to RGB
    img = pad_image(img) # pad the image
    return cv2.resize(img, (IMG_SIZE, IMG_SIZE)) # resize the image to 256x256

In [10]:
"""
This function takes an image path as input and returns the image after applying the base transform
"""
class HotelImageDataset: # this is the dataset class that we will use to load the data
    def __init__(self, data, transform=None, data_folder="train_images/"): # this is the constructor
        self.data = data # this is the dataframe that contains the data
        self.data_folder = data_folder # this is the folder where the images are stored
        self.transform = transform # this is the transform that we will apply to the images

    def __len__(self): # this function returns the length of the dataset
        return len(self.data) # returns the length of the dataset
    
    def __getitem__(self, idx): # this function returns the item at the given index
        record = self.data.iloc[idx] # get the record at the given index
        image_path = self.data_folder + record["image_id"] # get the image path
        
        image = np.array(open_and_preprocess_image(image_path)).astype(np.uint8) # open the image and preprocess it

        if self.transform: # if the transform is not None then apply the transform
            transformed = self.transform(image=image) # apply the transform
            image = transformed["image"] # get the image from the transformed object
        
        return { # return the image
            "image" : image, # return the image
        }

# Model

In [11]:
"""
this function takes a model, a data loader and a device as input and returns the predictions
"""
class HotelIdModel(nn.Module): # this is the model class that we will use to create the model
    def __init__(self, n_classes=100, backbone_name="efficientnet_b0"): # this is the constructor
        super(HotelIdModel, self).__init__() # call the constructor of the parent class
        
        self.backbone = timm.create_model(backbone_name, num_classes=n_classes, pretrained=False) # create the backbone model

    def forward(self, x): # this function defines the forward pass
        return self.backbone(x) # return the output of the backbone model

# Model helper functions

In [12]:
"""
we define a function called predict that takes three arguments: a data loader, a model and the number of matches.
we initialize an empty list called preds to store the predictions.
we iterate throguh the test loader and get the input and the outputs from the models  
we take the sigmoid of the outputs and convert them to numpy arrays. We then append these predictions to the list preds.
we sort the predictions in descending order and take the top 5 predictions, we then return these predictions.
"""

def predict(loader, model, n_matches=5): 
    preds = [] # initialize an empty list called preds to store the predictions
    with torch.no_grad(): # we do not need to calculate the gradients for the predictions
        t = tqdm(loader) # create a tqdm object
        for i, sample in enumerate(t): # iterate through the test loader
            input = sample['image'].to(args.device) # get the input
            outputs = model(input) # get the outputs from the model
            outputs = torch.sigmoid(outputs).detach().cpu().numpy() # take the sigmoid of the outputs and convert them to numpy arrays
            preds.extend(outputs) # append these predictions to the list preds
    
    # get 5 top predictions
    preds = np.argsort(-np.array(preds), axis=1)[:, :5] # sort the predictions in descending order and take the top 5 predictions
    return preds # return these predictions

# Prepare data

In [13]:
"""
we define a function called get_predictions that takes two arguments: a data loader and a model.
"""
test_df = pd.DataFrame(data={"image_id": os.listdir(TEST_DATA_FOLDER), "hotel_id": ""}).sort_values(by="image_id")

In [14]:
# code hotel_id mapping created in training notebook by encoding hotel_ids
hotel_id_code_df = pd.read_csv('../input/hotel-id-starter-classification-traning/hotel_id_code_mapping.csv')
hotel_id_code_map = hotel_id_code_df.set_index('hotel_id_code').to_dict()["hotel_id"]

# Prepare model

In [15]:
"""
1. we define a function that takes 4 arguments: model_type, backbone_name, checkpoint_path and args.
2. we create an object of the HotelIdModel class and pass the number of classes and the backbone name as arguments.
3. we load the checkpoint and pass it to the model.
4. we load the checkpoint's model state dict to the model.
4. we return the model.
"""
def get_model(model_type, backbone_name, checkpoint_path, args):
    model = HotelIdModel(args.n_classes, backbone_name) # create an object of the HotelIdModel class and pass the number of classes and the backbone name as arguments
        
    checkpoint = torch.load(checkpoint_path) # load the checkpoint
    model.load_state_dict(checkpoint["model"]) # load the checkpoint's model state dict to the model
    model = model.to(args.device) # send the model to the device
    
    return model # return the model

In [16]:
"""
1. Create a dataset object, passing in the dataframe and the base_transform function. 
2. Create a dataloader, passing in the dataset and some configuration parameters:
"""
class args: # this is the class that contains the arguments
    batch_size = 64 # batch size
    num_workers = 2 # number of workers
    n_classes = hotel_id_code_df["hotel_id"].nunique() # number of classes
    """
    import the torch library. Check if a GPU is available. If so use the GPU. If not Use the CPU
    store the result of the check in the variable device. 
    """
    device = ('cuda' if torch.cuda.is_available() else 'cpu') 
    
    
seed_everything(seed=SEED) # seed everything

test_dataset = HotelImageDataset(test_df, base_transform, data_folder=TEST_DATA_FOLDER) # create a dataset object, passing in the dataframe and the base_transform function
test_loader = DataLoader(test_dataset, num_workers=args.num_workers, batch_size=args.batch_size, shuffle=False) # create a dataloader, passing in the dataset and some configuration parameters

In [17]:
model = get_model("classification", "efficientnet_b0",
                  "../input/hotel-id-starter-classification-traning/checkpoint-classification-model-efficientnet_b0-256x256.pt", 
                  args) # get the model

# Submission

In [18]:
%%time

preds = predict(test_loader, model)
# replace classes with hotel_id using mapping created in trainig notebook
preds = [[hotel_id_code_map[b] for b in a] for a in preds]
# transform array of hotel_ids into string
test_df["hotel_id"] = [str(list(l)).strip("[]").replace(",", "") for l in preds]

test_df.to_csv("submission.csv", index=False)
test_df.head()

100%|██████████| 1/1 [00:06<00:00,  6.07s/it]

CPU times: user 834 ms, sys: 780 ms, total: 1.61 s
Wall time: 6.08 s





Unnamed: 0,image_id,hotel_id
0,abc.jpg,197510 73224 83464 23668 49472
