<a href="https://www.kaggle.com/code/rohithbehera/whichone?scriptVersionId=133216295" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [13]:
!pip install timm
import numpy as np
import pandas as pd
import cv2
import torch 
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import timm
import torch.nn as nn
from tqdm import tqdm

[0m

In [14]:
data_dir = '/kaggle/input/hotelid-2022-train-images-256x256/'
train_dir = '/kaggle/input/hotelid-2022-train-images-256x256/images/'
df = pd.read_csv('/kaggle/input/hotelid-2022-train-images-256x256/train.csv')

In [15]:
class Customdataset:
    def __init__(self,data,image_path):
        self.data = data
        self.image_path = image_path
    def __len__(self):
        return len(self.data)
    def __getitem__(self,idx):
        item = self.data.iloc[idx]
        item_path = self.image_path+item["image_id"]
        image = np.array(Image.open(item_path)).astype(np.uint8)
        return {
            "image":image,
            "target":item["hotel_id_code"]
        }

In [16]:
df["hotel_id_code"] = df["hotel_id"].astype('category').cat.codes.values.astype(np.int64)

In [17]:
hotel_id_code_df = df.drop(columns=["image_id"]).drop_duplicates().reset_index(drop=True)

In [18]:
data = Customdataset(df,train_dir)

In [19]:
class EmbeddingModel(nn.Module):
    def __init__(self,number_of_classes,embedding_size,backbone_name):
        super(EmbeddingModel,self).__init__()
        self.backbone = timm.create_model(backbone_name,num_classes=number_of_classes,pretrained=True)
        output_nodes = self.backbone.get_classifier().in_features
        self.backbone.classifier = nn.Identity()
        self.embedding = nn.Linear(output_nodes,embedding_size)
        self.classifier = nn.Linear(embedding_size,number_of_classes)
    def embed_and_classify(self, x):
        x = self.forward(x)
        return x, self.classifier(x)
    def forward(self, x):
        x = self.backbone(x)
        x = x.view(x.size(0), -1)
        x = self.embedding(x)
        return x   

In [20]:
def step(args,model,loader,optimizer,criterion,scheduler,epoch):
    losses = []
    target_all = []
    output_all = []
    model.train()
    item = tqdm(loader)
    for i , data in enumerate(item):
        optimizer.zero_grad()
        inputs = data['image'].to(args.device)
        inputs = inputs.permute(0, 3, 1, 2)
        inputs = inputs.float()
        targets = data['target'].to(args.device)
        embed , output = model.embed_and_classify(inputs)
        loss = criterion(output,targets)
        loss.backward()
        optimizer.step()
        if scheduler:
            scheduler.step()
        losses.append(loss.item())
        target_all.extend(targets.cpu().numpy())
        output_all.extend(torch.sigmoid(output).detach().cpu().numpy())
    net_loss = np.mean(losses)
    score = np.mean(target_all == np.argmax(output_all , axis = 1))
    print(f'at {epoch} the loss , score are {net_loss} , {score}')
    return net_loss , score

In [21]:
def save_checkpoint(model,scheduler,optimizer,start,model_name,loss,score):
     checkpoint = {"epoch": epoch,
                  "model": model.state_dict(),
                  "scheduler": scheduler.state_dict(),
                  "optimizer": optimizer.state_dict(),
                  "loss": loss,
                  "score": score,
                  }
     torch.save(checkpoint, f"{OUTPUT_FOLDER}checkpoint-{name}.pt")

In [22]:
def train(args,df):
    model_name = f"fine tuned {args.backbone}"
    epochs = args.epochs
    start = 1
    criterion = nn.CrossEntropyLoss()
    model = EmbeddingModel(args.n_classes,args.embedding_size,args.backbone)
    model = model.to(args.device)
    dataset = Customdataset(df,train_dir)
    loader = DataLoader(dataset,num_workers=args.workers,batch_size=args.batch_size,shuffle=True, drop_last=True)
    optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
                    optimizer,
                    max_lr=args.lr,
                    epochs=args.epochs,
                    steps_per_epoch=len(loader),
                    div_factor=10,
                    final_div_factor=1,
                    pct_start=0.1,
                    anneal_strategy="cos",
                )
    for start in range(1,epochs+1):
        loss , score = step(args,model,loader,optimizer,criterion,scheduler,start)
        if(start == epochs):
            save_checkpoint(model,scheduler,optimizer,start,model_name,loss,score)   