# BEiT + CutMix + CosineEmbeddingLoss

In [None]:
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# %matplotlib inline

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl

from PIL import Image
from torchvision.transforms import v2
import torchvision.models as models

import cv2
import albumentations as A

import transformers
from transformers import AutoImageProcessor, AutoModel

from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

from argparse import ArgumentParser

parser = ArgumentParser(description="lowreso_imgclf")
parser.add_argument('--image_pretrained_model', default="beit-base-patch16-224-pt22k-ft22k", type=str)
parser.add_argument('--image_size', default=224, type=int)
parser.add_argument('--aug_p', default=1, type=float)
parser.add_argument('--optimizer', default="adamw", type=str)
parser.add_argument('--learning_rate', default=0.00003, type=float)
parser.add_argument('--scheduler', default="cosine", type=str)
parser.add_argument('--batch_size', default=64, type=int)
parser.add_argument('--epochs', default=10, type=int)
parser.add_argument('--cv', default=5, type=int)
parser.add_argument('--seed', default=826, type=int)
parser.add_argument('--mixed_precision', default=16, type=int)
parser.add_argument('--device', nargs='+', default=[0], type=int)
parser.add_argument('--num_workers', default=0, type=int)
args = parser.parse_args('')

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

image_size = args.image_size
aug_p = args.aug_p
BATCH_SIZE = args.batch_size
EPOCHS = args.epochs
CV = args.cv
SEED = args.seed

def set_seeds(seed=SEED):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    pl.seed_everything(SEED)

set_seeds()

idx = f"{args.image_pretrained_model}"
idx

## config.py

In [None]:
# BEiT, DINOv2

if args.image_pretrained_model == "beit-base-patch16-224-pt22k-ft22k": # acc@1 : 85.2
    img_model_name = "microsoft/beit-base-patch16-224-pt22k-ft22k"
    latent_dim = 768
if args.image_pretrained_model == "dinov2-large":
    img_model_name = "facebook/dinov2-large"
    latent_dim = 1024

processor = AutoImageProcessor.from_pretrained(img_model_name)
# img_model = AutoModel.from_pretrained(img_model_name)

In [None]:
train_df = pd.read_csv('data/train.csv')
test_df = pd.read_csv('data/test.csv')

train_df["img_path"] = train_df["img_path"].apply(lambda x : "data"+x[1:])
test_df["img_path"] = test_df["img_path"].apply(lambda x : "data"+x[1:])

train_df["upscale_img_path"] = train_df["upscale_img_path"].apply(lambda x : "data"+x[1:])

train_df.head()

In [None]:
train_df["label"].value_counts().sort_index()

In [None]:
train_labels = train_df["label"]
label_unique = sorted(np.unique(train_labels))
label_unique = {key : value for key, value in zip(label_unique, range(len(label_unique)))}

label_unique

In [None]:
train_df["label"] = train_df["label"].apply(lambda x : label_unique[x])

train_df["label"].head()

## data_loader.py

In [None]:
## torchvision v2

cutmix = v2.CutMix(num_classes=len(label_unique))

In [None]:
class ImageDataset(Dataset):
    def __init__(self, df, img_path, is_test=False, transform=None):
        self.df = df
        self.processor = processor
        self.img_path = img_path
        self.is_test = is_test
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        
        image = cv2.imread(row[self.img_path])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if not self.is_test:

            image = cv2.imread(row[self.img_path])
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            encoding = self.processor(
                images=image,
                return_tensors="pt"
            )

            encoding["labels"] = torch.tensor(row['label'], dtype=torch.long)
            
            for k,v in encoding.items():
                encoding[k] = v.squeeze()

            return encoding
            
        encoding = self.processor(
            images=image,
            return_tensors="pt"
        )

        for k,v in encoding.items():
            encoding[k] = v.squeeze()

        return encoding

## model.py

In [None]:
class ImageModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = AutoModel.from_pretrained(img_model_name)
        self.clf = nn.Linear(latent_dim, len(label_unique))

    def forward(self, inputs):
        enc = self.model(inputs)
        x = enc.pooler_output
        outputs = self.clf(x)
        return outputs

In [None]:
class ImageClassifier(pl.LightningModule):
    def __init__(self, backbone, args):
        super().__init__()
        self.backbone = backbone

    def forward(self, x):
        outputs = self.backbone(x)
        return outputs

    def step(self, batch):
        x = batch["pixel_values"]
        y = batch["labels"]
        y_hat = self.forward(x)
        loss = nn.CrossEntropyLoss()(y_hat, y)
        return loss, y, y_hat

    def training_step(self, batch, batch_idx):
        loss_ce, y, y_hat = self.step(batch)
        loss_cos = nn.CosineEmbeddingLoss()(
            y_hat, y, torch.Tensor([1]).to(self.device)
        )
        loss = loss_ce + loss_cos
        f1 = f1_score(y_hat.max(dim=1)[1].cpu().numpy(), y.max(dim=1)[1].cpu().numpy(), average='macro')
        self.log('train_loss', loss, on_step=False, on_epoch=True, prog_bar=True)
        self.log("train_f1", f1, on_step=False, on_epoch=True, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        loss_ce, y, y_hat = self.step(batch)
        loss_cos = nn.CosineEmbeddingLoss()(
            y_hat, F.one_hot(y.long(), len(label_unique)), torch.Tensor([1]).to(self.device)
        )
        loss = loss_ce + loss_cos
        f1 = f1_score(y_hat.max(dim=1)[1].cpu().numpy(), y.cpu().numpy(), average='macro')
        self.log('val_loss', loss, on_epoch=True, prog_bar=True)
        self.log("val_f1", f1, on_epoch=True, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        loss, y, y_hat = self.step(batch)
        f1 = f1_score(y_hat.max(dim=1)[1].cpu().numpy(), y.cpu().numpy(), average='macro')
        self.log("test_f1", f1)

    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        x = batch["pixel_values"]
        y_hat = self.forward(x)
        return y_hat

    def configure_optimizers(self):
        if args.optimizer == "sgd":
            optimizer = torch.optim.SGD(self.parameters(), lr=args.learning_rate, momentum=0.9)
        if args.optimizer == "adam":
            optimizer = torch.optim.Adam(self.parameters(), lr=args.learning_rate)
        if args.optimizer == "adamw":
            optimizer = torch.optim.AdamW(self.parameters(), lr=args.learning_rate)
        
        if args.scheduler == "none":
            return optimizer
        if args.scheduler == "cosine":
            scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer=optimizer,
                T_max=args.epochs//2,
                eta_min=args.learning_rate//10,
            )
            return [optimizer], [scheduler]

## main.py

In [None]:
from torch.utils.data import default_collate

def collate_fn(batch):
    data = default_collate(batch)
    data = cutmix(data['pixel_values'], data['labels'])
    data_dict = {
        'pixel_values' : data[0],
        'labels' : data[1],
    }
    return data_dict

In [None]:
## preprocessing.py

val_f1_list = []
preds_list = []

skf = StratifiedKFold(n_splits=CV, shuffle=True, random_state=SEED)

for i, (train_index, val_index) in enumerate(skf.split(train_df, train_df["label"])):

    temp_df = train_df.iloc[train_index]
    val_df = train_df.iloc[val_index]

## data_loaders.py
    
    train_ds_low = ImageDataset(temp_df, "img_path", is_test=False)
    train_ds_high = ImageDataset(temp_df, "upscale_img_path", is_test=False)
    train_ds = train_ds_low + train_ds_high
    val_ds = ImageDataset(val_df, "img_path", is_test=False) 
    test_ds = ImageDataset(test_df, "img_path", is_test=True)
    
    train_dataloader = DataLoader(
        train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=args.num_workers,
        ## torchvision v2
        collate_fn=collate_fn
    )
    val_dataloader = DataLoader(
        val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=args.num_workers
    )
    test_dataloader = DataLoader(
        test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=args.num_workers
    )

## train.py

    model = ImageClassifier(ImageModel(), args)

    callbacks = [
        # pl.callbacks.EarlyStopping(
        #     monitor="val_f1", patience=5, mode="max"
        # ),
        pl.callbacks.ModelCheckpoint(
            dirpath="saved/", filename=f"{idx}_{i}",
            monitor="val_f1", mode="max"
        ),
    ]

    trainer = pl.Trainer(
        max_epochs=EPOCHS, accelerator="auto", callbacks=callbacks,
        precision=args.mixed_precision, #logger=wandb_logger,
        devices=args.device, #strategy='ddp_find_unused_parameters_true'
    )

    trainer.fit(model, train_dataloader, val_dataloader)
    
    ckpt = torch.load(f"saved/{idx}_{i}.ckpt", map_location=torch.device(device))
    model.load_state_dict(ckpt['state_dict'])

## test.py

    eval_dict = trainer.validate(model, dataloaders=val_dataloader)[0]
    val_f1_list.append(eval_dict["val_f1"])

    y_preds = trainer.predict(model, dataloaders=test_dataloader)

    y_pred = np.vstack(y_preds)
    # np.save(f'saved/{idx}_{i}', y_pred)

    preds_list.append(y_pred)
    
val_f1_mean = np.mean(val_f1_list)
print(f"val_f1_mean: {val_f1_mean}")

In [None]:
y_pred = np.mean(preds_list, axis=0)
# np.save(f'saved/{idx}_ensemble', y_pred)
preds = y_pred.argmax(axis=1)

preds.shape

## Submission

In [None]:
label_decoder = {val:key for key, val in label_unique.items()}
result = [label_decoder[result] for result in preds]

In [None]:
submit = pd.read_csv('data/sample_submission.csv')
submit["label"] = result
submit.to_csv(f'{idx}.csv', index=False)

submit.head()