In [23]:
import os
import gc
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import pytorch_lightning as L
import albumentations as albu
from albumentations.pytorch.transforms import ToTensorV2
import cv2

from glob import glob
from tqdm.auto import tqdm
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from torchvision.io import read_image
from torchvision.transforms import v2 as  transforms
from torch.utils.data import Dataset, DataLoader
from transformers import Swinv2Config, Swinv2Model, AutoImageProcessor, AutoModelForImageClassification
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

from pytorch_lightning_sam_callback import SAM

In [24]:
class CustomDataset(Dataset):
    def __init__(self, df, path_col, mode='train'):
        self.df = df
        self.path_col = path_col
        self.mode = mode

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        # OpenCV를 사용하여 이미지를 NumPy 배열로 로드
        image = cv2.imread(row[self.path_col])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # BGR에서 RGB로 변환
        image = image / 256.0  # 정규화

        if self.mode in ['train', 'val']:
            label = row['class']
            data = {
                'image': image,
                'label': label
            }
            return data
        elif self.mode == 'inference':
            data = {
                'image': image
            }
            return data

In [25]:
class CustomCollateFn:
    def __init__(self, transform, mode):
        self.mode = mode
        self.transform = transform

    def __call__(self, batch):
        # 변환된 이미지만을 리스트에 저장
        images = [self.transform(image=data['image'])['image'] for data in batch]
        pixel_values = torch.stack(images)

        if self.mode in ['train', 'val']:
            labels = torch.LongTensor([data['label'] for data in batch])
            return {
                'pixel_values': pixel_values,
                'label': labels,
            }
        elif self.mode == 'inference':
            return {
                'pixel_values': pixel_values,
            }

In [26]:
class CustomModel(nn.Module):
    def __init__(self, model):
        super(CustomModel, self).__init__()
        self.model = model
        self.clf = nn.Sequential(
            nn.Tanh(),
            nn.LazyLinear(25),
        )

#     @torch.compile
    def forward(self, x, label=None):
        x = self.model(x).pooler_output
        x = self.clf(x)
        loss = None
        if label is not None:
            loss = nn.CrossEntropyLoss()(x, label)
        probs = nn.LogSoftmax(dim=-1)(x)
        return probs, loss

class LitCustomModel(L.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = CustomModel(model)
        self.validation_step_output = []

    def configure_optimizers(self):
        opt = torch.optim.AdamW(self.parameters(), lr=1e-5)
        return opt

    def training_step(self, batch, batch_idx=None):
        x = batch['pixel_values']
        label = batch['label']
        probs, loss = self.model(x, label)
        self.log(f"train_loss", loss, on_step=True, on_epoch=False)
        return loss

    def validation_step(self, batch, batch_idx=None):
        x = batch['pixel_values']
        label = batch['label']
        probs, loss = self.model(x, label)
        self.validation_step_output.append([probs,label])
        return loss

    def predict_step(self, batch, batch_idx=None):
        x = batch['pixel_values']
        probs, _ = self.model(x)
        return probs

    def on_validation_epoch_end(self):
        pred = torch.cat([x for x, _ in self.validation_step_output]).cpu().detach().numpy().argmax(1)
        label = torch.cat([label for _, label in self.validation_step_output]).cpu().detach().numpy()
        score = f1_score(label, pred, average='macro')
        self.log("val_score", score)
        self.validation_step_output.clear()
        return score

In [27]:
SEED = 42
N_SPLIT = 5
BATCH_SIZE = 12
L.seed_everything(SEED)

Seed set to 42


42

In [28]:
train_df = pd.read_csv('./datas/train.csv')
train_df['img_path'] = train_df['img_path'].apply(lambda x: os.path.join('./datas', x))
train_df['upscale_img_path'] = train_df['upscale_img_path'].apply(lambda x: os.path.join('./datas', x))
le = LabelEncoder()
train_df['class'] = le.fit_transform(train_df['label'])
if not len(train_df) == len(os.listdir('./datas/train')):
    raise ValueError()

In [29]:
skf = StratifiedKFold(n_splits=N_SPLIT, random_state=SEED, shuffle=True)

In [30]:
train_transform = albu.Compose([
                             albu.Resize(256, 256, interpolation=cv2.INTER_CUBIC), 
                             albu.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0),
                             albu.OneOf([
                                         albu.HorizontalFlip(p=1),
                                         albu.RandomRotate90(p=1),
                                         albu.VerticalFlip(p=1),
                                         ], p=1),
                             ToTensorV2()
                            ])
val_transform = albu.Compose([
                             albu.Resize(256, 256, interpolation=cv2.INTER_CUBIC),
                             albu.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0),
                             ToTensorV2()
                            ])

train_collate_fn = CustomCollateFn(train_transform, 'train')
val_collate_fn = CustomCollateFn(val_transform, 'val')

In [31]:
for fold_idx, (train_index, val_index) in enumerate(skf.split(train_df, train_df['class'])):
    train_fold_df = train_df.loc[train_index,:]
    val_fold_df = train_df.loc[val_index,:]

    train_dataset = CustomDataset(train_fold_df, 'img_path', mode='train')
    val_dataset = CustomDataset(val_fold_df, 'img_path', mode='val')

    train_dataloader = DataLoader(train_dataset, collate_fn=train_collate_fn, batch_size=BATCH_SIZE)
    val_dataloader = DataLoader(val_dataset, collate_fn=val_collate_fn, batch_size=BATCH_SIZE*2)

    model = Swinv2Model.from_pretrained("microsoft/swinv2-large-patch4-window12to16-192to256-22kto1k-ft")
    lit_model = LitCustomModel(model)

    checkpoint_callback = ModelCheckpoint(
        monitor='val_score',
        mode='max',
        dirpath='./checkpoints/',
        filename=f'swinv2-SAM-fold_idx={fold_idx}'+'-{epoch:02d}-{train_loss:.4f}-{val_score:.4f}',
        save_top_k=1,
        save_weights_only=True,
        verbose=True
    )
    earlystopping_callback = EarlyStopping(monitor="val_score", mode="max", patience=3)
    trainer = L.Trainer(max_epochs=100, accelerator='auto', precision=32, callbacks=[checkpoint_callback, earlystopping_callback, SAM()], val_check_interval=0.5)
    trainer.fit(lit_model, train_dataloader, val_dataloader)

    model.cpu()
    lit_model.cpu()
    del model, lit_model, checkpoint_callback, earlystopping_callback, trainer
    gc.collect()
    torch.cuda.empty_cache()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
g:\내 드라이브\DACON\DACON-Bird_Image_Classification\bird\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:653: Checkpoint directory G:\내 드라이브\DACON\DACON-Bird_Image_Classification\checkpoints exists and is not empty.

  | Name  | Type        | Params
--------------------------------------
0 | model | CustomModel | 195 M 
--------------------------------------
195 M     Trainable params
0         Non-trainable params
195 M     Total params
780.812   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

g:\내 드라이브\DACON\DACON-Bird_Image_Classification\bird\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
g:\내 드라이브\DACON\DACON-Bird_Image_Classification\bird\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

g:\내 드라이브\DACON\DACON-Bird_Image_Classification\bird\lib\site-packages\pytorch_lightning\trainer\call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
g:\내 드라이브\DACON\DACON-Bird_Image_Classification\bird\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:653: Checkpoint directory G:\내 드라이브\DACON\DACON-Bird_Image_Classification\checkpoints exists and is not empty.

  | Name  | Type        | Params
--------------------------------------
0 | model | CustomModel | 195 M 
--------------------------------------
195 M     Trainable params
0         Non-trainable params
195 M     Total params
780.812   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

g:\내 드라이브\DACON\DACON-Bird_Image_Classification\bird\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
g:\내 드라이브\DACON\DACON-Bird_Image_Classification\bird\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

In [None]:
test_df = pd.read_csv('./datas/test.csv')
test_df['img_path'] = test_df['img_path'].apply(lambda x: os.path.join('./datas', x))
if not len(test_df) == len(os.listdir('./datas/test')):
    raise ValueError()

In [None]:
test_transform = albu.Compose([
                             albu.Resize(256, 256, interpolation=cv2.INTER_CUBIC),
                             albu.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, p=1.0),
                             ToTensorV2()
                            ])

test_collate_fn = CustomCollateFn(test_transform, 'inference')
test_dataset = CustomDataset(test_df, 'img_path', mode='inference')
test_dataloader = DataLoader(test_dataset, collate_fn=test_collate_fn, batch_size=BATCH_SIZE*2)

In [None]:
fold_preds = []
for checkpoint_path in glob('./checkpoints/swinv2-large-resize*.ckpt'):
    model = Swinv2Model.from_pretrained("microsoft/swinv2-large-patch4-window12to16-192to256-22kto1k-ft")
    lit_model = LitCustomModel.load_from_checkpoint(checkpoint_path, model=model)
    trainer = L.Trainer( accelerator='auto', precision=32)
    preds = trainer.predict(lit_model, test_dataloader)
    preds = torch.cat(preds,dim=0).detach().cpu().numpy().argmax(1)
    fold_preds.append(preds)
pred_ensemble = list(map(lambda x: np.bincount(x).argmax(),np.stack(fold_preds,axis=1)))

In [None]:
submission = pd.read_csv('./datas/sample_submission.csv')
submission['label'] = le.inverse_transform(pred_ensemble)
submission.to_csv('./submissions/swinv2_large_resize.csv',index=False)