In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cd /content/drive/MyDrive/dacon/lowresol/

In [None]:
# !unzip -qn open.zip -d ./open/

In [1]:
!pip install --quiet timm pytorch_lightning==1.7.7 torchmetrics==0.11.1

DEPRECATION: pytorch-lightning 1.7.7 has a non-standard dependency specifier torch>=1.9.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063


In [1]:
import os
import gc
import warnings
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import pytorch_lightning as L

from torchinfo import summary
from glob import glob
from tqdm.auto import tqdm
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from torchvision.io import read_image
from torchvision.transforms import v2 as  transforms
from torch.utils.data import Dataset, DataLoader
from transformers import Swinv2Config, Swinv2Model, AutoImageProcessor, AutoModelForImageClassification
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

from pytorch_lightning.loggers import WandbLogger  # wandb logger를 임포트


  from .autonotebook import tqdm as notebook_tqdm


In [28]:
class CFG:
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    NUM_DEVICES = torch.cuda.device_count()
    NUM_WORKERS = os.cpu_count()
    #NUM_CLASSES = 4
    NUM_CLASSES = 25
    EPOCHS = 16
    BATCH_SIZE = (
        32 if torch.cuda.device_count() < 2 
        else (32 * torch.cuda.device_count())
    )
    LR = 0.001
    APPLY_SHUFFLE = True
    SEED = 768
    #HEIGHT = 224
    #WIDTH = 224
    HEIGHT = 224
    WIDTH = 224
    CHANNELS = 3
    #IMAGE_SIZE = (224, 224, 3)
    IMAGE_SIZE = (224, 224, 3)
    
    # Define paths
    #DATASET_PATH = "/content/drive/MyDrive/Colab Notebooks/dataset"
    #TRAIN_PATH = '/content/drive/MyDrive/Colab Notebooks/dataset/train/'
    #TEST_PATH = '/content/drive/MyDrive/Colab Notebooks/dataset/test'
    
# Mute warnings
warnings.filterwarnings("ignore", "is_categorical_dtype")
warnings.filterwarnings("ignore", "use_inf_as_na")

In [3]:
class CustomDataset(Dataset):
    def __init__(self, df, path_col,  mode='train'):
        self.df = df
        self.path_col = path_col
        self.mode = mode

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if self.mode == 'train':
            row = self.df.iloc[idx]
            image = read_image(row[self.path_col])/256.
            label = row['class']
            data = {
                'image':image,
                'label':label
            }
            return data
        elif self.mode == 'val':
            row = self.df.iloc[idx]
            image = read_image(row[self.path_col])/256.
            label = row['class']
            data = {
                'image':image,
                'label':label
            }
            return data
        elif self.mode == 'inference':
            row = self.df.iloc[idx]
            image = read_image(row[self.path_col])/256.
            data = {
                'image':image,
            }
            return data

    def train_transform(self, image):
        pass

In [4]:
class CustomCollateFn:
    def __init__(self, transform, mode):
        self.mode = mode
        self.transform = transform

    def __call__(self, batch):
        if self.mode=='train':
            pixel_values = torch.stack([self.transform(data['image']) for data in batch])
            label = torch.LongTensor([data['label'] for data in batch])
            return {
                'pixel_values':pixel_values,
                'label':label,
            }
        elif self.mode=='val':
            pixel_values = torch.stack([self.transform(data['image']) for data in batch])
            label = torch.LongTensor([data['label'] for data in batch])
            return {
                'pixel_values':pixel_values,
                'label':label,
            }
        elif self.mode=='inference':
            pixel_values = torch.stack([self.transform(data['image']) for data in batch])
            return {
                'pixel_values':pixel_values,
            }

In [18]:
class CustomModel(nn.Module):
    def __init__(self, model):
        super(CustomModel, self).__init__()
        self.model = model
        self.clf = nn.Sequential(
            nn.Tanh(),
            nn.LazyLinear(25),
        )

#     @torch.compile
    def forward(self, x, label=None):
        # original
        # x = self.model(x).pooler_output
        x = self.model(x)
        # pooler_output 대신에 last_hidden_state 사용
        #x = outputs.last_hidden_state[:, 0]  # [CLS] 토큰에 해당하는 벡터 추출
        #x = self.clf(x)
        loss = None
        if label is not None:
            loss = nn.CrossEntropyLoss()(x, label)
        probs = nn.LogSoftmax(dim=-1)(x)
        return probs, loss

class LitCustomModel(L.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = CustomModel(model)
        self.validation_step_output = []

    def configure_optimizers(self):
        opt = torch.optim.AdamW(self.parameters(), lr=1e-5)
        return opt

    def training_step(self, batch, batch_idx=None):
        x = batch['pixel_values']
        label = batch['label']
        probs, loss = self.model(x, label)
        self.log(f"train_loss", loss, on_step=True, on_epoch=False)
        return loss

    def validation_step(self, batch, batch_idx=None):
        x = batch['pixel_values']
        label = batch['label']
        probs, loss = self.model(x, label)
        self.validation_step_output.append([probs,label])
        return loss

    def predict_step(self, batch, batch_idx=None):
        x = batch['pixel_values']
        probs, _ = self.model(x)
        return probs

    def validation_epoch_end(self, step_output):
        pred = torch.cat([x for x, _ in self.validation_step_output]).cpu().detach().numpy().argmax(1)
        label = torch.cat([label for _, label in self.validation_step_output]).cpu().detach().numpy()
        score = f1_score(label,pred, average='macro')
        self.log("val_score", score)
        self.validation_step_output.clear()
        return score

In [19]:
SEED = 42
N_SPLIT = 5
BATCH_SIZE = 12

In [20]:
L.seed_everything(SEED)

Global seed set to 42


42

In [8]:
train_df = pd.read_csv('./open/train.csv')
train_df['img_path'] = train_df['img_path'].apply(lambda x: os.path.join('./open', x))
train_df['upscale_img_path'] = train_df['upscale_img_path'].apply(lambda x: os.path.join('./open', x))
le = LabelEncoder()
train_df['class'] = le.fit_transform(train_df['label'])

In [9]:
if not len(train_df) == len(os.listdir('./open/train')):
    raise ValueError()

In [10]:
skf = StratifiedKFold(n_splits=N_SPLIT, random_state=SEED, shuffle=True)

In [29]:
#train_transform = transforms.Compose([
#    transforms.Resize(size=(256,256), interpolation=transforms.InterpolationMode.BICUBIC),
#    transforms.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
#])
#val_transform = transforms.Compose([
#    transforms.Resize(size=(256,256), interpolation=transforms.InterpolationMode.BICUBIC),
#    transforms.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
#])
train_transform = transforms.Compose([
    transforms.Resize(size=(224,224), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
])
val_transform = transforms.Compose([
    transforms.Resize(size=(224,224), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
])

train_collate_fn = CustomCollateFn(train_transform, 'train')
val_collate_fn = CustomCollateFn(val_transform, 'val')

In [30]:
class VisionTransformerModel(nn.Module):
    def __init__(self, backbone_model, name='vision-transformer', 
                 num_classes=CFG.NUM_CLASSES, device=CFG.DEVICE):
        super(VisionTransformerModel, self).__init__()
        
        self.backbone_model = backbone_model
        self.device = device
        self.num_classes = num_classes
        self.name = name
        
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(p=0.2, inplace=True), 
            nn.Linear(in_features=1000, out_features=256, bias=True),
            nn.GELU(),
            nn.Dropout(p=0.2, inplace=True),
            nn.Linear(in_features=256, out_features=num_classes, bias=False)
        ).to(device)
        
    def forward(self, image):
        vit_output = self.backbone_model(image)
        return self.classifier(vit_output)

In [31]:
def get_vit_b32_model(
    device: torch.device=CFG.NUM_CLASSES) -> nn.Module:
    # Set the manual seeds
    torch.manual_seed(CFG.SEED)
    torch.cuda.manual_seed(CFG.SEED)

    # Get model weights
    model_weights = (
        torchvision
        .models
        .ViT_L_32_Weights
        .DEFAULT
    )
    
    # Get model and push to device
    model = (
        torchvision.models.vit_l_32(
            weights=model_weights
        )
    ).to(device) 
    
    # Freeze Model Parameters
    for param in model.parameters():
        param.requires_grad = False
        
    return model

In [32]:
# Get ViT model
vit_backbone = get_vit_b32_model(CFG.DEVICE)

In [33]:
vit_params = {
    'backbone_model'    : vit_backbone,
    'name'              : 'ViT-L-B32',
    'device'            : CFG.DEVICE
}

# Generate Model
vit_model = VisionTransformerModel(**vit_params)

# If using GPU T4 x2 setup, use this:
if CFG.NUM_DEVICES > 1:
    vit_model = nn.DataParallel(vit_model)

In [34]:
# View model summary
summary(
    model=vit_model, 
    input_size=(CFG.BATCH_SIZE, CFG.CHANNELS, CFG.WIDTH, CFG.HEIGHT),
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=["var_names"]
)

Layer (type (var_name))                                           Input Shape          Output Shape         Param #              Trainable
VisionTransformerModel (VisionTransformerModel)                   [32, 3, 224, 224]    [32, 25]             --                   Partial
├─VisionTransformer (backbone_model)                              [32, 3, 224, 224]    [32, 1000]           1,024                False
│    └─Conv2d (conv_proj)                                         [32, 3, 224, 224]    [32, 1024, 7, 7]     (3,146,752)          False
│    └─Encoder (encoder)                                          [32, 50, 1024]       [32, 50, 1024]       51,200               False
│    │    └─Dropout (dropout)                                     [32, 50, 1024]       [32, 50, 1024]       --                   --
│    │    └─Sequential (layers)                                   [32, 50, 1024]       [32, 50, 1024]       (302,309,376)        False
│    │    └─LayerNorm (ln)                          

In [35]:
for fold_idx, (train_index, val_index) in enumerate(skf.split(train_df, train_df['class'])):
    train_fold_df = train_df.loc[train_index,:]
    val_fold_df = train_df.loc[val_index,:]

    train_dataset = CustomDataset(train_fold_df, 'img_path', mode='train')
    val_dataset = CustomDataset(val_fold_df, 'img_path', mode='val')

    train_dataloader = DataLoader(train_dataset, collate_fn=train_collate_fn, batch_size=BATCH_SIZE)
    val_dataloader = DataLoader(val_dataset, collate_fn=val_collate_fn, batch_size=BATCH_SIZE*2)

    #model = Swinv2Model.from_pretrained("microsoft/swinv2-large-patch4-window12to16-192to256-22kto1k-ft")
    model = vit_model
    lit_model = LitCustomModel(model)

    checkpoint_callback = ModelCheckpoint(
        monitor='val_score',
        mode='max',
        dirpath='./checkpoints/',
        #filename=f'swinv2-large-resize-fold_idx={fold_idx}'+'-{epoch:02d}-{train_loss:.4f}-{val_score:.4f}',
        filename=f'Transformer={fold_idx}'+'-{epoch:02d}-{train_loss:.4f}-{val_score:.4f}',
        save_top_k=1,
        save_weights_only=True,
        verbose=True
    )

    # wandb logger 초기화
    wandb_logger = WandbLogger(name=f"EfficientNetV2Model_Fold{fold_idx}", project="Bird_Competition", log_model="all")

    earlystopping_callback = EarlyStopping(monitor="val_score", mode="max", patience=3)
    trainer = L.Trainer(max_epochs=100, accelerator='auto', precision=32, callbacks=[checkpoint_callback, earlystopping_callback], val_check_interval=0.5, logger=wandb_logger)
    trainer.fit(lit_model, train_dataloader, val_dataloader)

    model.cpu()
    lit_model.cpu()
    del model, lit_model, checkpoint_callback, earlystopping_callback, trainer
    #wandb_logger.experiment.finish()
    gc.collect()
    torch.cuda.empty_cache()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mtjwjddn15584[0m ([33mtjwjddn980117[0m). Use [1m`wandb login --relogin`[0m to force relogin


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type        | Params
--------------------------------------
0 | model | CustomModel | 306 M 
--------------------------------------
262 K     Trainable params
306 M     Non-trainable params
306 M     Total params
1,227.192 Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0:  50%|█████     | 660/1320 [01:38<01:38,  6.72it/s, loss=3.03, v_num=jwns]

Epoch 0, global step 528: 'val_score' reached 0.26265 (best 0.26265), saving model to './checkpoints/Transformer=0-epoch=00-train_loss=3.1153-val_score=0.2627.ckpt' as top 1


Epoch 0: 100%|██████████| 1320/1320 [03:13<00:00,  6.80it/s, loss=2.79, v_num=jwns]

Epoch 0, global step 1056: 'val_score' reached 0.48045 (best 0.48045), saving model to './checkpoints/Transformer=0-epoch=00-train_loss=2.9215-val_score=0.4805.ckpt' as top 1


Epoch 1:  50%|█████     | 660/1320 [01:06<01:06,  9.98it/s, loss=2.5, v_num=jwns]  

Epoch 1, global step 1584: 'val_score' reached 0.57235 (best 0.57235), saving model to './checkpoints/Transformer=0-epoch=01-train_loss=2.5842-val_score=0.5724.ckpt' as top 1


Epoch 1: 100%|██████████| 1320/1320 [02:18<00:00,  9.50it/s, loss=2.24, v_num=jwns]

Epoch 1, global step 2112: 'val_score' reached 0.61519 (best 0.61519), saving model to './checkpoints/Transformer=0-epoch=01-train_loss=2.4402-val_score=0.6152.ckpt' as top 1


Epoch 2:  50%|█████     | 660/1320 [01:05<01:05, 10.11it/s, loss=2, v_num=jwns]    

Epoch 2, global step 2640: 'val_score' reached 0.64790 (best 0.64790), saving model to './checkpoints/Transformer=0-epoch=02-train_loss=2.0942-val_score=0.6479.ckpt' as top 1


Epoch 2: 100%|██████████| 1320/1320 [02:20<00:00,  9.39it/s, loss=1.78, v_num=jwns]

Epoch 2, global step 3168: 'val_score' reached 0.66560 (best 0.66560), saving model to './checkpoints/Transformer=0-epoch=02-train_loss=1.8639-val_score=0.6656.ckpt' as top 1


Epoch 3:  50%|█████     | 660/1320 [01:06<01:06,  9.95it/s, loss=1.67, v_num=jwns] 

Epoch 3, global step 3696: 'val_score' reached 0.69242 (best 0.69242), saving model to './checkpoints/Transformer=0-epoch=03-train_loss=1.6439-val_score=0.6924.ckpt' as top 1


Epoch 3: 100%|██████████| 1320/1320 [02:20<00:00,  9.42it/s, loss=1.51, v_num=jwns]

Epoch 3, global step 4224: 'val_score' reached 0.70323 (best 0.70323), saving model to './checkpoints/Transformer=0-epoch=03-train_loss=1.5581-val_score=0.7032.ckpt' as top 1


Epoch 4:  50%|█████     | 660/1320 [01:06<01:06,  9.88it/s, loss=1.45, v_num=jwns] 

Epoch 4, global step 4752: 'val_score' reached 0.72425 (best 0.72425), saving model to './checkpoints/Transformer=0-epoch=04-train_loss=1.6350-val_score=0.7243.ckpt' as top 1


Epoch 4: 100%|██████████| 1320/1320 [02:20<00:00,  9.37it/s, loss=1.39, v_num=jwns]

Epoch 4, global step 5280: 'val_score' reached 0.73408 (best 0.73408), saving model to './checkpoints/Transformer=0-epoch=04-train_loss=1.2467-val_score=0.7341.ckpt' as top 1


Epoch 5:  50%|█████     | 660/1320 [01:06<01:06,  9.89it/s, loss=1.27, v_num=jwns] 

Epoch 5, global step 5808: 'val_score' reached 0.74676 (best 0.74676), saving model to './checkpoints/Transformer=0-epoch=05-train_loss=1.2535-val_score=0.7468.ckpt' as top 1


Epoch 5: 100%|██████████| 1320/1320 [02:20<00:00,  9.39it/s, loss=1.24, v_num=jwns]

Epoch 5, global step 6336: 'val_score' reached 0.75580 (best 0.75580), saving model to './checkpoints/Transformer=0-epoch=05-train_loss=1.4246-val_score=0.7558.ckpt' as top 1


Epoch 6:  50%|█████     | 660/1320 [00:58<00:58, 11.21it/s, loss=1.17, v_num=jwns] 

Epoch 6, global step 6864: 'val_score' reached 0.76518 (best 0.76518), saving model to './checkpoints/Transformer=0-epoch=06-train_loss=1.4462-val_score=0.7652.ckpt' as top 1


Epoch 6: 100%|██████████| 1320/1320 [02:11<00:00, 10.01it/s, loss=1.08, v_num=jwns]

Epoch 6, global step 7392: 'val_score' reached 0.76960 (best 0.76960), saving model to './checkpoints/Transformer=0-epoch=06-train_loss=1.0843-val_score=0.7696.ckpt' as top 1


Epoch 7:  50%|█████     | 660/1320 [01:06<01:06,  9.92it/s, loss=1.04, v_num=jwns] 

Epoch 7, global step 7920: 'val_score' reached 0.78052 (best 0.78052), saving model to './checkpoints/Transformer=0-epoch=07-train_loss=1.3421-val_score=0.7805.ckpt' as top 1


Epoch 7: 100%|██████████| 1320/1320 [02:19<00:00,  9.45it/s, loss=1.03, v_num=jwns] 

Epoch 7, global step 8448: 'val_score' reached 0.78645 (best 0.78645), saving model to './checkpoints/Transformer=0-epoch=07-train_loss=1.3039-val_score=0.7864.ckpt' as top 1


Epoch 8:  50%|█████     | 660/1320 [01:07<01:07,  9.85it/s, loss=0.98, v_num=jwns] 

Epoch 8, global step 8976: 'val_score' reached 0.78909 (best 0.78909), saving model to './checkpoints/Transformer=0-epoch=08-train_loss=1.0182-val_score=0.7891.ckpt' as top 1


Epoch 8: 100%|██████████| 1320/1320 [02:20<00:00,  9.39it/s, loss=1, v_num=jwns]    

Epoch 8, global step 9504: 'val_score' reached 0.79277 (best 0.79277), saving model to './checkpoints/Transformer=0-epoch=08-train_loss=1.3331-val_score=0.7928.ckpt' as top 1


Epoch 9:  50%|█████     | 660/1320 [01:09<01:09,  9.49it/s, loss=0.888, v_num=jwns]

Epoch 9, global step 10032: 'val_score' reached 0.79892 (best 0.79892), saving model to './checkpoints/Transformer=0-epoch=09-train_loss=0.9305-val_score=0.7989.ckpt' as top 1


Epoch 9: 100%|██████████| 1320/1320 [02:17<00:00,  9.63it/s, loss=0.933, v_num=jwns]

Epoch 9, global step 10560: 'val_score' reached 0.80089 (best 0.80089), saving model to './checkpoints/Transformer=0-epoch=09-train_loss=1.1694-val_score=0.8009.ckpt' as top 1


Epoch 10:  50%|█████     | 660/1320 [01:00<01:00, 10.85it/s, loss=0.889, v_num=jwns]

Epoch 10, global step 11088: 'val_score' reached 0.80320 (best 0.80320), saving model to './checkpoints/Transformer=0-epoch=10-train_loss=0.9527-val_score=0.8032.ckpt' as top 1


Epoch 10: 100%|██████████| 1320/1320 [02:09<00:00, 10.22it/s, loss=0.861, v_num=jwns]

Epoch 10, global step 11616: 'val_score' reached 0.80878 (best 0.80878), saving model to './checkpoints/Transformer=0-epoch=10-train_loss=1.1398-val_score=0.8088.ckpt' as top 1


Epoch 11:  50%|█████     | 660/1320 [01:06<01:06,  9.93it/s, loss=0.846, v_num=jwns] 

Epoch 11, global step 12144: 'val_score' reached 0.81069 (best 0.81069), saving model to './checkpoints/Transformer=0-epoch=11-train_loss=0.9367-val_score=0.8107.ckpt' as top 1


Epoch 11: 100%|██████████| 1320/1320 [02:15<00:00,  9.73it/s, loss=0.879, v_num=jwns]

Epoch 11, global step 12672: 'val_score' reached 0.81576 (best 0.81576), saving model to './checkpoints/Transformer=0-epoch=11-train_loss=1.0849-val_score=0.8158.ckpt' as top 1


Epoch 12:  50%|█████     | 660/1320 [01:02<01:02, 10.48it/s, loss=0.789, v_num=jwns] 

Epoch 12, global step 13200: 'val_score' reached 0.81709 (best 0.81709), saving model to './checkpoints/Transformer=0-epoch=12-train_loss=0.9641-val_score=0.8171.ckpt' as top 1


Epoch 12: 100%|██████████| 1320/1320 [02:13<00:00,  9.90it/s, loss=0.829, v_num=jwns]

Epoch 12, global step 13728: 'val_score' reached 0.82133 (best 0.82133), saving model to './checkpoints/Transformer=0-epoch=12-train_loss=0.8986-val_score=0.8213.ckpt' as top 1


Epoch 13:  50%|█████     | 660/1320 [01:01<01:01, 10.76it/s, loss=0.77, v_num=jwns]  

Epoch 13, global step 14256: 'val_score' reached 0.82283 (best 0.82283), saving model to './checkpoints/Transformer=0-epoch=13-train_loss=0.8635-val_score=0.8228.ckpt' as top 1


Epoch 13: 100%|██████████| 1320/1320 [02:13<00:00,  9.91it/s, loss=0.799, v_num=jwns]

Epoch 13, global step 14784: 'val_score' reached 0.82648 (best 0.82648), saving model to './checkpoints/Transformer=0-epoch=13-train_loss=1.1389-val_score=0.8265.ckpt' as top 1


Epoch 14:  50%|█████     | 660/1320 [01:00<01:00, 10.93it/s, loss=0.709, v_num=jwns] 

Epoch 14, global step 15312: 'val_score' reached 0.82904 (best 0.82904), saving model to './checkpoints/Transformer=0-epoch=14-train_loss=1.1153-val_score=0.8290.ckpt' as top 1


Epoch 14: 100%|██████████| 1320/1320 [02:12<00:00,  9.96it/s, loss=0.775, v_num=jwns]

Epoch 14, global step 15840: 'val_score' reached 0.83266 (best 0.83266), saving model to './checkpoints/Transformer=0-epoch=14-train_loss=1.0793-val_score=0.8327.ckpt' as top 1


Epoch 15:  50%|█████     | 660/1320 [01:05<01:05, 10.13it/s, loss=0.704, v_num=jwns] 

Epoch 15, global step 16368: 'val_score' was not in top 1


Epoch 15: 100%|██████████| 1320/1320 [02:06<00:00, 10.44it/s, loss=0.77, v_num=jwns] 

Epoch 15, global step 16896: 'val_score' reached 0.83392 (best 0.83392), saving model to './checkpoints/Transformer=0-epoch=15-train_loss=1.1470-val_score=0.8339.ckpt' as top 1


Epoch 16:  50%|█████     | 660/1320 [01:01<01:01, 10.70it/s, loss=0.697, v_num=jwns]

Epoch 16, global step 17424: 'val_score' reached 0.83626 (best 0.83626), saving model to './checkpoints/Transformer=0-epoch=16-train_loss=0.7678-val_score=0.8363.ckpt' as top 1


Epoch 16: 100%|██████████| 1320/1320 [02:08<00:00, 10.27it/s, loss=0.708, v_num=jwns]

Epoch 16, global step 17952: 'val_score' reached 0.83706 (best 0.83706), saving model to './checkpoints/Transformer=0-epoch=16-train_loss=0.8483-val_score=0.8371.ckpt' as top 1


Epoch 17:  50%|█████     | 660/1320 [01:01<01:01, 10.80it/s, loss=0.644, v_num=jwns] 

Epoch 17, global step 18480: 'val_score' was not in top 1


Epoch 17: 100%|██████████| 1320/1320 [02:00<00:00, 10.92it/s, loss=0.68, v_num=jwns] 

Epoch 17, global step 19008: 'val_score' reached 0.83974 (best 0.83974), saving model to './checkpoints/Transformer=0-epoch=17-train_loss=0.8557-val_score=0.8397.ckpt' as top 1


Epoch 18:  50%|█████     | 660/1320 [01:00<01:00, 10.97it/s, loss=0.623, v_num=jwns]

Epoch 18, global step 19536: 'val_score' reached 0.84052 (best 0.84052), saving model to './checkpoints/Transformer=0-epoch=18-train_loss=0.6777-val_score=0.8405.ckpt' as top 1


Epoch 18: 100%|██████████| 1320/1320 [02:15<00:00,  9.73it/s, loss=0.666, v_num=jwns]

Epoch 18, global step 20064: 'val_score' reached 0.84162 (best 0.84162), saving model to './checkpoints/Transformer=0-epoch=18-train_loss=0.9737-val_score=0.8416.ckpt' as top 1


Epoch 19:  50%|█████     | 660/1320 [01:05<01:05, 10.05it/s, loss=0.636, v_num=jwns] 

Epoch 19, global step 20592: 'val_score' reached 0.84434 (best 0.84434), saving model to './checkpoints/Transformer=0-epoch=19-train_loss=0.8988-val_score=0.8443.ckpt' as top 1


Epoch 19: 100%|██████████| 1320/1320 [02:19<00:00,  9.47it/s, loss=0.668, v_num=jwns]

Epoch 19, global step 21120: 'val_score' reached 0.84488 (best 0.84488), saving model to './checkpoints/Transformer=0-epoch=19-train_loss=0.9820-val_score=0.8449.ckpt' as top 1


Epoch 20:  50%|█████     | 660/1320 [01:08<01:08,  9.58it/s, loss=0.606, v_num=jwns] 

Epoch 20, global step 21648: 'val_score' reached 0.84759 (best 0.84759), saving model to './checkpoints/Transformer=0-epoch=20-train_loss=0.6361-val_score=0.8476.ckpt' as top 1


Epoch 20: 100%|██████████| 1320/1320 [02:23<00:00,  9.19it/s, loss=0.644, v_num=jwns]

Epoch 20, global step 22176: 'val_score' reached 0.84897 (best 0.84897), saving model to './checkpoints/Transformer=0-epoch=20-train_loss=0.7566-val_score=0.8490.ckpt' as top 1


Epoch 21:  50%|█████     | 660/1320 [01:06<01:06,  9.87it/s, loss=0.618, v_num=jwns] 

Epoch 21, global step 22704: 'val_score' was not in top 1


Epoch 21: 100%|██████████| 1320/1320 [02:12<00:00,  9.94it/s, loss=0.64, v_num=jwns] 

Epoch 21, global step 23232: 'val_score' reached 0.84934 (best 0.84934), saving model to './checkpoints/Transformer=0-epoch=21-train_loss=0.7933-val_score=0.8493.ckpt' as top 1


Epoch 22:  50%|█████     | 660/1320 [01:06<01:06,  9.92it/s, loss=0.602, v_num=jwns]

Epoch 22, global step 23760: 'val_score' reached 0.84981 (best 0.84981), saving model to './checkpoints/Transformer=0-epoch=22-train_loss=0.7207-val_score=0.8498.ckpt' as top 1


Epoch 22: 100%|██████████| 1320/1320 [02:20<00:00,  9.42it/s, loss=0.581, v_num=jwns]

Epoch 22, global step 24288: 'val_score' reached 0.85332 (best 0.85332), saving model to './checkpoints/Transformer=0-epoch=22-train_loss=0.9194-val_score=0.8533.ckpt' as top 1


Epoch 23:  50%|█████     | 660/1320 [01:06<01:06,  9.94it/s, loss=0.579, v_num=jwns] 

Epoch 23, global step 24816: 'val_score' was not in top 1


Epoch 23: 100%|██████████| 1320/1320 [02:12<00:00,  9.97it/s, loss=0.633, v_num=jwns]

Epoch 23, global step 25344: 'val_score' reached 0.85558 (best 0.85558), saving model to './checkpoints/Transformer=0-epoch=23-train_loss=0.6737-val_score=0.8556.ckpt' as top 1


Epoch 24:  50%|█████     | 660/1320 [01:06<01:06,  9.90it/s, loss=0.523, v_num=jwns] 

Epoch 24, global step 25872: 'val_score' was not in top 1


Epoch 24: 100%|██████████| 1320/1320 [02:12<00:00,  9.95it/s, loss=0.591, v_num=jwns]

Epoch 24, global step 26400: 'val_score' reached 0.85811 (best 0.85811), saving model to './checkpoints/Transformer=0-epoch=24-train_loss=0.8673-val_score=0.8581.ckpt' as top 1


Epoch 25:  50%|█████     | 660/1320 [01:06<01:06,  9.90it/s, loss=0.559, v_num=jwns] 

Epoch 25, global step 26928: 'val_score' was not in top 1


Epoch 25: 100%|██████████| 1320/1320 [02:12<00:00,  9.95it/s, loss=0.58, v_num=jwns] 

Epoch 25, global step 27456: 'val_score' reached 0.85965 (best 0.85965), saving model to './checkpoints/Transformer=0-epoch=25-train_loss=1.1102-val_score=0.8597.ckpt' as top 1


Epoch 26:  50%|█████     | 660/1320 [01:06<01:06,  9.87it/s, loss=0.535, v_num=jwns]

Epoch 26, global step 27984: 'val_score' was not in top 1


Epoch 26: 100%|██████████| 1320/1320 [02:13<00:00,  9.92it/s, loss=0.633, v_num=jwns]

Epoch 26, global step 28512: 'val_score' reached 0.86025 (best 0.86025), saving model to './checkpoints/Transformer=0-epoch=26-train_loss=0.8569-val_score=0.8602.ckpt' as top 1


Epoch 27:  50%|█████     | 660/1320 [01:06<01:06,  9.89it/s, loss=0.549, v_num=jwns] 

Epoch 27, global step 29040: 'val_score' was not in top 1


Epoch 27: 100%|██████████| 1320/1320 [02:13<00:00,  9.91it/s, loss=0.531, v_num=jwns]

Epoch 27, global step 29568: 'val_score' reached 0.86194 (best 0.86194), saving model to './checkpoints/Transformer=0-epoch=27-train_loss=0.6313-val_score=0.8619.ckpt' as top 1


Epoch 28:  50%|█████     | 660/1320 [01:06<01:06,  9.94it/s, loss=0.529, v_num=jwns] 

Epoch 28, global step 30096: 'val_score' was not in top 1


Epoch 28: 100%|██████████| 1320/1320 [02:12<00:00,  9.96it/s, loss=0.571, v_num=jwns]

Epoch 28, global step 30624: 'val_score' was not in top 1


Epoch 29:  50%|█████     | 660/1320 [01:06<01:06, 10.00it/s, loss=0.491, v_num=jwns] 

Epoch 29, global step 31152: 'val_score' was not in top 1


Epoch 29:  50%|█████     | 660/1320 [01:06<01:06, 10.00it/s, loss=0.491, v_num=jwns]


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type        | Params
--------------------------------------
0 | model | CustomModel | 306 M 
--------------------------------------
262 K     Trainable params
306 M     Non-trainable params
306 M     Total params
1,227.192 Total estimated model params size (MB)


Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 23.79it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0:  50%|█████     | 660/1320 [01:06<01:06,  9.99it/s, loss=0.531, v_num=jwns]

Epoch 0, global step 528: 'val_score' reached 0.87360 (best 0.87360), saving model to './checkpoints/Transformer=1-epoch=00-train_loss=0.3367-val_score=0.8736.ckpt' as top 1


Epoch 0: 100%|██████████| 1320/1320 [02:19<00:00,  9.45it/s, loss=0.513, v_num=jwns]

Epoch 0, global step 1056: 'val_score' was not in top 1


Epoch 1:  50%|█████     | 660/1320 [01:06<01:06,  9.98it/s, loss=0.553, v_num=jwns] 

Epoch 1, global step 1584: 'val_score' was not in top 1


Epoch 1: 100%|██████████| 1320/1320 [02:12<00:00,  9.99it/s, loss=0.557, v_num=jwns]

Epoch 1, global step 2112: 'val_score' was not in top 1


Epoch 1: 100%|██████████| 1320/1320 [02:12<00:00,  9.99it/s, loss=0.557, v_num=jwns]


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type        | Params
--------------------------------------
0 | model | CustomModel | 306 M 
--------------------------------------
262 K     Trainable params
306 M     Non-trainable params
306 M     Total params
1,227.192 Total estimated model params size (MB)


Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 22.97it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0:  50%|█████     | 660/1320 [01:06<01:06,  9.97it/s, loss=0.555, v_num=jwns]

Epoch 0, global step 528: 'val_score' reached 0.89342 (best 0.89342), saving model to './checkpoints/Transformer=2-epoch=00-train_loss=0.4912-val_score=0.8934.ckpt' as top 1


Epoch 0: 100%|██████████| 1320/1320 [02:19<00:00,  9.44it/s, loss=0.537, v_num=jwns]

Epoch 0, global step 1056: 'val_score' reached 0.89359 (best 0.89359), saving model to './checkpoints/Transformer=2-epoch=00-train_loss=0.7243-val_score=0.8936.ckpt' as top 1


Epoch 1:  50%|█████     | 660/1320 [01:06<01:06,  9.88it/s, loss=0.555, v_num=jwns] 

Epoch 1, global step 1584: 'val_score' reached 0.89441 (best 0.89441), saving model to './checkpoints/Transformer=2-epoch=01-train_loss=0.5249-val_score=0.8944.ckpt' as top 1


Epoch 1: 100%|██████████| 1320/1320 [02:20<00:00,  9.40it/s, loss=0.517, v_num=jwns]

Epoch 1, global step 2112: 'val_score' reached 0.89482 (best 0.89482), saving model to './checkpoints/Transformer=2-epoch=01-train_loss=0.8859-val_score=0.8948.ckpt' as top 1


Epoch 2:  50%|█████     | 660/1320 [01:06<01:06,  9.90it/s, loss=0.538, v_num=jwns] 

Epoch 2, global step 2640: 'val_score' was not in top 1


Epoch 2: 100%|██████████| 1320/1320 [02:13<00:00,  9.90it/s, loss=0.479, v_num=jwns]

Epoch 2, global step 3168: 'val_score' was not in top 1


Epoch 3:  50%|█████     | 660/1320 [01:06<01:06,  9.99it/s, loss=0.536, v_num=jwns] 

Epoch 3, global step 3696: 'val_score' was not in top 1


Epoch 3:  50%|█████     | 660/1320 [01:06<01:06,  9.99it/s, loss=0.536, v_num=jwns]


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type        | Params
--------------------------------------
0 | model | CustomModel | 306 M 
--------------------------------------
262 K     Trainable params
306 M     Non-trainable params
306 M     Total params
1,227.192 Total estimated model params size (MB)


Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 22.97it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0:  50%|█████     | 660/1320 [01:06<01:06,  9.97it/s, loss=0.484, v_num=jwns]

Epoch 0, global step 528: 'val_score' reached 0.89505 (best 0.89505), saving model to './checkpoints/Transformer=3-epoch=00-train_loss=0.3972-val_score=0.8951.ckpt' as top 1


Epoch 0: 100%|██████████| 1320/1320 [02:19<00:00,  9.44it/s, loss=0.542, v_num=jwns]

Epoch 0, global step 1056: 'val_score' reached 0.89550 (best 0.89550), saving model to './checkpoints/Transformer=3-epoch=00-train_loss=0.4742-val_score=0.8955.ckpt' as top 1


Epoch 1:  50%|█████     | 660/1320 [01:06<01:06,  9.90it/s, loss=0.449, v_num=jwns] 

Epoch 1, global step 1584: 'val_score' was not in top 1


Epoch 1: 100%|██████████| 1320/1320 [02:12<00:00,  9.94it/s, loss=0.485, v_num=jwns]

Epoch 1, global step 2112: 'val_score' was not in top 1


Epoch 2:  50%|█████     | 660/1320 [01:06<01:06,  9.98it/s, loss=0.421, v_num=jwns] 

Epoch 2, global step 2640: 'val_score' was not in top 1


Epoch 2:  50%|█████     | 660/1320 [01:06<01:06,  9.98it/s, loss=0.421, v_num=jwns]


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type        | Params
--------------------------------------
0 | model | CustomModel | 306 M 
--------------------------------------
262 K     Trainable params
306 M     Non-trainable params
306 M     Total params
1,227.192 Total estimated model params size (MB)


Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 23.79it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0:  50%|█████     | 660/1320 [01:06<01:06,  9.96it/s, loss=0.505, v_num=jwns]

Epoch 0, global step 528: 'val_score' reached 0.90259 (best 0.90259), saving model to './checkpoints/Transformer=4-epoch=00-train_loss=0.5481-val_score=0.9026.ckpt' as top 1


Epoch 0: 100%|██████████| 1320/1320 [02:19<00:00,  9.43it/s, loss=0.464, v_num=jwns]

Epoch 0, global step 1056: 'val_score' was not in top 1


Epoch 1:  50%|█████     | 660/1320 [01:06<01:06,  9.97it/s, loss=0.511, v_num=jwns] 

Epoch 1, global step 1584: 'val_score' was not in top 1


Epoch 1:  96%|█████████▌| 1269/1320 [02:12<00:05,  9.59it/s, loss=0.462, v_num=jwns]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [None]:
test_df = pd.read_csv('./open/test.csv')
test_df['img_path'] = test_df['img_path'].apply(lambda x: os.path.join('./open', x))

In [None]:
if not len(test_df) == len(os.listdir('./open/test')):
    raise ValueError()

In [None]:
test_transform = transforms.Compose([
    transforms.Resize(size=(256,256), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
])

test_collate_fn = CustomCollateFn(test_transform, 'inference')
test_dataset = CustomDataset(test_df, 'img_path', mode='inference')
test_dataloader = DataLoader(test_dataset, collate_fn=test_collate_fn, batch_size=BATCH_SIZE*2)

In [None]:
fold_preds = []
for checkpoint_path in glob('./checkpoints/swinv2-large-resize*.ckpt'):
    model = Swinv2Model.from_pretrained("microsoft/swinv2-large-patch4-window12to16-192to256-22kto1k-ft")
    lit_model = LitCustomModel.load_from_checkpoint(checkpoint_path, model=model)
    trainer = L.Trainer( accelerator='auto', precision=32)
    preds = trainer.predict(lit_model, test_dataloader)
    preds = torch.cat(preds,dim=0).detach().cpu().numpy().argmax(1)
    fold_preds.append(preds)
pred_ensemble = list(map(lambda x: np.bincount(x).argmax(),np.stack(fold_preds,axis=1)))

In [None]:
submission = pd.read_csv('./open/sample_submission.csv')

In [None]:
submission['label'] = le.inverse_transform(pred_ensemble)

In [None]:
submission.to_csv('./submissions/swinv2_large_resize.csv',index=False)