In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cd /content/drive/MyDrive/dacon/lowresol/

In [None]:
# !unzip -qn open.zip -d ./open/

In [1]:
!pip install --quiet timm pytorch_lightning==1.7.7 torchmetrics==0.11.1

DEPRECATION: pytorch-lightning 1.7.7 has a non-standard dependency specifier torch>=1.9.*. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063


In [3]:
import os
import gc
import warnings
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import pytorch_lightning as L

from torchinfo import summary
from glob import glob
from tqdm.auto import tqdm
from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from torchvision.io import read_image
from torchvision.transforms import v2 as  transforms
from torch.utils.data import Dataset, DataLoader
from transformers import Swinv2Config, Swinv2Model, AutoImageProcessor, AutoModelForImageClassification
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

from pytorch_lightning.loggers import WandbLogger  # wandb logger를 임포트


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
class CFG:
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    NUM_DEVICES = torch.cuda.device_count()
    NUM_WORKERS = os.cpu_count()
    #NUM_CLASSES = 4
    NUM_CLASSES = 25
    EPOCHS = 16
    BATCH_SIZE = (
        32 if torch.cuda.device_count() < 2 
        else (32 * torch.cuda.device_count())
    )
    LR = 0.001
    APPLY_SHUFFLE = True
    SEED = 768
    #HEIGHT = 224
    #WIDTH = 224
    HEIGHT = 64
    WIDTH = 64
    CHANNELS = 3
    #IMAGE_SIZE = (224, 224, 3)
    IMAGE_SIZE = (64, 64, 3)
    
    # Define paths
    #DATASET_PATH = "/content/drive/MyDrive/Colab Notebooks/dataset"
    #TRAIN_PATH = '/content/drive/MyDrive/Colab Notebooks/dataset/train/'
    #TEST_PATH = '/content/drive/MyDrive/Colab Notebooks/dataset/test'
    
# Mute warnings
warnings.filterwarnings("ignore", "is_categorical_dtype")
warnings.filterwarnings("ignore", "use_inf_as_na")

In [5]:
class CustomDataset(Dataset):
    def __init__(self, df, path_col,  mode='train'):
        self.df = df
        self.path_col = path_col
        self.mode = mode

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if self.mode == 'train':
            row = self.df.iloc[idx]
            image = read_image(row[self.path_col])/256.
            label = row['class']
            data = {
                'image':image,
                'label':label
            }
            return data
        elif self.mode == 'val':
            row = self.df.iloc[idx]
            image = read_image(row[self.path_col])/256.
            label = row['class']
            data = {
                'image':image,
                'label':label
            }
            return data
        elif self.mode == 'inference':
            row = self.df.iloc[idx]
            image = read_image(row[self.path_col])/256.
            data = {
                'image':image,
            }
            return data

    def train_transform(self, image):
        pass

In [6]:
class CustomCollateFn:
    def __init__(self, transform, mode):
        self.mode = mode
        self.transform = transform

    def __call__(self, batch):
        if self.mode=='train':
            pixel_values = torch.stack([self.transform(data['image']) for data in batch])
            label = torch.LongTensor([data['label'] for data in batch])
            return {
                'pixel_values':pixel_values,
                'label':label,
            }
        elif self.mode=='val':
            pixel_values = torch.stack([self.transform(data['image']) for data in batch])
            label = torch.LongTensor([data['label'] for data in batch])
            return {
                'pixel_values':pixel_values,
                'label':label,
            }
        elif self.mode=='inference':
            pixel_values = torch.stack([self.transform(data['image']) for data in batch])
            return {
                'pixel_values':pixel_values,
            }

In [7]:
class CustomModel(nn.Module):
    def __init__(self, model):
        super(CustomModel, self).__init__()
        self.model = model
        self.clf = nn.Sequential(
            nn.Tanh(),
            nn.LazyLinear(25),
        )

#     @torch.compile
    def forward(self, x, label=None):
        # original
        # x = self.model(x).pooler_output
        x = self.model(x)
        # pooler_output 대신에 last_hidden_state 사용
        #x = outputs.last_hidden_state[:, 0]  # [CLS] 토큰에 해당하는 벡터 추출
        #x = self.clf(x)
        loss = None
        if label is not None:
            loss = nn.CrossEntropyLoss()(x, label)
        probs = nn.LogSoftmax(dim=-1)(x)
        return probs, loss

class LitCustomModel(L.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = CustomModel(model)
        self.validation_step_output = []

    def configure_optimizers(self):
        opt = torch.optim.AdamW(self.parameters(), lr=1e-5)
        return opt

    def training_step(self, batch, batch_idx=None):
        x = batch['pixel_values']
        label = batch['label']
        probs, loss = self.model(x, label)
        self.log(f"train_loss", loss, on_step=True, on_epoch=False)
        return loss

    def validation_step(self, batch, batch_idx=None):
        x = batch['pixel_values']
        label = batch['label']
        probs, loss = self.model(x, label)
        self.validation_step_output.append([probs,label])
        return loss

    def predict_step(self, batch, batch_idx=None):
        x = batch['pixel_values']
        probs, _ = self.model(x)
        return probs

    def validation_epoch_end(self, step_output):
        pred = torch.cat([x for x, _ in self.validation_step_output]).cpu().detach().numpy().argmax(1)
        label = torch.cat([label for _, label in self.validation_step_output]).cpu().detach().numpy()
        score = f1_score(label,pred, average='macro')
        self.log("val_score", score)
        self.validation_step_output.clear()
        return score

In [8]:
SEED = 42
N_SPLIT = 5
BATCH_SIZE = 12

In [9]:
L.seed_everything(SEED)

Global seed set to 42


42

In [10]:
train_df = pd.read_csv('./open/train.csv')
train_df['img_path'] = train_df['img_path'].apply(lambda x: os.path.join('./open', x))
train_df['upscale_img_path'] = train_df['upscale_img_path'].apply(lambda x: os.path.join('./open', x))
le = LabelEncoder()
train_df['class'] = le.fit_transform(train_df['label'])

In [11]:
if not len(train_df) == len(os.listdir('./open/train')):
    raise ValueError()

In [12]:
skf = StratifiedKFold(n_splits=N_SPLIT, random_state=SEED, shuffle=True)

In [13]:
train_transform = transforms.Compose([
    transforms.Resize(size=(256,256), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
])
val_transform = transforms.Compose([
    transforms.Resize(size=(256,256), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
])

train_collate_fn = CustomCollateFn(train_transform, 'train')
val_collate_fn = CustomCollateFn(val_transform, 'val')

In [14]:
class EfficientNetV2Model(nn.Module):
    def __init__(self, backbone_model, name='efficientnet-v2-large', 
                 num_classes=CFG.NUM_CLASSES, device=CFG.DEVICE):
        super(EfficientNetV2Model, self).__init__()
        
        self.backbone_model = backbone_model
        self.device = device
        self.num_classes = num_classes
        self.name = name
        
        classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(p=0.2, inplace=True), 
            nn.Linear(in_features=1280, out_features=256, bias=True),
            nn.GELU(),
            nn.Dropout(p=0.2, inplace=True),
            nn.Linear(in_features=256, out_features=num_classes, bias=False)
        ).to(device)
        
        self._set_classifier(classifier)
        
    def _set_classifier(self, classifier:nn.Module) -> None:
        self.backbone_model.classifier = classifier
    
    def forward(self, image):
        return self.backbone_model(image)

In [15]:
def get_effiecientnetv2_model(
    device: torch.device=CFG.NUM_CLASSES) -> nn.Module:
    # Set the manual seeds
    torch.manual_seed(CFG.SEED)
    torch.cuda.manual_seed(CFG.SEED)

    # Get model weights
    model_weights = (
        torchvision
        .models
        .EfficientNet_V2_L_Weights
        .DEFAULT
    )
    
    # Get model and push to device
    model = (
        torchvision.models.efficientnet_v2_l(
            weights=model_weights
        )
    ).to(device) 
    
    # Freeze Model Parameters
    for param in model.features.parameters():
        param.requires_grad = False
        
    return model

In [16]:
# Get EfficientNet v2 model
backbone_model = get_effiecientnetv2_model(CFG.DEVICE)

In [17]:
efficientnetv2_params = {
    'backbone_model'    : backbone_model,
    'name'              : 'efficientnet-v2-large',
    'device'            : CFG.DEVICE
}

# Generate Model
efficientnet_model = EfficientNetV2Model(**efficientnetv2_params)

# If using GPU T4 x2 setup, use this:
if CFG.NUM_DEVICES > 1:
    efficientnet_model = nn.DataParallel(efficientnet_model)

In [18]:
# View model summary
summary(
    model=efficientnet_model, 
    input_size=(CFG.BATCH_SIZE, CFG.CHANNELS, CFG.WIDTH, CFG.HEIGHT),
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=["var_names"]
)

Layer (type (var_name))                                           Input Shape          Output Shape         Param #              Trainable
EfficientNetV2Model (EfficientNetV2Model)                         [32, 3, 64, 64]      [32, 25]             --                   Partial
├─EfficientNet (backbone_model)                                   [32, 3, 64, 64]      [32, 25]             --                   Partial
│    └─Sequential (features)                                      [32, 3, 64, 64]      [32, 1280, 2, 2]     --                   False
│    │    └─Conv2dNormActivation (0)                              [32, 3, 64, 64]      [32, 32, 32, 32]     (928)                False
│    │    └─Sequential (1)                                        [32, 32, 32, 32]     [32, 32, 32, 32]     (37,120)             False
│    │    └─Sequential (2)                                        [32, 32, 32, 32]     [32, 64, 16, 16]     (1,032,320)          False
│    │    └─Sequential (3)                     

In [20]:
for fold_idx, (train_index, val_index) in enumerate(skf.split(train_df, train_df['class'])):
    train_fold_df = train_df.loc[train_index,:]
    val_fold_df = train_df.loc[val_index,:]

    train_dataset = CustomDataset(train_fold_df, 'img_path', mode='train')
    val_dataset = CustomDataset(val_fold_df, 'img_path', mode='val')

    train_dataloader = DataLoader(train_dataset, collate_fn=train_collate_fn, batch_size=BATCH_SIZE)
    val_dataloader = DataLoader(val_dataset, collate_fn=val_collate_fn, batch_size=BATCH_SIZE*2)

    #model = Swinv2Model.from_pretrained("microsoft/swinv2-large-patch4-window12to16-192to256-22kto1k-ft")
    model = efficientnet_model
    lit_model = LitCustomModel(model)

    checkpoint_callback = ModelCheckpoint(
        monitor='val_score',
        mode='max',
        dirpath='./checkpoints/',
        #filename=f'swinv2-large-resize-fold_idx={fold_idx}'+'-{epoch:02d}-{train_loss:.4f}-{val_score:.4f}',
        filename=f'EfficientNetV2Model={fold_idx}'+'-{epoch:02d}-{train_loss:.4f}-{val_score:.4f}',
        save_top_k=1,
        save_weights_only=True,
        verbose=True
    )

    # wandb logger 초기화
    wandb_logger = WandbLogger(name=f"EfficientNetV2Model_Fold{fold_idx}", project="Bird_Competition", log_model="all")

    earlystopping_callback = EarlyStopping(monitor="val_score", mode="max", patience=3)
    trainer = L.Trainer(max_epochs=100, accelerator='auto', precision=32, callbacks=[checkpoint_callback, earlystopping_callback], val_check_interval=0.5, logger=wandb_logger)
    trainer.fit(lit_model, train_dataloader, val_dataloader)

    model.cpu()
    lit_model.cpu()
    del model, lit_model, checkpoint_callback, earlystopping_callback, trainer
    #wandb_logger.experiment.finish()
    gc.collect()
    torch.cuda.empty_cache()



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type        | Params
--------------------------------------
0 | model | CustomModel | 117 M 
--------------------------------------
334 K     Trainable params
117 M     Non-trainable params
117 M     Total params
470.274   Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0:  50%|█████     | 660/1320 [00:53<00:53, 12.32it/s, loss=3.17, v_num=8410]

Epoch 0, global step 528: 'val_score' reached 0.11129 (best 0.11129), saving model to './checkpoints/EfficientNetV2Model=0-epoch=00-train_loss=3.2195-val_score=0.1113.ckpt' as top 1


Epoch 0: 100%|██████████| 1320/1320 [01:49<00:00, 12.03it/s, loss=3.1, v_num=8410] 

Epoch 0, global step 1056: 'val_score' reached 0.22404 (best 0.22404), saving model to './checkpoints/EfficientNetV2Model=0-epoch=00-train_loss=3.0847-val_score=0.2240.ckpt' as top 1


Epoch 1:  50%|█████     | 660/1320 [00:53<00:53, 12.39it/s, loss=3.01, v_num=8410]

Epoch 1, global step 1584: 'val_score' reached 0.32537 (best 0.32537), saving model to './checkpoints/EfficientNetV2Model=0-epoch=01-train_loss=3.2110-val_score=0.3254.ckpt' as top 1


Epoch 1: 100%|██████████| 1320/1320 [01:49<00:00, 12.07it/s, loss=2.9, v_num=8410] 

Epoch 1, global step 2112: 'val_score' reached 0.38765 (best 0.38765), saving model to './checkpoints/EfficientNetV2Model=0-epoch=01-train_loss=3.0144-val_score=0.3877.ckpt' as top 1


Epoch 2:  50%|█████     | 660/1320 [00:54<00:54, 12.00it/s, loss=2.81, v_num=8410]

Epoch 2, global step 2640: 'val_score' reached 0.43483 (best 0.43483), saving model to './checkpoints/EfficientNetV2Model=0-epoch=02-train_loss=3.1499-val_score=0.4348.ckpt' as top 1


Epoch 2: 100%|██████████| 1320/1320 [01:51<00:00, 11.81it/s, loss=2.64, v_num=8410]

Epoch 2, global step 3168: 'val_score' reached 0.45753 (best 0.45753), saving model to './checkpoints/EfficientNetV2Model=0-epoch=02-train_loss=2.6362-val_score=0.4575.ckpt' as top 1


Epoch 3:  50%|█████     | 660/1320 [00:56<00:56, 11.61it/s, loss=2.59, v_num=8410] 

Epoch 3, global step 3696: 'val_score' reached 0.49128 (best 0.49128), saving model to './checkpoints/EfficientNetV2Model=0-epoch=03-train_loss=3.0535-val_score=0.4913.ckpt' as top 1


Epoch 3: 100%|██████████| 1320/1320 [01:56<00:00, 11.38it/s, loss=2.48, v_num=8410]

Epoch 3, global step 4224: 'val_score' reached 0.49893 (best 0.49893), saving model to './checkpoints/EfficientNetV2Model=0-epoch=03-train_loss=2.6457-val_score=0.4989.ckpt' as top 1


Epoch 4:  50%|█████     | 660/1320 [00:54<00:54, 12.07it/s, loss=2.44, v_num=8410] 

Epoch 4, global step 4752: 'val_score' reached 0.51517 (best 0.51517), saving model to './checkpoints/EfficientNetV2Model=0-epoch=04-train_loss=2.7717-val_score=0.5152.ckpt' as top 1


Epoch 4: 100%|██████████| 1320/1320 [01:52<00:00, 11.70it/s, loss=2.29, v_num=8410]

Epoch 4, global step 5280: 'val_score' reached 0.53495 (best 0.53495), saving model to './checkpoints/EfficientNetV2Model=0-epoch=04-train_loss=2.1351-val_score=0.5349.ckpt' as top 1


Epoch 5:  50%|█████     | 660/1320 [00:54<00:54, 12.18it/s, loss=2.32, v_num=8410] 

Epoch 5, global step 5808: 'val_score' reached 0.54902 (best 0.54902), saving model to './checkpoints/EfficientNetV2Model=0-epoch=05-train_loss=2.7520-val_score=0.5490.ckpt' as top 1


Epoch 5: 100%|██████████| 1320/1320 [01:48<00:00, 12.15it/s, loss=2.15, v_num=8410]

Epoch 5, global step 6336: 'val_score' reached 0.55111 (best 0.55111), saving model to './checkpoints/EfficientNetV2Model=0-epoch=05-train_loss=2.0395-val_score=0.5511.ckpt' as top 1


Epoch 6:  50%|█████     | 660/1320 [00:50<00:50, 13.14it/s, loss=2.23, v_num=8410] 

Epoch 6, global step 6864: 'val_score' reached 0.57394 (best 0.57394), saving model to './checkpoints/EfficientNetV2Model=0-epoch=06-train_loss=2.5497-val_score=0.5739.ckpt' as top 1


Epoch 6: 100%|██████████| 1320/1320 [01:44<00:00, 12.61it/s, loss=2.14, v_num=8410]

Epoch 6, global step 7392: 'val_score' reached 0.57591 (best 0.57591), saving model to './checkpoints/EfficientNetV2Model=0-epoch=06-train_loss=1.9681-val_score=0.5759.ckpt' as top 1


Epoch 7:  50%|█████     | 660/1320 [00:50<00:50, 13.15it/s, loss=2.13, v_num=8410] 

Epoch 7, global step 7920: 'val_score' reached 0.58693 (best 0.58693), saving model to './checkpoints/EfficientNetV2Model=0-epoch=07-train_loss=2.7626-val_score=0.5869.ckpt' as top 1


Epoch 7: 100%|██████████| 1320/1320 [01:43<00:00, 12.73it/s, loss=2.02, v_num=8410]

Epoch 7, global step 8448: 'val_score' reached 0.59448 (best 0.59448), saving model to './checkpoints/EfficientNetV2Model=0-epoch=07-train_loss=1.7839-val_score=0.5945.ckpt' as top 1


Epoch 8:  50%|█████     | 660/1320 [00:51<00:51, 12.92it/s, loss=2.1, v_num=8410]  

Epoch 8, global step 8976: 'val_score' reached 0.59799 (best 0.59799), saving model to './checkpoints/EfficientNetV2Model=0-epoch=08-train_loss=2.5345-val_score=0.5980.ckpt' as top 1


Epoch 8: 100%|██████████| 1320/1320 [01:48<00:00, 12.16it/s, loss=1.94, v_num=8410]

Epoch 8, global step 9504: 'val_score' reached 0.60762 (best 0.60762), saving model to './checkpoints/EfficientNetV2Model=0-epoch=08-train_loss=1.7615-val_score=0.6076.ckpt' as top 1


Epoch 9:  50%|█████     | 660/1320 [00:51<00:51, 12.90it/s, loss=1.98, v_num=8410] 

Epoch 9, global step 10032: 'val_score' reached 0.61009 (best 0.61009), saving model to './checkpoints/EfficientNetV2Model=0-epoch=09-train_loss=2.4853-val_score=0.6101.ckpt' as top 1


Epoch 9: 100%|██████████| 1320/1320 [01:44<00:00, 12.62it/s, loss=1.86, v_num=8410]

Epoch 9, global step 10560: 'val_score' reached 0.61800 (best 0.61800), saving model to './checkpoints/EfficientNetV2Model=0-epoch=09-train_loss=1.6300-val_score=0.6180.ckpt' as top 1


Epoch 10:  50%|█████     | 660/1320 [00:49<00:49, 13.43it/s, loss=1.93, v_num=8410]

Epoch 10, global step 11088: 'val_score' reached 0.61867 (best 0.61867), saving model to './checkpoints/EfficientNetV2Model=0-epoch=10-train_loss=2.5606-val_score=0.6187.ckpt' as top 1


Epoch 10: 100%|██████████| 1320/1320 [01:40<00:00, 13.19it/s, loss=1.83, v_num=8410]

Epoch 10, global step 11616: 'val_score' reached 0.62775 (best 0.62775), saving model to './checkpoints/EfficientNetV2Model=0-epoch=10-train_loss=1.8013-val_score=0.6278.ckpt' as top 1


Epoch 11:  50%|█████     | 660/1320 [00:48<00:48, 13.70it/s, loss=1.83, v_num=8410] 

Epoch 11, global step 12144: 'val_score' reached 0.63841 (best 0.63841), saving model to './checkpoints/EfficientNetV2Model=0-epoch=11-train_loss=2.2874-val_score=0.6384.ckpt' as top 1


Epoch 11: 100%|██████████| 1320/1320 [01:39<00:00, 13.28it/s, loss=1.71, v_num=8410]

Epoch 11, global step 12672: 'val_score' was not in top 1


Epoch 12:  50%|█████     | 660/1320 [00:47<00:47, 13.80it/s, loss=1.82, v_num=8410] 

Epoch 12, global step 13200: 'val_score' reached 0.64368 (best 0.64368), saving model to './checkpoints/EfficientNetV2Model=0-epoch=12-train_loss=2.3063-val_score=0.6437.ckpt' as top 1


Epoch 12: 100%|██████████| 1320/1320 [01:38<00:00, 13.36it/s, loss=1.75, v_num=8410]

Epoch 12, global step 13728: 'val_score' was not in top 1


Epoch 13:  50%|█████     | 660/1320 [00:47<00:47, 13.85it/s, loss=1.78, v_num=8410] 

Epoch 13, global step 14256: 'val_score' reached 0.64494 (best 0.64494), saving model to './checkpoints/EfficientNetV2Model=0-epoch=13-train_loss=2.2937-val_score=0.6449.ckpt' as top 1


Epoch 13: 100%|██████████| 1320/1320 [01:38<00:00, 13.41it/s, loss=1.67, v_num=8410]

Epoch 13, global step 14784: 'val_score' was not in top 1


Epoch 14:  50%|█████     | 660/1320 [00:47<00:47, 13.85it/s, loss=1.76, v_num=8410] 

Epoch 14, global step 15312: 'val_score' reached 0.65375 (best 0.65375), saving model to './checkpoints/EfficientNetV2Model=0-epoch=14-train_loss=2.0746-val_score=0.6538.ckpt' as top 1


Epoch 14: 100%|██████████| 1320/1320 [01:38<00:00, 13.37it/s, loss=1.63, v_num=8410]

Epoch 14, global step 15840: 'val_score' was not in top 1


Epoch 15:  50%|█████     | 660/1320 [00:47<00:47, 13.83it/s, loss=1.74, v_num=8410] 

Epoch 15, global step 16368: 'val_score' reached 0.66269 (best 0.66269), saving model to './checkpoints/EfficientNetV2Model=0-epoch=15-train_loss=2.4559-val_score=0.6627.ckpt' as top 1


Epoch 15: 100%|██████████| 1320/1320 [01:38<00:00, 13.36it/s, loss=1.62, v_num=8410]

Epoch 15, global step 16896: 'val_score' was not in top 1


Epoch 16:  50%|█████     | 660/1320 [00:47<00:47, 13.86it/s, loss=1.68, v_num=8410] 

Epoch 16, global step 17424: 'val_score' reached 0.66377 (best 0.66377), saving model to './checkpoints/EfficientNetV2Model=0-epoch=16-train_loss=2.1925-val_score=0.6638.ckpt' as top 1


Epoch 16: 100%|██████████| 1320/1320 [01:38<00:00, 13.36it/s, loss=1.57, v_num=8410]

Epoch 16, global step 17952: 'val_score' was not in top 1


Epoch 17:  50%|█████     | 660/1320 [00:47<00:47, 13.83it/s, loss=1.65, v_num=8410] 

Epoch 17, global step 18480: 'val_score' reached 0.67115 (best 0.67115), saving model to './checkpoints/EfficientNetV2Model=0-epoch=17-train_loss=2.3881-val_score=0.6711.ckpt' as top 1


Epoch 17: 100%|██████████| 1320/1320 [01:38<00:00, 13.35it/s, loss=1.55, v_num=8410]

Epoch 17, global step 19008: 'val_score' was not in top 1


Epoch 18:  50%|█████     | 660/1320 [00:47<00:47, 13.83it/s, loss=1.67, v_num=8410] 

Epoch 18, global step 19536: 'val_score' reached 0.67615 (best 0.67615), saving model to './checkpoints/EfficientNetV2Model=0-epoch=18-train_loss=2.1377-val_score=0.6761.ckpt' as top 1


Epoch 18: 100%|██████████| 1320/1320 [01:38<00:00, 13.34it/s, loss=1.55, v_num=8410]

Epoch 18, global step 20064: 'val_score' was not in top 1


Epoch 19:  50%|█████     | 660/1320 [00:47<00:47, 13.83it/s, loss=1.61, v_num=8410] 

Epoch 19, global step 20592: 'val_score' reached 0.67912 (best 0.67912), saving model to './checkpoints/EfficientNetV2Model=0-epoch=19-train_loss=1.9359-val_score=0.6791.ckpt' as top 1


Epoch 19: 100%|██████████| 1320/1320 [01:38<00:00, 13.36it/s, loss=1.5, v_num=8410] 

Epoch 19, global step 21120: 'val_score' was not in top 1


Epoch 20:  50%|█████     | 660/1320 [00:47<00:47, 13.84it/s, loss=1.54, v_num=8410]

Epoch 20, global step 21648: 'val_score' reached 0.68174 (best 0.68174), saving model to './checkpoints/EfficientNetV2Model=0-epoch=20-train_loss=1.8784-val_score=0.6817.ckpt' as top 1


Epoch 20: 100%|██████████| 1320/1320 [01:38<00:00, 13.37it/s, loss=1.5, v_num=8410] 

Epoch 20, global step 22176: 'val_score' was not in top 1


Epoch 21:  50%|█████     | 660/1320 [00:47<00:47, 13.84it/s, loss=1.53, v_num=8410]

Epoch 21, global step 22704: 'val_score' reached 0.68982 (best 0.68982), saving model to './checkpoints/EfficientNetV2Model=0-epoch=21-train_loss=2.0252-val_score=0.6898.ckpt' as top 1


Epoch 21: 100%|██████████| 1320/1320 [01:38<00:00, 13.37it/s, loss=1.43, v_num=8410]

Epoch 21, global step 23232: 'val_score' was not in top 1


Epoch 22:  50%|█████     | 660/1320 [00:47<00:47, 13.82it/s, loss=1.56, v_num=8410] 

Epoch 22, global step 23760: 'val_score' was not in top 1


Epoch 22: 100%|██████████| 1320/1320 [01:35<00:00, 13.80it/s, loss=1.49, v_num=8410]

Epoch 22, global step 24288: 'val_score' was not in top 1


Epoch 22: 100%|██████████| 1320/1320 [01:35<00:00, 13.80it/s, loss=1.49, v_num=8410]


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type        | Params
--------------------------------------
0 | model | CustomModel | 117 M 
--------------------------------------
334 K     Trainable params
117 M     Non-trainable params
117 M     Total params
470.274   Total estimated model params size (MB)


Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 15.14it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0:  50%|█████     | 660/1320 [00:47<00:47, 13.82it/s, loss=1.46, v_num=8410]

Epoch 0, global step 528: 'val_score' reached 0.70086 (best 0.70086), saving model to './checkpoints/EfficientNetV2Model=1-epoch=00-train_loss=1.4945-val_score=0.7009.ckpt' as top 1


Epoch 0: 100%|██████████| 1320/1320 [01:38<00:00, 13.36it/s, loss=1.4, v_num=8410] 

Epoch 0, global step 1056: 'val_score' reached 0.70746 (best 0.70746), saving model to './checkpoints/EfficientNetV2Model=1-epoch=00-train_loss=1.3499-val_score=0.7075.ckpt' as top 1


Epoch 1:  50%|█████     | 660/1320 [00:48<00:48, 13.67it/s, loss=1.49, v_num=8410]

Epoch 1, global step 1584: 'val_score' reached 0.70897 (best 0.70897), saving model to './checkpoints/EfficientNetV2Model=1-epoch=01-train_loss=1.4109-val_score=0.7090.ckpt' as top 1


Epoch 1: 100%|██████████| 1320/1320 [01:39<00:00, 13.30it/s, loss=1.45, v_num=8410]

Epoch 1, global step 2112: 'val_score' reached 0.71291 (best 0.71291), saving model to './checkpoints/EfficientNetV2Model=1-epoch=01-train_loss=1.4718-val_score=0.7129.ckpt' as top 1


Epoch 2:  50%|█████     | 660/1320 [00:48<00:48, 13.65it/s, loss=1.44, v_num=8410] 

Epoch 2, global step 2640: 'val_score' reached 0.71436 (best 0.71436), saving model to './checkpoints/EfficientNetV2Model=1-epoch=02-train_loss=1.7251-val_score=0.7144.ckpt' as top 1


Epoch 2: 100%|██████████| 1320/1320 [01:39<00:00, 13.26it/s, loss=1.41, v_num=8410]

Epoch 2, global step 3168: 'val_score' was not in top 1


Epoch 3:  50%|█████     | 660/1320 [00:47<00:47, 13.79it/s, loss=1.42, v_num=8410] 

Epoch 3, global step 3696: 'val_score' was not in top 1


Epoch 3: 100%|██████████| 1320/1320 [01:35<00:00, 13.81it/s, loss=1.35, v_num=8410]

Epoch 3, global step 4224: 'val_score' was not in top 1


Epoch 3: 100%|██████████| 1320/1320 [01:35<00:00, 13.81it/s, loss=1.35, v_num=8410]


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type        | Params
--------------------------------------
0 | model | CustomModel | 117 M 
--------------------------------------
334 K     Trainable params
117 M     Non-trainable params
117 M     Total params
470.274   Total estimated model params size (MB)


Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 15.14it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0:  50%|█████     | 660/1320 [00:47<00:47, 13.80it/s, loss=1.51, v_num=8410]

Epoch 0, global step 528: 'val_score' reached 0.73062 (best 0.73062), saving model to './checkpoints/EfficientNetV2Model=2-epoch=00-train_loss=1.4427-val_score=0.7306.ckpt' as top 1


Epoch 0: 100%|██████████| 1320/1320 [01:38<00:00, 13.36it/s, loss=1.34, v_num=8410]

Epoch 0, global step 1056: 'val_score' was not in top 1


Epoch 1:  50%|█████     | 660/1320 [00:47<00:47, 13.82it/s, loss=1.51, v_num=8410] 

Epoch 1, global step 1584: 'val_score' was not in top 1


Epoch 1: 100%|██████████| 1320/1320 [01:35<00:00, 13.79it/s, loss=1.33, v_num=8410]

Epoch 1, global step 2112: 'val_score' was not in top 1


Epoch 1: 100%|██████████| 1320/1320 [01:35<00:00, 13.79it/s, loss=1.33, v_num=8410]


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type        | Params
--------------------------------------
0 | model | CustomModel | 117 M 
--------------------------------------
334 K     Trainable params
117 M     Non-trainable params
117 M     Total params
470.274   Total estimated model params size (MB)


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 24.37it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0:  50%|█████     | 660/1320 [00:47<00:47, 13.83it/s, loss=1.45, v_num=8410]

Epoch 0, global step 528: 'val_score' reached 0.72926 (best 0.72926), saving model to './checkpoints/EfficientNetV2Model=3-epoch=00-train_loss=1.3520-val_score=0.7293.ckpt' as top 1


Epoch 0: 100%|██████████| 1320/1320 [01:38<00:00, 13.35it/s, loss=1.34, v_num=8410]

Epoch 0, global step 1056: 'val_score' was not in top 1


Epoch 1:  50%|█████     | 660/1320 [00:47<00:47, 13.80it/s, loss=1.49, v_num=8410] 

Epoch 1, global step 1584: 'val_score' reached 0.73262 (best 0.73262), saving model to './checkpoints/EfficientNetV2Model=3-epoch=01-train_loss=1.6014-val_score=0.7326.ckpt' as top 1


Epoch 1: 100%|██████████| 1320/1320 [01:38<00:00, 13.34it/s, loss=1.37, v_num=8410]

Epoch 1, global step 2112: 'val_score' was not in top 1


Epoch 2:  50%|█████     | 660/1320 [00:47<00:47, 13.83it/s, loss=1.44, v_num=8410] 

Epoch 2, global step 2640: 'val_score' reached 0.73351 (best 0.73351), saving model to './checkpoints/EfficientNetV2Model=3-epoch=02-train_loss=1.3833-val_score=0.7335.ckpt' as top 1


Epoch 2: 100%|██████████| 1320/1320 [01:39<00:00, 13.32it/s, loss=1.39, v_num=8410]

Epoch 2, global step 3168: 'val_score' was not in top 1


Epoch 3:  50%|█████     | 660/1320 [00:47<00:47, 13.78it/s, loss=1.44, v_num=8410] 

Epoch 3, global step 3696: 'val_score' was not in top 1


Epoch 3: 100%|██████████| 1320/1320 [01:35<00:00, 13.75it/s, loss=1.33, v_num=8410]

Epoch 3, global step 4224: 'val_score' was not in top 1


Epoch 3: 100%|██████████| 1320/1320 [01:35<00:00, 13.75it/s, loss=1.33, v_num=8410]


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type        | Params
--------------------------------------
0 | model | CustomModel | 117 M 
--------------------------------------
334 K     Trainable params
117 M     Non-trainable params
117 M     Total params
470.274   Total estimated model params size (MB)


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 19.98it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0:  50%|█████     | 660/1320 [00:47<00:47, 13.76it/s, loss=1.56, v_num=8410]

Epoch 0, global step 528: 'val_score' reached 0.73142 (best 0.73142), saving model to './checkpoints/EfficientNetV2Model=4-epoch=00-train_loss=1.9169-val_score=0.7314.ckpt' as top 1


Epoch 0: 100%|██████████| 1320/1320 [01:39<00:00, 13.32it/s, loss=1.36, v_num=8410]

Epoch 0, global step 1056: 'val_score' was not in top 1


Epoch 1:  50%|█████     | 660/1320 [00:47<00:47, 13.82it/s, loss=1.45, v_num=8410] 

Epoch 1, global step 1584: 'val_score' was not in top 1


Epoch 1: 100%|██████████| 1320/1320 [01:35<00:00, 13.83it/s, loss=1.34, v_num=8410]

Epoch 1, global step 2112: 'val_score' was not in top 1


Epoch 1: 100%|██████████| 1320/1320 [01:35<00:00, 13.83it/s, loss=1.34, v_num=8410]


In [None]:
test_df = pd.read_csv('./open/test.csv')
test_df['img_path'] = test_df['img_path'].apply(lambda x: os.path.join('./open', x))

In [None]:
if not len(test_df) == len(os.listdir('./open/test')):
    raise ValueError()

In [None]:
test_transform = transforms.Compose([
    transforms.Resize(size=(256,256), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),
])

test_collate_fn = CustomCollateFn(test_transform, 'inference')
test_dataset = CustomDataset(test_df, 'img_path', mode='inference')
test_dataloader = DataLoader(test_dataset, collate_fn=test_collate_fn, batch_size=BATCH_SIZE*2)

In [None]:
fold_preds = []
for checkpoint_path in glob('./checkpoints/swinv2-large-resize*.ckpt'):
    model = Swinv2Model.from_pretrained("microsoft/swinv2-large-patch4-window12to16-192to256-22kto1k-ft")
    lit_model = LitCustomModel.load_from_checkpoint(checkpoint_path, model=model)
    trainer = L.Trainer( accelerator='auto', precision=32)
    preds = trainer.predict(lit_model, test_dataloader)
    preds = torch.cat(preds,dim=0).detach().cpu().numpy().argmax(1)
    fold_preds.append(preds)
pred_ensemble = list(map(lambda x: np.bincount(x).argmax(),np.stack(fold_preds,axis=1)))

In [None]:
submission = pd.read_csv('./open/sample_submission.csv')

In [None]:
submission['label'] = le.inverse_transform(pred_ensemble)

In [None]:
submission.to_csv('./submissions/swinv2_large_resize.csv',index=False)