In [1]:
# Module import
import os
import re
# import gc # garbage collection module
import glob
import numpy as np
import pandas as pd
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
import pytorch_lightning as L
from transformers import Swinv2Config, Swinv2Model, AutoImageProcessor, AutoModelForImageClassification
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

from tqdm import tqdm
from sklearn import preprocessing
from sklearn.metrics import f1_score
from sklearn.model_selection import StratifiedGroupKFold

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
basedir = os.getcwd() + '\..'

In [3]:
data_dir = basedir + '/Bird_image_data'
df = pd.read_csv(data_dir + '/train.csv')
df.head()
# train, val, _, _ = train_test_split(df, df['label'], test_size=0.3, stratify=df['label'], random_state=CFG['SEED'])

Unnamed: 0,img_path,upscale_img_path,label
0,./train/TRAIN_00000.jpg,./upscale_train/TRAIN_00000.png,Ruddy Shelduck
1,./train/TRAIN_00001.jpg,./upscale_train/TRAIN_00001.png,Gray Wagtail
2,./train/TRAIN_00002.jpg,./upscale_train/TRAIN_00002.png,Indian Peacock
3,./train/TRAIN_00003.jpg,./upscale_train/TRAIN_00003.png,Common Kingfisher
4,./train/TRAIN_00004.jpg,./upscale_train/TRAIN_00004.png,Common Kingfisher


In [None]:
# CustomDataset : Dataframe에서 받은 이미지 경로 리스트와 라벨 리스트
class CustomDataset(Dataset):
    # Dataframe에서 받은 이미지 경로 리스트와 라벨 리스트를 로드
    def __init__(self, img_path_list, label_list, transforms=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms

    def __get_item__(self):
        return image, label

In [None]:
class CustomModel(nn.Module):
    def __init__(self, model):
        super(CustomModel, self).__init__()
        self.model = model
        self.clf = nn.Sequential(
            nn.Tanh(),
            nn.LazyLinear(25),
        )

#     @torch.compile
    def forward(self, x, label=None):
        x = self.model(x).pooler_output
        x = self.clf(x)
        loss = None
        if label is not None:
            loss = nn.CrossEntropyLoss()(x, label)
        probs = nn.LogSoftmax(dim=-1)(x)
        return probs, loss

class LitCustomModel(L.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = CustomModel(model)
        self.validation_step_output = []

    def configure_optimizers(self):
        opt = torch.optim.AdamW(self.parameters(), lr=1e-5)
        return opt

    def training_step(self, batch, batch_idx=None):
        x = batch['pixel_values']
        label = batch['label']
        probs, loss = self.model(x, label)
        self.log(f"train_loss", loss, on_step=True, on_epoch=False)
        return loss

    def validation_step(self, batch, batch_idx=None):
        x = batch['pixel_values']
        label = batch['label']
        probs, loss = self.model(x, label)
        self.validation_step_output.append([probs,label])
        return loss

    def predict_step(self, batch, batch_idx=None):
        x = batch['pixel_values']
        probs, _ = self.model(x)
        return probs

    def validation_epoch_end(self, step_output):
        pred = torch.cat([x for x, _ in self.validation_step_output]).cpu().detach().numpy().argmax(1)
        label = torch.cat([label for _, label in self.validation_step_output]).cpu().detach().numpy()
        score = f1_score(label,pred, average='macro')
        self.log("val_score", score)
        self.validation_step_output.clear()
        return score

In [None]:
# Hyperparameters Setting
CFG = {
    'IMG_SIZE': 224,
    'EPOCHS': 5,
    'LEARNING_RATE': 3e-4,
    'BATCH_SIZE': 32,
    'SEED': 41
}

In [None]:
# # Fixed RandomSeed
# def seed_everything(seed):
#     random.seed(seed) # random으로 생성한 seed를 고정
#     os.environ['PYTHONHASHSEED'] = str(seed) # 해시 함수의 랜덤성 제어
#     np.random.seed(seed) # numpy 랜덤 숫자 고정
#     torch.manual_seed(seed) # torch 라이브러리에서 cpu 텐서 생성 랜덤 시드 고정
#     torch.cuda.manual_seed(seed) # cuda의 gpu 텐서에 대한 시드 고정
#     torch.backends.cudnn.deterministic = True # 백엔드가 결정적 알고리즘만 사용하도록 고정
#     torch.backends.cudnn.benchmark = True # CuDNN이 여러 내부 휴리스택을 사용하여 가장 빠른 알고리즘을 동적으로 찾도록 설정

L.seed_everything(CFG['SEED']) # Seed 고정