## 12.3 베이스라인 모델
### 12.3.1 시드값 고정 및 GPU 장비 설정

In [None]:
# 시드값 고정
import torch
import random
import numpy as np
import os

seed = 50
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.enabled = False

In [None]:
# 장비 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

### 12.3.2 데이터 준비

In [None]:
import pandas as pd

data_path = '../../data/12_plant/'
train = pd.read_csv(data_path + 'train.csv')
test = pd.read_csv(data_path + 'test.csv')
submission = pd.read_csv(data_path + 'sample_submission.csv')

In [None]:
# 훈련 데이터, 검증 데이터 분리
from sklearn.model_selection import train_test_split

train, valid = train_test_split(
    train,
    test_size=0.1,
    stratify=train[['healthy', 'multiple_diseases', 'rust', 'scab']],
    random_state=50
)

In [None]:
# 데이터 셋 클래스 정의
import cv2
from torch.utils.data import Dataset
import numpy as np

class ImageDataSet(Dataset):
    def __init__(self, df, img_dir=data_path+'images/', transform=None, is_test=False):
        super().__init__()
        self.df = df
        self.img_dir = img_dir
        self.transform = transform if transform else lambda x:x
        self.is_test = is_test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = self.df.iloc[idx, 0]
        img_path = self.img_dir + img_id + '.jpg'
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = self.transform(image=image)['image']
        if self.is_test:
            return image
        else:
            label = np.argmax(self.df.iloc[idx, 1:5])
            return image, label

In [None]:
# 이미지 변환기 정의
import albumentations as A
from albumentations.pytorch import ToTensorV2

transform_train = A.Compose([
    A.Resize(450, 650), # 이미지 크기 조절
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.3), # 밝기 대비 조절
    A.VerticalFlip(p=0.2), # 상하 대칭 변환
    A.HorizontalFlip(p=0.5), # 좌우 대칭 변환
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=30, p=0.3), # 이동, 스케일링, 회전
    A.OneOf([
        A.Emboss(p=1), # 양각화
        A.Sharpen(p=1), # 날카로움
        A.Blur(p=1) # 불러 효과
    ], p=0.3),
    A.PiecewiseAffine(p=0.3), # 어파인 변환
    A.Normalize(),
    ToTensorV2()
])

transform_test  = A.Compose([
    A.Resize(450, 650),
    A.Normalize(),
    ToTensorV2()
])

In [None]:
# 데이터셋 및 데이터 로더 생성
img_dir = data_path+'images/'
dataset_train = ImageDataSet(train, img_dir=img_dir, transform=transform_train)
dataset_valid = ImageDataSet(valid, img_dir=img_dir, transform=transform_test)

In [None]:
# 멀티프로세싱 사용을 위한 데이터 로더 시드값 고정
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2 ** 32
    np.random.seed(worker_seed)
    random.seed(worker_seed)
g = torch.Generator()
g.manual_seed(0)

In [None]:
from torch.utils.data import DataLoader

batch_size = 4
loader_train = DataLoader(
    dataset_train,
    batch_size=batch_size,
    shuffle=True,
    worker_init_fn=seed_worker,
    generator=g,
    num_workers=2
)
loader_valid = DataLoader(
    dataset_valid,
    batch_size=batch_size,
    shuffle=False,
    worker_init_fn=seed_worker,
    generator=g,
    num_workers=2
)

### 12.3.3 모델 생성

In [None]:
# !pip install -qqq timm torchmetrics torchinfo

In [None]:
import timm
from timm import create_model

model = timm.create_model('efficientnet_b4', pretrained=True, num_classes=4).to(device)

### 12.3.4 모델 훈련 및 성능 검증

In [None]:
# 손실 함수와 옵티마이저
import torch.nn as nn
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.00006, weight_decay=0.0001)

In [None]:
# from tqdm.auto import tqdm
# # 훈련 및 성능 검증
# epochs =
# for epoch in range(epochs):
#
#     # [훈련]
#     # 모델 훈련 상태로 설정
#     # 에폭별 손실값 초기화(훈련데이터용)
#     for idx, (images, labels) in tqdm(enumerate(loader_train)):
#         # 기울기 초기화
#         # 순전파
#         # 손실값 계산(훈련 데이터용)
#         # 역전파
#         # 사중치 갱신
#
#     # [검증]
#     # 모델 평가 상태로 설정
#     with torch.no_grad():
#         for idx, (images, labels) in tqdm(enumerate(loader_train)):
#             # 순전파
#             # 손실값 계산(검증 데이터용)
#             # 예측값 및 실제값 계산
#     # 검증 데이터 손실 값 및 ROC AUC 점수 출력

In [None]:
from tqdm.auto import tqdm
from sklearn.metrics import roc_auc_score

# 훈련 및 성능 검증
epochs = 5
for epoch in range(epochs):

    # [훈련]
    # 모델 훈련 상태로 설정
    model.train()
    # 에폭별 손실값 초기화(훈련데이터용)
    train_iter_loss = []
    for idx, (images, labels) in tqdm(enumerate(loader_train)):
        images = images.to(device)
        labels = labels.to(device)
        # 기울기 초기화
        optimizer.zero_grad()
        # 순전파
        outputs = model(images)
        # 손실값 계산(훈련 데이터용)
        loss = criterion(outputs, labels)
        train_iter_loss.append(loss.item())
        # 역전파
        loss.backward()
        # 사중치 갱신
        optimizer.step()
    print(f'Epoch [{epoch+1}/{epochs}] - train loss : {sum(train_iter_loss)/len(loader_train):.4f}')

    # [검증]
    # 모델 평가 상태로 설정
    model.eval()
    valid_iter_loss = []
    preds_list = []
    true_onehot_list = []
    with torch.no_grad():
        for idx, (images, labels) in tqdm(enumerate(loader_train)):
            images = images.to(device)
            labels = labels.to(device)
            # 순전파
            outputs = model(images)
            # 손실값 계산(검증 데이터용)
            loss = criterion(outputs, labels)
            valid_iter_loss.append(loss.item())
            preds = torch.softmax(outputs.cpu(), dim=1).numpy()

            # 실제값 (원-핫 인코딩 형식)
            true_onehot = torch.eye(4)[labels.cpu()].numpy() # 실제 타깃값을

            # 예측값 및 실제값 저장
            preds_list.extend(preds)
            true_onehot_list.extend(true_onehot)
    # 검증 데이터 손실 값 및 ROC AUC 점수 출력
    print(f'Epoch [{epoch+1}/{epochs}] - valid loss : {sum(valid_iter_loss)/len(loader_valid):.4f}')
    print(f'Epoch [{epoch+1}/{epochs}] - valid roc auc : {roc_auc_score(true_onehot_list, preds_list):.4f}')

In [None]:
torch.eye(4)[[1,2,3]]

### 12.3.5 예측 및 결과 제출

In [None]:
dataset_test = ImageDataSet(test, img_dir=data_path+'images/', transform=transform_test, is_test=True)
loader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=False, worker_init_fn=seed_worker,generator=g,num_workers=2)

In [None]:
# 예측
model.eval()
preds = np.zeros((len(test),4))
with torch.no_grad():
    for i, images in enumerate(loader_test):
        images = images.to(device)
        outputs = model(images)
        preds_part = torch.sortmax(outputs.cpu(), dim=1).squeeze().numpy()
        preds[i*batch_size: (i+1)*batch_size] += preds_part

In [None]:
# 결과 제출
submission[['healthy', 'multiple_diseases', 'rust', 'scab']] = preds
submission.to_csv(data_path+'submission.csv', index=False)