<a href="https://colab.research.google.com/github/rbdus0715/project/blob/main/plant_pathology_identification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **plant-pathology-identification**
- 다중 분류
- 교차 엔트로피 오차

In [36]:
import pandas as pd

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
submission = pd.read_csv('sample_submission.csv')
print(train.shape, test.shape, submission.shape)

(1821, 5) (1821, 1) (1821, 5)


In [37]:
train.head()

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Train_0,0,0,0,1
1,Train_1,0,1,0,0
2,Train_2,1,0,0,0
3,Train_3,0,0,1,0
4,Train_4,1,0,0,0


In [38]:
test.head()

Unnamed: 0,image_id
0,Test_0
1,Test_1
2,Test_2
3,Test_3
4,Test_4


In [39]:
submission.head()

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Test_0,0.25,0.25,0.25,0.25
1,Test_1,0.25,0.25,0.25,0.25
2,Test_2,0.25,0.25,0.25,0.25
3,Test_3,0.25,0.25,0.25,0.25
4,Test_4,0.25,0.25,0.25,0.25


**seed 값 고정**

In [40]:
import torch
import random
import numpy as np
import os

seed = 50
random.seed(seed)
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.manual_seed(seed)

**데이터 준비**

In [41]:
from sklearn.model_selection import train_test_split

train, valid = train_test_split(
    train,
    test_size=0.1,
    stratify=train[['healthy', 'multiple_diseases', 'rust', 'scab']],
    random_state = 50
)

In [42]:
import cv2
from torch.utils.data import Dataset

class ImageDataset(Dataset):
    def __init__(self, df, img_dir='./', transform=None, is_test=False):
        super().__init__()
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        self.is_test = is_test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_id = self.df.iloc[idx, 0]
        img_path = self.img_dir + img_id + '.jpg'
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform is not None:
            # albumentations 모듈의 변환기 사용
            image = self.transform(image=image)['image']
        if self.is_test:
            return image
        else:
            label = np.argmax(self.df.iloc[idx, 1:5])
            return image, label

**이미지 변환기 및 데이터 로더** 

In [43]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

transform_train = A.Compose([
    A.Resize(450, 650),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.3),
    A.VerticalFlip(p=0.2),
    A.HorizontalFlip(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=30, p=0.3),
    A.OneOf([A.Emboss(p=1), A.Sharpen(p=1), A.Blur(p=1)], p=0.3),
    A.PiecewiseAffine(p=0.3),
    A.Normalize(),
    ToTensorV2()
])

transform_test = A.Compose([
    A.Resize(450, 650),
    A.Normalize(),
    ToTensorV2()
])

In [44]:
img_dir = 'images/'
dataset_train = ImageDataset(train, img_dir=img_dir, transform=transform_train)
dataset_valid = ImageDataset(valid, img_dir=img_dir, transform=transform_test)

In [45]:
# 멀티 프로세싱
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(0) 

<torch._C.Generator at 0x7f61a0a03f90>

In [55]:
from torch.utils.data import DataLoader

batch_size = 4
loader_train = DataLoader(
    dataset_train,
    batch_size = batch_size,
    shuffle=True,
    worker_init_fn=seed_worker,
    generator=g,
    num_workers=2
)
loader_valid = DataLoader(
    dataset_valid,
    batch_size = batch_size,
    shuffle=False,
    worker_init_fn=seed_worker,
    generator=g,
    num_workers=2
)

**모델 생성(전이 학습)**
- 구글에 검색 : EfficientNet github pytorch

In [47]:
!pip install efficientnet-pytorch==0.7.1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [48]:
from efficientnet_pytorch import EfficientNet

In [49]:
# 사전 훈련된 모델 불러오기
model = EfficientNet.from_pretrained('efficientnet-b7', num_classes=4) # 최종 출력값 : 4개의 타깃값
model = model.to(device)

Loaded pretrained weights for efficientnet-b7


**모델 훈련 및 성능 검증**

In [50]:
import torch.nn as nn
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adamax(model.parameters(), lr=0.00006, weight_decay=0.0001)
# weight_decay : 가중치 감쇠를 의미하는 파라미터

In [None]:
# 매 에폭마다 검증하기
from sklearn.metrics import roc_auc_score
from tqdm.notebook import tqdm
epochs = 5

for epoch in range(epochs):

    model.train()
    epoch_train_loss = 0

    for images, labels in tqdm(loader_train):
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        epoch_train_loss += loss.item() # 역전파 수행
        loss.backward()
        optimizer.step()
    print(f'에폭 :[{epoch+1}/{epochs}] - 훈련 데이터 손실값: {epoch_train_loss/len(loader_train):.4f}')

    model.eval()
    epoch_valid_loss = 0
    preds_list = []
    true_onehot_list = []

    with torch.no_grad():
        for images, labels in tqdm(loader_valid):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            epoch_valid_loss += loss.item() # 역전파 수행

            preds = torch.softmax(outputs.cpu(), dim=1).numpy() # 예측 확률
            true_onehot = torch.eye(4)[labels].cpu().numpy()
            preds_list.extend(preds)
            true_onehot_list.extend(true_onehot)    
    print(f'에폭 :[{epoch+1}/{epochs}] - 검증 데이터손실값: {epoch_valid_loss/len(loader_valid):.4f} / 검증 데이터 ROC AUC : {roc_auc_score(true_onehot_list, preds_list):.4f}')