## Seed fixed

In [None]:
import torch
import random
import numpy as np
import os

seed=50
os.environ['PYTHONHASHSEED']=str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic=True
torch.backends.cudnn.benchmark=False
torch.backends.cudnn.enabled=False

## Device

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

## Train/Valid split

In [None]:
import pandas as pd

data_path='/kaggle/input/cassava-leaf-disease-classification/'

train=pd.read_csv(data_path+'train.csv')
submission=pd.read_csv(data_path+'sample_submission.csv')

In [None]:
from sklearn.model_selection import train_test_split

train,valid=train_test_split(train,test_size=0.1,stratify=train['label'],random_state=50)

In [None]:
train.reset_index(drop=True)
valid.reset_index(drop=True)

## Dataset


In [None]:
import cv2
from torch.utils.data import Dataset
import numpy as np

class LeafDataset(Dataset):
    def __init__(self,df,img_dir='./',transform=None,is_test=False):
        super().__init__()
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        self.is_test = is_test
    
    def __len__(self):
        
        return len(self.df)
    
    def __getitem__(self,idx):
        img_id = self.df.iloc[idx, 0]
        img_path = self.img_dir + img_id
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        
        if self.transform is not None:
            image=self.transform(image=image)['image']
            
        if self.is_test:
            return image
        else:
            label = self.df.iloc[idx, 1]
            return image, label
    
    def image_size(self, idx):
        img_id = self.df.iloc[idx, 0]
        img_path = self.img_dir + img_i|d
        image = cv2.imread(img_path)
        height, width, _= image.shape
        return height, width

In [None]:
import numpy as np
import cv2
import os

In [None]:
def calc_avg_mean_std(img_names, img_root, size):
    mean_sum = np.array([0., 0., 0.])
    std_sum = np.array([0., 0., 0.])
    n_images = len(img_names)
    for img_name in img_names:
        img = cv2.imread(img_root + img_name)
        img = cv2.resize(img, size)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        mean, std = cv2.meanStdDev(img)
        mean_sum += np.squeeze(mean)
        std_sum += np.squeeze(std)
    return (mean_sum / n_images, std_sum / n_images)

In [None]:
train_img_root = '../input/cassava-leaf-disease-classification/train_images/'
train_img_names = os.listdir(train_img_root)
train_mean, train_std = calc_avg_mean_std(train_img_names, train_img_root, (512,512))
train_mean, train_std

In [None]:
print(train_mean/255.0, train_std/255.0)

## Transform

In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [None]:
transform_train=A.Compose([A.Resize(512,512),A.HorizontalFlip(p=0.3),A.VerticalFlip(p=0.3), A.RandomBrightnessContrast(p=0.3),
                           A.CLAHE(p=0.5, clip_limit=(1, 14), tile_grid_size=(8, 8)),
                           A.OneOf([A.NoOp(), A.MultiplicativeNoise(), A.GaussNoise(), A.ISONoise()]),
          A.Normalize(mean=[0.42984136, 0.49624753, 0.3129598], std=[0.21417203, 0.21910103, 0.19542212]),ToTensorV2()])

In [None]:
transform_test=A.Compose([ A.Resize(512,512),A.Normalize(mean=[0.42984136, 0.49624753, 0.3129598], std=[0.21417203, 0.21910103, 0.19542212]),ToTensorV2()]) # training_data 분포로 수정

In [None]:
img_dir='/kaggle/input/cassava-leaf-disease-classification/train_images/'

dataset_train=LeafDataset(df=train,img_dir=img_dir,transform=transform_train)
dataset_valid=LeafDataset(df=valid,img_dir=img_dir,transform=transform_test)

## DataLoader

In [None]:
def seed_worker(worker_id):
    worker_seed=torch.initial_seed()% 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)
g=torch.Generator()
g.manual_seed(0)


In [None]:
from torch.utils.data import DataLoader

batch_size=12

loader_train=DataLoader(dataset_train,batch_size=batch_size,shuffle=True,worker_init_fn=seed_worker,generator=g,num_workers=4)
loader_valid=DataLoader(dataset_valid,batch_size=batch_size,shuffle=False,worker_init_fn=seed_worker,generator=g,num_workers=4)

## Model

In [None]:
!pip install efficientnet-pytorch

In [None]:
from efficientnet_pytorch import EfficientNet

In [None]:
import torch
from torch import nn
class CNN_Model(nn.Module):
    def __init__(self, class_n, rate=0.2):
        super(CNN_Model, self).__init__()
        self.model = EfficientNet.from_pretrained('efficientnet-b6')
        self.dropout = nn.Dropout(rate)
        self.output_layer = nn.Linear(in_features=1000, out_features=class_n, bias=True)

    def forward(self, inputs):
        output = self.output_layer(self.dropout(self.model(inputs)))
        return output
model=CNN_Model(class_n = 5)

model=model.to(device)
device

## Loss

In [None]:
import torch.nn as nn

criterion=nn.CrossEntropyLoss()

## Optimizer

In [None]:
optimizer=torch.optim.RAdam(model.parameters(),lr=0.001,weight_decay=0.0001)

## Scheduler

In [None]:
from transformers import get_cosine_schedule_with_warmup
epochs=16

scheduler=get_cosine_schedule_with_warmup(optimizer,num_warmup_steps=len(loader_train)*2,num_training_steps=len(loader_train)*epochs)

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()

## Train

In [None]:
from tqdm import tqdm
import torch
import torch.cuda.amp as amp
import gc
from torchvision.transforms import v2

scaler = amp.GradScaler()
cutmix = v2.CutMix(num_classes=5)

train_loss = []
valid_acc = []
for epoch in range(epochs):
    ##Train
    model.train()
    epoch_train_loss=0
    for images,labels in tqdm(loader_train):
        if np.random.random() <= 0.5:
            images, labels = cutmix(images, labels) ## cutmix
        images=images.to(device)
        labels=labels.to(device)
        
        optimizer.zero_grad()
        with amp.autocast(): # amp
            outputs=model(images)
            loss=criterion(outputs,labels)
            epoch_train_loss+=loss.item()
            train_loss.append(loss.item())
            
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        scheduler.step()
    print(f'에폭 [{epoch+1}/{epochs}] - 훈련 데이터 손실값 : {epoch_train_loss/len(loader_train):.4f}')
    torch.save(model.state_dict(), f'epoch_{epoch}.pth') #Save
    
    ##Valid
    model.eval()
    epoch_valid_loss=0
    counts = 0 
    with torch.no_grad():
        for images,labels in loader_valid:
            images=images.to(device)
            labels=labels.to(device)
            
            outputs=model(images)
            loss=criterion(outputs,labels)
            epoch_valid_loss+=loss.item()
            preds=torch.max(outputs.cpu(),dim=1)[1].numpy()
            true_labels=labels.cpu().numpy()
            count = np.sum(preds==true_labels)
            counts += count
    print(f'에폭 [{epoch+1}/{epochs}] - 검증 데이터 손실값 : {epoch_valid_loss/len(loader_valid):.4f} / 검증 데이터 정확도 : {counts/len(dataset_valid):.4f}')
    valid_acc.append(counts/len(dataset_valid))

## Analysis