In [1]:
from google.colab import drive
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [2]:
!unzip -qq "/content/gdrive/MyDrive/archive (1).zip"

In [17]:
import os
import shutil

# 디렉토리 경로 지정
dir_path = "/content/train/freshbanana"

# 디렉토리가 존재하는지 확인 후 삭제
if os.path.exists(dir_path):
    shutil.rmtree(dir_path)
    print("Directory and its contents deleted successfully.")
else:
    print("Directory not found.")


Directory and its contents deleted successfully.


In [19]:
import os
import shutil

# 디렉토리 경로 지정
dir_path = "/content/train/rottenoranges"

# 디렉토리가 존재하는지 확인 후 삭제
if os.path.exists(dir_path):
    shutil.rmtree(dir_path)
    print("Directory and its contents deleted successfully.")
else:
    print("Directory not found.")

Directory and its contents deleted successfully.


In [18]:
import os
import shutil

# 디렉토리 경로 지정
dir_path = "/content/train/freshoranges"

# 디렉토리가 존재하는지 확인 후 삭제
if os.path.exists(dir_path):
    shutil.rmtree(dir_path)
    print("Directory and its contents deleted successfully.")
else:
    print("Directory not found.")

Directory and its contents deleted successfully.


## Import

In [20]:
import random
import pandas as pd
import numpy as np
import os
import re
import glob
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
#WeightedRandomSampler는 PyTorch에서 제공하는 샘플링 방식 중 하나로, 클래스 불균형이 있는 데이터셋에서 사용할 수 있습니다. 클래스별로 다른 가중치를 부여하여 데이터를 샘플링하는 방식
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from tqdm.auto import tqdm

import warnings
warnings.filterwarnings(action='ignore') 

In [21]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Hyperparameter Setting

In [22]:
CFG = {
    'IMG_SIZE':224,
    'EPOCHS':10,
    'LEARNING_RATE':3e-4,
    'BATCH_SIZE':32,
    'SEED':41
}

## Fixed RandomSeed

In [23]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [None]:
all_img_list = glob.glob('./train/*/*')

In [27]:
df = pd.DataFrame(columns=['img_path', 'label'])
df['img_path'] = all_img_list
df['label'] = df['img_path'].apply(lambda x : str(x).split('/')[2])

In [28]:

df

Unnamed: 0,img_path,label
0,./train/freshapples/rotated_by_60_Screen Shot ...,freshapples
1,./train/freshapples/vertical_flip_Screen Shot ...,freshapples
2,./train/freshapples/vertical_flip_Screen Shot ...,freshapples
3,./train/freshapples/Screen Shot 2018-06-08 at ...,freshapples
4,./train/freshapples/translation_Screen Shot 20...,freshapples
...,...,...
4030,./train/rottenapples/rotated_by_45_Screen Shot...,rottenapples
4031,./train/rottenapples/rotated_by_30_Screen Shot...,rottenapples
4032,./train/rottenapples/rotated_by_75_Screen Shot...,rottenapples
4033,./train/rottenapples/rotated_by_60_Screen Shot...,rottenapples


In [29]:

counts = df['label'].value_counts()

In [30]:
counts

rottenapples    2342
freshapples     1693
Name: label, dtype: int64

In [31]:

train, val, _, _ = train_test_split(df, df['label'], test_size=0.3, stratify=df['label'], random_state=CFG['SEED'])

## Label-Encoding

In [32]:
le = preprocessing.LabelEncoder()
train['label'] = le.fit_transform(train['label'])
val['label'] = le.transform(val['label'])

## CustomDataset

In [33]:

class CustomDataset(Dataset):
    def __init__(self, img_path_list, label_list, transforms=None):
        self.img_path_list = img_path_list
        self.label_list = label_list
        self.transforms = transforms
        
    def __getitem__(self, index):
        img_path = self.img_path_list[index]
        
        image = cv2.imread(img_path)
        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']
        
        if self.label_list is not None:
            label = self.label_list[index]
            return image, label
        else:
            return image
        
    def __len__(self):
        return len(self.img_path_list)

In [34]:

train_transform = A.Compose([
    A.Resize(CFG['IMG_SIZE'], CFG['IMG_SIZE']),
    A.Flip(p=0.4),
    #A.GaussNoise(p=0.1),
    #A.ElasticTransform(p=0.1),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), always_apply=True),
    ToTensorV2(),
])

test_transform = A.Compose([
    A.Resize(CFG['IMG_SIZE'], CFG['IMG_SIZE']),
    A.Flip(p=0.4),
    #A.GaussNoise(p=0.1),
    #A.ElasticTransform(p=0.1),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), always_apply=True),
    ToTensorV2(),
])


In [35]:
train_dataset = CustomDataset(train['img_path'].values, train['label'].values, train_transform)
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

val_dataset = CustomDataset(val['img_path'].values, val['label'].values, test_transform)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

## Model Define

In [36]:

#efficientnet_b0
# class BaseModel(nn.Module):
#     def __init__(self, num_classes=len(le.classes_)):
#         super(BaseModel, self).__init__()
#         self.backbone = models.efficientnet_b1(pretrained=True)
#         self.classifier = nn.Linear(1000, num_classes)
        
#     def forward(self, x):
#         x = self.backbone(x)
#         x = self.classifier(x)
#         return x

In [37]:

#efficientnet_b1
# class BaseModel(nn.Module):
#     def __init__(self, num_classes=len(le.classes_)):
#         super(BaseModel, self).__init__()
#         self.backbone = models.efficientnet_b1(pretrained=True)
#         self.classifier = nn.Linear(1000, num_classes)
        
#     def forward(self, x):
#         x = self.backbone(x)
#         x = self.classifier(x)
#         return x

In [38]:

# class BaseModel(nn.Module):
#     def __init__(self, num_classes=len(le.classes_)):
#         super(BaseModel, self).__init__()
#         self.backbone = models.resnet18(pretrained=True)
#         self.classifier = nn.Linear(1000, num_classes)
        
#     def forward(self, x):
#         x = self.backbone(x)
#         x = self.classifier(x)
#         return x

In [39]:
from torchvision.models import efficientnet_b6 as efficientnet_b6

In [40]:
from torchvision.models import densenet201 as densenet201

In [41]:
class BaseModel(nn.Module):
    def __init__(self, num_classes=len(le.classes_)):
        super(BaseModel, self).__init__()
        self.backbone = models.efficientnet_b6(pretrained=True)
        self.classifier = nn.Linear(1000, num_classes)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.classifier(x)
        return x

## Train

In [42]:
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    best_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for imgs, labels in tqdm(iter(train_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(imgs)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val Weighted F1 Score : [{_val_score:.5f}]')
       
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_score < _val_score:
            best_score = _val_score
            best_model = model
    
    return best_model

In [43]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, true_labels = [], []

    with torch.no_grad():
        for imgs, labels in tqdm(iter(val_loader)):
            imgs = imgs.float().to(device)
            labels = labels.to(device)
            
            pred = model(imgs)
            
            loss = criterion(pred, labels)
            
            preds += pred.argmax(1).detach().cpu().numpy().tolist()
            true_labels += labels.detach().cpu().numpy().tolist()
            
            val_loss.append(loss.item())
        
        _val_loss = np.mean(val_loss)
        _val_score = f1_score(true_labels, preds, average='weighted')
    
    return _val_loss, _val_score

## Run!!

In [44]:
model = BaseModel()
model.eval()
#optimizer = torch.optim.RMSprop(model.parameters(), lr=CFG["LEARNING_RATE"])
optimizer = optim.Adagrad(model.parameters(), lr=0.01)
optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=2, threshold_mode='abs', min_lr=1e-8, verbose=True)
#infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)
infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

Downloading: "https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b6_lukemelas-c76e70fd.pth
100%|██████████| 165M/165M [00:06<00:00, 26.9MB/s]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

Epoch [1], Train Loss : [0.11410] Val Loss : [0.01889] Val Weighted F1 Score : [0.99505]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.01434] Val Loss : [0.00051] Val Weighted F1 Score : [1.00000]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.00654] Val Loss : [0.00939] Val Weighted F1 Score : [0.99670]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.00471] Val Loss : [0.01318] Val Weighted F1 Score : [0.99421]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.02236] Val Loss : [0.01918] Val Weighted F1 Score : [0.99339]
Epoch 00005: reducing learning rate of group 0 to 1.5000e-04.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.00436] Val Loss : [0.00940] Val Weighted F1 Score : [0.99752]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.00479] Val Loss : [0.00613] Val Weighted F1 Score : [0.99917]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.00076] Val Loss : [0.00014] Val Weighted F1 Score : [1.00000]
Epoch 00008: reducing learning rate of group 0 to 7.5000e-05.


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.00192] Val Loss : [0.00023] Val Weighted F1 Score : [1.00000]


  0%|          | 0/89 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.00022] Val Loss : [0.00143] Val Weighted F1 Score : [0.99917]
