# 1. DataSplit

In [3]:
# Create a directory (empty folder) for all folders : train,val,test

import os
import shutil

original_dataset_dir = './dataset'
classes_list = os.listdir(original_dataset_dir)

# Remove MacOS autocreated hidden file
classes_list.remove(".DS_Store")

base_dir = './splitted'
if not os.path.exists(base_dir):
    os.mkdir(base_dir)

train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'val')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)

for cls in classes_list:
    os.mkdir(os.path.join(train_dir, cls))
    os.mkdir(os.path.join(validation_dir, cls))
    os.mkdir(os.path.join(test_dir, cls))
    


In [4]:

print(classes_list)

['Strawberry___healthy', 'Grape___Black_rot', 'Potato___Early_blight', 'Cherry___Powdery_mildew', 'Tomato___Target_Spot', 'Peach___healthy', 'Potato___Late_blight', 'Tomato___Late_blight', 'Tomato___Tomato_mosaic_virus', 'Pepper,_bell___healthy', 'Tomato___Leaf_Mold', 'Grape___Leaf_blight_(Isariopsis_Leaf_Spot)', 'Apple___Cedar_apple_rust', 'Tomato___Bacterial_spot', 'Grape___healthy', 'Corn___Cercospora_leaf_spot Gray_leaf_spot', 'Tomato___Early_blight', 'Grape___Esca_(Black_Measles)', 'Tomato___healthy', 'Corn___Northern_Leaf_Blight', 'Tomato___Tomato_Yellow_Leaf_Curl_Virus', 'Cherry___healthy', 'Apple___Apple_scab', 'Tomato___Spider_mites Two-spotted_spider_mite', 'Corn___Common_rust', 'Peach___Bacterial_spot', 'Pepper,_bell___Bacterial_spot', 'Tomato___Septoria_leaf_spot', 'Corn___healthy', 'Apple___Black_rot', 'Apple___healthy', 'Strawberry___Leaf_scorch', 'Potato___healthy']


- ### Dividing data and checking the number of data per class

In [5]:
import math

for cls in classes_list:
    path = os.path.join(original_dataset_dir, cls)
    fnames = os.listdir(path) # 각 폴더 속 이미지파일을 리스트화 함
    
    train_size = math.floor(len(fnames) * 0.6)
    validation_size = math.floor(len(fnames) * 0.2)
    test_size = math.floor(len(fnames) * 0.2)
    
    train_fnames = fnames[:train_size]
    print("Train size(",cls,"): ", len(train_fnames))
    for fname in train_fnames:
        src = os.path.join(path, fname) # 파일의 원본 위치경로를 src에 저장, 즉 src는 해당이미지파일을 나타내게됨
        dst = os.path.join(os.path.join(train_dir, cls), fname) # src(원본파일)을 dst경로(파일명포함)으로 복사
        shutil.copyfile(src, dst)
    
    validation_fnames = fnames[train_size:(train_size + validation_size)]
    print("Validation size(",cls,"): ", len(validation_fnames))
    for fname in validation_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(validation_dir, cls), fname)
        shutil.copyfile(src, dst)
    
    test_fnames = fnames[(train_size + validation_size):(train_size + validation_size + test_size)]
    print("Test size(",cls,"): ", len(test_fnames))
    for fname in test_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(test_dir, cls), fname)
        shutil.copyfile(src, dst)

Train size( Strawberry___healthy ):  273
Validation size( Strawberry___healthy ):  91
Test size( Strawberry___healthy ):  91
Train size( Grape___Black_rot ):  708
Validation size( Grape___Black_rot ):  236
Test size( Grape___Black_rot ):  236
Train size( Potato___Early_blight ):  600
Validation size( Potato___Early_blight ):  200
Test size( Potato___Early_blight ):  200
Train size( Cherry___Powdery_mildew ):  631
Validation size( Cherry___Powdery_mildew ):  210
Test size( Cherry___Powdery_mildew ):  210
Train size( Tomato___Target_Spot ):  842
Validation size( Tomato___Target_Spot ):  280
Test size( Tomato___Target_Spot ):  280
Train size( Peach___healthy ):  216
Validation size( Peach___healthy ):  72
Test size( Peach___healthy ):  72
Train size( Potato___Late_blight ):  600
Validation size( Potato___Late_blight ):  200
Test size( Potato___Late_blight ):  200
Train size( Tomato___Late_blight ):  1145
Validation size( Tomato___Late_blight ):  381
Test size( Tomato___Late_blight ):  381

# 2. 베이스라인 모델학습

- ### 베이스라인 모델학습을 위한 준비
- PyTorch를 사용하여 이미지 분류 작업을 위한 데이터셋을 준비

In [7]:
import torch
import os

USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
print("CUDA 사용 가능:", USE_CUDA)
print("현재 사용 중인 장치:", DEVICE)

BATCH_SIZE = 256
EPOCH = 30

CUDA 사용 가능: False
현재 사용 중인 장치: cpu


- GPU 사용할 수 있는지 여부
- BATCH_SIZE는 한 번에 네트워크에 전달될 이미지의 수를, EPOCH은 전체 데이터셋을 몇 번 학습에 사용할지를 설정

In [8]:
import torch
import torchvision
print(torch.__version__)
print(torchvision.__version__)

2.1.2
0.16.2


In [9]:
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

transform_base = transforms.Compose([transforms.Resize((64,64)), transforms.ToTensor()])
train_dataset = ImageFolder(root='./splitted/train', transform = transform_base)
val_dataset = ImageFolder(root='./splitted/val', transform=transform_base)

In [11]:
from torch.utils.data import DataLoader

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

- ### Designing a baseline model

- Use PyTorch to define and initialize deep neural networks

In [19]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):
    
    def __init__(self):
        
        super(Net, self).__init__()
        ## nn.Conv2D(입력받는 채널 수 RGB, 출력할 채널 수, 커널 사이즈, padding 옵션)
        self.conv1 = nn.Conv2d(3, 32, 3, padding = 1)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 64, 3, padding=1)  

        
        self.fc1 = nn.Linear(4096, 512) # 8 * 8 * 64 = 4096
        self.fc2 = nn.Linear(512,33)
        
    def forward(self, x):
        
        x = self.conv1(x) # (64, 64) 
        x = F.relu(x)
        x = self.pool(x)# (32, 32) 
        x = F.dropout(x, p=0.25, training = self.training) # train 모드에서만 dropout 사용
        
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)
        
        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)
        
        x = x.view(-1, 4096)
        x = self.fc1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.fc2(x)
        
        return F.log_softmax(x, dim=1)

model_base = Net().to(DEVICE)
optimizer = optim.Adam(model_base.parameters(), lr = 0.001)

- 컨볼루셔널 신경망(CNN)을 기반으로 한 이미지 분류 모델을 구축
- 신경망의 각 레이어는 이미지에서 중요한 특징을 추출하고, 이를 바탕으로 이미지를 분류

## Model Learning

In [20]:
def train(model, train_loader, optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(DEVICE), target.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

## Model Evaluation

In [21]:
def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    
    with torch.no_grad():
        for data, target in test_loader:  
            data, target = data.to(DEVICE), target.to(DEVICE)  
            output = model(data) 

            test_loss += F.cross_entropy(output,target, reduction='sum').item() 


            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item() 

    test_loss /= len(test_loader.dataset) 
    test_accuracy = 100. * correct / len(test_loader.dataset) 
    return test_loss, test_accuracy  

## Run Leanring function & Save model

In [22]:
import time
import copy

def train_baseline(model ,train_loader, val_loader, optimizer, num_epochs = 30):
    best_acc = 0.0  
    best_model_wts = copy.deepcopy(model.state_dict()) 
 
    for epoch in range(1, num_epochs + 1):
        since = time.time()  
        train(model, train_loader, optimizer)
        train_loss, train_acc = evaluate(model, train_loader) 
        val_loss, val_acc = evaluate(model, val_loader)
        
        if val_acc > best_acc: 
            best_acc = val_acc 
            best_model_wts = copy.deepcopy(model.state_dict())
        
        time_elapsed = time.time() - since 
        print('-------------- epoch {} ----------------'.format(epoch))
        print('train Loss: {:.4f}, Accuracy: {:.2f}%'.format(train_loss, train_acc))   
        print('val Loss: {:.4f}, Accuracy: {:.2f}%'.format(val_loss, val_acc))
        print('Completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) 
    model.load_state_dict(best_model_wts)  
    return model
 

base = train_baseline(model_base, train_loader, val_loader, optimizer, EPOCH)  	 #(16)
torch.save(base,'baseline.pt')

-------------- epoch 1 ----------------
train Loss: 1.9276, Accuracy: 44.29%
val Loss: 1.9246, Accuracy: 43.99%
Completed in 2m 25s
-------------- epoch 2 ----------------
train Loss: 1.2227, Accuracy: 64.13%
val Loss: 1.2380, Accuracy: 63.57%
Completed in 2m 24s
-------------- epoch 3 ----------------
train Loss: 0.8881, Accuracy: 73.10%
val Loss: 0.9198, Accuracy: 71.69%
Completed in 2m 25s
-------------- epoch 4 ----------------
train Loss: 0.6760, Accuracy: 79.93%
val Loss: 0.7133, Accuracy: 78.80%
Completed in 2m 25s
-------------- epoch 5 ----------------
train Loss: 0.5893, Accuracy: 82.37%
val Loss: 0.6363, Accuracy: 80.66%
Completed in 2m 25s
-------------- epoch 6 ----------------
train Loss: 0.5669, Accuracy: 82.28%
val Loss: 0.6170, Accuracy: 80.56%
Completed in 2m 24s
-------------- epoch 7 ----------------
train Loss: 0.4219, Accuracy: 87.06%
val Loss: 0.4902, Accuracy: 85.09%
Completed in 2m 25s
-------------- epoch 8 ----------------
train Loss: 0.3876, Accuracy: 88.43%