In [1]:
!unzip -qq '/content/drive/MyDrive/dataset.zip' -d './dataset'

In [2]:
import os

original_dataset_dir = './dataset'
classes_list = os.listdir(original_dataset_dir)

base_dir = './splitted'
os.mkdir(base_dir)

## 데이터 정리를 위한 목록 및 폴더 생성

In [3]:
import shutil

train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'val')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)

for cls in classes_list:
    os.mkdir(os.path.join(train_dir, cls))
    os.mkdir(os.path.join(validation_dir, cls))
    os.mkdir(os.path.join(test_dir, cls))

## 데이터 현황 확인

In [4]:
import math

for cls in classes_list:
    path = os.path.join(original_dataset_dir, cls)
    fnames = os.listdir(path)

    train_size = math.floor(len(fnames) * 0.6)
    validation_size = math.floor(len(fnames) * 0.2)
    test_size = math.floor(len(fnames) * 0.2)

    train_fnames = fnames[:train_size]
    print('Train size(',cls,'): ', len(train_fnames))
    for fname in train_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(train_dir, cls), fname)
        shutil.copyfile(src, dst)

    validation_fnames = fnames[train_size:(validation_size + train_size)]
    print('Validation size(',cls,'): ', len(validation_fnames))
    for fname in validation_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(validation_dir, cls), fname)
        shutil.copyfile(src, dst)
    
    test_fnames = fnames[(train_size + validation_size):(validation_size + train_size + test_size)]
    print('Test size(',cls,'): ', len(test_fnames))
    for fname in test_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(test_dir, cls), fname)
        shutil.copyfile(src, dst)

Train size( Tomato___Spider_mites Two-spotted_spider_mite ):  1005
Validation size( Tomato___Spider_mites Two-spotted_spider_mite ):  335
Test size( Tomato___Spider_mites Two-spotted_spider_mite ):  335
Train size( Tomato___Tomato_Yellow_Leaf_Curl_Virus ):  3214
Validation size( Tomato___Tomato_Yellow_Leaf_Curl_Virus ):  1071
Test size( Tomato___Tomato_Yellow_Leaf_Curl_Virus ):  1071
Train size( Cherry___Powdery_mildew ):  631
Validation size( Cherry___Powdery_mildew ):  210
Test size( Cherry___Powdery_mildew ):  210
Train size( Cherry___healthy ):  512
Validation size( Cherry___healthy ):  170
Test size( Cherry___healthy ):  170
Train size( Grape___Black_rot ):  708
Validation size( Grape___Black_rot ):  236
Test size( Grape___Black_rot ):  236
Train size( Grape___Esca_(Black_Measles) ):  829
Validation size( Grape___Esca_(Black_Measles) ):  276
Test size( Grape___Esca_(Black_Measles) ):  276
Train size( Tomato___healthy ):  954
Validation size( Tomato___healthy ):  318
Test size( Tom

## 학습 준비

In [5]:
import torch
import os

USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device('cuda' if USE_CUDA else 'cpu')
BATCH_SIZE = 256 # 크게 할수록 학습이 빨리 되나, GPU 메모리 한계가 있음
EPOCH = 30

In [6]:
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

transform_base = transforms.Compose([transforms.Resize((64, 64)), transforms.ToTensor()])
train_dataset = ImageFolder(root='./splitted/train', transform=transform_base) # 폴더 하나를 하나의 라벨로
val_dataset = ImageFolder(root='./splitted/val', transform=transform_base)

In [7]:
from torch.utils.data import DataLoader

train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=BATCH_SIZE,
                                           shuffle=True,
                                           num_workers=4)
val_loader = torch.utils.data.DataLoader(val_dataset,
                                         batch_size=BATCH_SIZE,
                                         shuffle=True,
                                         num_workers=4)

  cpuset_checked))


## 모델링 

In [8]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):

    def __init__(self):
        
        super(Net, self).__init__()

        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 64, 3, padding=1)

        self.fc1 = nn.Linear(4096, 512)
        self.fc2 = nn.Linear(512, 33)

    def forward(self, x):

        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training) # train_set만 사용

        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)

        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)

        x = x.view(-1, 4096)
        x = self.fc1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.fc2(x)

        return F.log_softmax(x, dim=1)

model_base = Net().to(DEVICE)
optimizer = optim.Adam(model_base.parameters(), lr=0.001)

In [9]:
def train(model, train_loader, optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(DEVICE), target.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

In [10]:
def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad(): 
        for data, target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            output = model(data)

            test_loss += F.cross_entropy(output, target, reduction='sum').item()

            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy

## 학습 시작

In [12]:
import time
import copy

def train_baseline(model, train_loader, val_loader, optimizer, num_epochs=30):
    best_acc = 0.0 # best_acc 저장용
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(1, num_epochs + 1):
        since = time.time()
        train(model, train_loader, optimizer)
        train_loss, train_acc = evaluate(model, train_loader)
        val_loss, val_acc = evaluate(model, val_loader)

        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())

        time_elapsed = time.time() - since
        print('-------------- epoch {} --------------'.format(epoch))
        print('train Loss: {:.4f}, Accuracy: {:.2f}%'.format(train_loss, train_acc))
        print('val Loss: {:.4f}, Accuracy: {:.2f}%'.format(val_loss, val_acc))
        print('Completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    model.load_state_dict(best_model_wts)
    return model

base = train_baseline(model_base, train_loader, val_loader, optimizer, EPOCH) #(16)
torch.save(base, 'baseline.pt')

  cpuset_checked))


-------------- epoch 1 --------------
train Loss: 1.5911, Accuracy: 54.54%
val Loss: 1.6074, Accuracy: 54.60%
Completed in 1m 30s
-------------- epoch 2 --------------
train Loss: 0.9751, Accuracy: 70.91%
val Loss: 1.0037, Accuracy: 69.81%
Completed in 1m 33s
-------------- epoch 3 --------------
train Loss: 0.6962, Accuracy: 78.75%
val Loss: 0.7279, Accuracy: 77.61%
Completed in 1m 33s
-------------- epoch 4 --------------
train Loss: 0.6115, Accuracy: 81.11%
val Loss: 0.6528, Accuracy: 79.65%
Completed in 1m 32s
-------------- epoch 5 --------------
train Loss: 0.4787, Accuracy: 85.32%
val Loss: 0.5288, Accuracy: 83.55%
Completed in 1m 33s
-------------- epoch 6 --------------
train Loss: 0.4232, Accuracy: 86.96%
val Loss: 0.4819, Accuracy: 84.84%
Completed in 1m 32s
-------------- epoch 7 --------------
train Loss: 0.3844, Accuracy: 88.10%
val Loss: 0.4467, Accuracy: 85.87%
Completed in 1m 33s
-------------- epoch 8 --------------
train Loss: 0.3427, Accuracy: 89.41%
val Loss: 0.414