<a href="https://colab.research.google.com/github/tmd03/DL_study/blob/main/Leaf_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
! unzip -qq '/content/drive/MyDrive/dataset (1).zip' -d './dataset'

In [None]:
## data ready ##
import os

original_dataset_dir = './dataset'
classes_list = os.listdir(original_dataset_dir)
base_dir = './splitted'
os.mkdir(base_dir)

In [None]:
import  shutil

train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'val')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)

for cls in classes_list:
  os.mkdir(os.path.join(train_dir, cls))
  os.mkdir(os.path.join(validation_dir, cls))
  os.mkdir(os.path.join(test_dir, cls))

In [None]:
import math

for cls in classes_list:
    path = os.path.join(original_dataset_dir, cls)
    fnames = os.listdir(path)

    train_size = math.floor(len(fnames) * 0.6)
    validation_size = math.floor(len(fnames) * 0.2)
    test_size = math.floor(len(fnames) * 0.2)

    train_fnames = fnames[:train_size]
    print("Train size(",cls,"): ", len(train_fnames))
    for fname in train_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(train_dir, cls), fname)
        shutil.copyfile(src, dst)

    validation_fnames = fnames[train_size:(validation_size + train_size)]
    print("Validation size(",cls,"): ", len(validation_fnames))
    for fname in validation_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(validation_dir, cls), fname)
        shutil.copyfile(src, dst)

    test_fnames = fnames[(train_size+validation_size):(validation_size + train_size +test_size)]

    print("Test size(",cls,"): ", len(test_fnames))
    for fname in test_fnames:
        src = os.path.join(path, fname)
        dst = os.path.join(os.path.join(test_dir, cls), fname)
        shutil.copyfile(src, dst)

Train size( Grape___Leaf_blight_(Isariopsis_Leaf_Spot) ):  645
Validation size( Grape___Leaf_blight_(Isariopsis_Leaf_Spot) ):  215
Test size( Grape___Leaf_blight_(Isariopsis_Leaf_Spot) ):  215
Train size( Tomato___Early_blight ):  600
Validation size( Tomato___Early_blight ):  200
Test size( Tomato___Early_blight ):  200
Train size( Tomato___Septoria_leaf_spot ):  1062
Validation size( Tomato___Septoria_leaf_spot ):  354
Test size( Tomato___Septoria_leaf_spot ):  354
Train size( Tomato___Tomato_mosaic_virus ):  223
Validation size( Tomato___Tomato_mosaic_virus ):  74
Test size( Tomato___Tomato_mosaic_virus ):  74
Train size( Potato___Early_blight ):  600
Validation size( Potato___Early_blight ):  200
Test size( Potato___Early_blight ):  200
Train size( Pepper,_bell___Bacterial_spot ):  598
Validation size( Pepper,_bell___Bacterial_spot ):  199
Test size( Pepper,_bell___Bacterial_spot ):  199
Train size( Tomato___healthy ):  954
Validation size( Tomato___healthy ):  318
Test size( Tomat

In [None]:
## ready for training ##

import torch
import os

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

batch_size = 256
epochs = 30

In [None]:
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

transform_base = transforms.Compose([transforms.Resize((64,64)), transforms.ToTensor()])
train_dataset = ImageFolder(root='./splitted/train', transform=transform_base)
val_dataset = ImageFolder(root='./splitted/val', transform=transform_base)

In [None]:
from torch.utils.data import DataLoader

train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size = batch_size,
                                           shuffle=True, num_workers=4)

val_loader = torch.utils.data.DataLoader(val_dataset,
                                           batch_size = batch_size,
                                           shuffle=True, num_workers=4)



In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 64, 3, padding=1)

        self.fc1 = nn.Linear(4096, 512)
        self.fc2 = nn.Linear(512, 33)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)

        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool(x)
        x = F.dropout(x, p=0.25, training=self.training)

        x = x.view(-1, 4096)
        x = self.fc1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.fc2(x)

        return F.log_softmax(x, dim=1)

model_base = Net().to(device)
optimizer = optim.Adam(model_base.parameters(), lr=0.001)

In [None]:
def train(model, train_loader, optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)

            test_loss += F.cross_entropy(output, target, reduction='sum').item()

            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy

In [None]:
import time
import copy

def train_baseline(model ,train_loader, val_loader, optimizer, num_epochs = 30):
    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(1, num_epochs+1):
        since = time.time()
        train(model, train_loader, optimizer)
        train_loss, train_acc = evaluate(model, train_loader)
        val_loss, val_acc = evaluate(model, val_loader)

        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())

        time_elapsed = time.time() - since
        print('-------------- epoch {} --------------'.format(epoch))
        print('train loss : {:4f}, train acc : {:2f}'.format(train_loss, train_acc))
        print('val loss : {:4f}, val acc : {:2f}'.format(val_loss, val_acc))
        print('Completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    model.load_state_dict(best_model_wts)
    return model

base = train_baseline(model_base, train_loader, val_loader, optimizer, epochs)
torch.save(base,'baseline.pt')

-------------- epoch 1 --------------
train loss : 1.747927, train acc : 51.248489
val loss : 1.787474, val acc : 50.231568
Completed in 1m 8s
-------------- epoch 2 --------------
train loss : 1.105430, train acc : 66.051107
val loss : 1.160378, val acc : 64.376017
Completed in 1m 6s
-------------- epoch 3 --------------
train loss : 0.818230, train acc : 75.338697
val loss : 0.876839, val acc : 73.563650
Completed in 1m 8s
-------------- epoch 4 --------------
train loss : 0.629337, train acc : 80.399350
val loss : 0.692149, val acc : 78.220053
Completed in 1m 11s
-------------- epoch 5 --------------
train loss : 0.523273, train acc : 83.792572
val loss : 0.587022, val acc : 81.549631
Completed in 1m 5s
-------------- epoch 6 --------------
train loss : 0.430138, train acc : 86.635541
val loss : 0.500912, val acc : 84.203280
Completed in 1m 7s
-------------- epoch 7 --------------
train loss : 0.382040, train acc : 87.848597
val loss : 0.464119, val acc : 85.079484
Completed in 1m 6

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize([64,64]),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomCrop(52),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]),

    'val': transforms.Compose([
        transforms.Resize([64,64]),
        transforms.RandomCrop(52),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])
}

In [None]:
data_dir = './splitted'
image_datasets = {x: ImageFolder(root=os.path.join(data_dir, x),
                                  transform=data_transforms[x]) for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x],
                                               batch_size=batch_size,
                                               shuffle=True, num_workers=4) for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train','val']}

class_names = image_datasets['train'].classes



In [None]:
from torchvision import models

resnet = models.resnet50(pretrained=True) #pretrained=True : 학습 완료된 weight도 가져옴
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, 33)
resnet = resnet.to(device)

criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.Adam(filter(lambda p: p.requires_grad, resnet.parameters()), lr=0.001)

from torch.optim import lr_scheduler
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 141MB/s]


In [None]:
ct = 0

for child in resnet.children():
    ct += 1
    if ct < 6:
        for param in child.parameters():
            param.requires_grad = False


def train_resnet(model, criterion, optimizer, scheduler, num_epochs=25):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('-------------- epoch {} --------------'.format(epoch+1))
        since = time.time()
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train' :
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        time_elapsed = time.time() - since
        print('Completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)

    return model

In [None]:
model_resnet50 = train_resnet(resnet, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=epochs)
torch.save(model_resnet50, 'resnet50.pt')

-------------- epoch 1 --------------




train Loss: 0.5768 Acc: 0.8232
val Loss: 0.2768 Acc: 0.9099
Completed in 0m 52s
-------------- epoch 2 --------------
train Loss: 0.2351 Acc: 0.9259
val Loss: 0.2103 Acc: 0.9344
Completed in 0m 52s
-------------- epoch 3 --------------
train Loss: 0.1596 Acc: 0.9501
val Loss: 0.1816 Acc: 0.9412
Completed in 0m 51s
-------------- epoch 4 --------------
train Loss: 0.1427 Acc: 0.9531
val Loss: 0.2535 Acc: 0.9249
Completed in 0m 51s
-------------- epoch 5 --------------
train Loss: 0.1135 Acc: 0.9634
val Loss: 0.1336 Acc: 0.9588
Completed in 0m 53s
-------------- epoch 6 --------------
train Loss: 0.1037 Acc: 0.9665
val Loss: 0.1346 Acc: 0.9584
Completed in 0m 51s
-------------- epoch 7 --------------
train Loss: 0.0936 Acc: 0.9694
val Loss: 0.1212 Acc: 0.9586
Completed in 0m 51s
-------------- epoch 8 --------------
train Loss: 0.0493 Acc: 0.9837
val Loss: 0.0465 Acc: 0.9847
Completed in 0m 51s
-------------- epoch 9 --------------
train Loss: 0.0305 Acc: 0.9901
val Loss: 0.0408 Acc: 0.9

In [None]:
transform_resNet = transforms.Compose([
    transforms.Resize([64,64]),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_resNet = ImageFolder(root='./splitted/test', transform=transform_resNet)
test_loader_resNet = torch.utils.data.DataLoader(test_resNet, batch_size=batch_size, shuffle=True, num_workers=4)

resnet50 = torch.load('resnet50.pt')
resnet50.eval()
test_loss, test_accuracy = evaluate(resnet50, test_loader_resNet)
print('ResNet test acc :', test_accuracy)




ResNet test acc : 98.73576167229942
