In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torchsummary import summary
from tensorboardX import SummaryWriter
from torch import optim
from torch.optim.lr_scheduler import StepLR

import os
from torchvision import datasets
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import time
import copy
from tqdm.notebook import tqdm
import torch.backends.cudnn as cudnn
import torchvision.models as models
import cv2
import glob

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = '1,2,3,4,5'
start_time = time.time()
batch_size = 64
learning_rate = 0.003
default_directory = './save_models'
writer = SummaryWriter('./log/vgg16')

In [3]:
train_imgs = datasets.ImageFolder("./data/train",
                         transform=transforms.Compose([transforms.Resize(128),
                                                       transforms.RandomCrop(64),
                                                       transforms.ToTensor()]))

test_imgs = datasets.ImageFolder("./data/test",
                        transform=transforms.Compose([transforms.Resize(128),
                                                      transforms.RandomCrop(64),
                                                      transforms.ToTensor()]))

train_loader = DataLoader(train_imgs, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_imgs, batch_size=batch_size, shuffle=True)

In [4]:
vgg16 = models.vgg16()

In [5]:
if torch.cuda.device_count() > 0:
    print("USE", torch.cuda.device_count(), "GPUs!")
    model = nn.DataParallel(vgg16).cuda()
    cudnn.benchmark = True
else:
    print("USE ONLY CPU!")

summary(model, (3, 64,64))

USE 5 GPUs!
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 64, 64]           1,792
              ReLU-2           [-1, 64, 64, 64]               0
            Conv2d-3           [-1, 64, 64, 64]          36,928
              ReLU-4           [-1, 64, 64, 64]               0
         MaxPool2d-5           [-1, 64, 32, 32]               0
            Conv2d-6          [-1, 128, 32, 32]          73,856
              ReLU-7          [-1, 128, 32, 32]               0
            Conv2d-8          [-1, 128, 32, 32]         147,584
              ReLU-9          [-1, 128, 32, 32]               0
        MaxPool2d-10          [-1, 128, 16, 16]               0
           Conv2d-11          [-1, 256, 16, 16]         295,168
             ReLU-12          [-1, 256, 16, 16]               0
           Conv2d-13          [-1, 256, 16, 16]         590,080
             ReLU-14       

In [6]:
optimizer = optim.SGD(vgg16.parameters(), learning_rate,
                                momentum=0.9,
                                weight_decay=1e-4,
                                nesterov=True)             
#scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=50, T_mult=3, eta_min=0.001)
criterion = nn.CrossEntropyLoss()

In [7]:
def train(epoch):
    model.train()
    train_loss = 0 
    total = 0
    correct = 0
    iters = len(train_loader)
    for batch_idx, (data, target) in enumerate(train_loader):
        if torch.cuda.is_available():
            data, target = Variable(data.cuda()), Variable(target.cuda())
        else:
            data, target = Variable(data), Variable(target)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        #scheduler.step(epoch + batch_idx / iters)
        train_loss += loss.item()
        _, predicted = torch.max(output.data, 1)

        total += target.size(0)
        correct += predicted.eq(target.data).cpu().sum()
        if batch_idx % 10 == 0:
            print('Epoch: {} | Batch_idx: {} |  Loss_1: ({:.4f}) | Acc_1: ({:.2f}%) ({}/{})'
                  .format(epoch, batch_idx, train_loss / (batch_idx + 1), 100. * correct / total, correct, total))

        writer.add_scalar('training loss', (train_loss / (batch_idx + 1)) , epoch * len(train_loader) + batch_idx) #!#
        writer.add_scalar('training accuracy', (100. * correct / total), epoch * len(train_loader) + batch_idx) #!#
        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch * len(train_loader) + batch_idx) #!#

In [8]:
def test(epoch):
    model.eval()

    test_loss = 0
    correct = 0
    total = 0
    for batch_idx, (data, target) in enumerate(test_loader):
        if torch.cuda.is_available():
            data, target = Variable(data.cuda()), Variable(target.cuda())
        else:
            data, target = Variable(data), Variable(target)

        outputs = model(data)
        loss = criterion(outputs, target)

        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += target.size(0)
        correct += predicted.eq(target.data).cpu().sum()

        writer.add_scalar('test loss', test_loss / (batch_idx + 1), epoch * len(test_loader)+ batch_idx) #!#
        writer.add_scalar('test accuracy', 100. * correct / total, epoch * len(test_loader)+ batch_idx) #!#

    print('# TEST : Loss: ({:.4f}) | Acc: ({:.2f}%) ({}/{})'
          .format(test_loss / (batch_idx + 1), 100. * correct / total, correct, total))

        

In [9]:
def save_checkpoint(directory, state, filename='latest_1.tar.gz'):
    
    if not os.path.exists(directory):
        os.makedirs(directory)

    model_filename = os.path.join(directory, filename)
    torch.save(state, model_filename)
    print("=> saving checkpoint")

def load_checkpoint(directory, filename='latest_1.tar.gz'):

    model_filename = os.path.join(directory, filename)
    if os.path.exists(model_filename):
        print("=> loading checkpoint")
        state = torch.load(model_filename)
        return state
    else:
        return None

In [10]:
start_epoch = 0

checkpoint = load_checkpoint(default_directory, filename='vgg16.tar.gz')

if not checkpoint:
    pass
else:
    start_epoch = checkpoint['epoch'] + 1
    model.load_state_dict(checkpoint['state_dict'])

for epoch in range(start_epoch, 50):

    train(epoch)
    
    save_checkpoint(default_directory, {
        'epoch': epoch,
        'model': model,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
    }, filename='vgg16.tar.gz')
    test(epoch)  
    
now = time.gmtime(time.time() - start_time)
print('{} hours {} mins {} secs for training'.format(now.tm_hour, now.tm_min, now.tm_sec))

Epoch: 0 | Batch_idx: 0 |  Loss_1: (6.9111) | Acc_1: (0.00%) (0/64)
Epoch: 0 | Batch_idx: 10 |  Loss_1: (6.8679) | Acc_1: (12.22%) (86/704)
Epoch: 0 | Batch_idx: 20 |  Loss_1: (6.2132) | Acc_1: (18.30%) (246/1344)
Epoch: 0 | Batch_idx: 30 |  Loss_1: (6.0392) | Acc_1: (19.27%) (374/1941)
=> saving checkpoint
# TEST : Loss: (3.3431) | Acc: (30.10%) (31/103)
Epoch: 1 | Batch_idx: 0 |  Loss_1: (3.1338) | Acc_1: (23.44%) (15/64)
Epoch: 1 | Batch_idx: 10 |  Loss_1: (2.3618) | Acc_1: (24.01%) (169/704)
Epoch: 1 | Batch_idx: 20 |  Loss_1: (1.9313) | Acc_1: (24.78%) (333/1344)
Epoch: 1 | Batch_idx: 30 |  Loss_1: (1.7570) | Acc_1: (26.22%) (509/1941)
=> saving checkpoint
# TEST : Loss: (1.4166) | Acc: (19.42%) (20/103)
Epoch: 2 | Batch_idx: 0 |  Loss_1: (1.4060) | Acc_1: (26.56%) (17/64)
Epoch: 2 | Batch_idx: 10 |  Loss_1: (1.4136) | Acc_1: (26.85%) (189/704)
Epoch: 2 | Batch_idx: 20 |  Loss_1: (1.4090) | Acc_1: (28.57%) (384/1344)
Epoch: 2 | Batch_idx: 30 |  Loss_1: (1.4020) | Acc_1: (28.65%) (

In [None]:
checkpoint = load_checkpoint(default_directory, filename='vgg16.tar.gz')
model.load_state_dict(checkpoint['state_dict'])



In [4]:
def classification_img(img_path, model):

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # opencv는 BGR순서로 read한다.
    img = cv2.resize(img, (64, 64))
    img = torch.from_numpy(img).float()

    img = img.permute(2, 0, 1).squeeze(0) # (H, W, C) -> (C, H, W) -> (1, C, H, W)
    img = torch.unsqueeze(img, 0)

    model = model.to(device)
    img = img.to(device)

    model.eval()
    criterion = torch.nn.Softmax()

    out = model(img)
    label_idx = torch.argmax(out, dim=1)
    prob = criterion(out)

    return prob, label_idx

In [5]:
default_directory = './save_models'

def load_checkpoint(directory, filename='latest_1.tar.gz'):

    model_filename = os.path.join(directory, filename)
    if os.path.exists(model_filename):
        print("=> loading checkpoint")
        state = torch.load(model_filename)
        return state
    else:
        return None

checkpoint = load_checkpoint(default_directory, filename='vgg16.tar.gz')

# model편에서 만들어 두었던 MyNetwork를 활용.

model = models.vgg16()
model.load_state_dict(checkpoint['state_dict'], strict=False)
#model.eval()

start = time.time()
for img in glob.iglob('./data/test/**/*.jpg', recursive=True):
    pred, label_idx = classification_img(img, model)

now = time.gmtime(time.time() - start)
print('{} hours {} mins {} secs for training'.format(now.tm_hour, now.tm_min, now.tm_sec))

=> loading checkpoint




0 hours 0 mins 8 secs for training
