In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torchsummary import summary
from tensorboardX import SummaryWriter
from torch import optim
from torch.optim.lr_scheduler import StepLR

import os
from torchvision import datasets
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import time
import copy
from tqdm.notebook import tqdm
import torch.backends.cudnn as cudnn
import torchvision.models as models

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = '1,2,3,4,5'
start_time = time.time()
batch_size = 64
learning_rate = 0.005
default_directory = './save_models'
writer = SummaryWriter('./log/cus_cnn')

In [3]:
train_imgs = datasets.ImageFolder("./data/train",
                         transform=transforms.Compose([transforms.Resize(64),
                                                       transforms.RandomCrop(45),
                                                       transforms.ToTensor()]))

test_imgs = datasets.ImageFolder("./data/test",
                        transform=transforms.Compose([transforms.Resize(64),
                                                      transforms.RandomCrop(45),
                                                      transforms.ToTensor()]))

train_loader = DataLoader(train_imgs, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_imgs, batch_size=batch_size, shuffle=True)

In [4]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.layer1_conv = nn.Conv2d(3, 64, 3, 1) # 입력 1개, 출력 6개, 필터 크기는 5x5, 1칸 단위로 이동하면서 필터를 씌운다
        self.layer1_relu = nn.ReLU()             # 활성화 함수. ReLU(x) 는 max(x, 0)과 같다
        self.layer1_pool = nn.MaxPool2d(2)       # 각 2x2 칸마다 최대값 하나씩만을 남긴다
        self.layer2_conv = nn.Conv2d(64, 64, 3, 1)
        self.layer2_relu = nn.ReLU()
        self.layer2_pool = nn.MaxPool2d(2)
        self.layer3_conv = nn.Conv2d(64, 32, 3, 1)
        self.layer3_relu = nn.ReLU()
        self.layer3_pool = nn.MaxPool2d(2)
        self.fc = nn.Linear(32*3*3, 4)
    
    def forward(self, x):
        x1 = self.layer1_conv(x)  # 1x28x28 형식의 데이터가 6x24x24 형식으로 변환된다
        x2 = self.layer1_relu(x1) # 
        x3 = self.layer1_pool(x2) # 6x24x24 형식의 데이터가 6x12x12 형식으로 변환된다
        x4 = self.layer2_conv(x3) # 6x12x12 형식의 데이터가 16x8x8 형식으로 변환된다
        x5 = self.layer2_relu(x4) # 
        x6 = self.layer2_pool(x5) # 16x8x8 형식의 데이터가 16x4x4 형식으로 변환된다
        x7 = self.layer3_conv(x6) # 6x12x12 형식의 데이터가 16x8x8 형식으로 변환된다
        x8 = self.layer3_relu(x7) # 
        x9 = self.layer3_pool(x8)
        #print(x9.size())
        x10 = x9.view(-1, 288)     # 16x4x4 형식의 데이터가 256-벡터로 변환된다
        x11 = self.fc(x10)          # 256-벡터가 10-벡터로 변환된다
        return x11

In [5]:
cus_cnn = MyModel()

In [6]:
#if torch.cuda.device_count() > 0:
#    print("USE", torch.cuda.device_count(), "GPUs!")
#    model = nn.DataParallel(cus_cnn).cuda()
#    cudnn.benchmark = True
#else:
#    print("USE ONLY CPU!")
model = cus_cnn.cuda()

summary(model,(3, 45,45))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 43, 43]           1,792
              ReLU-2           [-1, 64, 43, 43]               0
         MaxPool2d-3           [-1, 64, 21, 21]               0
            Conv2d-4           [-1, 64, 19, 19]          36,928
              ReLU-5           [-1, 64, 19, 19]               0
         MaxPool2d-6             [-1, 64, 9, 9]               0
            Conv2d-7             [-1, 32, 7, 7]          18,464
              ReLU-8             [-1, 32, 7, 7]               0
         MaxPool2d-9             [-1, 32, 3, 3]               0
           Linear-10                    [-1, 4]           1,156
Total params: 58,340
Trainable params: 58,340
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.02
Forward/backward pass size (MB): 2.44
Params size (MB): 0.22
Estimated Tot

In [7]:
optimizer = optim.SGD(cus_cnn.parameters(), learning_rate,
                                momentum=0.9,
                                weight_decay=1e-4,
                                nesterov=True)             
#scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=50, T_mult=3, eta_min=0.001)
criterion = nn.CrossEntropyLoss()

In [8]:
def train(epoch):
    model.train()
    train_loss = 0 
    total = 0
    correct = 0
    iters = len(train_loader)
    for batch_idx, (data, target) in enumerate(train_loader):
        if torch.cuda.is_available():
            data, target = Variable(data.cuda()), Variable(target.cuda())
        else:
            data, target = Variable(data), Variable(target)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        #scheduler.step(epoch + batch_idx / iters)
        train_loss += loss.item()
        _, predicted = torch.max(output.data, 1)

        total += target.size(0)
        correct += predicted.eq(target.data).cpu().sum()
        if batch_idx % 10 == 0:
            print('Epoch: {} | Batch_idx: {} |  Loss_1: ({:.4f}) | Acc_1: ({:.2f}%) ({}/{})'
                  .format(epoch, batch_idx, train_loss / (batch_idx + 1), 100. * correct / total, correct, total))

        writer.add_scalar('training loss', (train_loss / (batch_idx + 1)) , epoch * len(train_loader) + batch_idx) #!#
        writer.add_scalar('training accuracy', (100. * correct / total), epoch * len(train_loader) + batch_idx) #!#
        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch * len(train_loader) + batch_idx) #!#

In [9]:
def test(epoch):
    model.eval()

    test_loss = 0
    correct = 0
    total = 0
    for batch_idx, (data, target) in enumerate(test_loader):
        if torch.cuda.is_available():
            data, target = Variable(data.cuda()), Variable(target.cuda())
        else:
            data, target = Variable(data), Variable(target)
        print(data.shape)
        outputs = model(data)
        loss = criterion(outputs, target)

        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += target.size(0)
        correct += predicted.eq(target.data).cpu().sum()

        writer.add_scalar('test loss', test_loss / (batch_idx + 1), epoch * len(test_loader)+ batch_idx) #!#
        writer.add_scalar('test accuracy', 100. * correct / total, epoch * len(test_loader)+ batch_idx) #!#

    print('# TEST : Loss: ({:.4f}) | Acc: ({:.2f}%) ({}/{})'
          .format(test_loss / (batch_idx + 1), 100. * correct / total, correct, total))

        

In [10]:
def save_checkpoint(directory, state, filename='latest_1.tar.gz'):
    
    if not os.path.exists(directory):
        os.makedirs(directory)

    model_filename = os.path.join(directory, filename)
    torch.save(state, model_filename)
    print("=> saving checkpoint")

def load_checkpoint(directory, filename='latest_1.tar.gz'):

    model_filename = os.path.join(directory, filename)
    if os.path.exists(model_filename):
        print("=> loading checkpoint")
        state = torch.load(model_filename)
        return state
    else:
        return None

In [11]:
start_epoch = 0

checkpoint = load_checkpoint(default_directory, filename='cus_cnn.tar.gz')

if not checkpoint:
    pass
else:
    start_epoch = checkpoint['epoch'] + 1
    model.load_state_dict(checkpoint['state_dict'])

for epoch in range(start_epoch, 50):

    train(epoch)
    
    save_checkpoint(default_directory, {
        'epoch': epoch,
        'model': model,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
    }, filename='cus_cnn.tar.gz')
    test(epoch)  
    
now = time.gmtime(time.time() - start_time)
print('{} hours {} mins {} secs for training'.format(now.tm_hour, now.tm_min, now.tm_sec))

Epoch: 0 | Batch_idx: 0 |  Loss_1: (1.3823) | Acc_1: (29.69%) (19/64)
Epoch: 0 | Batch_idx: 10 |  Loss_1: (1.3842) | Acc_1: (29.12%) (205/704)
Epoch: 0 | Batch_idx: 20 |  Loss_1: (1.3824) | Acc_1: (30.43%) (409/1344)
Epoch: 0 | Batch_idx: 30 |  Loss_1: (1.3801) | Acc_1: (30.45%) (591/1941)
=> saving checkpoint
torch.Size([64, 3, 45, 45])
torch.Size([39, 3, 45, 45])
# TEST : Loss: (1.3780) | Acc: (30.10%) (31/103)
Epoch: 1 | Batch_idx: 0 |  Loss_1: (1.3792) | Acc_1: (23.44%) (15/64)
Epoch: 1 | Batch_idx: 10 |  Loss_1: (1.3721) | Acc_1: (28.41%) (200/704)
Epoch: 1 | Batch_idx: 20 |  Loss_1: (1.3685) | Acc_1: (28.42%) (382/1344)
Epoch: 1 | Batch_idx: 30 |  Loss_1: (1.3615) | Acc_1: (30.09%) (584/1941)
=> saving checkpoint
torch.Size([64, 3, 45, 45])
torch.Size([39, 3, 45, 45])
# TEST : Loss: (1.3508) | Acc: (31.07%) (32/103)
Epoch: 2 | Batch_idx: 0 |  Loss_1: (1.2965) | Acc_1: (42.19%) (27/64)
Epoch: 2 | Batch_idx: 10 |  Loss_1: (1.3351) | Acc_1: (33.66%) (237/704)
Epoch: 2 | Batch_idx: 2

KeyboardInterrupt: 