-----

# Import

In [1]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision
import torch.nn.functional as F

from torchsummary import summary



from torch.utils.data import DataLoader, SubsetRandomSampler
from torch.nn import CrossEntropyLoss

import cv2
import numpy as np
import matplotlib.pyplot as plt

from torch.utils.data import DataLoader, SubsetRandomSampler
from torch.nn import CrossEntropyLoss
from torchvision.datasets import CIFAR10

-----

# Load and split data

In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)


classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [3]:
validation_ratio=0.1
random_seed= 17

num_train = len(train_dataset)
indices = list(range(num_train))
split = int(np.floor(validation_ratio * num_train))

np.random.seed(random_seed)
np.random.shuffle(indices)

train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

trainloader = DataLoader(train_dataset, batch_size=4,
                                          sampler=train_sampler, num_workers=2)
validloader = DataLoader(train_dataset, batch_size=4,
                                          sampler=valid_sampler, num_workers=2)
testloader = DataLoader(test_dataset, batch_size=4,
                                         shuffle=False, num_workers=2)

# week 9 에서는 batch_size = 1000, num_workers = int(cpu_count() / 2)

In [4]:
print("train set : ",len(train_sampler))
print("validation set : ",len(valid_sampler))
print("test set : ",len(test_dataset))

train set :  45000
validation set :  5000
test set :  10000


-----

# Make module



In [5]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [6]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3,stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.stride = stride

        # residualblock has two convolutional layer.

        self.identity = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
          self.identity = nn.Sequential(
              nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
              nn.BatchNorm2d(out_channels)
          )


    def forward(self, x):

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        return self.relu(out + self.identity(x))

# I made residualblock with lecture contents. but i realized, in H(x) = F(x)+ x , they are different tensor. so i added code which matching tensor .

class ResidualBlock2(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.residualfunc = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),

            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels)
        )

        # residualblock has two convolutional layer.

        self.identity = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
          self.identity = nn.Sequential(
              nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
              nn.BatchNorm2d(out_channels)
          )

        # F(x) + x(identity) before activate fucntion, should same tensor F(x) and identity 

    def forward(self, x):
        return nn.ReLU(inplace=True)(self.residualfunc(x) + self.identity(x))

In [7]:
class ResNet(nn.Module):

    def __init__(self, block, num_block, num_classes=10):
        super(ResNet, self).__init__() 

        self.in_channels = 64

        self.conv1x = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1, bias=False), 
            nn.BatchNorm2d(64),                                     # out_channels = 64.
            nn.ReLU(inplace=True))
        
        self.conv2x = self.get_layers(block, 64, num_block[0], 1)   # 3*3, 64
        self.conv3x = self.get_layers(block, 128, num_block[1], 2)  # 3*3, 128
        self.conv4x = self.get_layers(block, 256, num_block[2], 2)  # 3*3, 256
        self.conv5x = self.get_layers(block, 512, num_block[3], 2)  # 3*3, 512
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def get_layers(self, block, out_channels, num_blocks, stride):
      
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels

        return nn.Sequential(*layers)

    def forward(self, x):

        output = self.conv1x(x)

        output = self.conv2x(output)
        output = self.conv3x(output)
        output = self.conv4x(output)
        output = self.conv5x(output)
        output = self.avg_pool(output)

        output = output.view(output.size(0), -1)

        output = self.fc(output)

        return output

In [8]:
#model = ResNet(ResidualBlock, [2, 2, 2, 2]).to('cuda')

model = ResNet(ResidualBlock2, [2, 2, 2, 2]).to('cuda') 
# use ResidualBlock2 block, and [2, 2, 2, 2] => resnet-18 ( each conv2_x~conv5_x are *2)

summary(model, (3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,728
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
            Conv2d-4           [-1, 64, 32, 32]          36,864
       BatchNorm2d-5           [-1, 64, 32, 32]             128
              ReLU-6           [-1, 64, 32, 32]               0
            Conv2d-7           [-1, 64, 32, 32]          36,864
       BatchNorm2d-8           [-1, 64, 32, 32]             128
    ResidualBlock2-9           [-1, 64, 32, 32]               0
           Conv2d-10           [-1, 64, 32, 32]          36,864
      BatchNorm2d-11           [-1, 64, 32, 32]             128
             ReLU-12           [-1, 64, 32, 32]               0
           Conv2d-13           [-1, 64, 32, 32]          36,864
      BatchNorm2d-14           [-1, 64,

----
# Training model

In [9]:
criterion = CrossEntropyLoss()
optimizer = optim.Adam(params=model.parameters(), lr=0.1, weight_decay=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, patience=3, verbose=True)

In [10]:
train_total = len(train_idx)
valid_total = len(valid_idx)

train_batches = len(trainloader)
valid_batches = len(validloader)

# Variables for lr scheduling and early stopping
best_valid_loss = 1024    # Any large number will suffice
patience = 0    # Bad epoch counter

In [11]:
for epoch in range(50):
    # Train
    model.train()
    
    train_loss = 0
    train_correct = 0
    
    for x, y in trainloader:
        x = x.to(device)
        y = y.to(device)
        outputs = model(x)
        loss = criterion(outputs, y)
        
        optimizer.zero_grad()
        
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        train_correct += predicted.eq(y).sum().item()
        
    train_loss = train_loss / train_batches
    train_acc = train_correct / train_total
    
    # Validate
    model.eval()
    
    valid_loss = 0
    valid_correct = 0
    
    with torch.no_grad():
        for x, y in validloader:
            x = x.to(device)
            y = y.to(device)
            outputs = model(x)
            loss = criterion(outputs, y)
            
            valid_loss += loss.item()
            _, predicted = outputs.max(1)
            valid_correct += predicted.eq(y).sum().item()
            
    valid_loss = valid_loss / valid_batches
    valid_acc = valid_correct / valid_total
    
    if best_valid_loss > valid_loss:
        torch.save(model.state_dict(), './best_resnet.pth')
        best_valid_loss = valid_loss
        patience = 0
        
    print('[%2d] TRAIN loss: %.3f, acc: %.3f, lr: %f .... VALID loss: %.3F, acc: %.3f, best_loss: %.3f .... PATIENCE %d' 
          % (epoch+1, train_loss, train_acc, optimizer.param_groups[0]['lr'], valid_loss, valid_acc, best_valid_loss, patience))
    
    scheduler.step(metrics=valid_loss)
    
    if patience == 5:
        break
        
    patience += 1

[ 1] TRAIN loss: 2.385, acc: 0.100, lr: 0.100000 .... VALID loss: 2.336, acc: 0.102, best_loss: 2.336 .... PATIENCE 0
[ 2] TRAIN loss: 2.398, acc: 0.101, lr: 0.100000 .... VALID loss: 5.123, acc: 0.102, best_loss: 2.336 .... PATIENCE 1
[ 3] TRAIN loss: 2.409, acc: 0.102, lr: 0.100000 .... VALID loss: 18.018, acc: 0.098, best_loss: 2.336 .... PATIENCE 2
[ 4] TRAIN loss: 2.409, acc: 0.098, lr: 0.100000 .... VALID loss: 2.522, acc: 0.102, best_loss: 2.336 .... PATIENCE 3
[ 5] TRAIN loss: 2.409, acc: 0.102, lr: 0.100000 .... VALID loss: 29.246, acc: 0.062, best_loss: 2.336 .... PATIENCE 4
Epoch     5: reducing learning rate of group 0 to 1.0000e-02.
[ 6] TRAIN loss: 1.938, acc: 0.259, lr: 0.010000 .... VALID loss: 1.730, acc: 0.320, best_loss: 1.730 .... PATIENCE 0
[ 7] TRAIN loss: 1.744, acc: 0.337, lr: 0.010000 .... VALID loss: 1.716, acc: 0.353, best_loss: 1.716 .... PATIENCE 0
[ 8] TRAIN loss: 1.646, acc: 0.381, lr: 0.010000 .... VALID loss: 1.604, acc: 0.389, best_loss: 1.604 .... PAT

KeyboardInterrupt: ignored

------

# Test model

In [12]:
loaded = ResNet(ResidualBlock2, [2, 2, 2, 2]).to(device)
loaded.load_state_dict(torch.load('./best_resnet.pth'))

<All keys matched successfully>

In [13]:
# Test
loaded.eval()

test_loss = 0
test_correct = 0

with torch.no_grad():
    for i, (x, y) in enumerate(testloader):
        x = x.to(device)
        y = y.to(device)
        outputs = loaded(x)
        loss = criterion(outputs, y)
        
        test_loss += loss.item()
        _, predicted = outputs.max(1)
        test_correct += predicted.eq(y).sum().item()
        
        if i == 0:
            test_preds = predicted
        else:
            test_preds = torch.cat((test_preds, predicted), dim=0)
            
test_preds = test_preds.cpu()

print('TEST loss: %.4f, acc: %.4f' % (test_loss/len(testloader), test_correct/len(test_dataset)))

TEST loss: 0.6909, acc: 0.7606


my test acc is 55% -> 76%