# 10-5 VGG for cifar10

In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from tqdm.notebook import tqdm

torch.manual_seed(777)

<torch._C.Generator at 0x1b4dd5f5530>

In [4]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))] # 채널 수에 따른 정규화
)

train_set = torchvision.datasets.CIFAR10(root = 'data',
                                         train = True,
                                         download = True,
                                         transform = transform)
train_loader = torch.utils.data.DataLoader(train_set,
                                           batch_size = 512,
                                           shuffle = True)

test_set = torchvision.datasets.CIFAR10(root = 'data',
                                        train = False,
                                        download = True,
                                        transform = transform)
test_loader = torch.utils.data.DataLoader(test_set,
                                          batch_size = 4,
                                          shuffle = False)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data\cifar-10-python.tar.gz


HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

Extracting data\cifar-10-python.tar.gz to data
Files already downloaded and verified


In [5]:
import torchvision.models.vgg as vgg

cfg = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], #8 + 3 =11 == vgg11
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # 10 + 3 = vgg 13
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], #13 + 3 = vgg 16
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], # 16 +3 =vgg 19
    'custom' : [64,64,64,'M',128,128,128,'M',256,256,256,'M']
}

In [8]:
cfg = [32,32,'M', 64,64,128,128,128,'M',256,256,256,512,512,512,'M'] #13 + 3 =vgg16

In [10]:
class VGG(nn.Module):
    def __init__(self, features, num_classes=10, init_weights=True):
        super(VGG, self).__init__()
        self.features = features
        
        # FC
        self.classifier = nn.Sequential(
            nn.Linear(512 * 4 * 4, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes)
        )
        
        if init_weights:
            self._initialize_weights()
            
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
                
def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    
    return nn.Sequential(*layers)

In [13]:
vgg16 = VGG(make_layers(cfg),10,True)

# test
a = torch.Tensor(1,3,32,32)
vgg16(a).shape

torch.Size([1, 10])

In [15]:
# cost/loss
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg16.parameters(), lr=0.005, momentum=0.9)
lr_sche = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9) # step_size만큼 learning rate에 gamma곱하기

### Training

In [24]:
epochs = 1

for epoch in range(epochs):
    running_loss = 0.0
    lr_sche.step()
    for idx, data in tqdm(enumerate(train_loader, 0)):
        inputs, labels = data
        
        optimizer.zero_grad()
        outputs = vgg16(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if idx % 30 == 29:
            print('[%d, %4d] loss : %.4f' %(epoch+1, idx+1, running_loss/30))
            running_loss = 0.0

HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

[1,   30] loss : 2.2976
[1,   60] loss : 2.2908
[1,   90] loss : 2.2715



In [25]:
dataiter = iter(test_loader)
images, labels = dataiter.next()

# print images
print('GroundTruth : ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))

GroundTruth :    cat  ship  ship plane


In [28]:
outputs = vgg16(images)

_, predicted = torch.max(outputs, 1)

print('Predicted : ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))

Predicted :  horse  deer   dog  deer


In [27]:
correct = 0
total = 0

with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = vgg16(images)
        
        _, predicted = torch.max(outputs.data, 1)
        
        total += labels.size(0)
        
        correct += (predicted == labels).sum().item()
        
print('Accuracy : %d %%' %(100 * correct / total))

Accuracy : 18 %
