In [10]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt

In [11]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
batch_size = 4
num_workers = 2
epochs = 4
learning_rate = 0.001
betas = (0.9, 0.999)
PATH = '../data/saved_model/Resnet.pth'

In [12]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

trainset = torchvision.datasets.CIFAR10(root = '../data', train=True,
                                       download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                         shuffle=True, num_workers=num_workers)

testset = torchvision.datasets.CIFAR10(root = '../data', train=False,
                                       download=True, transform=transform)

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                        shuffle=False, num_workers=num_workers)

classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress',
          'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot')

def imshow(img):
    img = img / 2 + 0.5                        # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show


Files already downloaded and verified
Files already downloaded and verified


In [13]:
# Block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        
        self.block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_channels)
        )
        self.downsample = downsample
        self.relu = nn.ReLU(inplace=True)
        
    def forward(self, x):
        residual = x
        out = self.block(x)
        # match size & channel
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        
        return out
            
        

In [18]:
# 11-layer ResNet
class Resnet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(Resnet, self).__init__()
        self.in_channels = 16
        self.conv = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.bn = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self.make_layer(block, 16, layers[0])
        self.layer2 = self.make_layer(block, 32, layers[1], 2)
        self.layer3 = self.make_layer(block, 64, layers[2], 2)
        self.avg_pool = nn.AvgPool2d(8)
        self.fc = nn.Linear(64, num_classes)
        
        
    def make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if (stride != 1) or (self.in_channels != out_channels):
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
                nn.BatchNorm2d(out_channels)
            )
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        # x : 28 * 28 * 1 (FashionMnist)
        # https://github.com/zalandoresearch/fashion-mnist
        out = self.conv(x)
        out = self.bn(out)
        # out : 28 * 28 * 16
        out = self.layer1(out)
        # out : 28 * 28 * 16
        out = self.layer2(out)
        # out : 14 * 14 * 32
        out = self.layer3(out)
        # out : 7 * 7 * 64
        out = self.avg_pool(out)
        # out : 1 * 1 * 64
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        
        return out
        
        

In [19]:
net_args = {
    "block": ResidualBlock,
    "layers": [2, 2, 2]
}

net = Resnet(**net_args)
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    net = nn.DataParallel(net)
net.to(device)

Resnet(
  (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): ResidualBlock(
      (block): Sequential(
        (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (relu): ReLU(inplace=True)
    )
    (1): ResidualBlock(
      (block): Sequential(
        (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3)

In [20]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate, betas=betas)

In [21]:
total_steps = len(trainloader)
for epoch in range(epochs):
    running_loss = 0.
    for i, (images, labels) in enumerate(trainloader, 0):
        images = images.to(device)
        labels = labels.to(device)
        
        out = net(images)
        loss = criterion(out, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % 1000 == 999:
            print("[{epoch}, {index}] loss: {loss}".format(epoch = epoch + 1, index = i + 1, loss = running_loss / i + 1))
    print("epoch: {epoch}, loss: {loss}".format(epoch = epoch, loss = running_loss / total_steps))
print("Finished Training")

[1, 1000] loss: 3.034723558702746
[1, 2000] loss: 2.9278693132068945
[1, 3000] loss: 2.8552026555275036
[1, 4000] loss: 2.7928992556941363
[1, 5000] loss: 2.734620675012383
[1, 6000] loss: 2.680020374757168
[1, 7000] loss: 2.634004911885532
[1, 8000] loss: 2.5932287976009754
[1, 9000] loss: 2.5587432008443214
[1, 10000] loss: 2.5225535728732567
[1, 11000] loss: 2.4904545149008506
[1, 12000] loss: 2.4643235638450807
epoch: 0, loss: 1.4498807907271385
[2, 1000] loss: 2.0900396839515105
[2, 2000] loss: 2.0746196331144153
[2, 3000] loss: 2.0628886450207524
[2, 4000] loss: 2.0502628137891548
[2, 5000] loss: 2.04182123974529
[2, 6000] loss: 2.0326274996112637
[2, 7000] loss: 2.0234256775008896
[2, 8000] loss: 2.015110206190072
[2, 9000] loss: 2.005532307629586
[2, 10000] loss: 1.9957177316991075
[2, 11000] loss: 1.9891684749785612
[2, 12000] loss: 1.9822336860843692
epoch: 1, loss: 0.979180535480976
[3, 1000] loss: 1.850831242503824
[3, 2000] loss: 1.848712152841987
[3, 3000] loss: 1.8383352

In [9]:
net.eval()

correct = 0
total = 0

with torch.no_grad():
    for i, (images, labels) in enumerate(testloader, 0):
        images = images.to(device)
        labels = labels.to(device)
        
        out = net(images)
        
        _, predicted = torch.max(out.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: {}'.format(
    100 * correct / total))

Accuracy of the network on the 10000 test images: 92.43


In [89]:
torch.save(net.state_dict(), PATH)