# ResNet Implementation with PyTorch


In [1]:
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch import optim
from torchsummary import summary

import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch.utils.data import random_split

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


## Download CIFAR10 & Define DataLoader

- `ToTensor()` transforms data into `Tensor` type and also normalizes into `[0,1]` range.

In [3]:
transform = transforms.Compose([transforms.ToTensor()])

dataset = CIFAR10(root='', train=True, download=True, transform=transform)
testset = CIFAR10(root='', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
dataset.data.shape

(50000, 32, 32, 3)

In [5]:
testset.data.shape

(10000, 32, 32, 3)

In [6]:
dataset.classes

['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

In [7]:
train_num = int(len(dataset)*0.9)
valid_num = len(dataset) - train_num

trainset, validset = random_split(dataset, [train_num, valid_num])

In [8]:
batch_size = 128
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)
validloader = DataLoader(validset, batch_size=batch_size, shuffle=False, num_workers=4)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4)

## Construct Model

In [51]:
class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes, stride=1):
        super(BasicBlock, self).__init__()
        
        self.residual = nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size=(3,3), stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_planes),
            nn.ReLU(),
            nn.Conv2d(out_planes, out_planes, kernel_size=(3,3), stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_planes)
        )
        
        self.shortcut = nn.Sequential()
        self.relu = nn.ReLU()
        
        if stride != 1 or in_planes != out_planes:
            self.shortcut.add_module('conv', nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=True))                                     
        
    def forward(self, x):
        x = self.residual(x) + self.shortcut(x)
        x = self.relu(x)
        return x
    

In [46]:
'''
숙제!
'''

class BottleNeck(nn.Module):
    def __init__(self, in_planes, out_planes, stride=1):
        super(BottleNeck, self).__init__()
        
        self.residual = nn.Sequential(
            nn.Conv2d(in_planes, out_planes//4, kernel_size=(1,1), stride=stride, padding=0, bias=False),
            nn.BatchNorm2d(out_planes//4),
            nn.ReLU(),
            nn.Conv2d(out_planes//4, out_planes//4, kernel_size=(3,3), stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_planes//4),
            nn.ReLU(),
            nn.Conv2d(out_planes//4, out_planes, kernel_size=(1,1), stride=1, padding=0, bias=False),
            nn.BatchNorm2d(out_planes)
        )
        
        self.shortcut = nn.Sequential()
        self.relu = nn.ReLU()
        
        if stride != 1 or in_planes != out_planes:
            self.shortcut.add_module('conv', nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=True))
            
    def forward(self, x):
        x = self.residual(x) + self.shortcut(x)
        x = self.relu(x)
        return x

In [61]:
class ResNet(nn.Module):
    def __init__(self, channels, init_weights=True):
        super(ResNet,self).__init__()
        
        self.conv = nn.Conv2d(3, channels[0], kernel_size=7, stride=1, padding=3, bias=False)
        self.bn = nn.BatchNorm2d(channels[0])
        self.relu = nn.ReLU()
        
        self.block1 = BasicBlock(channels[0], channels[0], stride=1)
        self.block2 = BasicBlock(channels[0], channels[1], stride=2)
        self.block3 = BasicBlock(channels[1], channels[2], stride=1)
        self.block4 = BasicBlock(channels[2], channels[3], stride=2)
        self.block5 = BasicBlock(channels[3], channels[4], stride=2)
        self.block6 = BasicBlock(channels[4], channels[4], stride=1)
        
        self.avgpool = nn.AvgPool2d(kernel_size=(4,4))
        
        self.fc1 = nn.Linear(channels[4], channels[4]//4, bias=False)
        self.bn1 = nn.BatchNorm1d(channels[4]//4)
        self.fc2 = nn.Linear(channels[4]//4, 10, bias=True)
        
        if init_weights:
            self.initialize_weights()
        
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        
        x = self.block1(x)      
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        x = self.block6(x)        
        
        x = self.avgpool(x)
        x = x.view(x.size(0),-1)
        
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x
    
    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias,0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm1d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias,0)

In [62]:
model = ResNet(channels=[16,16,32,64,128]).to(device)

In [63]:
summary(model,input_size=(3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]           2,352
       BatchNorm2d-2           [-1, 16, 32, 32]              32
              ReLU-3           [-1, 16, 32, 32]               0
            Conv2d-4           [-1, 16, 32, 32]           2,304
       BatchNorm2d-5           [-1, 16, 32, 32]              32
              ReLU-6           [-1, 16, 32, 32]               0
            Conv2d-7           [-1, 16, 32, 32]           2,304
       BatchNorm2d-8           [-1, 16, 32, 32]              32
              ReLU-9           [-1, 16, 32, 32]               0
       BasicBlock-10           [-1, 16, 32, 32]               0
           Conv2d-11           [-1, 16, 16, 16]           2,304
      BatchNorm2d-12           [-1, 16, 16, 16]              32
             ReLU-13           [-1, 16, 16, 16]               0
           Conv2d-14           [-1, 16,

In [56]:
loss_ftn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = optim.lr_scheduler.LambdaLR(optimizer=optimizer,
                                        lr_lambda=lambda epoch: 0.95 ** epoch,
                                        last_epoch=-1,
                                        verbose=True)

Adjusting learning rate of group 0 to 1.0000e-02.


In [57]:
def train(net, trainloader, validloader, epochs, device, loss_ftn, optimizer, scheduler=None, temperature=None):
    losses = []
    accs = []
    valid_losses = []
    valid_accs = []
    
    for epoch in range(epochs):
        running_loss = 0
        valid_loss = 0
        correct = 0
        total = 0
        valid_correct = 0
        valid_total = 0
        num_iter = 0

        for i, data in enumerate(trainloader, 0):
            inputs, labels = data

            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = net(inputs)
            loss = loss_ftn(outputs, labels)
            loss.backward()
            optimizer.step()

            num_iter += 1
            running_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        accuracy = (correct/total)*100
        losses.append(running_loss/num_iter)
        accs.append(accuracy)
        
        valid_num_iter = 0
        with torch.no_grad():
            for data in validloader:
                images, labels = data

                images = images.to(device)
                labels = labels.to(device)

                # calculate outputs by running images through the network
                outputs = net(images)
                loss = loss_ftn(outputs, labels)
                
                valid_num_iter += 1
                valid_loss += loss.item()

                # the class with the highest energy is what we choose as prediction
                _, predicted = torch.max(outputs.data, 1)
                valid_total += labels.size(0)
                valid_correct += (predicted == labels).sum().item()

        valid_accuracy = (valid_correct/valid_total)*100
        valid_losses.append(valid_loss/valid_num_iter)
        valid_accs.append(valid_accuracy)

        print('EPOCH %d Completed. \n Training Loss: %.3f, Training Accuracy: %.2f, Validation Loss: %.3f, Validation Accuracy: %.2f \n' 
              %(epoch+1, running_loss/num_iter, accuracy, valid_loss/valid_num_iter, valid_accuracy))
            
        if scheduler is not None:
            scheduler.step()
                        
    return losses, accs, valid_losses, valid_accs

In [58]:
def test(net, testloader, device):
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data in testloader:
            images, labels = data

            images = images.to(device)
            labels = labels.to(device)

            # calculate outputs by running images through the network
            outputs = net(images)

            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on 10000 test images: {100 * correct / total} %')

In [59]:
EPOCHS=30

losses, accs, valid_losses, valid_accs = train(model, trainloader, validloader, EPOCHS, device, 
                                               loss_ftn, optimizer, scheduler=scheduler)

EPOCH 1 Completed. 
 Training Loss: 1.706, Training Accuracy: 36.55, Validation Loss: 1.494, Validation Accuracy: 45.50 

Adjusting learning rate of group 0 to 9.5000e-03.
EPOCH 2 Completed. 
 Training Loss: 1.316, Training Accuracy: 52.24, Validation Loss: 1.193, Validation Accuracy: 58.50 

Adjusting learning rate of group 0 to 9.0250e-03.
EPOCH 3 Completed. 
 Training Loss: 1.061, Training Accuracy: 62.54, Validation Loss: 1.057, Validation Accuracy: 63.40 

Adjusting learning rate of group 0 to 8.5737e-03.
EPOCH 4 Completed. 
 Training Loss: 0.902, Training Accuracy: 67.98, Validation Loss: 0.919, Validation Accuracy: 68.86 

Adjusting learning rate of group 0 to 8.1451e-03.
EPOCH 5 Completed. 
 Training Loss: 0.774, Training Accuracy: 72.74, Validation Loss: 0.813, Validation Accuracy: 72.22 

Adjusting learning rate of group 0 to 7.7378e-03.
EPOCH 6 Completed. 
 Training Loss: 0.666, Training Accuracy: 76.80, Validation Loss: 0.798, Validation Accuracy: 72.74 

Adjusting learning

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fcafb761820>
Traceback (most recent call last):
  File "/opt/anaconda3/envs/mathdl/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1203, in __del__
    self._shutdown_workers()
  File "/opt/anaconda3/envs/mathdl/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1177, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/opt/anaconda3/envs/mathdl/lib/python3.8/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/opt/anaconda3/envs/mathdl/lib/python3.8/multiprocessing/popen_fork.py", line 44, in wait
    if not wait([self.sentinel], timeout):
  File "/opt/anaconda3/envs/mathdl/lib/python3.8/multiprocessing/connection.py", line 931, in wait
    ready = selector.select(timeout)
  File "/opt/anaconda3/envs/mathdl/lib/python3.8/selectors.py", line 415, in select
    fd_event_list = self._selector.poll(timeout)
Keyboa

KeyboardInterrupt: 

In [65]:
test(model, testloader, device)

Accuracy of the network on the 10000 test images: 8.78 %


In [64]:
plt.figure()
plt.plot(np.arange(EPOCHS), losses)
plt.plot(np.arange(EPOCHS), valid_losses)
plt.show()

NameError: name 'losses' is not defined

<Figure size 640x480 with 0 Axes>