In [2]:
# loading and normalizing cifar10
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np

import torch.nn as nn
import torch.nn.functional as F

from torchsummary import summary

In [3]:
# new transform
#transform = transforms.Compose(
#    [transforms.ToTensor(),
#    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
#    transforms.Pad(4), 
#    transforms.RandomHorizontalFlip(),
#    transforms.RandomCrop(32)])

# old transform
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])



In [4]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
                                        
                                        
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


In [5]:
## creating a function that initializes weights
# new init weight        
def init_weight(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight) #he initialize, can use xavier instead
        #nn.init.constant_(m.bias, 0.001) # optional bias
    if isinstance(m, nn.Linear):
        nn.init.kaiming_normal_(m.weight) #he initialize, can use xavier instead
        #nn.init.constant_(m.bias, 0.001) # optional bias
    elif type(m) == nn.BatchNorm2d:
        torch.nn.init.constant_(m.weight, 1)
        torch.nn.init.constant_(m.bias, 1)

In [31]:
# darknet block
class darknet_block(nn.Module):
    expansion = 2 # used in downsampling
    
    def __init__(self, in_channels, out_channels, stride = 1, downsample = None):
        super(darknet_block, self).__init__()
        # conv1 has defined stride
        self.conv1 = nn.Conv2d(in_channels, out_channels, 1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels*self.expansion, 3, stride = stride, padding = 1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels*self.expansion)
        self.leakyrelu = nn.LeakyReLU(negative_slope=0.1, inplace=True)
        self.downsample = downsample 
        self.stride = stride
        
    def forward(self, x):
        residual = x
        # 1st conv layer
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.leakyrelu(out)
        # 2nd conv layer
        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.leakyrelu(out)
        return out
        

                        

In [35]:
# ResNet
n_classes = 10 

class DarkNet(nn.Module):
    def __init__(self, block, layers, num_classes = n_classes): # layer is a list
        super(DarkNet, self).__init__()
        #initial conv layer
        self.conv1 = nn.Conv2d(3, 32, 3, stride=1, padding = 1, bias = False) # first thing in resnet
        self.batchnorm1 = nn.BatchNorm2d(32) # match outchannel for conv1
        self.leakyrelu = nn.LeakyReLU(negative_slope=0.1, inplace=True)

        
        self.conv2 = nn.Conv2d(32, 64, 3, stride=2, padding = 1, bias = False) # reduce output dimensions by 1/2
        self.batchnorm2 = nn.BatchNorm2d(64)
        #self.in_channels = 64 # match outchannel for conv2
        
        self.layer1 = self.make_layer(block, 64, 32, layers[0])
        self.conv3 = nn.Conv2d(32*2, 32*4, 3, stride = 2, padding = 1, bias = False) # out = 128
        self.batchnorm3 = nn.BatchNorm2d(32*4)
        
        self.layer2 = self.make_layer(block, 128, 64, layers[1], stride = 1)
        self.conv4 = nn.Conv2d(64*2, 64*4, 3, stride = 2, padding = 1, bias = False) # out = 256 
        self.batchnorm4 = nn.BatchNorm2d(64*4)
        
        self.layer3 = self.make_layer(block, 256, 128, layers[2], stride = 1)
        self.conv5 = nn.Conv2d(128*2, 128*4, 3, stride = 2, padding = 1, bias = False) # out = 512 
        self.batchnorm5 = nn.BatchNorm2d(128*4)
        
        self.layer4 = self.make_layer(block, 512, 256, layers[3], stride = 1)
        self.conv6 = nn.Conv2d(256*2, 256*4, 3, stride = 2, padding = 1, bias = False) # out = 1024
        self.batchnorm6 = nn.BatchNorm2d(256*4)    
    
        self.layer5 = self.make_layer(block, 1024,  512, layers[4], stride = 1)
        
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.linear = nn.Linear(512*block.expansion, num_classes) # in_features = out_channel from last layer * expansion
        
        
    def make_layer(self, block, in_channels, out_channels, blocks, stride=1):
        # block = residual_block
        # out_channel = output dimension of the block
        # blocks = number of residual_block to use
        # stride = stride length

        downsample = None

        # if dimesions don't match up
        if (stride != 1) or (in_channels != out_channels * block.expansion):
            downsample = nn.Sequential(
            nn.Conv2d(in_channels, out_channels*block.expansion, kernel_size=1, stride = stride, bias = False),
            nn.BatchNorm2d(out_channels*block.expansion))

        # creating multiple layers of resblocks
        layers = []
        layers.append(block(in_channels, out_channels, stride, downsample))
        in_channels = out_channels*block.expansion

        for i in range(1, blocks):
            layers.append(block(in_channels, out_channels))
        return nn.Sequential(*layers)
        
    def forward(self, x):
        # initial conv layer to improve starting point
        out = self.conv1(x)
        out = self.batchnorm1(out)
        out = self.leakyrelu(out)
        
        out = self.conv2(out)
        out = self.batchnorm2(out)
        out = self.leakyrelu(out)
        
        # resblocks
        out = self.layer1(out)
        out = self.conv3(out)
        out = self.batchnorm3(out)
        out = self.leakyrelu(out)
        
        out = self.layer2(out)
        out = self.conv4(out)
        out = self.batchnorm4(out)
        out = self.leakyrelu(out)
        
        out = self.layer3(out)
        out = self.conv5(out)
        out = self.batchnorm5(out)        
        out = self.leakyrelu(out)
        
        out = self.layer4(out)
        out = self.conv6(out)
        out = self.batchnorm6(out)
        out = self.leakyrelu(out)
        
        out = self.layer5(out)
        out = self.avg_pool(out) # adaptive avg pooling to get (-1, out_channel(last layer), 1, 1)
        out = out.view(out.size(0), -1) # flatten
        out = self.linear(out) # output layer

        return out





In [36]:
# specify different network architectures

net = DarkNet(darknet_block, layers = [1,1,1,1,1])

#darknet53 = DarkNet(darknet_block, layers = [1,2,8,8,4])

In [37]:
# visualize network
#for i, weights in enumerate(list(net.parameters())):
#    print('i:',i,'weights:',weights.size())

#print(net)

summary(net, (3, 256, 256))

#summary(darknet53, (3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 256, 256]             864
       BatchNorm2d-2         [-1, 32, 256, 256]              64
         LeakyReLU-3         [-1, 32, 256, 256]               0
            Conv2d-4         [-1, 64, 128, 128]          18,432
       BatchNorm2d-5         [-1, 64, 128, 128]             128
         LeakyReLU-6         [-1, 64, 128, 128]               0
            Conv2d-7         [-1, 32, 128, 128]           2,048
       BatchNorm2d-8         [-1, 32, 128, 128]              64
         LeakyReLU-9         [-1, 32, 128, 128]               0
           Conv2d-10         [-1, 64, 128, 128]          18,432
      BatchNorm2d-11         [-1, 64, 128, 128]             128
        LeakyReLU-12         [-1, 64, 128, 128]               0
    darknet_block-13         [-1, 64, 128, 128]               0
           Conv2d-14          [-1, 128,

In [None]:
# apply initializers
net.apply(init_weight)

In [None]:
# define loss and optimizer
import torch.optim as optim
learning_rate = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
#optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, nesterov=True)

In [None]:
# modified model training to keep track of train/val loss
n_epochs = 2

for epoch in range(n_epochs):
    running_loss = 0.0
    total_train_loss = 0.0
    for i, train_data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = train_data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print loss per n minibatches
        running_loss += loss.item()
        total_train_loss += loss.item()
        if i % 50 == 49:    # print every 50 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 50))
            running_loss = 0.0
    
    # keep track of loss in test dataset 
    correct = 0
    total = 0
    total_test_loss = 0.0
    with torch.no_grad():
        for test_data in testloader:
            test_images, test_labels = test_data
            test_outputs = net(test_images)
            test_loss = criterion(test_outputs, test_labels)
            total_test_loss += test_loss.item()
            _, predicted = torch.max(test_outputs.data, 1)
            total += test_labels.size(0)
            correct += (predicted == test_labels).sum().item()


    
    
    # for printing average loss every epoch
    print("===> Epoch {} Complete: Train Avg. Loss: {:.4f}".format(epoch+1, total_train_loss / len(trainloader)))
    print("===> Epoch {} Complete: Test Avg. Loss: {:.4f}".format(epoch+1, total_test_loss / len(testloader)))
    print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))
print('Finished Training')