Import data and apply normalization

In [44]:
%matplotlib inline
import torch
import torch.nn as nn
from matplotlib import pyplot as plt
import numpy as np
import torchvision
import torchvision.datasets as datasets
from torchvision import transforms
import torch.optim as optim
import time

batch_size = 128
num_epochs = 20
transform = transforms.Compose([transforms.ToTensor()])

trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)


Files already downloaded and verified
Files already downloaded and verified


1. Without Batch-Normalization

In [45]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 10, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(10, 16, kernel_size=5)
        self.fc1 = nn.Linear(16*6*6, 100)
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, 10)
        self.sigmoid = nn.Sigmoid()
        self.avgpool = nn.AvgPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        # Feature extraction using to convolutional layers.
        x = self.avgpool(self.sigmoid(self.conv1(x)))
        x = self.avgpool(self.sigmoid(self.conv2(x)))
        #reshape the tensor to 1-d to fit the FC layer input
        x = x.view(x.shape[0], -1)
        # Classifier using three fully connected layers.
        x = self.sigmoid(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        x = self.fc3(x)
        return x

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.normal_(0.0, 0.01)
    elif classname.find('Linear') != -1:
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.normal_(0.0, 0.01)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.01)
        m.bias.data.fill_(0)

def evaluate_accuracy(data_iter, net):
    acc_sum,n = 0,0
    for (imgs, labels) in data_iter:
        # send data to the GPU if cuda is available
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            labels = labels.cuda()
        net.eval()
        with torch.no_grad():
            labels = labels.long()
            acc_sum += torch.sum((torch.argmax(net(imgs), dim=1) == labels)).float()
            n += labels.shape[0]
    return acc_sum.item()/n


# Loading training set and test set using DataLoader.
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
    shuffle=True, num_workers=0)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
    shuffle=True, num_workers=0)

if torch.cuda.is_available():
    print('Training using GPU.')
    net = LeNet().cuda()
else:
    print('Training using CPU.')
    net = LeNet()

#Initialize network parameters.
net.apply(weights_init)

#Loss function
if torch.cuda.is_available():
    loss = nn.CrossEntropyLoss().cuda()
else:
    loss = nn.CrossEntropyLoss()

# Train using SGD optimizer
lr= 0.41 # not fine-tuned lr.
opt_n = optim.SGD(net.parameters(), lr=lr)

# Training stage
for epoch in range(1, num_epochs+1):
    train_loader_iter = iter(train_loader)
    train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()

    for (imgs, labels) in train_loader_iter:
        net.train()
        opt_n.zero_grad()
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            labels = labels.cuda()
        # Label prediction from LeNet
        y_hat = net(imgs)
        l = loss(y_hat, labels)
        # Backprobagation
        l.backward()
        opt_n.step()

        # Calculate tarining error
        with torch.no_grad():
            labels = labels.long()
            train_l_sum += l.item()
            train_acc_sum += (torch.sum(torch.argmax(y_hat, dim=1) == labels)).float().item()
            n += labels.shape[0]
    # calculate testing error every epoch.
    test_acc = evaluate_accuracy(iter(test_loader), net)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
          % (epoch, train_l_sum/n, train_acc_sum/n, test_acc,
            time.time() - start))



Training using CPU.
epoch 1, loss 0.0181, train acc 0.102, test acc 0.100, time 24.1 sec
epoch 2, loss 0.0180, train acc 0.101, test acc 0.102, time 24.1 sec
epoch 3, loss 0.0172, train acc 0.168, test acc 0.229, time 24.2 sec
epoch 4, loss 0.0159, train acc 0.241, test acc 0.269, time 24.1 sec
epoch 5, loss 0.0155, train acc 0.264, test acc 0.286, time 24.4 sec
epoch 6, loss 0.0152, train acc 0.276, test acc 0.252, time 24.4 sec
epoch 7, loss 0.0148, train acc 0.306, test acc 0.350, time 24.1 sec
epoch 8, loss 0.0140, train acc 0.352, test acc 0.306, time 24.2 sec
epoch 9, loss 0.0135, train acc 0.374, test acc 0.345, time 24.0 sec
epoch 10, loss 0.0130, train acc 0.394, test acc 0.403, time 24.0 sec
epoch 11, loss 0.0127, train acc 0.409, test acc 0.427, time 24.2 sec
epoch 12, loss 0.0124, train acc 0.424, test acc 0.444, time 24.0 sec
epoch 13, loss 0.0120, train acc 0.439, test acc 0.462, time 23.9 sec
epoch 14, loss 0.0117, train acc 0.458, test acc 0.475, time 24.0 sec
epoch 15,

2. with an additional droppout layer

In [46]:
class LeNetDropout(nn.Module):
    def __init__(self):
        super(LeNetDropout, self).__init__()
        self.conv1 = nn.Conv2d(3, 10, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(10, 16, kernel_size=5)
        self.fc1 = nn.Linear(16*6*6, 100)
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, 10)
        self.sigmoid = nn.Sigmoid()
        self.avgpool = nn.AvgPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)  # 50% dropout

    def forward(self, x):
        x = self.avgpool(self.sigmoid(self.conv1(x)))
        x = self.avgpool(self.sigmoid(self.conv2(x)))
        x = x.view(x.shape[0], -1)
        x = self.dropout(x)  # Apply dropout
        x = self.sigmoid(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        x = self.fc3(x)
        return x

In [47]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.normal_(0.0, 0.01)
    elif classname.find('Linear') != -1:
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.normal_(0.0, 0.01)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.01)
        m.bias.data.fill_(0)

def evaluate_accuracy(data_iter, net):
    """Evaluate accuracy of a model on the given data set."""
    acc_sum,n = 0,0
    for (imgs, labels) in data_iter:
        # send data to the GPU if cuda is availabel
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            labels = labels.cuda()
        net.eval()
        with torch.no_grad():
            labels = labels.long()
            acc_sum += torch.sum((torch.argmax(net(imgs), dim=1) == labels)).float()
            n += labels.shape[0]
    return acc_sum.item()/n


# Loading training set and test set using DataLoader.
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
    shuffle=True, num_workers=0)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
    shuffle=True, num_workers=0)

if torch.cuda.is_available():
    print('Training using GPU.')
    net = LeNet().cuda()
else:
    print('Training using CPU.')
    net = LeNet()

#Initialize network parameters.
net.apply(weights_init)

#Loss function
if torch.cuda.is_available():
    loss = nn.CrossEntropyLoss().cuda()
else:
    loss = nn.CrossEntropyLoss()

# Train using SGD optimizer
lr= 0.4 # not fine-tuned lr.
opt_n = optim.SGD(net.parameters(), lr=lr)

# Training stage
for epoch in range(1, num_epochs+1):
    train_loader_iter = iter(train_loader)
    train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()

    for (imgs, labels) in train_loader_iter:
        net.train()
        opt_n.zero_grad()
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            labels = labels.cuda()
        # Label prediction from LeNet
        y_hat = net(imgs)
        l = loss(y_hat, labels)
        # Backprobagation
        l.backward()
        opt_n.step()

        # Calculate tarining error
        with torch.no_grad():
            labels = labels.long()
            train_l_sum += l.item()
            train_acc_sum += (torch.sum(torch.argmax(y_hat, dim=1) == labels)).float().item()
            n += labels.shape[0]
    # calculate testing error every epoch.
    test_acc = evaluate_accuracy(iter(test_loader), net)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
          % (epoch, train_l_sum/n, train_acc_sum/n, test_acc,
            time.time() - start))

Training using CPU.
epoch 1, loss 0.0181, train acc 0.099, test acc 0.100, time 24.4 sec
epoch 2, loss 0.0180, train acc 0.104, test acc 0.100, time 24.6 sec
epoch 3, loss 0.0171, train acc 0.174, test acc 0.209, time 25.8 sec
epoch 4, loss 0.0159, train acc 0.242, test acc 0.223, time 25.1 sec
epoch 5, loss 0.0155, train acc 0.264, test acc 0.282, time 24.7 sec
epoch 6, loss 0.0151, train acc 0.282, test acc 0.300, time 24.1 sec
epoch 7, loss 0.0147, train acc 0.313, test acc 0.337, time 24.2 sec
epoch 8, loss 0.0140, train acc 0.350, test acc 0.354, time 24.1 sec
epoch 9, loss 0.0135, train acc 0.375, test acc 0.390, time 24.1 sec
epoch 10, loss 0.0131, train acc 0.394, test acc 0.423, time 24.0 sec
epoch 11, loss 0.0128, train acc 0.405, test acc 0.385, time 24.3 sec
epoch 12, loss 0.0125, train acc 0.422, test acc 0.435, time 24.1 sec
epoch 13, loss 0.0122, train acc 0.436, test acc 0.444, time 24.1 sec
epoch 14, loss 0.0119, train acc 0.448, test acc 0.447, time 24.1 sec
epoch 15,

With one additional batch normalization

In [48]:
class LeNetBatchNorm(nn.Module):
    def __init__(self):
        super(LeNetBatchNorm, self).__init__()
        self.conv1 = nn.Conv2d(3, 10, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm2d(10)  # Batch normalization for 6 channels
        self.conv2 = nn.Conv2d(10, 16, kernel_size=5)
        self.bn2 = nn.BatchNorm2d(16)  # Batch normalization for 16 channels
        self.fc1 = nn.Linear(16*6*6, 100)
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, 10)
        self.sigmoid = nn.Sigmoid()
        self.avgpool = nn.AvgPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        x = self.avgpool(self.sigmoid(self.bn1(self.conv1(x))))
        x = self.avgpool(self.sigmoid(self.bn2(self.conv2(x))))
        x = x.view(x.shape[0], -1)
        x = self.sigmoid(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        x = self.fc3(x)
        return x


In [49]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.normal_(0.0, 0.01)
    elif classname.find('Linear') != -1:
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.normal_(0.0, 0.01)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.01)
        m.bias.data.fill_(0)

def evaluate_accuracy(data_iter, net):
    """Evaluate accuracy of a model on the given data set."""
    acc_sum,n = 0,0
    for (imgs, labels) in data_iter:
        # send data to the GPU if cuda is availabel
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            labels = labels.cuda()
        net.eval()
        with torch.no_grad():
            labels = labels.long()
            acc_sum += torch.sum((torch.argmax(net(imgs), dim=1) == labels)).float()
            n += labels.shape[0]
    return acc_sum.item()/n


# Loading training set and test set using DataLoader.
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
    shuffle=True, num_workers=0)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
    shuffle=True, num_workers=0)

if torch.cuda.is_available():
    print('Training using GPU.')
    net = LeNet().cuda()
else:
    print('Training using CPU.')
    net = LeNet()

#Initialize network parameters.
net.apply(weights_init)

#Loss function
if torch.cuda.is_available():
    loss = nn.CrossEntropyLoss().cuda()
else:
    loss = nn.CrossEntropyLoss()

# Train using SGD optimizer
lr= 0.4 # not fine-tuned lr.
opt_n = optim.SGD(net.parameters(), lr=lr)

# Training stage
for epoch in range(1, num_epochs+1):
    train_loader_iter = iter(train_loader)
    train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()

    for (imgs, labels) in train_loader_iter:
        net.train()
        opt_n.zero_grad()
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            labels = labels.cuda()
        # Label prediction from LeNet
        y_hat = net(imgs)
        l = loss(y_hat, labels)
        # Backprobagation
        l.backward()
        opt_n.step()

        # Calculate tarining error
        with torch.no_grad():
            labels = labels.long()
            train_l_sum += l.item()
            train_acc_sum += (torch.sum(torch.argmax(y_hat, dim=1) == labels)).float().item()
            n += labels.shape[0]
    # calculate testing error every epoch.
    test_acc = evaluate_accuracy(iter(test_loader), net)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
          % (epoch, train_l_sum/n, train_acc_sum/n, test_acc,
            time.time() - start))

Training using CPU.
epoch 1, loss 0.0181, train acc 0.098, test acc 0.100, time 24.2 sec
epoch 2, loss 0.0180, train acc 0.115, test acc 0.128, time 24.1 sec
epoch 3, loss 0.0165, train acc 0.203, test acc 0.220, time 24.0 sec
epoch 4, loss 0.0158, train acc 0.248, test acc 0.237, time 24.1 sec
epoch 5, loss 0.0154, train acc 0.267, test acc 0.260, time 24.1 sec
epoch 6, loss 0.0151, train acc 0.281, test acc 0.297, time 24.5 sec
epoch 7, loss 0.0149, train acc 0.297, test acc 0.315, time 26.3 sec
epoch 8, loss 0.0146, train acc 0.320, test acc 0.325, time 24.6 sec
epoch 9, loss 0.0141, train acc 0.345, test acc 0.323, time 24.1 sec
epoch 10, loss 0.0135, train acc 0.373, test acc 0.400, time 24.0 sec
epoch 11, loss 0.0130, train acc 0.396, test acc 0.421, time 24.3 sec
epoch 12, loss 0.0126, train acc 0.414, test acc 0.422, time 24.0 sec
epoch 13, loss 0.0122, train acc 0.433, test acc 0.428, time 24.0 sec
epoch 14, loss 0.0119, train acc 0.448, test acc 0.452, time 23.9 sec
epoch 15,