In [2]:
import torch

import torchvision
import torchvision.transforms as transforms
from torchvision import models

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
import matplotlib.pyplot as plt

from torch.nn import Parameter

## Model

In [3]:
# CIFAR10 images are 3x32x32, 3-channel 32x32 images

class Net(nn.Module):
    def __init__(self, train_drop1, train_drop2, train_drop3, train_drop4, train_drop5):    
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 12, 3, padding=1)   # (in-channels, out-channels, kernel size)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout1 = nn.Dropout(train_drop1)
        self.conv2 = nn.Conv2d(12, 32, 3, padding=1)
        self.dropout2 = nn.Dropout(train_drop2)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.dropout3 = nn.Dropout(train_drop3)
        self.fc1 = nn.Linear(64 * 4 * 4, 128)
        self.dropout4 = nn.Dropout(train_drop4)
        self.fc2 = nn.Linear(128, 128)
        self.dropout5 = nn.Dropout(train_drop5)
        self.fc3 = nn.Linear(128, 10)

    def forward(self, x, bool, drop1, drop2, drop3, drop4, drop5):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.dropout1(x)        
        x = F.dropout(x, drop1, bool)

        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout2(x)
        x = F.dropout(x, drop2, bool)
        
        x = self.pool(F.relu(self.conv3(x)))
        x = x.reshape(-1, 64 * 4 * 4)
        x = self.dropout3(x)
        x = F.dropout(x, drop3, bool)
        
        x = F.relu(self.fc1(x))
        x = self.dropout4(x)
        x = F.dropout(x, drop4, bool)
        
        x = F.relu(self.fc2(x))
        x = self.dropout5(x)
        x = F.dropout(x, drop5, bool)
    
        x = self.fc3(x)
        
        return x

## CIFAR-10

In [5]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# functions to show an image
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

Files already downloaded and verified
Files already downloaded and verified


## Create network instance

In [9]:
net = Net(train_drop1=0, train_drop2=0, train_drop3=0, train_drop4=0, train_drop5=0)

## Optimizer + Loss (Used Adam weight_decay param as L2 regularization)

In [10]:
criterion = nn.CrossEntropyLoss()

# optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr=0.001, weight_decay=1e-5)

## Training loop without dropout

In [11]:
dataiter = iter(testloader)
images, labels = dataiter.next()

for epoch in range(10):  # loop over the dataset multiple times

    print('Epoch:', (epoch+1))
    
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs, False, drop1=0, drop2=0, drop3=0, drop4=0, drop5=0)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
            
    '''
    # Save versions of the network
    PATH = './cifar_net_qat' + str(epoch) + '.pth'
    torch.save(net.state_dict(), PATH)
    '''
    
    correct = 0
    total = 0  
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = net(images, True, drop1=0, drop2=0, drop3=0, drop4=0, drop5=0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('Accuracy: %.2f %%' % (100 * correct / float(total)))
    
    print("")

print('Finished Training')

Epoch: 1
[1,  2000] loss: 1.844
[1,  4000] loss: 1.525
[1,  6000] loss: 1.374
[1,  8000] loss: 1.305
[1, 10000] loss: 1.257
[1, 12000] loss: 1.165
Accuracy: 59.05 %

Epoch: 2
[2,  2000] loss: 1.089
[2,  4000] loss: 1.069
[2,  6000] loss: 1.050
[2,  8000] loss: 1.025
[2, 10000] loss: 1.021
[2, 12000] loss: 1.019
Accuracy: 66.01 %

Epoch: 3
[3,  2000] loss: 0.909
[3,  4000] loss: 0.916
[3,  6000] loss: 0.922
[3,  8000] loss: 0.907
[3, 10000] loss: 0.901
[3, 12000] loss: 0.915
Accuracy: 67.61 %

Epoch: 4
[4,  2000] loss: 0.815
[4,  4000] loss: 0.841
[4,  6000] loss: 0.822
[4,  8000] loss: 0.855
[4, 10000] loss: 0.859
[4, 12000] loss: 0.861
Accuracy: 67.31 %

Epoch: 5
[5,  2000] loss: 0.771
[5,  4000] loss: 0.774
[5,  6000] loss: 0.783
[5,  8000] loss: 0.794
[5, 10000] loss: 0.794
[5, 12000] loss: 0.797
Accuracy: 69.73 %

Epoch: 6
[6,  2000] loss: 0.680
[6,  4000] loss: 0.736
[6,  6000] loss: 0.724
[6,  8000] loss: 0.768
[6, 10000] loss: 0.765
[6, 12000] loss: 0.775
Accuracy: 69.26 %

Epoc

## Iterate and train with different dropouts range 0.1-0.5

In [6]:
# Iterate over dropout over range
drop_r = [0.1, 0.2, 0.3, 0.4, 0.5]
text = ['1', '2', '3', '4', '5']

for j in range(len(drop_r)):
    print("drop rate", drop_r[j])
    print("")
    net = Net(train_drop1=0, train_drop2=0, train_drop3=drop_r[j], train_drop4=drop_r[j], train_drop5=drop_r[j])
    
    criterion = nn.CrossEntropyLoss()

    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    
    dataiter = iter(testloader)
    images, labels = dataiter.next()

    for epoch in range(10):  # loop over the dataset multiple times

        print('Epoch:', (epoch+1))

        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs, False, drop1=0, drop2=0, drop3=0, drop4=0, drop5=0)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0

        correct = 0
        total = 0  
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                outputs = net(images, True, drop1=0, drop2=0, drop3=0, drop4=0, drop5=0)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        print('Accuracy: %.2f %%' % (100 * correct / float(total)))

        print("")

    print('Finished Training')
    
    PATH = './cifar_drop_' + text[j] + '.pth'
    torch.save(net.state_dict(), PATH)
    print(PATH)
    
    print("")
    print("")



drop rate 0.1

Epoch: 1
[1,  2000] loss: 2.271
[1,  4000] loss: 2.012
[1,  6000] loss: 1.761
[1,  8000] loss: 1.603
[1, 10000] loss: 1.517
[1, 12000] loss: 1.446
Accuracy: 49.41 %

Epoch: 2
[2,  2000] loss: 1.361
[2,  4000] loss: 1.320
[2,  6000] loss: 1.241
[2,  8000] loss: 1.212
[2, 10000] loss: 1.175
[2, 12000] loss: 1.167
Accuracy: 56.49 %

Epoch: 3
[3,  2000] loss: 1.100
[3,  4000] loss: 1.060
[3,  6000] loss: 1.070
[3,  8000] loss: 1.063
[3, 10000] loss: 1.032
[3, 12000] loss: 1.013
Accuracy: 65.42 %

Epoch: 4
[4,  2000] loss: 0.930
[4,  4000] loss: 0.949
[4,  6000] loss: 0.951
[4,  8000] loss: 0.950
[4, 10000] loss: 0.935
[4, 12000] loss: 0.917
Accuracy: 64.59 %

Epoch: 5
[5,  2000] loss: 0.854
[5,  4000] loss: 0.837
[5,  6000] loss: 0.852
[5,  8000] loss: 0.869
[5, 10000] loss: 0.853
[5, 12000] loss: 0.853
Accuracy: 66.68 %

Epoch: 6
[6,  2000] loss: 0.780
[6,  4000] loss: 0.787
[6,  6000] loss: 0.765
[6,  8000] loss: 0.799
[6, 10000] loss: 0.799
[6, 12000] loss: 0.791
Accuracy

## Testing with some dropout

In [None]:
PATHS = ['./sample_models/cifar_weight_2_1e-3.pth', './sample_models/cifar_weight_2_5e-3.pth', './sample_models/cifar_net_sgd_72_41.pth']
drops = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6]

for i in range(len(PATHS)):
    print(PATHS[i])
    net = Net(train_drop1=0, train_drop2=0, train_drop3=0, train_drop4=0, train_drop5=0)
    net.load_state_dict(torch.load(PATHS[i]))

    for j in range(len(drops)):
        correct = 0
        total = 0 
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                outputs = net(images, True, drop1=0, drop2=0, drop3=drops[j], drop4=drops[j], drop5=drops[j])
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        print('Drop:', drops[j], 'Accuracy: %.2f %%' % (100 * correct / float(total)))
                
    print("")