In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib import cm
import torchvision
from torch.utils import data
from torchvision import transforms
from time import time
import multiprocessing as mp

In [2]:
class BasicBlock(nn.Module):

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out



class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def project1_model():
    return ResNet(BasicBlock, [2, 2, 2, 2])

# def test():
#     net = ResNet18()
#     y = net(torch.randn(1, 3, 32, 32))
#     print(y.size())


In [3]:
transform_train = torchvision.transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=512, shuffle=True, num_workers=1)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=200, shuffle=False, num_workers=1)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')


Files already downloaded and verified
Files already downloaded and verified


In [4]:
net = project1_model()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

net.to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

train_loss_list = []
train_acc_list = []
#train_dur_list = []
test_loss_list = []
test_acc_list = []
#test_dur_list = []

In [5]:
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    batch_index = 0
    t0 = time()
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        # loss = nll(outputs, targets)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        batch_index = batch_idx
        # train_loss += loss.item()
        train_loss = loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        #print(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                     #% (train_loss/(batch_idx+1), 100.*correct/total, correct, total))
        #print("\n")
    train_loss_list.append(train_loss/(batch_index+1))
    train_acc_list.append(100.*correct/total)
    print(' Train: Loss: %.6f | Acc: %.3f%% | Dur: %.2fS' 
          % (train_loss/(batch_index+1), 100.*correct/total, time() - t0))

def test(epoch):
#     global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    batch_index = 0
    t0 = time()
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            # loss = nll(outputs, targets)
            batch_index = batch_idx
            # test_loss += loss.item()
            test_loss = loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            #print(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                         #% (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
            #print("\n")
    test_loss_list.append(test_loss/(batch_index+1))
    test_acc_list.append(100.*correct/total)
    print(' Test:  Loss: %.6f | Acc: %.3f%% | Dur: %.2fS' 
          % (test_loss/(batch_index+1), 100.*correct/total, time() - t0))

    # Save checkpoint.
#     acc = 100.*correct/total
#     if acc > best_acc:
#         print('Saving..')
#         state = {
#             'net': net.state_dict(),
#             'acc': acc,
#             'epoch': epoch,
#             'train_history': (train_loss_list, train_acc_list, test_loss_list, test_acc_list)
#         }
#         if not os.path.isdir('checkpoint'):
#             os.mkdir('checkpoint')
#         torch.save(state, './checkpoint/ckpt_rmsprop.pth')
#         best_acc = acc


def plot_model(train_loss, train_acc, test_loss, test_acc):
    length = len(train_acc)
    plt.subplot(2, 2, 1)
    plt.plot(range(length), train_acc)
    plt.title('Train accuracy vs. epoches')
    plt.ylabel('Train accuracy')
    plt.xlabel('epoch')
    plt.subplot(2, 2, 2)
    plt.plot(range(length), train_loss)
    plt.title('Train loss vs. epoches')
    plt.ylabel('Train loss')
    plt.xlabel('epoch')
    plt.subplot(2, 2, 3)
    plt.plot(range(length), test_acc)
    plt.title('Test accuracy vs. epoches')
    plt.ylabel('Test accuracy')
    plt.xlabel('epoch')
    plt.subplot(2, 2, 4)
    plt.plot(range(length), test_loss)
    plt.title('Test loss vs. epoches')
    plt.ylabel('Test loss')
    plt.xlabel('epoch')

In [None]:
for epoch in range(100):
    train(epoch)
    test(epoch)
    scheduler.step()


Epoch: 0


In [7]:
print(train_acc_list)
print(train_loss_list)

[23.0, 32.062, 35.88, 38.674, 40.974, 43.374, 45.082, 46.856, 48.698, 50.268, 51.932, 52.678, 54.838, 55.492, 56.464, 57.178, 58.19, 59.394, 59.654, 61.142, 61.326, 62.036, 62.26, 63.474, 64.452, 64.942, 65.542, 66.142, 66.562, 67.372, 67.508, 68.338, 68.474, 69.294, 69.736, 70.222, 70.702, 71.172, 71.332, 72.04, 72.244, 72.746, 73.222, 73.11, 73.974, 74.826, 74.744, 75.014, 75.358, 75.64, 76.028, 76.328, 76.434, 77.178, 77.054, 77.986, 77.746, 78.894, 78.536, 79.12, 79.098, 79.412, 79.684, 79.976, 80.204, 80.548, 80.826, 80.964, 81.266, 81.096, 81.564, 81.84, 82.026, 82.514, 82.078, 82.594, 82.852, 83.144, 83.36, 83.274, 83.338, 83.618, 83.926, 84.078, 84.47, 84.716, 84.288, 84.81, 85.1, 85.092, 85.136, 85.774, 85.67, 85.574, 85.768, 85.956, 86.322, 86.272, 86.396, 86.358]
[0.07863185882568359, 0.0707058334350586, 0.06798649787902832, 0.06558572769165039, 0.06468032360076904, 0.061038212776184084, 0.057265644073486326, 0.056644749641418454, 0.05576033592224121, 0.05498651027679444, 0.

In [8]:
print(test_acc_list)
print(test_loss_list)

[10.47, 33.97, 33.71, 40.18, 41.94, 39.81, 45.23, 45.07, 48.13, 49.38, 51.01, 43.99, 51.95, 54.53, 55.06, 53.9, 57.84, 55.98, 57.95, 56.88, 60.61, 61.95, 61.24, 61.19, 62.33, 63.5, 65.14, 61.19, 65.77, 65.59, 65.18, 64.24, 59.65, 61.63, 63.67, 66.98, 66.23, 67.73, 67.45, 66.38, 68.32, 67.76, 69.04, 71.43, 70.55, 71.78, 67.69, 68.69, 71.08, 70.94, 73.65, 72.5, 69.46, 73.72, 73.45, 73.61, 72.89, 72.87, 76.42, 74.02, 76.48, 75.72, 75.01, 76.05, 77.22, 77.23, 76.04, 76.19, 75.19, 76.65, 77.29, 77.49, 78.73, 76.86, 77.47, 78.62, 77.57, 78.64, 79.18, 79.42, 79.63, 78.95, 78.14, 77.99, 79.66, 80.11, 79.83, 78.99, 78.55, 79.58, 81.01, 79.06, 76.25, 80.91, 79.46, 80.71, 79.1, 79.75, 80.85, 80.27]
[0.24788956642150878, 0.17773879766464235, 0.1862744927406311, 0.16267482042312623, 0.15411751270294188, 0.17147183418273926, 0.14832090139389037, 0.15638885498046876, 0.1472830057144165, 0.13727262020111083, 0.13453842401504518, 0.17226266860961914, 0.13430585861206054, 0.13010144233703613, 0.12478911

In [None]:
plot_model(train_loss_list,train_acc_list,test_loss_list,test_acc_list)