In [13]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import tensorboard
import os
import matplotlib.pyplot as plt
import time
import copy
import numpy as np

# Downloading  and  preparing  CIFAR-10  dataset 

In [14]:
transform = transforms.Compose(
    [transforms.Resize(224),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

#images, labels = next(iter(trainloader))

Files already downloaded and verified
Files already downloaded and verified


# Downloading AlexNet and adding fully connected layer

In [3]:
model = torch.hub.load('pytorch/vision:v0.9.0', 'alexnet', pretrained=True)
model.classifier[6] = nn.Linear(4096,10)
model.eval()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

Using cache found in C:\Users\andre/.cache\torch\hub\pytorch_vision_v0.9.0
Downloading: "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth" to C:\Users\andre/.cache\torch\hub\checkpoints\alexnet-owt-4df8aa71.pth


  0%|          | 0.00/233M [00:00<?, ?B/s]

In [15]:
model_2 = torch.hub.load('pytorch/vision:v0.9.0', 'alexnet', pretrained=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_2.parameters(), lr=0.0001, momentum=0.9)

Using cache found in C:\Users\andre/.cache\torch\hub\pytorch_vision_v0.9.0


In [24]:
for param in model_2.parameters():
    param.requeires_grad = False
model_2.classifier[6] = nn.Linear(4096,10)

model_2.eval()
model_2.to(device)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

# Finetuning / Training

In [8]:
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    total = 0
    correct = 0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)
        #inputs, labels = data
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs).to(device)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # print statistics
        running_loss += loss.item()
#         writer.add_scalar('Loss/train', loss.item(), i)
#         writer.add_scalar('Accuracy/train', correct/total, i)
       
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
            print("Accuracy: ", correct/total)

print('Finished Training')

[1,  2000] loss: 2.302
Accuracy:  0.108125
[1,  4000] loss: 2.302
Accuracy:  0.104625
[1,  6000] loss: 2.302
Accuracy:  0.10845833333333334
[1,  8000] loss: 2.301
Accuracy:  0.1110625
[1, 10000] loss: 2.300
Accuracy:  0.124225
[1, 12000] loss: 2.297
Accuracy:  0.1338125
[2,  2000] loss: 2.277
Accuracy:  0.17375
[2,  4000] loss: 2.072
Accuracy:  0.2135625
[2,  6000] loss: 1.919
Accuracy:  0.24270833333333333
[2,  8000] loss: 1.810
Accuracy:  0.269375
[2, 10000] loss: 1.719
Accuracy:  0.2903
[2, 12000] loss: 1.651
Accuracy:  0.3071875
[3,  2000] loss: 1.592
Accuracy:  0.419375
[3,  4000] loss: 1.559
Accuracy:  0.4241875
[3,  6000] loss: 1.525
Accuracy:  0.43083333333333335
[3,  8000] loss: 1.499
Accuracy:  0.43484375
[3, 10000] loss: 1.464
Accuracy:  0.4422
[3, 12000] loss: 1.434
Accuracy:  0.4488333333333333
[4,  2000] loss: 1.401
Accuracy:  0.48975
[4,  4000] loss: 1.369
Accuracy:  0.4965625
[4,  6000] loss: 1.354
Accuracy:  0.5040833333333333
[4,  8000] loss: 1.352
Accuracy:  0.505812

# Training (Feature Extraction, only training last layer)

In [25]:
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    total = 0
    correct = 0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)
        #inputs, labels = data
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model_2(inputs).to(device)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # print statistics
        running_loss += loss.item()
#         writer.add_scalar('Loss/train', loss.item(), i)
#         writer.add_scalar('Accuracy/train', correct/total, i)
       
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
            print("Accuracy: ", correct/total)

print('Finished Training')

[1,  2000] loss: 1.267
Accuracy:  0.552625
[1,  4000] loss: 0.763
Accuracy:  0.6420625
[1,  6000] loss: 0.632
Accuracy:  0.6890416666666667
[1,  8000] loss: 0.578
Accuracy:  0.7175625
[1, 10000] loss: 0.520
Accuracy:  0.7381
[1, 12000] loss: 0.493
Accuracy:  0.7528541666666667
[2,  2000] loss: 0.427
Accuracy:  0.849625
[2,  4000] loss: 0.398
Accuracy:  0.855125
[2,  6000] loss: 0.385
Accuracy:  0.8594583333333333
[2,  8000] loss: 0.397
Accuracy:  0.86040625
[2, 10000] loss: 0.376
Accuracy:  0.8623
[2, 12000] loss: 0.364
Accuracy:  0.8644791666666667
[3,  2000] loss: 0.306
Accuracy:  0.8955
[3,  4000] loss: 0.292
Accuracy:  0.89575
[3,  6000] loss: 0.307
Accuracy:  0.8959583333333333
[3,  8000] loss: 0.294
Accuracy:  0.89690625
[3, 10000] loss: 0.288
Accuracy:  0.8975
[3, 12000] loss: 0.289
Accuracy:  0.8976666666666666
[4,  2000] loss: 0.230
Accuracy:  0.923875
[4,  4000] loss: 0.223
Accuracy:  0.92475
[4,  6000] loss: 0.225
Accuracy:  0.9236666666666666
[4,  8000] loss: 0.231
Accuracy

# Testing (on finetuned model)

In [5]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        #images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 72 %


# Testing (AlexNet as Feature Extraction)

In [26]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        #images, labels = data
        outputs = model_2(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 89 %


In [21]:
torch.save(model.state_dict(), "model.pt")

In [4]:
model.load_state_dict(torch.load("model.pt"))

<All keys matched successfully>

# Transfer Learning from MNIST

In [None]:
model_2 = torch.hub.load('pytorch/vision:v0.9.0', 'alexnet', pretrained=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_2.parameters(), lr=0.0001, momentum=0.9)