
**Install requirements**

In [0]:
!pip3 install 'torch==1.3.1'
!pip3 install 'torchvision==0.5.0'
!pip3 install 'Pillow-SIMD'
!pip3 install 'tqdm'

Collecting torch==1.3.1
  Using cached https://files.pythonhosted.org/packages/88/95/90e8c4c31cfc67248bf944ba42029295b77159982f532c5689bcfe4e9108/torch-1.3.1-cp36-cp36m-manylinux1_x86_64.whl
[31mERROR: torchvision 0.5.0 has requirement torch==1.4.0, but you'll have torch 1.3.1 which is incompatible.[0m
Installing collected packages: torch
  Found existing installation: torch 1.4.0
    Uninstalling torch-1.4.0:
      Successfully uninstalled torch-1.4.0
Successfully installed torch-1.3.1
Collecting torch==1.4.0
  Using cached https://files.pythonhosted.org/packages/24/19/4804aea17cd136f1705a5e98a00618cb8f6ccc375ad8bfa437408e09d058/torch-1.4.0-cp36-cp36m-manylinux1_x86_64.whl
Installing collected packages: torch
  Found existing installation: torch 1.3.1
    Uninstalling torch-1.3.1:
      Successfully uninstalled torch-1.3.1
Successfully installed torch-1.4.0


**Import libraries**

In [0]:
import os
import logging

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset, DataLoader, ConcatDataset
from torch.backends import cudnn

import torchvision
from torchvision import transforms
from torchvision.models import alexnet
from torchvision.models import vgg16

from PIL import Image
from tqdm import tqdm

from torchvision.datasets import VisionDataset
import os.path
import sys

from sklearn.model_selection import train_test_split
import numpy as np
import copy

import matplotlib.pyplot as plt

**Set Arguments**

In [0]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 101 # 101 + 1: There is am extra Background class that should be removed 

# DEFAULT VALUES

#BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                     # the batch size, learning rate should change by the same factor to have comparable results

LR = 0.001            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

#NUM_EPOCHS = 30      # Total number of training epochs (iterations over dataset)
STEP_SIZE = 20       # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 10

# SECOND CONFIGURATION
#LR = 0.01           
#NUM_EPOCHS = 40
#GAMMA = 0.1

# THIRD CONFIGURATION
#LR = 0.01           
#NUM_EPOCHS = 30
#GAMMA = 0.2

# FOURTH CONFIGURATION
#LR = 0.01           
#NUM_EPOCHS = 30
#GAMMA = 0.8

# THIRD CONFIGURATION - Transfer Learning
#LR = 0.01           
#NUM_EPOCHS = 30
#STEP_SIZE = 10
#GAMMA = 0.8

transferLearning = True #True if we use Transfer Learning, False otherwise

train_freeze = "all" 
# "all" train all layers
# "fc" train only fully connected layers and freeze other layers
# "cl" train only convolutional layers and freeze other layers

#### VGGNet ####

network = "vgg"
# "alex" if AlexNet is the network that we want to use
# "vgg" if VGGs is the network that we want to use

BATCH_SIZE = 16
NUM_EPOCHS = 10

**Define Data Preprocessing**

In [0]:
if transferLearning: # ImageNet
  mean = (0.485, 0.456, 0.406)
  std = (0.229, 0.224, 0.225)
else:
  mean = (0.5, 0.5, 0.5)
  std = (0.5, 0.5, 0.5)


# Define transforms for training phase
train_transform = transforms.Compose([transforms.Resize(256),      # Resizes short size of the PIL image to 256
                                      transforms.CenterCrop(224),  # Crops a central square patch of the image
                                                                   # 224 because torchvision's AlexNet needs a 224x224 input!
                                                                   # Remember this when applying different transformations, otherwise you get an error
                                      transforms.ToTensor(), # Turn PIL Image to torch.Tensor
                                      transforms.Normalize(mean, std) # Normalizes tensor with mean and standard deviation
])
# Define transforms for the evaluation phase
eval_transform = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean, std)                                    
])


augmentation_transform1 = transforms.Compose([transforms.Resize(256),      
                                      transforms.RandomCrop(224),                      
                                      transforms.ToTensor(), 
                                      transforms.Normalize(mean, std)
])

augmentation_transform2 = transforms.Compose([transforms.Resize(256),      
                                      transforms.RandomHorizontalFlip(),
                                      transforms.CenterCrop(224),                  
                                      transforms.ToTensor(), 
                                      transforms.Normalize(mean, std)
])

augmentation_transform3 = transforms.Compose([transforms.Resize(256),      
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ColorJitter(brightness=20, contrast=10, saturation=30),
                                      transforms.CenterCrop(224),                  
                                      transforms.ToTensor(), 
                                      transforms.Normalize(mean, std)
])

**Prepare Dataset**

In [0]:
def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

def getDataset(dir, indexClass, split="train"):
    dataset = []

    path = os.path.join(dir, split+".txt")
    with open(path, "r") as f:
        imgs = f.read().splitlines()

    for img in sorted(imgs):
        label = img.split("/")[0]

        if label != "BACKGROUND_Google":
            image = pil_loader(dir+"101_ObjectCategories/"+img)
            item = (image, indexClass[label])
            dataset.append(item)
  
    return dataset # tuple (image, index)



class Caltech(VisionDataset):
    def __init__(self, root, split='train', transform=None, target_transform=None):
        super(Caltech, self).__init__(root, transform=transform, target_transform=target_transform)

        self.split = split # This defines the split you are going to use
                           # (split files are called 'train.txt' and 'test.txt')

        '''
        - Here you should implement the logic for reading the splits files and accessing elements
        - If the RAM size allows it, it is faster to store all data in memory
        - PyTorch Dataset classes use indexes to read elements
        - You should provide a way for the __getitem__ method to access the image-label pair
          through the index
        - Labels should start from 0, so for Caltech you will have lables 0...100 (excluding the background class) 
        '''
        classes, indexClass = self._getClasses(self.root)
        dataset = getDataset(self.root, indexClass, split=self.split)

        self.classes = classes
        self.indexClass = indexClass
        self.dataset = dataset

    def _getClasses(self, dir):
        classes = os.listdir(dir+"101_ObjectCategories")
        classes.remove("BACKGROUND_Google")
        classes.sort()
        indexClass = {classes[i]: i for i in range(len(classes))}

        return classes, indexClass

    def __getitem__(self, index):
        '''
        __getitem__ should access an element through its index
        Args:
            index (int): Index
        Returns:
            tuple: (sample, target) where target is class_index of the target class.
        '''

        image, label = self.dataset[index] # Provide a way to access image and label via index
                           # Image should be a PIL Image
                           # label can be int

        # Applies preprocessing when accessing the image
        if self.transform is not None:
            image = self.transform(image)

        return image, label

    def __len__(self):
        '''
        The __len__ method returns the length of the dataset
        It is mandatory, as this is used by several other components
        '''
        length = len(self.dataset) # Provide a way to get the length (number of elements) of the dataset
        return length

In [5]:
# Clone github repository with data
if not os.path.isdir('./Caltech101'):
  !git clone https://github.com/MachineLearning2020/Homework2-Caltech101.git
  !mv 'Homework2-Caltech101' 'Caltech101'

DATA_DIR = 'Caltech101/'

# Prepare Pytorch train/test Datasets
train_dataset = Caltech(DATA_DIR, split='train',  transform=train_transform) # we will use this dataset to not apply data augmentation to the validation set

#train_dataset_transf = Caltech(DATA_DIR, split='train',  transform=augmentation_transform1) # we will need this for data augmentation
test_dataset = Caltech(DATA_DIR, split='test', transform=eval_transform)


#train_indexes = # split the indices for your train split
#val_indexes = # split the indices for your val split

#train_dataset = Subset(train_dataset, train_indexes)
#val_dataset = Subset(train_dataset, val_indexes)

# Check dataset sizes
print('Train Dataset: {}'.format(len(train_dataset)))
#print('Valid Dataset: {}'.format(len(val_dataset)))
print('Test Dataset: {}'.format(len(test_dataset)))

Train Dataset: 5784
Test Dataset: 2893


In [6]:
targets = [train_dataset[i][1] for i in range(len(train_dataset))]

train_indexes, val_indexes = train_test_split(np.arange(len(targets)), test_size = 0.5, shuffle=True, stratify=targets, random_state=42)
print(sorted(train_indexes))
print(train_indexes)
print()
print(sorted(val_indexes))
print(val_indexes)

[3, 5, 6, 7, 9, 10, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 30, 31, 33, 37, 39, 42, 45, 46, 48, 54, 55, 56, 57, 59, 60, 63, 66, 67, 68, 69, 72, 73, 75, 76, 77, 78, 79, 81, 82, 84, 86, 90, 92, 93, 94, 96, 97, 99, 101, 104, 106, 108, 109, 110, 111, 113, 114, 116, 118, 119, 121, 124, 125, 126, 132, 137, 139, 140, 143, 144, 145, 146, 148, 149, 152, 153, 155, 157, 158, 163, 164, 165, 166, 167, 172, 173, 175, 176, 177, 179, 181, 182, 183, 185, 188, 191, 193, 194, 195, 199, 202, 203, 204, 208, 209, 210, 218, 219, 220, 221, 222, 225, 226, 227, 229, 231, 237, 243, 244, 248, 250, 253, 254, 257, 260, 261, 262, 266, 270, 273, 274, 275, 276, 277, 278, 279, 287, 288, 292, 296, 298, 300, 303, 306, 307, 310, 314, 316, 317, 319, 320, 321, 324, 326, 327, 329, 330, 331, 332, 333, 334, 335, 339, 345, 346, 348, 350, 353, 355, 357, 360, 361, 362, 364, 366, 367, 368, 370, 371, 372, 376, 377, 380, 384, 386, 387, 391, 394, 396, 399, 400, 401, 402, 404, 408, 410, 412, 419, 420, 421, 422, 423, 424, 425, 428,

In [7]:
print(f"Train Dataset: {len(train_indexes)}")
print(f"Valid Dataset: {len(val_indexes)}")

Train Dataset: 2892
Valid Dataset: 2892


In [0]:
val_dataset = Subset(train_dataset, val_indexes) # in order to not apply data augmentation to the validation
#train_dataset_1 = Subset(train_dataset_transf, train_indexes) # transformed
#train_dataset_2 = Subset(train_dataset, train_indexes)
train_dataset = Subset(train_dataset, train_indexes)

In [0]:
#DATA AUGMENTATION -> we concatenate the original dataset with the transformed one
#sets = [train_dataset_1, train_dataset_2]
#train_dataset = ConcatDataset(sets)

In [10]:
len(train_dataset)

2892

**Prepare Dataloaders**

In [0]:
# Dataloaders iterate over pytorch datasets and transparently provide useful functions (e.g. parallelization and shuffling)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, drop_last=True)

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

**Prepare Network**

In [12]:
if network == "alex":
  net = alexnet(pretrained = transferLearning) # Loading AlexNet model
  # if transferLearning = True, we use the pretrained net

  # AlexNet has 1000 output neurons, corresponding to the 1000 ImageNet's classes
  # We need 101 outputs for Caltech-101
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES) # nn.Linear in pytorch is a fully connected layer
                                                  # The convolutional layer is nn.Conv2d

# We just changed the last layer of AlexNet with a new fully connected layer with 101 outputs
# It is strongly suggested to study torchvision.models.alexnet source code

if network == "vgg":
  net = vgg16(pretrained="imagenet")
  net.classifier[6] = nn.Linear(4096, NUM_CLASSES)


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/checkpoints/vgg16-397923af.pth


HBox(children=(FloatProgress(value=0.0, max=553433881.0), HTML(value='')))




**Prepare Training**

In [0]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

# Choose parameters to optimize
# To access a different set of parameters, you have to access submodules of AlexNet
# (nn.Module objects, like AlexNet, implement the Composite Pattern)
# e.g.: parameters of the fully connected layers: net.classifier.parameters()
# e.g.: parameters of the convolutional layers: look at alexnet's source code ;) 

if train_freeze == "all":
  parameters_to_optimize = net.parameters() # In this case we optimize over all the parameters of AlexNet
elif train_freeze == "fc":
  parameters_to_optimize = net.classifier.parameters()
elif train_freeze == "cl":
  parameters_to_optimize = net.features.parameters()

# Define optimizer
# An optimizer updates the weights based on loss
# We use SGD with momentum
optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

# Define scheduler
# A scheduler dynamically changes learning rate
# The most common schedule is the step(-down), which multiplies learning rate by gamma every STEP_SIZE epochs
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

**Training and Validation**

In [14]:
# By default, everything is loaded to cpu
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda

cudnn.benchmark # Calling this optimizes runtime

current_step = 0

#losses = []
#counterLosses = 0

accuracy_train = []
accuracy_val = []
loss_train = []
loss_val = []

#best_accuracy = 0
best_loss = 1000
best_epoch = 0
best_net = 0
acc = 0

# Start iterating over the epochs
for epoch in range(NUM_EPOCHS):
  counterLosses = 0
  print()
  print('\nStarting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))
  running_corrects = 0

  # Iterate over the dataset
  for images, labels in train_dataloader:
    # Bring data over the device of choice
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    net.train() # Sets module in training mode

    # PyTorch, by default, accumulates gradients after each backward pass
    # We need to manually set the gradients to zero before starting a new iteration
    optimizer.zero_grad() # Zero-ing the gradients

    # Forward pass to the network
    outputs_train = net(images)

    _, preds = torch.max(outputs_train.data, 1)

    running_corrects += torch.sum(preds == labels.data).data.item()

    # Compute loss based on output and ground truth
    loss = criterion(outputs_train, labels)

    #counterLosses += loss.item()*images.size(0)

    # Log loss
    if current_step % LOG_FREQUENCY == 0:
      print('Step {}, Loss {}'.format(current_step, loss.item()))

    # Compute gradients for each layer and update weights
    loss.backward()  # backward pass: computes gradients
    optimizer.step() # update weights based on accumulated gradients

    current_step += 1

  loss_train.append(loss.item())
  accuracy = running_corrects/float(len(train_dataset))
  accuracy_train.append(accuracy)

  net.train(False)

  running_correct_val = 0
  for images, labels in tqdm(val_dataloader):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    outputs_val = net(images)

    _, preds = torch.max(outputs_val.data, 1)

    running_correct_val += torch.sum(preds == labels.data).data.item()

    loss = criterion(outputs_val, labels)

  loss_val.append(loss.item())
  accuracy = running_correct_val/ float(len(val_dataset))
  accuracy_val.append(accuracy)

  """
  if accuracy > best_accuracy:
    best_accuracy = accuracy
    best_epoch = epoch
    best_net = copy.deepcopy(net.state_dict())
  """

  if loss.item() < best_loss:
    best_loss = loss.item()
    acc = accuracy
    best_epoch = epoch
    best_net = copy.deepcopy(net.state_dict())

  # Step the scheduler
  scheduler.step()

  #losses.append(counterLosses/float(len(train_dataset)))

print(f"\nThe best epoch is {best_epoch+1} with loss = {best_loss} and accuracy = {acc}")




Starting epoch 1/10, LR = [0.001]




Step 0, Loss 4.6649932861328125
Step 10, Loss 4.249819755554199
Step 20, Loss 2.9293761253356934
Step 30, Loss 3.2396597862243652
Step 40, Loss 2.745976209640503
Step 50, Loss 1.7849349975585938
Step 60, Loss 1.6708509922027588
Step 70, Loss 1.8259751796722412
Step 80, Loss 1.2945725917816162
Step 90, Loss 1.527274250984192
Step 100, Loss 1.4919296503067017
Step 110, Loss 0.8681522011756897
Step 120, Loss 0.7028505802154541
Step 130, Loss 0.8059684634208679
Step 140, Loss 1.1641367673873901
Step 150, Loss 0.4521486461162567
Step 160, Loss 1.0729159116744995
Step 170, Loss 0.8568102121353149


100%|██████████| 180/180 [00:21<00:00,  8.27it/s]



Starting epoch 2/10, LR = [0.001]





Step 180, Loss 1.0127592086791992
Step 190, Loss 0.9333365559577942
Step 200, Loss 0.8596938252449036
Step 210, Loss 0.15394443273544312
Step 220, Loss 0.17981335520744324
Step 230, Loss 0.9784526824951172
Step 240, Loss 0.24571780860424042
Step 250, Loss 0.7120425701141357
Step 260, Loss 0.08158481121063232
Step 270, Loss 0.47768986225128174
Step 280, Loss 0.05839258432388306
Step 290, Loss 0.9902570247650146
Step 300, Loss 0.22150373458862305
Step 310, Loss 0.4780900478363037
Step 320, Loss 0.6098006963729858
Step 330, Loss 0.7462977170944214
Step 340, Loss 0.1017712950706482
Step 350, Loss 0.5927731394767761


100%|██████████| 180/180 [00:22<00:00,  8.12it/s]



Starting epoch 3/10, LR = [0.001]





Step 360, Loss 0.07698240876197815
Step 370, Loss 0.2770034074783325
Step 380, Loss 0.009000122547149658
Step 390, Loss 0.19619977474212646
Step 400, Loss 0.31239795684814453
Step 410, Loss 0.7995076775550842
Step 420, Loss 0.38357022404670715
Step 430, Loss 0.0859115719795227
Step 440, Loss 0.05072373151779175
Step 450, Loss 0.012138724327087402
Step 460, Loss 0.07114112377166748
Step 470, Loss 0.12251773476600647
Step 480, Loss 0.2863577604293823
Step 490, Loss 0.3360494375228882
Step 500, Loss 0.08682775497436523
Step 510, Loss 0.09223681688308716
Step 520, Loss 0.38098645210266113
Step 530, Loss 0.07107070088386536


100%|██████████| 180/180 [00:22<00:00,  8.06it/s]



Starting epoch 4/10, LR = [0.001]





Step 540, Loss 0.112641841173172
Step 550, Loss 0.10449367761611938
Step 560, Loss 0.015279769897460938
Step 570, Loss 0.1339322328567505
Step 580, Loss 0.23402217030525208
Step 590, Loss 0.11479628086090088
Step 600, Loss 0.0031758546829223633
Step 610, Loss 0.32953083515167236
Step 620, Loss 0.07395172119140625
Step 630, Loss 0.012108862400054932
Step 640, Loss 0.01886773109436035
Step 650, Loss 0.00897127389907837
Step 660, Loss 0.060418665409088135
Step 670, Loss 0.0030364990234375
Step 680, Loss 0.029888033866882324
Step 690, Loss 0.08647799491882324
Step 700, Loss 0.012089073657989502
Step 710, Loss 0.0016959905624389648


100%|██████████| 180/180 [00:22<00:00,  8.16it/s]



Starting epoch 5/10, LR = [0.001]





Step 720, Loss 0.04614332318305969
Step 730, Loss 0.014392733573913574
Step 740, Loss 0.003977775573730469
Step 750, Loss 0.3953900933265686
Step 760, Loss 0.1375279426574707
Step 770, Loss 0.3605579733848572
Step 780, Loss 0.10149145126342773
Step 790, Loss 0.09541186690330505
Step 800, Loss 0.008935868740081787
Step 810, Loss 0.11811429262161255
Step 820, Loss 0.019345462322235107
Step 830, Loss 0.1713206171989441
Step 840, Loss 0.014815688133239746
Step 850, Loss 0.038175493478775024
Step 860, Loss 0.19523203372955322
Step 870, Loss 0.06894084811210632
Step 880, Loss 0.007835149765014648
Step 890, Loss 0.020886778831481934


100%|██████████| 180/180 [00:22<00:00,  8.18it/s]



Starting epoch 6/10, LR = [0.001]





Step 900, Loss 0.04618269205093384
Step 910, Loss 0.06250226497650146
Step 920, Loss 0.1369454264640808
Step 930, Loss 0.9065480828285217
Step 940, Loss 0.015567302703857422
Step 950, Loss 0.005316793918609619
Step 960, Loss 0.11915081739425659
Step 970, Loss 0.12338441610336304
Step 980, Loss 0.03182533383369446
Step 990, Loss 0.002554595470428467
Step 1000, Loss 0.11852139234542847
Step 1010, Loss 0.31057798862457275
Step 1020, Loss 0.009486794471740723
Step 1030, Loss 0.007878780364990234
Step 1040, Loss 0.12616699934005737
Step 1050, Loss 0.018029332160949707
Step 1060, Loss 0.0037181973457336426
Step 1070, Loss 0.014783620834350586


100%|██████████| 180/180 [00:22<00:00,  8.06it/s]



Starting epoch 7/10, LR = [0.001]





Step 1080, Loss 0.0026586055755615234
Step 1090, Loss 0.012469470500946045
Step 1100, Loss 0.5536125898361206
Step 1110, Loss 0.0031786561012268066
Step 1120, Loss 0.027273118495941162
Step 1130, Loss 0.12987017631530762
Step 1140, Loss 0.01489037275314331
Step 1150, Loss 0.00015395879745483398
Step 1160, Loss 0.006038546562194824
Step 1170, Loss 0.004154384136199951
Step 1180, Loss 2.5272369384765625e-05
Step 1190, Loss 0.001378178596496582
Step 1200, Loss 0.12137877941131592
Step 1210, Loss 0.0004042387008666992
Step 1220, Loss 0.03431808948516846
Step 1230, Loss 0.2778434455394745
Step 1240, Loss 0.004923820495605469
Step 1250, Loss 0.05562698841094971


100%|██████████| 180/180 [00:21<00:00,  8.18it/s]



Starting epoch 8/10, LR = [0.001]





Step 1260, Loss 0.013600051403045654
Step 1270, Loss 0.01584005355834961
Step 1280, Loss 0.01599675416946411
Step 1290, Loss 0.02076125144958496
Step 1300, Loss 0.052708059549331665
Step 1310, Loss 0.023739874362945557
Step 1320, Loss 0.008678197860717773
Step 1330, Loss 0.005270421504974365
Step 1340, Loss 0.053936779499053955
Step 1350, Loss 0.02830129861831665
Step 1360, Loss 0.006143927574157715
Step 1370, Loss 0.0021263957023620605
Step 1380, Loss 0.007113277912139893
Step 1390, Loss 0.035734474658966064
Step 1400, Loss 0.00427323579788208
Step 1410, Loss 0.003599226474761963
Step 1420, Loss 0.0009480118751525879
Step 1430, Loss 2.6106834411621094e-05


100%|██████████| 180/180 [00:22<00:00,  8.18it/s]



Starting epoch 9/10, LR = [0.001]





Step 1440, Loss 0.01810544729232788
Step 1450, Loss 0.006396055221557617
Step 1460, Loss 0.0015486478805541992
Step 1470, Loss 0.004211902618408203
Step 1480, Loss 0.0008026361465454102
Step 1490, Loss 0.010561048984527588
Step 1500, Loss 0.0006518959999084473
Step 1510, Loss 0.0004096031188964844
Step 1520, Loss 5.6803226470947266e-05
Step 1530, Loss 6.401538848876953e-05
Step 1540, Loss 0.0019676685333251953
Step 1550, Loss 0.003679633140563965
Step 1560, Loss 3.594160079956055e-05
Step 1570, Loss 0.4339160919189453
Step 1580, Loss 1.0609626770019531e-05
Step 1590, Loss 0.0031328201293945312
Step 1600, Loss 0.036397457122802734
Step 1610, Loss 0.00029355287551879883


100%|██████████| 180/180 [00:21<00:00,  8.26it/s]



Starting epoch 10/10, LR = [0.001]





Step 1620, Loss 0.009793341159820557
Step 1630, Loss 0.0003210306167602539
Step 1640, Loss 0.0308687686920166
Step 1650, Loss 0.0030673742294311523
Step 1660, Loss 0.0007050037384033203
Step 1670, Loss 0.0016117095947265625
Step 1680, Loss 0.011597812175750732
Step 1690, Loss 0.00010269880294799805
Step 1700, Loss 5.21540641784668e-05
Step 1710, Loss 0.14564916491508484
Step 1720, Loss 0.0005182027816772461
Step 1730, Loss 5.072355270385742e-05
Step 1740, Loss 0.0013156533241271973
Step 1750, Loss 0.005546450614929199
Step 1760, Loss 0.003642439842224121
Step 1770, Loss 0.0005130767822265625
Step 1780, Loss 0.0005648136138916016
Step 1790, Loss 0.0001811981201171875


100%|██████████| 180/180 [00:22<00:00,  8.18it/s]


The best epoch is 8 with loss = 0.03835397958755493 and accuracy = 0.9014522821576764





In [0]:
best_net

In [0]:
# PLOT LOSSES
plt.plot(loss_train, label="Training")
plt.plot(loss_val, "r", label="Validation")
plt.legend(loc="upper right")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training and Validation losses")
plt.grid(axis="y")
plt.show()

In [16]:
loss_val

[0.5640168786048889,
 0.11916041374206543,
 0.37789058685302734,
 0.11801713705062866,
 0.30325281620025635,
 0.12105575203895569,
 0.09973520040512085,
 0.03835397958755493,
 0.3649991750717163,
 0.03836846351623535]

In [0]:
# PLOT ACCURACIES
plt.plot(accuracy_train, label="Training")
plt.plot(accuracy_val, "r", label="Validation")
plt.legend(loc="lower right")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Training and Validation accuracies")
plt.grid(axis="y")
plt.show()

**Test**

In [18]:
net.load_state_dict(best_net)
net = net.to(DEVICE) # this will bring the network to GPU if DEVICE is cuda
net.train(False) # Set Network to evaluation mode

running_corrects = 0
for images, labels in tqdm(test_dataloader):
  images = images.to(DEVICE)
  labels = labels.to(DEVICE)

  # Forward Pass
  outputs = net(images)

  # Get predictions
  _, preds = torch.max(outputs.data, 1)

  # Update Corrects
  running_corrects += torch.sum(preds == labels.data).data.item()

# Calculate Accuracy
accuracy = running_corrects / float(len(test_dataset))

print('\nTest Accuracy = {}'.format(accuracy))

100%|██████████| 181/181 [00:21<00:00,  8.48it/s]


Test Accuracy = 0.9101278949187694



