In [1]:
import copy

import torch
import torch.nn as nn
import torch.optim as optim
from torch.backends import cudnn

from torchvision.models import resnet18

from Cifar100.Cifar100 import Cifar100

**Set Arguments**

In [2]:
DEVICE = 'cuda' # 'cuda' or 'cpu'

NUM_CLASSES = 100
CLASSES_EACH_TRAIN = 10

BATCH_SIZE = 256     # Higher batch sizes allows for larger learning rates. An empirical heuristic suggests that, when changing
                    # the batch size, learning rate should change by the same factor to have comparable results

LR = 1e-3            # The initial Learning Rate
MOMENTUM = 0.9       # Hyperparameter for SGD, keep this at 0.9 when using SGD
WEIGHT_DECAY = 5e-5  # Regularization, you can keep this at the default

NUM_EPOCHS = 5  #30    # Total number of training epochs (iterations over dataset)
STEP_SIZE = 2  #30    # How many epochs before decreasing learning rate (if using a step-down policy)
GAMMA = 0.1          # Multiplicative factor for learning rate step-down

LOG_FREQUENCY = 100


**Prepare Network**

In [3]:
net = resnet18()
best_net = resnet18()

net.fc = nn.Linear(net.fc.in_features, NUM_CLASSES)

**Prepare Training**

In [4]:
# Define loss function
criterion = nn.CrossEntropyLoss() # for classification, we use Cross Entropy

parameters_to_optimize = net.parameters()

optimizer = optim.SGD(parameters_to_optimize, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
# optimizer = optim.Adam(parameters_to_optimize, lr=LR, weight_decay=WEIGHT_DECAY)

# Define scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

**Train and Test**

In [5]:
net = net.to(DEVICE)
cudnn.benchmark = True # Calling this optimizes runtime

current_step = 0
best_accuracy = 0
loss = 0
index = 0
accuracy_train = []
accuracy_test = []
loss_train = []
loss_test = []

In [6]:
#New variable from our class Cifar100
cifar100 = Cifar100(BATCH_SIZE, NUM_EPOCHS, DEVICE, LR, STEP_SIZE, GAMMA)

for index in range(0, int(NUM_CLASSES/CLASSES_EACH_TRAIN)):

  print('index', index)
  #Load data from Cifar100
  train_dataloader = cifar100.load('train', index=index)
  test_dataloader = cifar100.load('test', index=index)

  # Start iterating over the epochs
  for epoch in range(NUM_EPOCHS):

    print('Starting epoch {}/{}, LR = {}'.format(epoch+1, NUM_EPOCHS, scheduler.get_lr()))
    running_correct=0

    # Iterate over the dataset
    for images, labels in train_dataloader:

      # Bring data over the device of choice
      images = images.to(DEVICE)
      labels = labels.to(DEVICE)

      net.train().to(DEVICE)
      optimizer.zero_grad()
      outputs = net(images)

      #Calc the correct for the graph
      _, preds = torch.max(outputs.data, 1)
      running_correct += torch.sum(preds == labels.data).data.item()

      # Compute loss based on output and ground truth
      loss = criterion(outputs, labels)

      # Log loss
      if current_step % LOG_FREQUENCY == 0:
        print('Step {}, Loss {}'.format(current_step, loss.item()))

      # Compute gradients for each layer and update weights
      loss.backward()  # backward pass: computes gradients
      optimizer.step() # update weights based on accumulated gradients

      current_step += 1

    loss_train.append(loss.item())
    accuracy_train.append(running_correct / float(len(train_dataloader)))

    # Step the scheduler
    scheduler.step()

  accuracy, loss = cifar100.test(net, test_dataloader, criterion)

  loss_test.append(loss.item())
  accuracy_test.append(accuracy)
  print('Test Accuracy: {}'.format(accuracy))


  if accuracy > best_accuracy:
    best_net = copy.deepcopy(net)
    best_accuracy = accuracy


Files already downloaded and verified
Files already downloaded and verified
Cifar100 - DATASET CREATED
Train Dataset: 50000
Test Dataset: 10000
range(0, 10)
index 0
Starting epoch 1/5, LR = [0.001]
Step 0, Loss 4.778038024902344
Step 100, Loss 1.8260149955749512
Starting epoch 2/5, LR = [0.001]
Step 200, Loss 1.3069546222686768
Step 300, Loss 1.4522117376327515
Starting epoch 3/5, LR = [1e-05]
Step 400, Loss 1.2341827154159546
Step 500, Loss 1.2403239011764526
Starting epoch 4/5, LR = [0.0001]
Step 600, Loss 1.2569968700408936
Step 700, Loss 1.232317566871643
Starting epoch 5/5, LR = [1.0000000000000002e-06]
Step 800, Loss 1.2365448474884033
Step 900, Loss 1.176631212234497
Test Accuracy: 149.75
index 1


100%|██████████| 40/40 [00:08<00:00,  4.71it/s]


ValueError: num_samples should be a positive integer value, but got num_samples=0

**Plots**

In [None]:
cifar100.plot(accuracy_train, accuracy_test, loss_train, loss_test)
print('Best accuracy', best_accuracy)
