## Convolutional Networks

We'll check out how to build a **convolutional network** to classify CIFAR10 images. By using weight sharing - multiple units with the same weights - convolutional layers are able to learn repeated patterns in your data. For example, a unit could learn the pattern for an eye, or a face, or lower level features like edges.


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import time

import torch
from torch import optim, nn
import torch.nn.functional as F

%matplotlib inline 
import matplotlib.pyplot as plt

from torchsummary import summary  # install with 'pip install torchsummary'
from tensorboardX import SummaryWriter

from models import ConvNet
from utils import plot_images, get_train_valid_loader, plot_weights, plot_gradient_flow

In [None]:
# get data loaders
trainloader, valloader = get_train_valid_loader()

In [None]:
# init net
net = ConvNet()

# plot net parameters
summary(net, input_size=(3,32,32))

In [None]:
# visualize init weights
w = net.conv1.weight.data.numpy()
plot_weights(w,scaling=True)

In [None]:
################################################################################
# TODO:                                                                        #
# Choose an Optimizer that will be used to minimize the loss function.         #
# Choose a critera that measures the loss                                      #
################################################################################
learning_rate = 1e-2
decayRate = 0.2
l2_reg = 0.01
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=l2_reg)

# https://discuss.pytorch.org/t/how-to-do-exponential-learning-rate-decay-in-pytorch/63146/3
# https://pytorch.org/docs/stable/optim.html#torch.optim.lr_scheduler.ReduceLROnPlateau
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, factor=decayRate, patience=3, threshold=1e-2, verbose = True)


epochs = 30
running_loss = 0
print_every = 200
training_steps = 0

# init tensorboard writer
writer = SummaryWriter()

val_acc_best = 0

for e in range(epochs):
    
    print('\n--- EPOCH %03d / %03d STARTED ---' % (e+1, epochs))
 
    # set net to train mode
    net.train()
    
    # log current learning rate
    writer.add_scalar('optimizer/learning_rate', optimizer.param_groups[0]['lr'], e+1)
    
    # train over all batches
    start = time.time()
    for idx_batch, (images, labels) in enumerate(trainloader):
        
        training_steps += 1
        
        if idx_batch % print_every == 0:
            print('---> train on batch %03d' % idx_batch)
        
        optimizer.zero_grad()
        output = net(images)
        loss = criterion(output, labels)
        loss.backward()
        plot_gradient_flow(net.named_parameters())
        
        optimizer.step()
        
        writer.add_scalar('train/loss_training_step', loss.item(), training_steps)
        
    # set net to evaluation mode
    net.eval()
    
    # evaluate on training data 
    train_loss_accum = 0.
    count_total = 0
    count_correct = 0
    for idx_batch, (images, labels) in enumerate(trainloader):
        output = net(images)
        loss = criterion(output, labels)
        train_loss_accum += loss.item()
        
        prediction = torch.argmax(F.softmax(output), axis=1)
        count_total += prediction.size(0)
        count_correct += (prediction == labels).sum().item()
        
    train_loss_avg = train_loss_accum / (idx_batch+1)
    train_acc = count_correct / count_total
    writer.add_scalar('train/loss_epoch', train_loss_avg, e+1)
    writer.add_scalar('train/accuracy', train_acc, e+1)
    
    # evaluate on validation data 
    val_loss_accum = 0.
    count_total = 0
    count_correct = 0
    for idx_batch, (images, labels) in enumerate(valloader):
        output = net(images)
        loss = criterion(output, labels)
        val_loss_accum += loss.item()
        
        prediction = torch.argmax(F.softmax(output), axis=1)
        count_total += prediction.size(0)
        count_correct += (prediction == labels).sum().item()

    val_loss_avg = val_loss_accum / (idx_batch+1)
    val_acc = count_correct / count_total
    writer.add_scalar('val/loss_epoch', val_loss_avg, e+1)
    writer.add_scalar('val/accuracy', val_acc, e+1)
    
    print('\nMetrics: ')
    print('---> train loss / accuracy: %.03f / %.03f' % (train_loss_avg, train_acc))
    print('---> val loss / accuracy:   %.03f / %.03f' % (val_loss_avg, val_acc))
    
    scheduler.step(val_loss_avg)
    
    # print('filters conv1')
    # w1 = net.conv1.weight.data.numpy()
    # plot_weights(w1, scaling=True)
    # plt.show()
    
    if val_acc > val_acc_best:
        val_acc_best = val_acc
        print('---> save new best checkpoint')
        torch.save(net.state_dict(), 'model_%.03f.ckpt' % val_acc)

# close tensorboard logging
writer.close()

Save best trained model.