In [0]:
################################################################################
# Title= CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features 
# Author= Yun, Sangdoo and Han, Dongyoon and Oh, Seong Joon and Chun, Sanghyuk and Choe, Junsuk and Yoo, Youngjoon,
# Booktitle = International Conference on Computer Vision (ICCV)
# Year=2019
# Availability: https://github.com/clovaai/CutMix-PyTorch
################################################################################

###############################################################
#Title: mixup: Beyond Empirical Risk Minimization
#Author: Hongyi Zhang, Moustapha Cisse, Yann N. Dauphin, David Lopez-Paz
#Journal: International Conference on Learning Representations
#Date: 2018
#Availability: https://github.com/facebookresearch/mixup-cifar10
###############################################################


import argparse
import os
import shutil
import time

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import numpy as np
import math

In [0]:
import torch.nn.functional as F
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3,   64,  3)
        self.conv2 = nn.Conv2d(64,  128, 3)
        self.conv3 = nn.Conv2d(128, 256, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 4 * 4, 128)
        self.fc2 = nn.Linear(128, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)
        

In [0]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    current_LR = get_learning_rate(optimizer)[0]
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        input = input.cuda()
        target = target.cuda()

        r = np.random.rand(1)

        # Apply Mixup to the input data
        if use_cuda:
              input, target = input.cuda(), target.cuda()

        input, targets_a, targets_b, lam_mu = mixup_data(input, target, alpha)

        input_var = torch.autograd.Variable(input, requires_grad=True)
        target_a_var = torch.autograd.Variable(targets_a)
        target_b_var = torch.autograd.Variable(targets_b)
        output = model(input_var)
        loss = mixup_criterion(criterion, output, targets_a, targets_b, lam_mu)

        
        # measure accuracy and record loss
        err1, err5 = accuracy(output.data, target, topk=(1, 5))

        losses.update(loss.item(), input.size(0))
        top1.update(err1.item(), input.size(0))
        top5.update(err5.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # if i % print_freq == 0 and verbose == True:
        #     print('Epoch: [{0}/{1}][{2}/{3}]\t'
        #           'LR: {LR:.6f}\t'
        #           'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
        #           'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
        #           'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
        #           'Top 1-err {top1.val:.4f} ({top1.avg:.4f})\t'.format(
        #         epoch, epochs, i, len(train_loader), LR=current_LR, batch_time=batch_time,
        #         data_time=data_time, loss=losses, top1=top1))

    print('* Epoch: [{0}/{1}]\t Top 1-err {top1.avg:.3f}  Top 5-err {top5.avg:.3f}\t Train Loss {loss.avg:.3f}'.format(
        epoch, epochs, top1=top1, top5=top5, loss=losses))

    return losses.avg

def mixup_data(x, y, alpha=1.0, use_cuda=True):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam


def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)


def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int(W * cut_rat)
    cut_h = np.int(H * cut_rat)

    # uniform
    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2


def validate(val_loader, model, criterion, epoch):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda()

        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        err1, err5 = accuracy(output.data, target, topk=(1, 5))

        losses.update(loss.item(), input.size(0))

        top1.update(err1.item(), input.size(0))
        top5.update(err5.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # if i % print_freq == 0 and verbose == True:
        #     print('Test (on val set): [{0}/{1}][{2}/{3}]\t'
        #           'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
        #           'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
        #           'Top 1-err {top1.val:.4f} ({top1.avg:.4f})\t'.format(
        #            epoch, epochs, i, len(val_loader), batch_time=batch_time, loss=losses,
        #            top1=top1))

    print('* Epoch: [{0}/{1}]\t Top 1-err {top1.avg:.3f}  Top 5-err {top5.avg:.3f}\t Test Loss {loss.avg:.3f}'.format(
        epoch, epochs, top1=top1, top5=top5, loss=losses))
    return top1.avg, top5.avg, losses.avg

class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    global lr
    lr = lr * (0.1 ** (epoch // (epochs * 0.5))) * (0.1 ** (epoch // (epochs * 0.75)))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


def get_learning_rate(optimizer):
    lr = []
    for param_group in optimizer.param_groups:
        lr += [param_group['lr']]
    return lr


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        wrong_k = batch_size - correct_k
        res.append(wrong_k.mul_(100.0 / batch_size))

    return res

In [0]:
batch_size=100
numberofclass = 10
depth=200
alpha=240
lr=0.1
epochs=300
beta=1.0
print_freq=10
verbose=True
workers=4
momentum=0.9
weight_decay=1e-4
best_err1 = 100
best_err5 = 100
use_cuda = torch.cuda.is_available()
normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
                                          std=[x / 255.0 for x in [63.0, 62.1, 66.7]])


transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
transform_test = transforms.Compose([
            transforms.ToTensor(),
            normalize
        ])


train_loader = torch.utils.data.DataLoader(
                datasets.CIFAR10('../data', train=True, download=True, transform=transform_train),
                batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True)
val_loader = torch.utils.data.DataLoader(
                datasets.CIFAR10('../data', train=False, transform=transform_test),
                batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True)

model = CNN()
model = torch.nn.DataParallel(model).cuda()
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay, nesterov=True)
cudnn.benchmark = True

for epoch in range(0, 300):

    adjust_learning_rate(optimizer, epoch)

    # train for one epoch
    train_loss = train(train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    err1, err5, val_loss = validate(val_loader, model, criterion, epoch)

    # remember best prec@1 and save checkpoint
    is_best = err1 <= best_err1
    best_err1 = min(err1, best_err1)
    if is_best:
        best_err5 = err5

    print('Current best accuracy (top-1 and 5 error):', best_err1, best_err5)
print('Best accuracy (top-1 and 5 error):', best_err1, best_err5)

Files already downloaded and verified
* Epoch: [0/300]	 Top 1-err 77.826  Top 5-err 27.372	 Train Loss 2.098
* Epoch: [0/300]	 Top 1-err 58.190  Top 5-err 12.020	 Test Loss 1.599
Current best accuracy (top-1 and 5 error): 58.19 12.02
* Epoch: [1/300]	 Top 1-err 71.348  Top 5-err 20.540	 Train Loss 1.971
* Epoch: [1/300]	 Top 1-err 52.830  Top 5-err 8.500	 Test Loss 1.463
Current best accuracy (top-1 and 5 error): 52.83 8.5
* Epoch: [2/300]	 Top 1-err 67.748  Top 5-err 18.246	 Train Loss 1.903
* Epoch: [2/300]	 Top 1-err 43.270  Top 5-err 4.870	 Test Loss 1.253
Current best accuracy (top-1 and 5 error): 43.27 4.87
* Epoch: [3/300]	 Top 1-err 66.156  Top 5-err 16.874	 Train Loss 1.863
* Epoch: [3/300]	 Top 1-err 40.760  Top 5-err 5.930	 Test Loss 1.263
Current best accuracy (top-1 and 5 error): 40.76 5.93
* Epoch: [4/300]	 Top 1-err 65.424  Top 5-err 16.046	 Train Loss 1.838
* Epoch: [4/300]	 Top 1-err 36.790  Top 5-err 4.020	 Test Loss 1.111
Current best accuracy (top-1 and 5 error): 36

KeyboardInterrupt: ignored