In [0]:
################################################################################
# Title= CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features 
# Author= Yun, Sangdoo and Han, Dongyoon and Oh, Seong Joon and Chun, Sanghyuk and Choe, Junsuk and Yoo, Youngjoon,
# Booktitle = International Conference on Computer Vision (ICCV)
# Year=2019
# Availability: https://github.com/clovaai/CutMix-PyTorch
################################################################################



import os
import shutil
import time

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import numpy as np
import math
import torch.nn.functional as F

In [0]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    current_LR = get_learning_rate(optimizer)[0]
    for i, (input, target) in enumerate(train_loader):
        data_time.update(time.time() - end)        
        input = input.cuda()
        target = target.cuda()
        input_var = torch.autograd.Variable(input, requires_grad=True)
        target_var = torch.autograd.Variable(target)
        output = model(input_var)
        
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        err1, err5 = accuracy(output.data, target, topk=(1, 5))

        losses.update(loss.item(), input.size(0))
        top1.update(err1.item(), input.size(0))
        top5.update(err5.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0 and verbose == True:
            print('Epoch: [{0}/{1}][{2}/{3}]\t'
                  'LR: {LR:.6f}\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Top 1-err {top1.val:.4f} ({top1.avg:.4f})\t'.format(
                epoch, epochs, i, len(train_loader), LR=current_LR, batch_time=batch_time,
                loss=losses, top1=top1))
    print('* Epoch: [{0}/{1}]\t Top 1-err {top1.avg:.3f}  Top 5-err {top5.avg:.3f}\t Train Loss {loss.avg:.3f}'.format(
        epoch, epochs, top1=top1, top5=top5, loss=losses))

    return losses.avg


def custom_criterion(criterion, pred, labels, split_ratio):
    ls = int(split_ratio*batch_size)

    pred_labeled = pred[:ls]
    pred_unlabeled = pred[ls:]
  
    labels_l = labels[:ls].cuda()

    labels_ul = torch.ones((int(batch_size-ls)),dtype=torch.long).cuda()

    # return (1/ls) * criterion(pred_labeled, labels_l) + (1/(batch_size-ls)) * criterion(pred_unlabeled, labels_ul)
    return criterion(pred_labeled, labels_l) + criterion(pred_unlabeled, labels_ul)
    # return criterion(pred_labeled, labels_l) 

def validate(val_loader, model, criterion, epoch):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        target = target.cuda()

        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        err1, err5 = accuracy(output.data, target, topk=(1, 5))

        losses.update(loss.item(), input.size(0))

        top1.update(err1.item(), input.size(0))
        top5.update(err5.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0 and verbose == True:
            print('Test (on val set): [{0}/{1}][{2}/{3}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Top 1-err {top1.val:.4f} ({top1.avg:.4f})\t'.format(
                   epoch, epochs, i, len(val_loader), batch_time=batch_time, loss=losses,
                   top1=top1))

    print('* Epoch: [{0}/{1}]\t Top 1-err {top1.avg:.3f}  Top 5-err {top5.avg:.3f}\t Test Loss {loss.avg:.3f}'.format(
        epoch, epochs, top1=top1, top5=top5, loss=losses))
    return top1.avg, top5.avg, losses.avg


class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    global lr
    
    lr = lr * (0.2 ** (epoch // (epochs * 0.5))) * (0.1 ** (epoch // (epochs * 0.75)))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


def get_learning_rate(optimizer):
    lr = []
    for param_group in optimizer.param_groups:
        lr += [param_group['lr']]
    return lr


def accuracy(output, target, topk=(1,)):
    maxk = max(topk)
    batch_size = target.size(0)
    _, Y_pred = output.topk(maxk, 1, True, True)
    Y_pred = Y_pred.t()
    correct = Y_pred.eq(target.view(1, -1).expand_as(Y_pred))
    result = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        wrong_k = batch_size - correct_k
        result.append(wrong_k.mul_(100.0 / batch_size))

    return result


In [0]:
class CNN(nn.Module):
    def __init__(self,num_classes=10):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3,64,3,2,1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64,128,3,2,1),
            nn.ReLU(inplace=True),
            nn.BatchNorm2d(128))
        self.out = nn.Linear(128,num_classes)
        
    def forward(self, x, noise=True):
        x = self.conv(x)
        x = F.adaptive_avg_pool2d(x,1).view(-1,128)
        return self.out(x)

In [0]:
batch_size=100
numberofclass = 10
depth=200
alpha=240
lr=0.1
epochs=300
beta=1.0
print_freq=100
verbose=True
workers=4
momentum=0.9
weight_decay=1e-4
best_err1 = 100
best_err5 = 100
use_cuda = torch.cuda.is_available()
normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
                                          std=[x / 255.0 for x in [63.0, 62.1, 66.7]])


transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
transform_test = transforms.Compose([
            transforms.ToTensor(),
            normalize
        ])


train_loader = torch.utils.data.DataLoader(
                datasets.CIFAR10('../data', train=True, download=True, transform=transform_train),
                batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True)
val_loader = torch.utils.data.DataLoader(
                datasets.CIFAR10('../data', train=False, transform=transform_test),
                batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True)

model = CNN()
model = torch.nn.DataParallel(model).cuda()
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay, nesterov=True)
cudnn.benchmark = True

for epoch in range(0, 300):

    adjust_learning_rate(optimizer, epoch)

    # train for one epoch
    train_loss = train(train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    err1, err5, val_loss = validate(val_loader, model, criterion, epoch)

    # remember best prec@1 and save checkpoint
    is_best = err1 <= best_err1
    best_err1 = min(err1, best_err1)
    if is_best:
        best_err5 = err5

    print('Current best accuracy (top-1 and 5 error):', best_err1, best_err5)
print('Best accuracy (top-1 and 5 error):', best_err1, best_err5)

0it [00:00, ?it/s]

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/cifar-10-python.tar.gz


 98%|█████████▊| 167583744/170498071 [00:11<00:00, 17795726.57it/s]

Extracting ../data/cifar-10-python.tar.gz to ../data
Epoch: [0/300][0/500]	LR: 0.100000	Time 0.336 (0.336)	Loss 2.3066 (2.3066)	Top 1-err 92.0000 (92.0000)	
Epoch: [0/300][100/500]	LR: 0.100000	Time 0.004 (0.013)	Loss 1.7865 (1.8709)	Top 1-err 68.0000 (69.9703)	
Epoch: [0/300][200/500]	LR: 0.100000	Time 0.005 (0.012)	Loss 1.4982 (1.7738)	Top 1-err 53.0000 (65.8259)	
Epoch: [0/300][300/500]	LR: 0.100000	Time 0.006 (0.011)	Loss 1.7722 (1.6999)	Top 1-err 67.0000 (62.8804)	
Epoch: [0/300][400/500]	LR: 0.100000	Time 0.012 (0.011)	Loss 1.4287 (1.6546)	Top 1-err 54.0000 (60.8853)	
* Epoch: [0/300]	 Top 1-err 59.492  Top 5-err 10.638	 Train Loss 1.618
Test (on val set): [0/300][0/100]	Time 0.164 (0.164)	Loss 1.5180 (1.5180)	Top 1-err 46.0000 (46.0000)	
* Epoch: [0/300]	 Top 1-err 53.430  Top 5-err 7.250	 Test Loss 1.496
Current best accuracy (top-1 and 5 error): 53.43 7.25
Epoch: [1/300][0/500]	LR: 0.100000	Time 0.164 (0.164)	Loss 1.4222 (1.4222)	Top 1-err 54.0000 (54.0000)	
Epoch: [1/300][100

170500096it [00:29, 17795726.57it/s]                               

Epoch: [1/300][400/500]	LR: 0.100000	Time 0.014 (0.010)	Loss 1.2801 (1.3764)	Top 1-err 47.0000 (49.5661)	
* Epoch: [1/300]	 Top 1-err 49.190  Top 5-err 6.698	 Train Loss 1.366
Test (on val set): [1/300][0/100]	Time 0.152 (0.152)	Loss 1.1579 (1.1579)	Top 1-err 40.0000 (40.0000)	
* Epoch: [1/300]	 Top 1-err 47.020  Top 5-err 5.940	 Test Loss 1.327
Current best accuracy (top-1 and 5 error): 47.02 5.94
Epoch: [2/300][0/500]	LR: 0.100000	Time 0.180 (0.180)	Loss 1.3089 (1.3089)	Top 1-err 51.0000 (51.0000)	
Epoch: [2/300][100/500]	LR: 0.100000	Time 0.004 (0.011)	Loss 1.1579 (1.3115)	Top 1-err 43.0000 (46.7327)	
Epoch: [2/300][200/500]	LR: 0.100000	Time 0.004 (0.011)	Loss 1.1813 (1.3023)	Top 1-err 34.0000 (46.5373)	
Epoch: [2/300][300/500]	LR: 0.100000	Time 0.003 (0.011)	Loss 1.3727 (1.2974)	Top 1-err 46.0000 (46.2326)	
Epoch: [2/300][400/500]	LR: 0.100000	Time 0.007 (0.010)	Loss 1.0885 (1.2909)	Top 1-err 41.0000 (45.9027)	
* Epoch: [2/300]	 Top 1-err 45.630  Top 5-err 5.616	 Train Loss 1.282
