In [0]:
################################################################################
# Title= CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features 
# Author= Yun, Sangdoo and Han, Dongyoon and Oh, Seong Joon and Chun, Sanghyuk and Choe, Junsuk and Yoo, Youngjoon,
# Booktitle = International Conference on Computer Vision (ICCV)
# Year=2019
# Availability: https://github.com/clovaai/CutMix-PyTorch
################################################################################


import argparse
import os
import shutil
import time

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data as utils
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import numpy as np
import math

In [0]:
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3,   64,  3)
        self.conv2 = nn.Conv2d(64,  128, 3)
        self.conv3 = nn.Conv2d(128, 256, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 4 * 4, 128)
        self.fc2 = nn.Linear(128, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 64 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

In [0]:
batch_size=25
dataset ="cifar10"
depth=200
net_type="pyramidnet"
alpha=240
lr=0.25
expname="PyraNet200"
epochs=10
beta=1.0
cutmix_prob=0.5
print_freq=10
verbose=True
workers=4
momentum=0.9
weight_decay=1e-4
best_err1 = 100
best_err5 = 100
ratio = 0.8
import torch.multiprocessing as mp


# Normalize a tensor image with mean and standard deviation.
# Given mean: (M1,...,Mn) and std: (S1,..,Sn) for n channels, this transform will normalize each channel of the input torch
normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
                                          std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

# Transforms are common image transformations. They can be chanined together using Compose.
# Compose composes several transformations together


# RandomCrop - Crop the given PIL(Pillow) image at a random location. Size is the desired output size of the crop. Optional padding on each border of the image
# RandomHorizontalFlip - Horizontally flip the given PIL image randomly with a given probability
# ToTensor - Convert a PIL image or numpy.ndarray(HxWxC) to to tensor of shape (CXHXW)

transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])

transform_test = transforms.Compose([
            transforms.ToTensor(),
            normalize
        ])


train_loader = torch.utils.data.DataLoader(
                datasets.CIFAR10('../data', train=True, download=True, transform=transform_train),
                batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True)

val_loader = torch.utils.data.DataLoader(
                datasets.CIFAR10('../data', train=False, transform=transform_test),
                batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True)
numberofclass = 10

# Create a network
model = CNN()

# Make your model run parallel
model = torch.nn.DataParallel(model).cuda()

# CrossEntropyLoss - combines LogSoftMax and negative log likelihood loss in one single class.
# It is useful when training a classification problem with C classes.
# The input is expected to contain raw, unnormalized scores for each class.
criterion = nn.CrossEntropyLoss().cuda()

custom_dataset = []
my_inputs = []
my_targets = []

def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    current_LR = get_learning_rate(optimizer)[0]
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        
        input = input.cuda()
        target = target.cuda()

        r = np.random.rand(1)
        
        ###########

        input_var = torch.autograd.Variable(input, requires_grad=True)
        target_var = torch.autograd.Variable(target)
        output = model(input_var)
        
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        err1, err5 = accuracy(output.data, target, topk=(1, 5))

        losses.update(loss.item(), input.size(0))
        top1.update(err1.item(), input.size(0))
        top5.update(err5.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0 and verbose == True:
            print('Epoch: [{0}/{1}][{2}/{3}]\t'
                  'LR: {LR:.6f}\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Top 1-err {top1.val:.4f} ({top1.avg:.4f})\t'
                  'Top 5-err {top5.val:.4f} ({top5.avg:.4f})'.format(
                epoch, epochs, i, len(train_loader), LR=current_LR, batch_time=batch_time,
                data_time=data_time, loss=losses, top1=top1, top5=top5))
        if i == 400:  break
    print('* Epoch: [{0}/{1}]\t Top 1-err {top1.avg:.3f}  Top 5-err {top5.avg:.3f}\t Train Loss {loss.avg:.3f}'.format(
        epoch, epochs, top1=top1, top5=top5, loss=losses))
    return losses.avg



    # ls = int(split_ratio*batch_size)

    # pred_labeled = pred[:ls]
    # pred_unlabeled = pred[ls:]
  
    # labels_l = labels[:ls].cuda()

    # labels_ul = torch.ones((int(batch_size-ls)),dtype=torch.long).cuda()

    # return (1/ls) * criterion(pred_labeled, labels_l) + (1/(batch_size-ls)) * criterion(pred_unlabeled, labels_ul)
    # return criterion(pred_labeled, labels_l) + criterion(pred_unlabeled, labels_ul)
    # return criterion(pred_labeled, labels_l) 

def validate(val_loader, model, criterion, epoch):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()
    dummy = torch.tensor([], dtype=torch.float).cuda()

    flag_init = True
    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        if i <400:
          # custom_dataset.append((input,target))
          my_inputs.append(input)
          my_targets.append(target)
          continue
        #print(val_loader[i])
        target = target.cuda()

        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        err1, err5 = accuracy(output.data, target, topk=(1, 5))
        # print("H-----")
        # print(target.shape)
        # print("L-----")
        # print(output.data.shape)
        losses.update(loss.item(), input.size(0))
        #target = torch.tensor(np.argmax(np.asarray(output.data), axis = 1))
        _, indices = torch.max(output.data, 1)
        indices = indices.float()
        if flag_init:
            dummy_data = output.data
            flag_init = False
        else:
            dummy_data = torch.cat((dummy_data, output.data), 1)
        dummy = torch.cat((dummy, indices))
        indices = indices.cuda()
        top1.update(err1.item(), input.size(0))
        top5.update(err5.item(), input.size(0))
        if i >= 400:
          my_inputs.append(input)
          my_targets.append(indices)
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0 and verbose == True:
            print('Test (on val set): [{0}/{1}][{2}/{3}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Top 1-err {top1.val:.4f} ({top1.avg:.4f})\t'
                  'Top 5-err {top5.val:.4f} ({top5.avg:.4f})'.format(
                   epoch, epochs, i, len(val_loader), batch_time=batch_time, loss=losses,
                   top1=top1, top5=top5))

    print('* Epoch: [{0}/{1}]\t Top 1-err {top1.avg:.3f}  Top 5-err {top5.avg:.3f}\t Test Loss {loss.avg:.3f}'.format(
        epoch, epochs, top1=top1, top5=top5, loss=losses))
    # custom_dataset.append((input,dummy))

    return top1.avg, top5.avg, losses.avg, dummy, dummy_data


def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    directory = "runs/%s/" % (expname)
    if not os.path.exists(directory):
        os.makedirs(directory)
    filename = directory + filename
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'runs/%s/' % (expname) + 'model_best.pth.tar')


class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    global lr
    if dataset.startswith('cifar'):
        lr = lr * (0.1 ** (epoch // (epochs * 0.5))) * (0.1 ** (epoch // (epochs * 0.75)))
    elif dataset == ('imagenet'):
        if epochs == 300:
            lr = lr * (0.1 ** (epoch // 75))
        else:
            lr = lr * (0.1 ** (epoch // 30))

    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


def get_learning_rate(optimizer):
    lr = []
    for param_group in optimizer.param_groups:
        lr += [param_group['lr']]
    return lr


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
        wrong_k = batch_size - correct_k
        res.append(wrong_k.mul_(100.0 / batch_size))

    return res

# Implements a Stochastic Gradient Descent to optimize the kernel hyperparameters and the noise level.
optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay, nesterov=True)

# Cudnn will look for the optimal set of algorithms for that particular configuration. This leads to faster runtime.
# The flag allows you to enable the buitin cudnn auto-tuner to find the best algorithm to use for your hardware.
cudnn.benchmark = True

## args.epoch = 300
for epoch in range(0, 1):

    adjust_learning_rate(optimizer, epoch)

    # train for one epoch
    train_loss = train(train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    err1, err5, val_loss, dummy, dummy_data = validate(train_loader, model, criterion, epoch)
    psuedo_dummy = dummy
    psuedo_dummy_data = dummy_data


    # remember best prec@1 and save checkpoint
    is_best = err1 <= best_err1
    best_err1 = min(err1, best_err1)
    if is_best:
        best_err5 = err5

    print('Current best accuracy (top-1 and 5 error):', best_err1, best_err5)
    save_checkpoint({
        'epoch': epoch,
        'arch': net_type,
        'state_dict': model.state_dict(),
        'best_err1': best_err1,
        'best_err5': best_err5,
        'optimizer': optimizer.state_dict(),}, is_best)

print('Best accuracy (top-1 and 5 error):', best_err1, best_err5)
# target_var has the pseudo-labels.

#print(target_var.shape)







Files already downloaded and verified
Epoch: [0/10][0/2000]	LR: 0.250000	Time 0.205 (0.205)	Data 0.139 (0.139)	Loss 2.3083 (2.3083)	Top 1-err 88.0000 (88.0000)	Top 5-err 52.0000 (52.0000)
Epoch: [0/10][10/2000]	LR: 0.250000	Time 0.005 (0.024)	Data 0.000 (0.013)	Loss 2.3201 (2.3144)	Top 1-err 88.0000 (88.3636)	Top 5-err 52.0000 (48.0000)
Epoch: [0/10][20/2000]	LR: 0.250000	Time 0.006 (0.015)	Data 0.000 (0.007)	Loss 2.3193 (2.3222)	Top 1-err 88.0000 (88.9524)	Top 5-err 56.0000 (49.1429)
Epoch: [0/10][30/2000]	LR: 0.250000	Time 0.006 (0.012)	Data 0.000 (0.005)	Loss 2.3912 (2.3196)	Top 1-err 96.0000 (89.4194)	Top 5-err 60.0000 (48.0000)
Epoch: [0/10][40/2000]	LR: 0.250000	Time 0.004 (0.010)	Data 0.000 (0.004)	Loss 2.2671 (2.3214)	Top 1-err 84.0000 (88.5854)	Top 5-err 32.0000 (48.7805)
Epoch: [0/10][50/2000]	LR: 0.250000	Time 0.005 (0.009)	Data 0.000 (0.003)	Loss 2.2929 (2.3181)	Top 1-err 80.0000 (87.8431)	Top 5-err 56.0000 (48.6275)
Epoch: [0/10][60/2000]	LR: 0.250000	Time 0.003 (0.009)	Da

In [0]:
def custom_criterion(criterion, pred, labels, flag):
    # print("pred shape: ",pred.shape)
    # print("labels shape",labels.shape)

    # one_hot = torch.nn.functional.one_hot(labels.to(torch.int64),numberofclass)
    # print(one_hot)
    # one_hot = one_hot.view(0)
    # print(one_hot)



    mask = flag >0
    non_zero = torch.nonzero(mask).squeeze(-1).tolist()
    # loss1 = criterion(pred[non_zero],labels[non_zero])

    X = pred[non_zero].float()
    Y = labels[non_zero].long()

    loss1 = criterion(X,Y)
    if len(non_zero) == batch_size:
      return loss1

    # mask2 = flag<=0
    zero = (flag == 0).nonzero().squeeze(-1).tolist()
    A = pred[zero].float()
    B = labels[zero].long()
    loss2 = criterion(A,B)
    loss = loss1+loss2
    return loss


def train_pseudo(train_loader, model, criterion, optimizer, epoch):
    
    indices = psuedo_dummy
    data = psuedo_dummy_data
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    current_LR = get_learning_rate(optimizer)[0]
    for i, (input, target,flag) in enumerate(train_loader):
        # measure data loading time

        data_time.update(time.time() - end)
        
        input = input.cuda()
        target = target.cuda()
        flag = flag.cuda()

        r = np.random.rand(1)
        
        ###########

        input_var = torch.autograd.Variable(input, requires_grad=True)
        target_var = torch.autograd.Variable(target)
        output = model(input_var)

        # print("Output shape: ",output.shape)
        
        loss = custom_criterion(criterion,output, target, flag)

        # measure accuracy and record loss
        err1, err5 = accuracy(output.data, target, topk=(1, 5))

        losses.update(loss.item(), input.size(0))
        top1.update(err1.item(), input.size(0))
        top5.update(err5.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0 and verbose == True:
            print('Epoch: [{0}/{1}][{2}/{3}]\t'
                  'LR: {LR:.6f}\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Top 1-err {top1.val:.4f} ({top1.avg:.4f})\t'
                  'Top 5-err {top5.val:.4f} ({top5.avg:.4f})'.format(
                epoch, epochs, i, len(train_loader), LR=current_LR, batch_time=batch_time,
                data_time=data_time, loss=losses, top1=top1, top5=top5))
    print('* Epoch: [{0}/{1}]\t Top 1-err {top1.avg:.3f}  Top 5-err {top5.avg:.3f}\t Train Loss {loss.avg:.3f}'.format(
        epoch, epochs, top1=top1, top5=top5, loss=losses))
    return losses.avg

def validate_pseudo(val_loader, model, criterion, epoch):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (input, target) in enumerate(val_loader):
        #print(val_loader[i])
        target = target.cuda()

        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)
        output = model(input_var)
        loss = criterion(output, target)

        # measure accuracy and record loss
        err1, err5 = accuracy(output.data, target, topk=(1, 5))

        losses.update(loss.item(), input.size(0))

        top1.update(err1.item(), input.size(0))
        top5.update(err5.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0 and verbose == True:
            print('Test (on val set): [{0}/{1}][{2}/{3}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Top 1-err {top1.val:.4f} ({top1.avg:.4f})\t'
                  'Top 5-err {top5.val:.4f} ({top5.avg:.4f})'.format(
                   epoch, epochs, i, len(val_loader), batch_time=batch_time, loss=losses,
                   top1=top1, top5=top5))

    print('* Epoch: [{0}/{1}]\t Top 1-err {top1.avg:.3f}  Top 5-err {top5.avg:.3f}\t Test Loss {loss.avg:.3f}'.format(
        epoch, epochs, top1=top1, top5=top5, loss=losses))
    return top1.avg, top5.avg, losses.avg




tensor_x = torch.cat([i.float() for i in my_inputs])
tensor_y = torch.cat([i.float().cuda() for i in my_targets])
z = torch.cat((torch.zeros(10000,1), torch.ones(40000,1)))
tensor_z = torch.cat([torch.Tensor(i.float()) for i in z])
custom_dataset = torch.utils.data.TensorDataset(tensor_x,tensor_y, tensor_z)

custom_dataloader =  torch.utils.data.DataLoader(
                custom_dataset,
                batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=False)

# for i, (input, target, flag) in enumerate(custom_dataloader):
#   print("Input: ",input)
#   print("Target: ",target)
#   print("Flag:", flag)
  

    
for epoch in range(0, epochs):

    adjust_learning_rate(optimizer, epoch)

    # train for one epoch
    train_loss = train_pseudo(custom_dataloader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    err1, err5, val_loss = validate_pseudo(val_loader, model, criterion, epoch)

    # remember best prec@1 and save checkpoint
    is_best = err1 <= best_err1
    best_err1 = min(err1, best_err1)
    if is_best:
        best_err5 = err5

    print('Current best accuracy (top-1 and 5 error):', best_err1, best_err5)
    save_checkpoint({
        'epoch': epoch,
        'arch': net_type,
        'state_dict': model.state_dict(),
        'best_err1': best_err1,
        'best_err5': best_err5,
        'optimizer': optimizer.state_dict(),}, is_best)
    

print('Best accuracy (top-1 and 5 error):', best_err1, best_err5)

Epoch: [0/10][0/2000]	LR: 0.250000	Time 0.019 (0.019)	Data 0.003 (0.003)	Loss 4.1367 (4.1367)	Top 1-err 4.0000 (4.0000)	Top 5-err 0.0000 (0.0000)
Epoch: [0/10][10/2000]	LR: 0.250000	Time 0.004 (0.006)	Data 0.000 (0.001)	Loss 3.8338 (3.6678)	Top 1-err 24.0000 (14.9091)	Top 5-err 20.0000 (9.4545)
Epoch: [0/10][20/2000]	LR: 0.250000	Time 0.004 (0.005)	Data 0.000 (0.001)	Loss 2.9337 (3.5856)	Top 1-err 20.0000 (15.4286)	Top 5-err 12.0000 (9.9048)
Epoch: [0/10][30/2000]	LR: 0.250000	Time 0.004 (0.005)	Data 0.000 (0.001)	Loss 3.4805 (3.5446)	Top 1-err 28.0000 (17.0323)	Top 5-err 12.0000 (9.5484)
Epoch: [0/10][40/2000]	LR: 0.250000	Time 0.005 (0.005)	Data 0.001 (0.001)	Loss 3.7826 (3.5256)	Top 1-err 16.0000 (17.0732)	Top 5-err 8.0000 (9.3659)
Epoch: [0/10][50/2000]	LR: 0.250000	Time 0.005 (0.005)	Data 0.001 (0.001)	Loss 0.9936 (3.4717)	Top 1-err 0.0000 (16.7059)	Top 5-err 0.0000 (9.2549)
Epoch: [0/10][60/2000]	LR: 0.250000	Time 0.005 (0.005)	Data 0.000 (0.001)	Loss 2.2708 (3.4459)	Top 1-err 4.