<a href="https://colab.research.google.com/github/linhexiu/Data-Augmentation/blob/main/MixUp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

MixUp:数据增强策略； 以下主要用于自我学习整理； 代码来源：https://gitcode.net/mirrors/facebookresearch/mixup-cifar10


```
acc. 
```




In [None]:
gpu_info = !nvidia-smi -i 0
gpu_info = '\n'.join(gpu_info)
print(gpu_info)

Thu Nov 17 04:57:56 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   67C    P0    28W /  70W |   3654MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import argparse
import csv
import os
from datetime import datetime
import numpy as np
import torch
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from tqdm import tqdm

In [None]:
parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
parser.add_argument('--lr', default=0.1, type=float, help='learning rate')
parser.add_argument('--resume', '-r', action='store_true',
                    help='resume from checkpoint')
parser.add_argument('--model', default="ResNet18", type=str,
                    help='model type (default: ResNet18)')
parser.add_argument('--name', default='mixup', type=str, help='name of run')
parser.add_argument('--seed', default=20221117, type=int, help='random seed')
parser.add_argument('--batch-size', default=128, type=int, help='batch size')
parser.add_argument('--epoch', default=200, type=int,
                    help='total epochs to run')
parser.add_argument('--no-augment', dest='augment', action='store_false',
                    help='use standard augmentation (default: True)')
parser.add_argument('--decay', default=1e-4, type=float, help='weight decay')
parser.add_argument('--alpha', default=1., type=float,
                    help='mixup interpolation coefficient (default: 1)')
args = parser.parse_args('')
print(args)

Namespace(alpha=1.0, augment=True, batch_size=128, decay=0.0001, epoch=200, lr=0.1, model='ResNet18', name='mixup', resume=False, seed=20221117)


In [None]:
use_cuda = torch.cuda.is_available()
print(use_cuda)

True


In [None]:
best_acc = 0
start_epoch = 0

if args.seed != 0:
  torch.manual_seed(args.seed)

In [None]:
# Data
print("==> Preparing data ... ")
if args.augment:
  transform_train=transforms.Compose([
      transforms.RandomCrop(32,padding=4),
      transforms.RandomHorizontalFlip(),
      transforms.ToTensor(),
      transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
  ])
else:
  transfrom_train=transforms.Compose([
      transforms.ToTensor(),
      transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),   
  ])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset=datasets.CIFAR10(root='data', train=True, download=True, transform=transform_train)
trainloader=torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=8)

testset=datasets.CIFAR10(root='data', train=False, download=True, transform=transform_test)
testloader=torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=8)

==> Preparing data ... 
Files already downloaded and verified
Files already downloaded and verified


In [None]:
# ResNet18

'''ResNet in PyTorch.

BasicBlock and Bottleneck module is from the original ResNet paper:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Deep Residual Learning for Image Recognition. arXiv:1512.03385

PreActBlock and PreActBottleneck module is from the later paper:
[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
    Identity Mappings in Deep Residual Networks. arXiv:1603.05027
'''
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.autograd import Variable


def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(in_planes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class PreActBlock(nn.Module):
    '''Pre-activation version of the BasicBlock.'''
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(PreActBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = conv3x3(in_planes, planes, stride)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
            )

    def forward(self, x):
        out = F.relu(self.bn1(x))
        shortcut = self.shortcut(out)
        out = self.conv1(out)
        out = self.conv2(F.relu(self.bn2(out)))
        out += shortcut
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


class PreActBottleneck(nn.Module):
    '''Pre-activation version of the original Bottleneck module.'''
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(PreActBottleneck, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
            )

    def forward(self, x):
        out = F.relu(self.bn1(x))
        shortcut = self.shortcut(out)
        out = self.conv1(out)
        out = self.conv2(F.relu(self.bn2(out)))
        out = self.conv3(F.relu(self.bn3(out)))
        out += shortcut
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = conv3x3(3,64)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x, lin=0, lout=5):
        out = x
        if lin < 1 and lout > -1:
            out = self.conv1(out)
            out = self.bn1(out)
            out = F.relu(out)
        if lin < 2 and lout > 0:
            out = self.layer1(out)
        if lin < 3 and lout > 1:
            out = self.layer2(out)
        if lin < 4 and lout > 2:
            out = self.layer3(out)
        if lin < 5 and lout > 3:
            out = self.layer4(out)
        if lout > 4:
            out = F.avg_pool2d(out, 4)
            out = out.view(out.size(0), -1)
            out = self.linear(out)
        return out


def ResNet18():
    return ResNet(PreActBlock, [2,2,2,2])

def ResNet34():
    return ResNet(BasicBlock, [3,4,6,3])

def ResNet50():
    return ResNet(Bottleneck, [3,4,6,3])

def ResNet101():
    return ResNet(Bottleneck, [3,4,23,3])

def ResNet152():
    return ResNet(Bottleneck, [3,8,36,3])


def test():
    net = ResNet18()
    y = net(Variable(torch.randn(1,3,32,32)))
    print(y.size())

# test()


In [None]:
# Model
if args.resume:
  print("==> Resuming from checkpoint ... ")
  assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
  checkpoint = torch.load('./checkpoint/ckpt.t7' + args.name + '_'
                            + str(args.seed))
  net = checkpoint['net']
  best_acc = checkpoint['acc']
  start_epoch = checkpoint['epoch'] + 1
  rng_state = checkpoint['rng_state']
  torch.set_rng_state(rng_state)
else:
  print('==> Building model..')
  net = ResNet18()

print(net)

==> Building model..
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): PreActBlock(
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (shortcut): Sequential()
    )
    (1): PreActBlock(
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_s

In [None]:
if not os.path.isdir('results'):
    os.mkdir('results')

logname = ('results/log_' + '_' + args.name + '_'
           + str(args.seed) + '.csv')
if use_cuda:
    net.cuda()
    net = torch.nn.DataParallel(net)
    print(torch.cuda.device_count())
    cudnn.benchmark = True
    print('Using CUDA..')

1
Using CUDA..


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9,
                      weight_decay=args.decay)

def mixup_data(x, y, alpha=1.0, use_cuda=True):
  if alpha > 0:
    lam = np.random.beta(alpha, alpha)
  else:
    lam = 1

  batch_size = x.size()[0]
  if use_cuda:
    index = torch.randperm(batch_size).cuda()
  else:
    index = torch.randperm(batch_size)
  
  mixed_x = lam * x + (1 - lam) * x[index, :]
  y_a, y_b = y, y[index]
  return mixed_x, y_a, y_b, lam

In [None]:
def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [None]:
def train(epoch):
  net.train()
  train_loss = 0.0
  reg_loss = 0.0
  correct = 0.0
  total = 0
  train_bar = tqdm(trainloader)
  for inputs, targets in trainloader:
    if use_cuda:
      inputs, targets = inputs.cuda(), targets.cuda()

    inputs, targets_a, targets_b, lam = mixup_data(inputs, targets, args.alpha, use_cuda)


    inputs, targets_a, targets_b = map(Variable, (inputs, targets_a, targets_b))

    outputs = net(inputs)
    loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
    train_loss += loss.item() * trainloader.batch_size # *128
    _, predicted = torch.max(outputs.data, 1)
    total += targets.size(0)
    correct += (lam * predicted.eq(targets_a.data).cpu().sum().float()
            + (1 - lam) * predicted.eq(targets_b.data).cpu().sum().float())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    train_bar.set_description('Train Epoch: [{}/{}], Loss: {:.4f}, Reg: {:.4f}, Acc: {:.4f}%'.format(epoch, args.epoch, train_loss/total, reg_loss/total, 100.*correct/total))
  return (train_loss/total, reg_loss/total, (100.*correct/total).item())

In [None]:
def test(epoch):
  global best_acc
  net.eval()
  test_loss=0.0
  correct=0.0
  total=0
  test_bar = tqdm(testloader)
  for inputs, targets in testloader:
    if use_cuda:
      inputs, targets = inputs.cuda(), targets.cuda()
    inputs, targets = Variable(inputs, volatile=True), Variable(targets)
    outputs = net(inputs)
    loss = criterion(outputs, targets)
    test_loss += loss.item() * testloader.batch_size
    _, predicted = torch.max(outputs.data, 1)
    total += targets.size(0)
    correct += predicted.eq(targets.data).cpu().sum()
    test_bar.set_description('Test Epoch: [{}/{}], Loss: {:.4f}, Acc: {:.4f}%'.format(epoch, args.epoch, test_loss/total, 100.*correct/total))
  acc = 100.* correct/total
  if epoch == start_epoch + args.epoch - 1 or acc > best_acc:
    checkpoint(acc, epoch)
  if acc > best_acc:
      best_acc = acc
  return (test_loss/total, acc.item())

In [None]:
def checkpoint(acc, epoch):
    # Save checkpoint.
    print('Saving..')
    state = {
        'net': net,
        'acc': acc,
        'epoch': epoch,
        'rng_state': torch.get_rng_state()
    }
    if not os.path.isdir('checkpoint'):
        os.mkdir('checkpoint')
    torch.save(state, './checkpoint/ckpt.t7' + args.name + '_'
               + str(args.seed))

In [None]:
def adjust_learning_rate(optimizer, epoch):
    """decrease the learning rate at 100 and 150 epoch"""
    lr = args.lr
    if epoch >= 100:
        lr /= 10
    if epoch >= 150:
        lr /= 10
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [None]:
if not os.path.exists(logname):
    with open(logname, 'w') as logfile:
        logwriter = csv.writer(logfile, delimiter=',')
        logwriter.writerow(['epoch', 'train loss', 'reg loss', 'train acc',
                            'test loss', 'test acc'])

In [None]:
for epoch in range(start_epoch, args.epoch):
    train_loss, reg_loss, train_acc = train(epoch)
    test_loss, test_acc = test(epoch)
    adjust_learning_rate(optimizer, epoch)
    with open(logname, 'a') as logfile:
        logwriter = csv.writer(logfile, delimiter=',')
        logwriter.writerow([epoch, train_loss, reg_loss, train_acc, test_loss,
                            test_acc])







  0%|          | 0/391 [00:00<?, ?it/s][A[A[A[A[A




Train Epoch: [0/200], Loss: 1.2657, Reg: 0.0000, Acc: 57.7316%:   0%|          | 0/391 [00:00<?, ?it/s][A[A[A[A[A




Train Epoch: [0/200], Loss: 1.2241, Reg: 0.0000, Acc: 61.0154%:   0%|          | 0/391 [00:00<?, ?it/s][A[A[A[A[A




Train Epoch: [0/200], Loss: 1.1902, Reg: 0.0000, Acc: 62.9267%:   0%|          | 0/391 [00:01<?, ?it/s][A[A[A[A[A




Train Epoch: [0/200], Loss: 0.9627, Reg: 0.0000, Acc: 70.9838%:   0%|          | 0/391 [00:01<?, ?it/s][A[A[A[A[A




Train Epoch: [0/200], Loss: 0.9142, Reg: 0.0000, Acc: 73.6231%:   0%|          | 0/391 [00:01<?, ?it/s][A[A[A[A[A




Train Epoch: [0/200], Loss: 0.9319, Reg: 0.0000, Acc: 73.4026%:   0%|          | 0/391 [00:01<?, ?it/s][A[A[A[A[A




Train Epoch: [0/200], Loss: 0.9873, Reg: 0.0000, Acc: 70.1381%:   0%|          | 0/391 [00:01<?, ?it/s][A[A[A[A[A




Train Epoch: [0/200], Loss: 0.9530, Reg: 0.0000, Acc: 71.8177%:   0%|      

Saving..


[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m




Test Epoch: [6/200], Loss: 0.4815, Acc: 89.0114%:   0%|          | 0/100 [00:04<?, ?it/s][A[A[A[A[A




Test Epoch: [6/200], Loss: 0.4815, Acc: 89.0112%:   0%|          | 0/100 [00:04<?, ?it/s][A[A[A[A[A




Test Epoch: [6/200], Loss: 0.4815, Acc: 89.0333%:   0%|          | 0/100 [00:04<?, ?it/s][A[A[A[A[A




Test Epoch: [6/200], Loss: 0.4808, Acc: 89.0769%:   0%|          | 0/100 [00:05<?, ?it/s][A[A[A[A[A




Test Epoch: [6/200], Loss: 0.4800, Acc: 89.0870%:   0%|          | 0/100 [00:05<?, ?it/s][A[A[A[A[A




Test Epoch: [6/200], Loss: 0.4808, Acc: 89.0645%:   0%|          | 0/100 [00:05<?, ?it/s][A[A[A[A[A




Test Epoch: [6/200], Loss: 0.4813, Acc: 89.0426%:   0%|          | 0/100 [00:05<?, ?it/s][A[A[A[A[A




Test Epoch: [6/200], Loss: 0.4813, Acc: 89.0316%:   0%|          | 0/100 [00:05<?, ?it/s][A[A[A[A[A




Test Epoch: [6/200], Loss: 0.4811, Acc: 89.0417%:   0%|          | 0/100 [00:05<?, 

Saving..







  0%|          | 0/391 [00:00<?, ?it/s][A[A[A[A[A




Train Epoch: [9/200], Loss: 0.9732, Reg: 0.0000, Acc: 75.5330%:   0%|          | 0/391 [00:00<?, ?it/s][A[A[A[A[A




Train Epoch: [9/200], Loss: 0.8482, Reg: 0.0000, Acc: 79.4918%:   0%|          | 0/391 [00:00<?, ?it/s][A[A[A[A[A




Train Epoch: [9/200], Loss: 0.6470, Reg: 0.0000, Acc: 84.3490%:   0%|          | 0/391 [00:01<?, ?it/s][A[A[A[A[A




Train Epoch: [9/200], Loss: 0.6931, Reg: 0.0000, Acc: 83.7272%:   0%|          | 0/391 [00:01<?, ?it/s][A[A[A[A[A




Train Epoch: [9/200], Loss: 0.8051, Reg: 0.0000, Acc: 78.5320%:   0%|          | 0/391 [00:01<?, ?it/s][A[A[A[A[A




Train Epoch: [9/200], Loss: 0.9085, Reg: 0.0000, Acc: 73.7766%:   0%|          | 0/391 [00:01<?, ?it/s][A[A[A[A[A




Train Epoch: [9/200], Loss: 0.9679, Reg: 0.0000, Acc: 70.4616%:   0%|          | 0/391 [00:01<?, ?it/s][A[A[A[A[A




Train Epoch: [9/200], Loss: 0.9788, Reg: 0.0000, Acc: 70.3699%:   0%|      