In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.6.11-py3-none-any.whl (548 kB)
[K     |████████████████████████████████| 548 kB 27.4 MB/s 
[?25hCollecting huggingface-hub
  Downloading huggingface_hub-0.11.0-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 70.7 MB/s 
Installing collected packages: huggingface-hub, timm
Successfully installed huggingface-hub-0.11.0 timm-0.6.11


In [3]:
from torchvision import datasets as ds
from torch.utils.data import DataLoader
from torchvision import transforms as ts
import torchvision as tv
import torch
import torch.nn.functional as F
import torch.nn as nn
import math
import numpy as np
from torch.autograd import Variable
from torch import optim
from matplotlib import pyplot as plt
import torch.backends.cudnn as cudnn
import timm
from PIL import Image
from tqdm import tqdm
import random
import os

%config Completer.use_jedi = False

In [4]:
def setseed(seednum = 20):
    torch.manual_seed(seednum)
    torch.cuda.manual_seed(seednum)
    torch.cuda.manual_seed_all(seednum)
    np.random.seed(seednum)
    cudnn.benchmark = False
    cudnn.deterministic = True
    random.seed(seednum)

In [5]:
setseed(35)

In [6]:
cuda_available = torch.cuda.is_available()
device = torch.device("cuda" if cuda_available else "cpu")
device

device(type='cuda')

# Dataset

In [7]:
!pip install git+https://github.com/ildoonet/cutmix

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/ildoonet/cutmix
  Cloning https://github.com/ildoonet/cutmix to /tmp/pip-req-build-a48pcxll
  Running command git clone -q https://github.com/ildoonet/cutmix /tmp/pip-req-build-a48pcxll
Building wheels for collected packages: cutmix
  Building wheel for cutmix (setup.py) ... [?25l[?25hdone
  Created wheel for cutmix: filename=cutmix-0.1-py3-none-any.whl size=3599 sha256=13a71f08345a382c380ed544a47c60372fe2aa5ecf3a6f058ccf3cd94e4dcc5c
  Stored in directory: /tmp/pip-ephem-wheel-cache-ym04b42l/wheels/a9/81/a7/d3822499b14d97b1e2ef7e7538b70f15355607cfc7526f7cd5
Successfully built cutmix
Installing collected packages: cutmix
Successfully installed cutmix-0.1


In [8]:
from cutmix.cutmix import CutMix
from cutmix.utils import CutMixCrossEntropyLoss

In [9]:
transform = ts.Compose(
    [
        ts.ToTensor(),
        ts.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ]
)

In [47]:
train_set = ds.CIFAR10(root='../data/', train=True, transform=transform, download=True)
train_set_origin = ds.CIFAR10(root='../data/', train=True, transform=transform, download=True)
train_set = CutMix(train_set, num_class=10, beta=1.0, prob=0.5, num_mix=2)

val_set = tv.datasets.CIFAR10(root='../data/', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [11]:
train_set = train_set + train_set_origin

In [48]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True, num_workers=0)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=64, shuffle=False, num_workers=0)

In [13]:
def make_test_list(data_dir):
    
    test_img_list = list()
    
    files = os.listdir(data_dir)
    
    for i in range(10):
        img_file = data_dir + "/" + files[i]
        img = os.listdir(img_file)
        for j in range(200):
            img_path = data_dir + "/" +  files[i] + "/" + img[j]
            
            test_img_list.append(img_path)
    return test_img_list

In [14]:
img_list = make_test_list("./drive/MyDrive/Statistical_Deep_Image")

In [15]:
def make_test_label(label_list):
    
    test_label_list = list()
    
    for i in [0,2,1,3,4,5,6,7,8,9]:
        for j in range(200):
            test_label_list.append(i)
    
    return test_label_list

In [16]:
label = os.listdir("./drive/MyDrive/Statistical_Deep_Image")
img_label_list = make_test_label(label)

In [17]:
class testset(torch.utils.data.Dataset):
    def __init__(self, img_list, img_label_list, transform):
        self.file_list = img_list
        self.labels = img_label_list
        self.transform = transform
        
    def __len__(self):
        return len(self.file_list)
    
    def __getitem__(self, index):
        img_path = self.file_list[index]
        img = Image.open(img_path)
        img_transformed = self.transform(img)
        label = self.labels[index]
        return img_transformed, label

In [18]:
test_set = testset(img_list = img_list,
                  img_label_list = img_label_list,
                  transform = transform)

In [19]:
test_loader = torch.utils.data.DataLoader(test_set, batch_size = 64, shuffle=False, num_workers=0)

In [20]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Model

In [42]:
import copy
from functools import partial
from collections import OrderedDict





class ConvBNAct(nn.Sequential):
    """Convolution-Normalization-Activation Module"""
    def __init__(self, in_channel, out_channel, kernel_size, stride, groups, norm_layer, act, conv_layer=nn.Conv2d):
        super(ConvBNAct, self).__init__(
            conv_layer(in_channel, out_channel, kernel_size, stride=stride, padding=(kernel_size-1)//2, groups=groups, bias=False),
            norm_layer(out_channel),
            act()
        )


class SEUnit(nn.Module):
    """Squeeze-Excitation Unit
    paper: https://openaccess.thecvf.com/content_cvpr_2018/html/Hu_Squeeze-and-Excitation_Networks_CVPR_2018_paper
    """
    def __init__(self, in_channel, reduction_ratio=4, act1=partial(nn.SiLU, inplace=True), act2=nn.Sigmoid):
        super(SEUnit, self).__init__()
        hidden_dim = in_channel // reduction_ratio
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc1 = nn.Conv2d(in_channel, hidden_dim, (1, 1), bias=True)
        self.fc2 = nn.Conv2d(hidden_dim, in_channel, (1, 1), bias=True)
        self.act1 = act1()
        self.act2 = act2()

    def forward(self, x):
        return x * self.act2(self.fc2(self.act1(self.fc1(self.avg_pool(x)))))


class StochasticDepth(nn.Module):
    """StochasticDepth
    paper: https://link.springer.com/chapter/10.1007/978-3-319-46493-0_39
    :arg
        - prob: Probability of dying
        - mode: "row" or "all". "row" means that each row survives with different probability
    """
    def __init__(self, prob, mode):
        super(StochasticDepth, self).__init__()
        self.prob = prob
        self.survival = 1.0 - prob
        self.mode = mode

    def forward(self, x):
        if self.prob == 0.0 or not self.training:
            return x
        else:
            shape = [x.size(0)] + [1] * (x.ndim - 1) if self.mode == 'row' else [1]
            return x * torch.empty(shape).bernoulli_(self.survival).div_(self.survival).to(x.device)


class MBConvConfig:
    """EfficientNet Building block configuration"""
    def __init__(self, expand_ratio: float, kernel: int, stride: int, in_ch: int, out_ch: int, layers: int,
                 use_se: bool, fused: bool, act=nn.SiLU, norm_layer=nn.BatchNorm2d):
        self.expand_ratio = expand_ratio
        self.kernel = kernel
        self.stride = stride
        self.in_ch = in_ch
        self.out_ch = out_ch
        self.num_layers = layers
        self.act = act
        self.norm_layer = norm_layer
        self.use_se = use_se
        self.fused = fused

    @staticmethod
    def adjust_channels(channel, factor, divisible=8):
        new_channel = channel * factor
        divisible_channel = max(divisible, (int(new_channel + divisible / 2) // divisible) * divisible)
        divisible_channel += divisible if divisible_channel < 0.9 * new_channel else 0
        return divisible_channel


class MBConv(nn.Module):
    """EfficientNet main building blocks
    :arg
        - c: MBConvConfig instance
        - sd_prob: stochastic path probability
    """
    def __init__(self, c, sd_prob=0.0):
        super(MBConv, self).__init__()
        inter_channel = c.adjust_channels(c.in_ch, c.expand_ratio)
        block = []

        if c.expand_ratio == 1:
            block.append(('fused', ConvBNAct(c.in_ch, inter_channel, c.kernel, c.stride, 1, c.norm_layer, c.act)))
        elif c.fused:
            block.append(('fused', ConvBNAct(c.in_ch, inter_channel, c.kernel, c.stride, 1, c.norm_layer, c.act)))
            block.append(('fused_point_wise', ConvBNAct(inter_channel, c.out_ch, 1, 1, 1, c.norm_layer, nn.Identity)))
        else:
            block.append(('linear_bottleneck', ConvBNAct(c.in_ch, inter_channel, 1, 1, 1, c.norm_layer, c.act)))
            block.append(('depth_wise', ConvBNAct(inter_channel, inter_channel, c.kernel, c.stride, inter_channel, c.norm_layer, c.act)))
            block.append(('se', SEUnit(inter_channel, 4 * c.expand_ratio)))
            block.append(('point_wise', ConvBNAct(inter_channel, c.out_ch, 1, 1, 1, c.norm_layer, nn.Identity)))

        self.block = nn.Sequential(OrderedDict(block))
        self.use_skip_connection = c.stride == 1 and c.in_ch == c.out_ch
        self.stochastic_path = StochasticDepth(sd_prob, "row")

    def forward(self, x):
        out = self.block(x)
        if self.use_skip_connection:
            out = x + self.stochastic_path(out)
        return out


class EfficientNetV2(nn.Module):
    def __init__(self, layer_infos, out_channels=1280, nclass=0, dropout=0.2, stochastic_depth=0.0,
                 block=MBConv, act_layer=nn.SiLU, norm_layer=nn.BatchNorm2d):
        super(EfficientNetV2, self).__init__()
        self.layer_infos = layer_infos
        self.norm_layer = norm_layer
        self.act = act_layer

        self.in_channel = layer_infos[0].in_ch
        self.final_stage_channel = layer_infos[-1].out_ch
        self.out_channels = out_channels

        self.cur_block = 0
        self.num_block = sum(stage.num_layers for stage in layer_infos)
        self.stochastic_depth = stochastic_depth

        self.stem = ConvBNAct(3, self.in_channel, 3, 2, 1, self.norm_layer, self.act)
        self.blocks = nn.Sequential(*self.make_stages(layer_infos, block))
        self.head = nn.Sequential(OrderedDict([
            ('bottleneck', ConvBNAct(self.final_stage_channel, out_channels, 1, 1, 1, self.norm_layer, self.act)),
            ('avgpool', nn.AdaptiveAvgPool2d((1, 1))),
            ('flatten', nn.Flatten()),
            ('dropout', nn.Dropout(p=dropout, inplace=True)),
            ('classifier', nn.Linear(out_channels, nclass) if nclass else nn.Identity())
        ]))

    def make_stages(self, layer_infos, block):
        return [layer for layer_info in layer_infos for layer in self.make_layers(copy.copy(layer_info), block)]

    def make_layers(self, layer_info, block):
        layers = []
        for i in range(layer_info.num_layers):
            layers.append(block(layer_info, sd_prob=self.get_sd_prob()))
            layer_info.in_ch = layer_info.out_ch
            layer_info.stride = 1
        return layers

    def get_sd_prob(self):
        sd_prob = self.stochastic_depth * (self.cur_block / self.num_block)
        self.cur_block += 1
        return sd_prob

    def forward(self, x):
        return self.head(self.blocks(self.stem(x)))

    def change_dropout_rate(self, p):
        self.head[-2] = nn.Dropout(p=p, inplace=True)


def efficientnet_v2_init(model):
    for m in model.modules():
        if isinstance(m, nn.Conv2d):
            nn.init.kaiming_normal_(m.weight, mode='fan_out')
            if m.bias is not None:
                nn.init.zeros_(m.bias)
        elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
            nn.init.ones_(m.weight)
            nn.init.zeros_(m.bias)
        elif isinstance(m, nn.Linear):
            nn.init.normal_(m.weight, mean=0.0, std=0.01)
            nn.init.zeros_(m.bias)


def get_efficientnet_v2(model_name, nclass=0, dropout=0.1, stochastic_depth=0.2, **kwargs):
    residual_config = [MBConvConfig(*layer_config) for layer_config in get_efficientnet_v2_structure(model_name)]
    model = EfficientNetV2(residual_config, 1280, nclass, dropout=dropout, stochastic_depth=stochastic_depth, block=MBConv, act_layer=nn.SiLU)
    efficientnet_v2_init(model)

    return model

def get_efficientnet_v2_hyperparam(model_name):
    from box import Box
    # train_size, eval_size, dropout, randaug, mixup
    if 'efficientnet_v2_s' in model_name:
        end = 300, 384, 0.2, 10, 0
    elif 'efficientnet_v2_m' in model_name:
        end = 384, 480, 0.3, 15, 0.2
    elif 'efficientnet_v2_l' in model_name:
        end = 384, 480, 0.4, 20, 0.5
    elif 'efficientnet_v2_xl' in model_name:
        end = 384, 512, 0.4, 20, 0.5
    return Box({"init_train_size": 128, "init_dropout": 0.1, "init_randaug": 5, "init_mixup": 0,
             "end_train_size": end[0], "end_dropout": end[2], "end_randaug": end[3], "end_mixup": end[4], "eval_size": end[1]})


def get_efficientnet_v2_structure(model_name):
    if 'efficientnet_v2_s' in model_name:
        return [
            # e k  s  in  out xN  se   fused
            (1, 3, 1, 24, 24, 2, False, True),
            (4, 3, 2, 24, 48, 4, False, True),
            (4, 3, 2, 48, 64, 4, False, True),
            (4, 3, 2, 64, 128, 6, True, False),
            (6, 3, 1, 128, 160, 9, True, False),
            (6, 3, 2, 160, 256, 15, True, False),
        ]
    elif 'efficientnet_v2_m' in model_name:
        return [
            # e k  s  in  out xN  se   fused
            (1, 3, 1, 24, 24, 3, False, True),
            (4, 3, 2, 24, 48, 5, False, True),
            (4, 3, 2, 48, 80, 5, False, True),
            (4, 3, 2, 80, 160, 7, True, False),
            (6, 3, 1, 160, 176, 14, True, False),
            (6, 3, 2, 176, 304, 18, True, False),
            (6, 3, 1, 304, 512, 5, True, False),
        ]
    elif 'efficientnet_v2_l' in model_name:
        return [
            # e k  s  in  out xN  se   fused
            (1, 3, 1, 32, 32, 4, False, True),
            (4, 3, 2, 32, 64, 7, False, True),
            (4, 3, 2, 64, 96, 7, False, True),
            (4, 3, 2, 96, 192, 10, True, False),
            (6, 3, 1, 192, 224, 19, True, False),
            (6, 3, 2, 224, 384, 25, True, False),
            (6, 3, 1, 384, 640, 7, True, False),
        ]
    elif 'efficientnet_v2_xl' in model_name:
        return [
            # e k  s  in  out xN  se   fused
            (1, 3, 1, 32, 32, 4, False, True),
            (4, 3, 2, 32, 64, 8, False, True),
            (4, 3, 2, 64, 96, 8, False, True),
            (4, 3, 2, 96, 192, 16, True, False),
            (6, 3, 1, 192, 256, 24, True, False),
            (6, 3, 2, 256, 512, 32, True, False),
            (6, 3, 1, 512, 640, 8, True, False),
        ]

In [44]:
net = get_efficientnet_v2(model_name='efficientnet_v2_s', nclass=10, dropout=0.1, stochastic_depth=0.2).to(device)
model = get_efficientnet_v2(model_name='efficientnet_v2_s', nclass=10, dropout=0.1, stochastic_depth=0.2).to(device)

In [45]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = CutMixCrossEntropyLoss(True)

# Train

In [50]:
def train(num_epoch):
    best_accuracy = 0.0 
    
    for epoch in tqdm(range(num_epoch)):

        running_train_loss = 0.0
        running_val_loss = 0.0
        true = 0
        total = 0

        for i, data in enumerate(train_loader, 0) :
            inputs, labels = data[0].to(device), data[1].to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            running_train_loss += loss.item()
        
        with torch.no_grad():
            model.eval()
            for i, data in enumerate(val_loader, 0):
                inputs, labels = data[0].to(device), data[1].to(device)
                predicted_outputs = model(inputs)
                val_loss = criterion(predicted_outputs, labels)
                
                _, predicted = torch.max(predicted_outputs, 1) 
                
                running_val_loss += val_loss.item()
                total += labels.size(0)
                true += (predicted == labels).sum().item()
                
        train_loss_per_epoch = running_train_loss / len(train_loader)
        val_loss = running_val_loss/len(val_loader)
        accuracy = (100 * true / total)     
        
        if accuracy > best_accuracy:
            torch.save(model.state_dict(), 'EfficientNet_CutMix_weights.pth')
            best_accuracy = accuracy
            
        print('epoch: %d' %(epoch+1), ' train_loss: %.3f' %train_loss_per_epoch, ' val_loss: %.4f' %val_loss, ' Accuracy: %.2f %%' % (accuracy))

In [51]:
train(20)

  5%|▌         | 1/20 [02:28<47:05, 148.69s/it]

epoch: 1  train_loss: 2.079  val_loss: 1.6604  Accuracy: 41.68 %


 10%|█         | 2/20 [04:45<42:31, 141.75s/it]

epoch: 2  train_loss: 1.852  val_loss: 1.3917  Accuracy: 50.37 %


 15%|█▌        | 3/20 [07:03<39:39, 139.97s/it]

epoch: 3  train_loss: 1.742  val_loss: 1.2469  Accuracy: 55.74 %


 20%|██        | 4/20 [09:20<37:03, 138.99s/it]

epoch: 4  train_loss: 1.642  val_loss: 1.1306  Accuracy: 62.47 %


 25%|██▌       | 5/20 [11:37<34:33, 138.26s/it]

epoch: 5  train_loss: 1.552  val_loss: 0.9599  Accuracy: 68.25 %


 30%|███       | 6/20 [13:54<32:08, 137.74s/it]

epoch: 6  train_loss: 1.489  val_loss: 0.9252  Accuracy: 68.95 %


 35%|███▌      | 7/20 [16:09<29:37, 136.70s/it]

epoch: 7  train_loss: 1.431  val_loss: 0.8851  Accuracy: 71.31 %


 40%|████      | 8/20 [18:24<27:13, 136.16s/it]

epoch: 8  train_loss: 1.393  val_loss: 0.8373  Accuracy: 71.92 %


 45%|████▌     | 9/20 [20:39<24:53, 135.82s/it]

epoch: 9  train_loss: 1.357  val_loss: 0.7861  Accuracy: 74.86 %


 50%|█████     | 10/20 [22:54<22:35, 135.53s/it]

epoch: 10  train_loss: 1.315  val_loss: 0.7393  Accuracy: 76.22 %


 55%|█████▌    | 11/20 [25:07<20:14, 134.91s/it]

epoch: 11  train_loss: 1.282  val_loss: 0.7171  Accuracy: 76.79 %


 60%|██████    | 12/20 [27:21<17:56, 134.58s/it]

epoch: 12  train_loss: 1.251  val_loss: 0.6948  Accuracy: 77.64 %


 65%|██████▌   | 13/20 [29:36<15:42, 134.64s/it]

epoch: 13  train_loss: 1.231  val_loss: 0.6690  Accuracy: 78.55 %


 70%|███████   | 14/20 [31:50<13:26, 134.47s/it]

epoch: 14  train_loss: 1.211  val_loss: 0.6586  Accuracy: 78.74 %


 75%|███████▌  | 15/20 [34:05<11:13, 134.63s/it]

epoch: 15  train_loss: 1.180  val_loss: 0.6899  Accuracy: 78.09 %


 80%|████████  | 16/20 [36:19<08:57, 134.39s/it]

epoch: 16  train_loss: 1.167  val_loss: 0.6514  Accuracy: 79.23 %


 85%|████████▌ | 17/20 [38:33<06:43, 134.36s/it]

epoch: 17  train_loss: 1.148  val_loss: 0.6615  Accuracy: 79.03 %


 90%|█████████ | 18/20 [40:47<04:28, 134.18s/it]

epoch: 18  train_loss: 1.140  val_loss: 0.6503  Accuracy: 79.60 %


 95%|█████████▌| 19/20 [42:58<02:13, 133.42s/it]

epoch: 19  train_loss: 1.146  val_loss: 0.6884  Accuracy: 77.95 %


100%|██████████| 20/20 [45:09<00:00, 135.47s/it]

epoch: 20  train_loss: 1.113  val_loss: 0.6586  Accuracy: 78.70 %





# Test

In [52]:
model.load_state_dict(torch.load('EfficientNet_CutMix_weights.pth'))
model.eval()

EfficientNetV2(
  (stem): ConvBNAct(
    (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): SiLU()
  )
  (blocks): Sequential(
    (0): MBConv(
      (block): Sequential(
        (fused): ConvBNAct(
          (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): SiLU()
        )
      )
      (stochastic_path): StochasticDepth()
    )
    (1): MBConv(
      (block): Sequential(
        (fused): ConvBNAct(
          (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): SiLU()
        )
      )
      (stochastic_path): StochasticDepth()
    )
    (2): MBConv(
      (block): Sequential(
  

In [53]:
correct = 0
total = 0

with torch.no_grad():
    for data in tqdm(test_loader):
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
print('Accuracy: %d %%' % (100 * correct / total))

100%|██████████| 32/32 [23:07<00:00, 43.35s/it]

Accuracy: 9 %



