## 1. Load Package

In [24]:
!pip install colorama



You should consider upgrading via the 'c:\users\dmqa\appdata\local\programs\python\python38\python.exe -m pip install --upgrade pip' command.


In [25]:
import os, math, sys, argparse
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

from tqdm import tqdm
from colorama import Fore
from torch.utils.data import DataLoader
from torchvision import transforms as transforms
from torch.utils.tensorboard import SummaryWriter

## 2. 함수 정의

In [26]:
# Custom Transform 함수 정의 --> 2가지 종류의 Augmentation 산출

class Transform_Twice:
    
    def __init__(self, transform):
        self.transform = transform
    
    def __call__(self, img):
        out1 = self.transform(img)
        out2 = self.transform(img)
        
        return out1, out2

In [27]:
# Labeled data를 생성하는 함수

class Labeled_CIFAR10(torchvision.datasets.CIFAR10):
    
    def __init__(self, root, indices=None,
                train=True, transform=None,
                target_transform=None, download=False):
        
        super(Labeled_CIFAR10, self).__init__(root,
                                        train=train,
                                        transform=transform,
                                        target_transform=target_transform,
                                        download=download)

        if indices is not None:
            self.data = self.data[indices]
            self.targets = np.array(self.targets)[indices]
        
        self.data = Transpose(Normalize(self.data))
    
    def __getitem__(self, index):
        
        img, target = self.data[index], self.targets[index]
        
        if self.transform is not None:
            img = self.transform(img)
        
        if self.target_transform is not None:
            target = self.target_transform(target)
        
        return img, target

In [28]:
# Unlabeled data를 생성하는 함수

'''
Unlabeled data의 Label은 -1로 지정
'''

class Unlabeled_CIFAR10(Labeled_CIFAR10):
    
    def __init__(self, root, indices, train=True, transform=None, target_transform=None, download=False):
        
        super(Unlabeled_CIFAR10, self).__init__(root, indices, train,
                                            transform=transform,
                                            target_transform=target_transform,
                                            download=download)
        
        self.targets = np.array([-1 for i in range(len(self.targets))])

In [29]:
# 데이터셋을 분할하기 위해서 Index를 섞는 함수 정의

def split_datasets(labels, n_labeled_per_class):
    
    '''
    - n_labeled_per_class: labeled data의 개수
    - 클래스 내 500개 데이터는 validation data로 정의
    - 클래스 당 n_labeled_per_class 개수 만큼 labeled data로 정의
    - 나머지 이미지는 unlabeled data로 정의
    '''
    
    ### labeled, unlabeled, validation data 분할할 list 초기화
    labels = np.array(labels, dtype=int) 
    indice_labeled, indice_unlabeled, indice_val = [], [], [] 
    
    ### 각 class 단위로 loop 생성
    for i in range(10): 

        # 각각 labeled, unlabeled, validation data를 할당
        indice_tmp = np.where(labels==i)[0]
        
        indice_labeled.extend(indice_tmp[: n_labeled_per_class])
        indice_unlabeled.extend(indice_tmp[n_labeled_per_class: -500])
        indice_val.extend(indice_tmp[-500: ])
    
    ### 각 index를 Shuffle
    for i in [indice_labeled, indice_unlabeled, indice_val]:
        np.random.shuffle(i)
    
    return indice_labeled, indice_unlabeled, indice_val

In [30]:
# CIFAR10에 대하여 labeled, unlabeled, validation, test dataset 생성

def get_cifar10(data_dir: str, n_labeled: int,
                transform_train=None, transform_val=None,
                download=True):
    
    ### Torchvision에서 제공해주는 CIFAR10 dataset Download
    base_dataset = torchvision.datasets.CIFAR10(data_dir, train=True, download=download)
    
    ### labeled, unlabeled, validation data에 해당하는 index를 가져오기
    indice_labeled, indice_unlabeled, indice_val = split_datasets(base_dataset.targets, int(n_labeled/10)) ### n_labeled는 아래 MixMatch_argparser 함수에서 정의
    
    ### index를 기반으로 dataset을 생성
    '''
    왜 unlabeled가 Transform_twice가 적용되었을까?
    '''
    train_labeled_set = Labeled_CIFAR10(data_dir, indice_labeled, train=True, transform=transform_train) 
    train_unlabeled_set = Unlabeled_CIFAR10(data_dir, indice_unlabeled, train=True, transform=Transform_Twice(transform_train))
    val_set = Labeled_CIFAR10(data_dir, indice_val, train=True, transform=transform_val, download=True) 
    test_set = Labeled_CIFAR10(data_dir, train=False, transform=transform_val, download=True) 

    return train_labeled_set, train_unlabeled_set, val_set, test_set

In [31]:
# Image를 전처리 하기 위한 함수

### 데이터를 정규화 하기 위한 함수
def Normalize(x, m=(0.4914, 0.4822, 0.4465), std=(0.2471, 0.2345, 0.2616)):
        
    ##### x, m, std를 각각 array화
    x, m, std = [np.array(a, np.float32) for a in (x, m, std)] 

    ##### 데이터 정규화
    x -= m * 255 
    x *= 1.0/(255*std)
    return x

### 데이터를 (B, C, H, W)로 수정해주기 위한 함수 (from torchvision.transforms 내 ToTensor 와 동일한 함수)
def Transpose(x, source='NHWC', target='NCHW'):
    return x.transpose([source.index(d) for d in target])

### 특정 이미지에 동서남북 방향으로 4만큼 픽셀을 추가해주기 위한 학습
def pad(x, border=4):
    return np.pad(x, [(0, 0), (border, border), (border, border)], mode='reflect')

In [32]:
# Image를 Augmentation하기 위한 함수

### Image를 Padding 및 Crop적용
'''
1. object는 써도 되고 안써도 되는 것
2. assert는 오류를 유도하기 위함 (나중에 이렇게 해놓으면 디버깅이 편함) --> 여기선 적절한 데이터 인풋의 형태를 유도
'''
class RandomPadandCrop(object):
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        if isinstance(output_size, int):
            self.output_size = (output_size, output_size)
        else:
            assert len(output_size) == 2
            self.output_size = output_size
    
    def __call__(self, x):
        x = pad(x, 4)
        
        old_h, old_w = x.shape[1: ]
        new_h, new_w = self.output_size
        
        top = np.random.randint(0, old_h-new_h)
        left = np.random.randint(0, old_w-new_w)
        
        x = x[:, top:top+new_h, left:left+new_w]
        return x
    
    
### RandomFlip하는 함수 정의
class RandomFlip(object):
    def __call__(self, x):
        if np.random.rand() < 0.5:
            x = x[:, :, ::-1]
        
        return x.copy()
    
    
### GaussianNoise를 추가하는 함수 정의
class GaussianNoise(object):
    def __call__(self, x):
        c, h, w = x.shape
        x += np.random.randn(c, h, w)*0.15
        return x

In [33]:
# Numpy를 Tensor로 변환하는 함수
class ToTensor(object):
    def __call__(self, x):
        x = torch.from_numpy(x)
        return x

## WideResNet

In [34]:
class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes, stride, dropRate=0.0, activate_before_residual=False):
        super(BasicBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes, momentum=0.001)
        self.relu1 = nn.LeakyReLU(negative_slope=0.1, inplace=True)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes, momentum=0.001)
        self.relu2 = nn.LeakyReLU(negative_slope=0.1, inplace=True)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.droprate = dropRate
        self.equalInOut = (in_planes == out_planes)
        self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
                               padding=0, bias=False) or None
        self.activate_before_residual = activate_before_residual
    def forward(self, x):
        if not self.equalInOut and self.activate_before_residual == True:
            x = self.relu1(self.bn1(x))
        else:
            out = self.relu1(self.bn1(x))
        out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
        if self.droprate > 0:
            out = F.dropout(out, p=self.droprate, training=self.training)
        out = self.conv2(out)
        return torch.add(x if self.equalInOut else self.convShortcut(x), out)

In [35]:
class NetworkBlock(nn.Module):
    def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0, activate_before_residual=False):
        super(NetworkBlock, self).__init__()
        self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate, activate_before_residual)
    def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate, activate_before_residual):
        layers = []
        for i in range(int(nb_layers)):
            layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate, activate_before_residual))
        return nn.Sequential(*layers)
    def forward(self, x):
        return self.layer(x)

In [36]:
class WideResNet(nn.Module):
    def __init__(self, num_classes, depth=28, widen_factor=2, dropRate=0.0):
        super(WideResNet, self).__init__()
        nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor]
        assert((depth - 4) % 6 == 0)
        n = (depth - 4) / 6
        block = BasicBlock
        # 1st conv before any network block
        self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
                               padding=1, bias=False)
        # 1st block
        self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate, activate_before_residual=True)
        # 2nd block
        self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate)
        # 3rd block
        self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate)
        # global average pooling and classifier
        self.bn1 = nn.BatchNorm2d(nChannels[3], momentum=0.001)
        self.relu = nn.LeakyReLU(negative_slope=0.1, inplace=True)
        self.fc = nn.Linear(nChannels[3], num_classes)
        self.nChannels = nChannels[3]

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight.data)
                m.bias.data.zero_()

    def forward(self, x):
        out = self.conv1(x)
        out = self.block1(out)
        out = self.block2(out)
        out = self.block3(out)
        out = self.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        out = out.view(-1, self.nChannels)
        return self.fc(out)

## Semi-supervised loss function
#### Semi-supervised loss = Loss(Labeled, x) + lambda * Loss(Unlabeled, u)

In [37]:
class Loss_Semisupervised(object):
    def __call__(self, args, outputs_x, target_x, outputs_u, targets_u, epoch):
        self.args = args
        probs_u = torch.softmax(outputs_u, dim=1)

        loss_x = -torch.mean(
            torch.sum(F.log_softmax(outputs_x, dim=1)*target_x, dim=1)
        )

        loss_u = torch.mean((probs_u-targets_u)**2)

        return loss_x, loss_u, self.args.lambda_u*linear_rampup(epoch, self.args.epochs)

In [38]:
def linear_rampup(current, rampup_length):
    if rampup_length == 0:
        return 1.0
    else:
        current = np.clip(current/rampup_length, 0.0, 1.0)
        return float(current)

In [39]:
class WeightEMA(object): # EMA=Exponential Moving Average
    
    '''
    이를 하는 이유는 학습시간이 길어지거나, Trivial Solution을 방지, 과적합 방지 등. --> 가중치를 업데이트 시 a(최근가중치)+(1-a)(이전가중치)
    '''
    def __init__(self, model, ema_model, lr, alpha=0.999):
        self.model = model
        self.ema_model = ema_model

        self.alpha = alpha

        self.params = list(self.model.state_dict().items())
        self.ema_params = list(self.ema_model.state_dict().items())

        self.wd = 0.02 * lr

        for param, ema_param in zip(self.params, self.ema_params):
            param[1].data.copy_(ema_param[1].data)
    
    def step(self):
        inverse_alpha = 1.0 - self.alpha
        for param, ema_param in zip(self.params, self.ema_params):
            if ema_param[1].dtype == torch.float32:
                ema_param[1].mul_(self.alpha) # ema_params_new = self.alpha * ema_params_old
                ema_param[1].add_(param[1]*inverse_alpha) # ema_params_Double_new = (1-self.alpha)*params

                # summary: ema_params_new = self.alpha*ema_params_old + (1-self.alpha)*params
                # params: 학습되고 있는 모델 parameter
                param[1].mul_(1-self.wd)


In [40]:
def interleave_offsets(batch_size, nu):
    
    '''
    이것도 assert의 목적으로 활용되는 code
    '''
    
    groups = [batch_size//(nu+1)]*(nu+1)
    for x in range(batch_size-sum(groups)):
        groups[-x-1] += 1

    offsets = [0]
    for g in groups:
        offsets.append(offsets[-1]+g)
    
    assert offsets[-1] == batch_size
    return offsets

In [41]:
def interleave(xy, batch_size):
    
    '''
    이것도 assert의 목적으로 활용되는 code
    '''
    
    nu = len(xy) - 1
    offsets = interleave_offsets(batch_size, nu)

    xy = [[v[offsets[p]:offsets[p+1]] for p in range(nu+1)] for v in xy]
    for i in range(1, nu+1):
        xy[0][i], xy[i][i] = xy[i][i], xy[0][i]
    return [torch.cat(v, dim=0) for v in xy]

In [42]:
def get_tqdm_config(total, leave=True, color='white'):
    fore_colors = {
        'red': Fore.LIGHTRED_EX,
        'green': Fore.LIGHTGREEN_EX,
        'yellow': Fore.LIGHTYELLOW_EX,
        'blue': Fore.LIGHTBLUE_EX,
        'magenta': Fore.LIGHTMAGENTA_EX,
        'cyan': Fore.LIGHTCYAN_EX,
        'white': Fore.LIGHTWHITE_EX,
    }
    return {
        'file': sys.stdout,
        'total': total,
        'desc': " ",
        'dynamic_ncols': True,
        'bar_format':
            "{l_bar}%s{bar}%s| [{elapsed}<{remaining}, {rate_fmt}{postfix}]" % (fore_colors[color], Fore.RESET),
        'leave': leave
    }

## Evaluation metric
#### top1 accuracy, top5 accuracy
#### top1 accuracy: (확률 값이 가장 높은 범주와 실제 범주가 일치하는 관측치 수)/ 전체 관측치
#### top5 accuracy: (확률 값 상위 5개 중 실제 범주가 존재하는 관측치 수)/ 전체 관측치

In [43]:
def accuracy(output, target, topk=(1, )):
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        if k == 1:
            correct_k = correct[:k].view(-1).float().sum(0)
        if k > 1:
            correct_k = correct[:k].float().sum(0).sum(0)
        acc = correct_k.mul_(100.0 / batch_size)
        acc = acc.detach().cpu().numpy()
        res.append(acc)
    return res

In [53]:
class MixMatchTrainer():
    def __init__(self, args):
        self.args = args

        root_dir = '/content/MixMatch' # PROJECT directory
        self.experiment_dir = os.path.join(root_dir, 'results') # 학습된 모델을 저장할 폴더 경로 정의 및 폴더 생성
        os.makedirs(self.experiment_dir, exist_ok=True)

        name_exp = "_".join([str(self.args.n_labeled), str(self.args.T)]) # 주요 하이퍼 파라미터로 폴더 저장 경로 지정 
        self.experiment_dir = os.path.join(self.experiment_dir, name_exp)
        os.makedirs(self.experiment_dir, exist_ok=True)

        # Data
        print("==> Preparing CIFAR10 dataset")
        transform_train = transforms.Compose([
            RandomPadandCrop(32),
            RandomFlip(),
            ToTensor()
        ]) # 학습에 사용할 data augmentation 정의

        transform_val = transforms.Compose([
            ToTensor()
        ]) # validation, test dataset에 대한 data augmentation 정의
           # 합성곱 신경망에 입력 될 수 있도록만 지정(Augmentation 사용하지 않는 것과 동일)

        train_labeled_set, train_unlabeled_set, val_set, test_set = \
            get_cifar10(
                data_dir=os.path.join(root_dir, 'data'),
                n_labeled=self.args.n_labeled,
                transform_train=transform_train,
                transform_val=transform_val
            ) # 앞에서 정의한 (def) get_cifar10 함수에서 train_labeled, train_unlabeled, validation, test dataset
        
        # DataLoader 정의
        self.labeled_loader = DataLoader(
            dataset=train_labeled_set,
            batch_size=self.args.batch_size,
            shuffle=True, num_workers=0, drop_last=True
        )

        self.unlabeled_loader = DataLoader(
            dataset=train_unlabeled_set,
            batch_size=self.args.batch_size,
            shuffle=True, num_workers=0, drop_last=True
        )

        self.val_loader = DataLoader(
            dataset=val_set, shuffle=False, num_workers=0, drop_last=False
        )

        self.test_loader = DataLoader(
            dataset=test_set, shuffle=False, num_workers=0, drop_last=False
        )

        # Build WideResNet
        print("==> Preparing WideResNet")
        self.model = self.create_model(ema=False)
        self.ema_model = self.create_model(ema=True)

        # Define loss functions
        self.criterion_train = Loss_Semisupervised()
        self.criterion_val = nn.CrossEntropyLoss().to(self.args.cuda)

        # Define optimizers
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.args.lr)
        self.ema_optimizer = WeightEMA(self.model, self.ema_model, lr=self.args.lr, alpha=self.args.ema_decay)

        # 학습 결과를 저장할 Tensorboard 정의
        self.writer = SummaryWriter(self.experiment_dir)

    def create_model(self, ema=False):
        # Build WideResNet & EMA model
        model = WideResNet(num_classes=10)
        model = model.to(self.args.cuda)

        if ema:
            for param in model.parameters():
                param.detach_()
            
        return model
    
    def train(self, epoch):
        # 모델 학습 함수
        losses_t, losses_x, losses_u, ws = 0.0, 0.0, 0.0, 0.0
        self.model.train()

        # iter & next remind
        # iter: list 내 batch size 만큼 랜덤하게 불러오게 하는 함수
        # next: iter 함수가 작동하도록 하는 명령어
        iter_labeled = iter(self.labeled_loader)
        iter_unlabeled = iter(self.unlabeled_loader)

        with tqdm(**get_tqdm_config(total=self.args.num_iter,
                leave=True, color='blue')) as pbar:
            for batch_idx in range(self.args.num_iter):
                # 왜 try-except 문을 사용하나?
                # 코드 작성 후 iter&next가 정확히 작용하지 않는 경우가 있음을 확인
                # 다시 iter_labeled, iter_unlabeled를 정의해 학습에 문제가 없도록 다시 선언
                try:
                    inputs_x, targets_x = next(iter_labeled)
                except:
                    iter_labeled = iter(self.labeled_loader)
                    inputs_x, targets_x = next(iter_labeled)
                real_B = inputs_x.size(0)

                # Transform label to one-hot
                targets_x = torch.zeros(real_B, 10).scatter_(1, targets_x.view(-1,1).long(), 1)
                inputs_x, targets_x = inputs_x.to(self.args.cuda), targets_x.to(self.args.cuda)

                try:
                    tmp_inputs, _ = next(iter_unlabeled)
                except:
                    iter_unlabeled = iter(self.unlabeled_loader)
                    tmp_inputs, _ = next(iter_unlabeled)

                inputs_u1, inputs_u2 = tmp_inputs[0], tmp_inputs[1]
                inputs_u1, inputs_u2 = inputs_u1.to(self.args.cuda), inputs_u2.to(self.args.cuda)

                # Unlabeled data에 대한 실제 값 생성
                # 서로 다른 Augmentation 결과의 출력 값의 평균 계산
                # Temperature 값으로 실제 값 스케일링
                with torch.no_grad():
                    outputs_u1 = self.model(inputs_u1)
                    outputs_u2 = self.model(inputs_u2)

                    pt = (torch.softmax(outputs_u1, dim=1)+torch.softmax(outputs_u2, dim=1)) / 2
                    pt = pt**(1/self.args.T)

                    targets_u = pt / pt.sum(dim=1, keepdim=True)
                    targets_u = targets_u.detach()
                
                # MixUp
                # 서로 다른 이미지와 레이블을 섞는 작업
                # feature space 상에서 범주 별 Decision boundary를 정확하게 잡아주는 역할
                inputs = torch.cat([inputs_x, inputs_u1, inputs_u2], dim=0)
                targets = torch.cat([targets_x, targets_u, targets_u], dim=0)

                l_mixup = np.random.beta(self.args.alpha, self.args.alpha)
                l_mixup = max(l_mixup, 1-l_mixup)

                # inputs의 index를 섞어 서로 다른 범주끼리 섞도록 하는 역할
                B = inputs.size(0)
                random_idx = torch.randperm(B)

                inputs_a, inputs_b = inputs, inputs[random_idx]
                targets_a, targets_b = targets, targets[random_idx]

                mixed_input = l_mixup*inputs_a + (1-l_mixup)*inputs_b
                mixed_target = l_mixup*targets_a + (1-l_mixup)*targets_b

                # batch size 만큼 분할 진행 (2N, C, H, W) -> (N, C, H, W) & (N, C, H, W)
                # 앞 부분은 labeled, 뒷 부분은 unlabeled
                '''
                이렇게 하는 이유는 첫 B는 Label 데이터로 활용, 나중 B는 Unlabeled data로 활용하기 위함 (관용적 활용법)
                '''
                
                mixed_input = list(torch.split(mixed_input, real_B))
                mixed_input = interleave(mixed_input, real_B)

                logits = [self.model(mixed_input[0])] # for labeled
                for input in mixed_input[1:]:
                    logits.append(self.model(input)) # for unlabeled

                logits = interleave(logits, real_B) # interleave: 정확히 섞이었는지 확인
                logits_x = logits[0]
                logits_u = torch.cat(logits[1:], dim=0)

                loss_x, loss_u, w = \
                    self.criterion_train(self.args,
                                    logits_x, mixed_target[:real_B],
                                    logits_u, mixed_target[real_B:],
                                    epoch+batch_idx/self.args.num_iter) # Semi-supervised loss 계산

                loss = loss_x + w * loss_u

                # Backpropagation and Model parameter update
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                self.ema_optimizer.step()

                losses_x += loss_x.item()
                losses_u += loss_u.item()
                losses_t += loss.item()
                ws += w

                self.writer.add_scalars(
                    'Training steps', {
                        'Total_loss': losses_t/(batch_idx+1),
                        'Labeled_loss':losses_x/(batch_idx+1),
                        'Unlabeled_loss':losses_u/(batch_idx+1),
                        'W values': ws/(batch_idx+1)
                    }, global_step=epoch*self.args.batch_size+batch_idx
                )

                pbar.set_description(
                    '[Train(%4d/ %4d)-Total: %.3f|Labeled: %.3f|Unlabeled: %.3f]'%(
                        (batch_idx+1), self.args.num_iter,
                        losses_t/(batch_idx+1), losses_x/(batch_idx+1), losses_u/(batch_idx+1)
                    )
                )
                pbar.update(1)

            pbar.set_description(
                '[Train(%4d/ %4d)-Total: %.3f|Labeled: %.3f|Unlabeled: %.3f]'%(
                    epoch, self.args.epochs,
                    losses_t/(batch_idx+1), losses_x/(batch_idx+1), losses_u/(batch_idx+1)
                )
            )
        
        return losses_t/(batch_idx+1), losses_x/(batch_idx+1), losses_u/(batch_idx+1)

    @torch.no_grad()
    def validate(self, epoch, phase):
        self.ema_model.eval()

        # Train, Validation, Test dataset 에 대한 DataLoader를 정의
        if phase == 'Train':
            data_loader = self.labeled_loader
            c = 'blue'
        elif phase == 'Valid':
            data_loader = self.val_loader
            c = 'green'
        elif phase == 'Test ':        
            data_loader = self.test_loader
            c = 'red'

        losses = 0.0
        top1s, top5s = [], []

        with tqdm(**get_tqdm_config(total=len(data_loader),
                leave=True, color=c)) as pbar:
            for batch_idx, (inputs, targets) in enumerate(data_loader):
                
                targets = targets.type(torch.LongTensor)
                inputs, targets = inputs.to(self.args.cuda), targets.to(self.args.cuda)
                outputs = self.ema_model(inputs)
                
                loss = self.criterion_val(outputs, targets)
                # labeled dataset에 대해서만 손실함수 계산
                # torch.nn.CrossEntropyLoss()를 사용해서 손실함수 계산

                prec1, prec5 = accuracy(outputs, targets, topk=(1, 5))
                losses += loss.item()
                top1s.append(prec1)
                top5s.append(prec5)

                self.writer.add_scalars(
                    f'{phase} steps', {
                        'Total_loss': losses/(batch_idx+1),
                        'Top1 Acc': np.mean(top1s),
                        'Top5 Acc': np.mean(top5s)
                    }, global_step=epoch*self.args.batch_size+batch_idx
                )

                pbar.set_description(
                    '[%s-Loss: %.3f|Top1 Acc: %.3f|Top5 Acc: %.3f]'%(
                        phase,
                        losses/(batch_idx+1), np.mean(top1s), np.mean(top5s)
                    )
                )
                pbar.update(1)

            pbar.set_description(
                '[%s(%4d/ %4d)-Loss: %.3f|Top1 Acc: %.3f|Top5 Acc: %.3f]'%(
                    phase,
                    epoch, self.args.epochs,
                    losses/(batch_idx+1), np.mean(top1s), np.mean(top5s)
                )
            )

        return losses/(batch_idx+1), np.mean(top1s), np.mean(top5s)

## Define hyperparamters
#### argparser라는 패키지를 이용해 각종 hyperparameter 저장

In [54]:
def MixMatch_parser():
    parser = argparse.ArgumentParser(description="MixMatch PyTorch Implementation for LG Electornics education")
    
    # method arguments
    parser.add_argument('--n-labeled', type=int, default=4000)
    parser.add_argument('--num-iter', type=int, default=1024,
                        help="The number of iteration per epoch")
    parser.add_argument('--alpha', type=float, default=0.75)
    parser.add_argument('--lambda-u', type=float, default=75)
    parser.add_argument('--T', default=0.5, type=float)
    parser.add_argument('--ema-decay', type=float, default=0.999)

    parser.add_argument('--epochs', type=int, default=1024)
    parser.add_argument('--batch-size', type=int, default=64)
    parser.add_argument('--lr', type=float, default=0.002)

    return parser

In [55]:
def main():
    parser = MixMatch_parser()
    args = parser.parse_args([])
    args.cuda = torch.device("cuda:0")

    trainer = MixMatchTrainer(args)
    
    best_loss = np.inf
    # best_loss of validation 기준으로 모멜 저장

    losses, losses_x, losses_u = [], [], []
    
    train_losses, train_top1s, train_top5s = [], [], []
    val_losses, val_top1s, val_top5s = [], [], []
    test_losses, test_top1s, test_top5s = [], [], []
    # accuracy 증가 속도, loss values 감소 속도를 그래프로 그리기
    # list에 각종 값들을 저장
    for epoch in range(1, args.epochs+1, 1):
        loss, loss_x, loss_u = trainer.train(epoch)
        losses.append(loss)
        losses_x.append(loss_x)
        losses_u.append(loss_u)

        loss, top1, top5 = trainer.validate(epoch, 'Train')
        train_losses.append(loss)
        train_top1s.append(top1)
        train_top5s.append(top5)

        loss, top1, top5 = trainer.validate(epoch, 'Valid')
        val_losses.append(loss)
        val_top1s.append(top1)
        val_top5s.append(top5)

        # validation loss 기준 모델 저장
        if loss < best_loss:
            best_loss = loss
            torch.save(trainer.model, os.path.join(trainer.experiment_dir, 'model.pth'))
            torch.save(trainer.ema_model, os.path.join(trainer.experiment_dir, 'ema_model.pth'))

        loss, top1, top5 = trainer.validate(epoch, 'Test ')
        test_losses.append(loss)
        test_top1s.append(top1)
        test_top5s.append(top5)

        torch.save(trainer.model, os.path.join(trainer.experiment_dir, 'checkpooint_model.pth'))
        torch.save(trainer.ema_model, os.path.join(trainer.experiment_dir, 'checkpoint_ema_model.pth'))

In [56]:
if __name__=="__main__":
    main()

==> Preparing CIFAR10 dataset
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
==> Preparing WideResNet
[Train(   1/ 1024)-Total: 1.696|Labeled: 1.695|Unlabeled: 0.008]: 100%|[94m██████████████████████[39m| [02:54<00:00,  5.88it/s][0m
[Train(   1/ 1024)-Loss: 2.129|Top1 Acc: 20.111|Top5 Acc: 71.144]: 100%|[94m█████████████████████[39m| [00:01<00:00, 45.11it/s][0m
[Valid(   1/ 1024)-Loss: 2.129|Top1 Acc: 19.840|Top5 Acc: 71.540]: 100%|[92m████████████████████[39m| [00:42<00:00, 117.73it/s][0m
[Test (   1/ 1024)-Loss: 2.125|Top1 Acc: 20.300|Top5 Acc: 71.890]: 100%|[91m█████████████████████[39m| [01:49<00:00, 91.71it/s][0m
[Train(   2/ 1024)-Total: 1.404|Labeled: 1.403|Unlabeled: 0.010]: 100%|[94m██████████████████████[39m| [02:51<00:00,  5.96it/s][0m
[Train(   2/ 1024)-Loss: 1.323|Top1 Acc: 57.056|Top5 Acc: 95.691]: 100%|[94m█████████████████████[39m| [00:01<00:00, 44.92it/s][0m
[Valid(   2/ 1024)-Loss: 1.

[Test (  15/ 1024)-Loss: 0.542|Top1 Acc: 84.840|Top5 Acc: 98.450]: 100%|[91m█████████████████████[39m| [01:49<00:00, 91.47it/s][0m
[Train(  16/ 1024)-Total: 0.877|Labeled: 0.863|Unlabeled: 0.012]: 100%|[94m██████████████████████[39m| [02:51<00:00,  5.98it/s][0m
[Train(  16/ 1024)-Loss: 0.122|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 45.66it/s][0m
[Valid(  16/ 1024)-Loss: 0.527|Top1 Acc: 84.880|Top5 Acc: 98.540]: 100%|[92m█████████████████████[39m| [00:53<00:00, 92.71it/s][0m
[Test (  16/ 1024)-Loss: 0.539|Top1 Acc: 85.040|Top5 Acc: 98.640]: 100%|[91m█████████████████████[39m| [02:22<00:00, 70.21it/s][0m
[Train(  17/ 1024)-Total: 0.850|Labeled: 0.836|Unlabeled: 0.011]: 100%|[94m██████████████████████[39m| [02:56<00:00,  5.82it/s][0m
[Train(  17/ 1024)-Loss: 0.117|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 40.72it/s][0m
[Valid(  17/ 1024)-Loss: 0.518|Top1 Acc: 85.600|Top5 Acc: 98.4

[Train(  31/ 1024)-Total: 0.833|Labeled: 0.808|Unlabeled: 0.011]: 100%|[94m██████████████████████[39m| [03:12<00:00,  5.31it/s][0m
[Train(  31/ 1024)-Loss: 0.135|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 42.33it/s][0m
[Valid(  31/ 1024)-Loss: 0.485|Top1 Acc: 87.460|Top5 Acc: 98.760]: 100%|[92m████████████████████[39m| [00:44<00:00, 112.47it/s][0m
[Test (  31/ 1024)-Loss: 0.496|Top1 Acc: 86.960|Top5 Acc: 98.650]: 100%|[91m█████████████████████[39m| [01:53<00:00, 87.98it/s][0m
[Train(  32/ 1024)-Total: 0.849|Labeled: 0.823|Unlabeled: 0.011]: 100%|[94m██████████████████████[39m| [03:09<00:00,  5.40it/s][0m
[Train(  32/ 1024)-Loss: 0.136|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 45.10it/s][0m
[Valid(  32/ 1024)-Loss: 0.488|Top1 Acc: 87.080|Top5 Acc: 98.740]: 100%|[92m████████████████████[39m| [00:42<00:00, 118.53it/s][0m
[Test (  32/ 1024)-Loss: 0.496|Top1 Acc: 86.860|Top5 Acc: 98.5

[Train(  46/ 1024)-Loss: 0.130|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 44.98it/s][0m
[Valid(  46/ 1024)-Loss: 0.452|Top1 Acc: 88.060|Top5 Acc: 98.940]: 100%|[92m████████████████████[39m| [00:44<00:00, 112.73it/s][0m
[Test (  46/ 1024)-Loss: 0.462|Top1 Acc: 87.750|Top5 Acc: 98.830]: 100%|[91m█████████████████████[39m| [03:27<00:00, 48.23it/s][0m
[Train(  47/ 1024)-Total: 0.817|Labeled: 0.781|Unlabeled: 0.010]: 100%|[94m██████████████████████[39m| [04:16<00:00,  3.99it/s][0m
[Train(  47/ 1024)-Loss: 0.131|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:02<00:00, 24.90it/s][0m
[Valid(  47/ 1024)-Loss: 0.450|Top1 Acc: 88.240|Top5 Acc: 98.980]: 100%|[92m█████████████████████[39m| [01:43<00:00, 48.27it/s][0m
[Test (  47/ 1024)-Loss: 0.462|Top1 Acc: 87.980|Top5 Acc: 98.840]: 100%|[91m█████████████████████[39m| [03:23<00:00, 49.05it/s][0m
[Train(  48/ 1024)-Total: 0.839|Labeled: 0.801|Unlabeled: 0.01

[Valid(  61/ 1024)-Loss: 0.433|Top1 Acc: 88.740|Top5 Acc: 98.940]: 100%|[92m████████████████████[39m| [00:41<00:00, 120.21it/s][0m
[Test (  61/ 1024)-Loss: 0.438|Top1 Acc: 88.880|Top5 Acc: 99.000]: 100%|[91m█████████████████████[39m| [01:45<00:00, 94.81it/s][0m
[Train(  62/ 1024)-Total: 0.843|Labeled: 0.796|Unlabeled: 0.010]: 100%|[94m██████████████████████[39m| [02:50<00:00,  6.02it/s][0m
[Train(  62/ 1024)-Loss: 0.129|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 45.87it/s][0m
[Valid(  62/ 1024)-Loss: 0.434|Top1 Acc: 88.860|Top5 Acc: 98.980]: 100%|[92m████████████████████[39m| [00:41<00:00, 119.86it/s][0m
[Test (  62/ 1024)-Loss: 0.442|Top1 Acc: 88.530|Top5 Acc: 98.790]: 100%|[91m█████████████████████[39m| [01:46<00:00, 93.83it/s][0m
[Train(  63/ 1024)-Total: 0.843|Labeled: 0.794|Unlabeled: 0.010]: 100%|[94m██████████████████████[39m| [02:50<00:00,  6.02it/s][0m
[Train(  63/ 1024)-Loss: 0.129|Top1 Acc: 100.000|Top5 Acc: 100

[Test (  76/ 1024)-Loss: 0.411|Top1 Acc: 89.410|Top5 Acc: 99.070]: 100%|[91m█████████████████████[39m| [01:51<00:00, 89.50it/s][0m
[Train(  77/ 1024)-Total: 0.825|Labeled: 0.770|Unlabeled: 0.010]: 100%|[94m██████████████████████[39m| [02:53<00:00,  5.91it/s][0m
[Train(  77/ 1024)-Loss: 0.119|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 43.66it/s][0m
[Valid(  77/ 1024)-Loss: 0.403|Top1 Acc: 89.580|Top5 Acc: 99.320]: 100%|[92m████████████████████[39m| [00:45<00:00, 109.20it/s][0m
[Test (  77/ 1024)-Loss: 0.410|Top1 Acc: 89.440|Top5 Acc: 99.140]: 100%|[91m█████████████████████[39m| [01:57<00:00, 85.41it/s][0m
[Train(  78/ 1024)-Total: 0.862|Labeled: 0.803|Unlabeled: 0.010]: 100%|[94m██████████████████████[39m| [02:53<00:00,  5.89it/s][0m
[Train(  78/ 1024)-Loss: 0.123|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 44.78it/s][0m
[Valid(  78/ 1024)-Loss: 0.407|Top1 Acc: 89.440|Top5 Acc: 99.2

[Train(  92/ 1024)-Total: 0.862|Labeled: 0.795|Unlabeled: 0.010]: 100%|[94m██████████████████████[39m| [02:54<00:00,  5.87it/s][0m
[Train(  92/ 1024)-Loss: 0.123|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 41.91it/s][0m
[Valid(  92/ 1024)-Loss: 0.393|Top1 Acc: 89.940|Top5 Acc: 99.320]: 100%|[92m████████████████████[39m| [00:47<00:00, 104.24it/s][0m
[Test (  92/ 1024)-Loss: 0.399|Top1 Acc: 89.960|Top5 Acc: 99.140]: 100%|[91m█████████████████████[39m| [01:50<00:00, 90.88it/s][0m
[Train(  93/ 1024)-Total: 0.850|Labeled: 0.784|Unlabeled: 0.010]: 100%|[94m██████████████████████[39m| [02:50<00:00,  6.02it/s][0m
[Train(  93/ 1024)-Loss: 0.121|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 45.89it/s][0m
[Valid(  93/ 1024)-Loss: 0.388|Top1 Acc: 89.940|Top5 Acc: 99.300]: 100%|[92m████████████████████[39m| [00:41<00:00, 119.72it/s][0m
[Test (  93/ 1024)-Loss: 0.397|Top1 Acc: 90.250|Top5 Acc: 99.0

[Train( 107/ 1024)-Loss: 0.116|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 45.08it/s][0m
[Valid( 107/ 1024)-Loss: 0.375|Top1 Acc: 90.440|Top5 Acc: 99.280]: 100%|[92m████████████████████[39m| [00:43<00:00, 114.02it/s][0m
[Test ( 107/ 1024)-Loss: 0.386|Top1 Acc: 90.130|Top5 Acc: 99.070]: 100%|[91m█████████████████████[39m| [01:54<00:00, 87.45it/s][0m
[Train( 108/ 1024)-Total: 0.843|Labeled: 0.768|Unlabeled: 0.009]: 100%|[94m██████████████████████[39m| [02:53<00:00,  5.90it/s][0m
[Train( 108/ 1024)-Loss: 0.113|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 44.07it/s][0m
[Valid( 108/ 1024)-Loss: 0.374|Top1 Acc: 90.620|Top5 Acc: 99.340]: 100%|[92m████████████████████[39m| [00:43<00:00, 115.37it/s][0m
[Test ( 108/ 1024)-Loss: 0.385|Top1 Acc: 90.190|Top5 Acc: 99.060]: 100%|[91m█████████████████████[39m| [01:53<00:00, 87.79it/s][0m
[Train( 109/ 1024)-Total: 0.863|Labeled: 0.786|Unlabeled: 0.01

[Valid( 122/ 1024)-Loss: 0.360|Top1 Acc: 91.020|Top5 Acc: 99.360]: 100%|[92m█████████████████████[39m| [02:48<00:00, 29.71it/s][0m
[Test ( 122/ 1024)-Loss: 0.374|Top1 Acc: 90.590|Top5 Acc: 99.100]: 100%|[91m█████████████████████[39m| [05:59<00:00, 27.81it/s][0m
[Train( 123/ 1024)-Total: 0.873|Labeled: 0.787|Unlabeled: 0.009]: 100%|[94m██████████████████████[39m| [03:58<00:00,  4.29it/s][0m
[Train( 123/ 1024)-Loss: 0.110|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 45.25it/s][0m
[Valid( 123/ 1024)-Loss: 0.356|Top1 Acc: 90.920|Top5 Acc: 99.540]: 100%|[92m████████████████████[39m| [00:43<00:00, 114.82it/s][0m
[Test ( 123/ 1024)-Loss: 0.372|Top1 Acc: 90.560|Top5 Acc: 99.150]: 100%|[91m█████████████████████[39m| [02:59<00:00, 55.86it/s][0m
[Train( 124/ 1024)-Total: 0.836|Labeled: 0.752|Unlabeled: 0.009]: 100%|[94m██████████████████████[39m| [07:34<00:00,  2.25it/s][0m
[Train( 124/ 1024)-Loss: 0.108|Top1 Acc: 100.000|Top5 Acc: 100

[Test ( 137/ 1024)-Loss: 0.363|Top1 Acc: 90.790|Top5 Acc: 99.240]: 100%|[91m█████████████████████[39m| [01:45<00:00, 94.47it/s][0m
[Train( 138/ 1024)-Total: 0.862|Labeled: 0.769|Unlabeled: 0.009]: 100%|[94m██████████████████████[39m| [02:49<00:00,  6.04it/s][0m
[Train( 138/ 1024)-Loss: 0.104|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 45.98it/s][0m
[Valid( 138/ 1024)-Loss: 0.341|Top1 Acc: 91.120|Top5 Acc: 99.360]: 100%|[92m████████████████████[39m| [00:41<00:00, 120.34it/s][0m
[Test ( 138/ 1024)-Loss: 0.360|Top1 Acc: 90.910|Top5 Acc: 99.250]: 100%|[91m█████████████████████[39m| [01:46<00:00, 93.96it/s][0m
[Train( 139/ 1024)-Total: 0.858|Labeled: 0.765|Unlabeled: 0.009]: 100%|[94m██████████████████████[39m| [02:49<00:00,  6.04it/s][0m
[Train( 139/ 1024)-Loss: 0.106|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 46.04it/s][0m
[Valid( 139/ 1024)-Loss: 0.345|Top1 Acc: 91.180|Top5 Acc: 99.3

[Train( 153/ 1024)-Total: 0.862|Labeled: 0.761|Unlabeled: 0.009]: 100%|[94m██████████████████████[39m| [02:49<00:00,  6.04it/s][0m
[Train( 153/ 1024)-Loss: 0.098|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 45.99it/s][0m
[Valid( 153/ 1024)-Loss: 0.332|Top1 Acc: 91.400|Top5 Acc: 99.440]: 100%|[92m████████████████████[39m| [00:41<00:00, 120.40it/s][0m
[Test ( 153/ 1024)-Loss: 0.350|Top1 Acc: 91.060|Top5 Acc: 99.260]: 100%|[91m█████████████████████[39m| [01:46<00:00, 93.95it/s][0m
[Train( 154/ 1024)-Total: 0.876|Labeled: 0.773|Unlabeled: 0.009]: 100%|[94m██████████████████████[39m| [02:49<00:00,  6.04it/s][0m
[Train( 154/ 1024)-Loss: 0.101|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 46.21it/s][0m
[Valid( 154/ 1024)-Loss: 0.335|Top1 Acc: 91.540|Top5 Acc: 99.360]: 100%|[92m████████████████████[39m| [00:41<00:00, 119.99it/s][0m
[Test ( 154/ 1024)-Loss: 0.355|Top1 Acc: 90.870|Top5 Acc: 99.2

[Train( 168/ 1024)-Loss: 0.103|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 46.17it/s][0m
[Valid( 168/ 1024)-Loss: 0.333|Top1 Acc: 91.480|Top5 Acc: 99.360]: 100%|[92m████████████████████[39m| [00:41<00:00, 120.35it/s][0m
[Test ( 168/ 1024)-Loss: 0.351|Top1 Acc: 91.120|Top5 Acc: 99.240]: 100%|[91m█████████████████████[39m| [01:46<00:00, 94.29it/s][0m
[Train( 169/ 1024)-Total: 0.871|Labeled: 0.760|Unlabeled: 0.009]: 100%|[94m██████████████████████[39m| [02:49<00:00,  6.04it/s][0m
[Train( 169/ 1024)-Loss: 0.099|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 45.93it/s][0m
[Valid( 169/ 1024)-Loss: 0.336|Top1 Acc: 91.340|Top5 Acc: 99.520]: 100%|[92m████████████████████[39m| [00:41<00:00, 119.85it/s][0m
[Test ( 169/ 1024)-Loss: 0.357|Top1 Acc: 90.730|Top5 Acc: 99.260]: 100%|[91m█████████████████████[39m| [01:46<00:00, 94.12it/s][0m
[Train( 170/ 1024)-Total: 0.857|Labeled: 0.750|Unlabeled: 0.00

[Valid( 183/ 1024)-Loss: 0.329|Top1 Acc: 91.560|Top5 Acc: 99.480]: 100%|[92m████████████████████[39m| [00:41<00:00, 120.45it/s][0m
[Test ( 183/ 1024)-Loss: 0.343|Top1 Acc: 91.580|Top5 Acc: 99.190]: 100%|[91m█████████████████████[39m| [01:45<00:00, 94.57it/s][0m
[Train( 184/ 1024)-Total: 0.870|Labeled: 0.754|Unlabeled: 0.009]: 100%|[94m██████████████████████[39m| [02:49<00:00,  6.05it/s][0m
[Train( 184/ 1024)-Loss: 0.097|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 45.86it/s][0m
[Valid( 184/ 1024)-Loss: 0.324|Top1 Acc: 91.640|Top5 Acc: 99.340]: 100%|[92m████████████████████[39m| [00:41<00:00, 119.91it/s][0m
[Test ( 184/ 1024)-Loss: 0.338|Top1 Acc: 91.570|Top5 Acc: 99.250]: 100%|[91m█████████████████████[39m| [01:46<00:00, 94.20it/s][0m
[Train( 185/ 1024)-Total: 0.889|Labeled: 0.771|Unlabeled: 0.009]: 100%|[94m██████████████████████[39m| [02:49<00:00,  6.04it/s][0m
[Train( 185/ 1024)-Loss: 0.098|Top1 Acc: 100.000|Top5 Acc: 100

[Test ( 198/ 1024)-Loss: 0.335|Top1 Acc: 91.430|Top5 Acc: 99.360]: 100%|[91m█████████████████████[39m| [01:46<00:00, 93.92it/s][0m
[Train( 199/ 1024)-Total: 0.889|Labeled: 0.763|Unlabeled: 0.009]: 100%|[94m██████████████████████[39m| [02:49<00:00,  6.05it/s][0m
[Train( 199/ 1024)-Loss: 0.098|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 45.69it/s][0m
[Valid( 199/ 1024)-Loss: 0.320|Top1 Acc: 92.120|Top5 Acc: 99.460]: 100%|[92m████████████████████[39m| [00:41<00:00, 119.85it/s][0m
[Test ( 199/ 1024)-Loss: 0.338|Top1 Acc: 91.390|Top5 Acc: 99.300]: 100%|[91m█████████████████████[39m| [01:46<00:00, 93.62it/s][0m
[Train( 200/ 1024)-Total: 0.880|Labeled: 0.753|Unlabeled: 0.009]: 100%|[94m██████████████████████[39m| [02:49<00:00,  6.04it/s][0m
[Train( 200/ 1024)-Loss: 0.098|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 46.12it/s][0m
[Valid( 200/ 1024)-Loss: 0.320|Top1 Acc: 91.940|Top5 Acc: 99.5

[Train( 214/ 1024)-Total: 0.893|Labeled: 0.760|Unlabeled: 0.008]: 100%|[94m██████████████████████[39m| [02:49<00:00,  6.03it/s][0m
[Train( 214/ 1024)-Loss: 0.098|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 46.07it/s][0m
[Valid( 214/ 1024)-Loss: 0.311|Top1 Acc: 92.360|Top5 Acc: 99.580]: 100%|[92m████████████████████[39m| [00:41<00:00, 120.37it/s][0m
[Test ( 214/ 1024)-Loss: 0.330|Top1 Acc: 91.730|Top5 Acc: 99.310]: 100%|[91m█████████████████████[39m| [01:46<00:00, 94.33it/s][0m
[Train( 215/ 1024)-Total: 0.896|Labeled: 0.759|Unlabeled: 0.009]: 100%|[94m██████████████████████[39m| [02:49<00:00,  6.04it/s][0m
[Train( 215/ 1024)-Loss: 0.097|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 46.08it/s][0m
[Valid( 215/ 1024)-Loss: 0.306|Top1 Acc: 92.300|Top5 Acc: 99.560]: 100%|[92m████████████████████[39m| [00:41<00:00, 119.90it/s][0m
[Test ( 215/ 1024)-Loss: 0.326|Top1 Acc: 91.830|Top5 Acc: 99.4

[Train( 229/ 1024)-Loss: 0.093|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 45.55it/s][0m
[Valid( 229/ 1024)-Loss: 0.302|Top1 Acc: 92.580|Top5 Acc: 99.660]: 100%|[92m████████████████████[39m| [00:41<00:00, 120.33it/s][0m
[Test ( 229/ 1024)-Loss: 0.323|Top1 Acc: 91.870|Top5 Acc: 99.460]: 100%|[91m█████████████████████[39m| [01:47<00:00, 93.42it/s][0m
[Train( 230/ 1024)-Total: 0.900|Labeled: 0.756|Unlabeled: 0.009]: 100%|[94m██████████████████████[39m| [02:49<00:00,  6.03it/s][0m
[Train( 230/ 1024)-Loss: 0.097|Top1 Acc: 100.000|Top5 Acc: 100.000]: 100%|[94m███████████████████[39m| [00:01<00:00, 45.58it/s][0m
[Valid( 230/ 1024)-Loss: 0.305|Top1 Acc: 92.640|Top5 Acc: 99.640]: 100%|[92m████████████████████[39m| [00:41<00:00, 120.02it/s][0m
[Test ( 230/ 1024)-Loss: 0.325|Top1 Acc: 91.880|Top5 Acc: 99.360]: 100%|[91m█████████████████████[39m| [01:46<00:00, 94.03it/s][0m
[Train( 231/ 1024)-Total: 0.908|Labeled: 0.764|Unlabeled: 0.00

KeyboardInterrupt: 