In [1]:
import numpy as np
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

from torch.cuda.amp import autocast, GradScaler
from torch.nn.utils import clip_grad_norm_
from tqdm import tqdm
import time

from timm.data import Mixup
import transformers

from einops import rearrange
from einops.layers.torch import Rearrange

from timm.models.layers import DropPath, trunc_normal_, to_2tuple

from sklearn.metrics import confusion_matrix
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

  from .autonotebook import tqdm as notebook_tqdm


# 01.Convolutional Token Embedding

In [2]:
class ConvEmbed(nn.Module):
    '''
    img/token map to Conv Embedding
    '''
    
    def __init__(self,
                 patch_size=7, # [7, 3, 3]
                 in_chans=3,   # [3, dim of stage1, dim of stage2]
                 embed_dim=64, # [64, 192, 384]
                 stride=4,     # [4, 2, 2]
                 padding=2,    # [2, 1, 1]
                 norm_layer=None):
        super().__init__()
        self.patch_size = to_2tuple(patch_size)
        
        self.proj = nn.Conv2d(
            in_channels=in_chans,
            out_channels=embed_dim,
            kernel_size=patch_size,
            stride=stride,
            padding=padding
        )
        
        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
        
    def forward(self, x):
        x = self.proj(x)
        
        _, _, H, W = x.shape
        x = rearrange(x, 'b c h w -> b (h w) c')
        x = self.norm(x)
        x = rearrange(x, 'b (h w) c -> b c h w', h=H, w=W)
        return x
    

In [3]:
class AttentionConv(nn.Module):
    def __init__(self,
                 dim=64,        # [64,192,384]
                 num_heads=4,   # paper: [1,3,6], me: [4,8,16]
                 qkv_bias=False,
                 attn_drop=0.,
                 proj_drop=0.,
                 kernel_size=3,
                 padding_q=1,
                 padding_kv=1,
                 stride_q=1,
                 stride_kv=2,
                 **kwargs
                 ):
        super().__init__()
        self.stride_q = stride_q
        self.stride_kv = stride_kv
        self.dim = dim
        self.num_heads = num_heads        
        self.scale = dim ** -0.5
        
        self.conv_proj_q = self._build_projection(dim,
                                                  kernel_size,
                                                  padding_q,
                                                  stride_q,
                                                  )
        self.conv_proj_k = self._build_projection(dim,
                                                  kernel_size,
                                                  padding_kv,
                                                  stride_kv,
                                                  )
        
        self.conv_proj_v = self._build_projection(dim,
                                                  kernel_size,
                                                  padding_kv,
                                                  stride_kv,
                                                  )
        
        self.linear_proj_q = nn.Linear(dim, dim, bias=qkv_bias)
        self.linear_proj_k = nn.Linear(dim, dim, bias=qkv_bias)
        self.linear_proj_v = nn.Linear(dim, dim, bias=qkv_bias)
        
        self.attn_drop = nn.Dropout(attn_drop)
        self.linear_proj_last = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)        
        
    def _build_projection(self,
                          dim,
                          kernel_size,
                          padding,
                          stride,
                          ):
        
        proj = nn.Sequential(OrderedDict([
            ('depthwise', nn.Conv2d(
                dim,
                dim,
                kernel_size=kernel_size,
                padding=padding,
                stride=stride,
                bias=False,
                groups=dim)),
            ('bn', nn.BatchNorm2d(dim)),
            ('pointwise', nn.Conv2d(
                dim,
                dim,
                kernel_size=1)),
            ('rearrange', Rearrange('b c h w -> b (h w) c'))
        ]))
        
        return proj
    
    def forward(self, x, h, w):
        x = rearrange(x, 'b (h w) c -> b c h w', h=h, w=w)
        
        q = F.normalize(self.conv_proj_q(x), dim=-1)
        k = F.normalize(self.conv_proj_k(x), dim=-1)
        v = self.conv_proj_v(x)
        
        q = rearrange(self.linear_proj_q(q), 'b t (h d) -> b h t d', h=self.num_heads)
        k = rearrange(self.linear_proj_k(k), 'b t (h d) -> b h t d', h=self.num_heads)
        v = rearrange(self.linear_proj_v(v), 'b t (h d) -> b h t d', h=self.num_heads)
        
        attn_score = torch.einsum('bhlk,bhtk->bhlt', [q, k]) * self.scale
        attn = self.attn_drop(F.softmax(attn_score, dim=-1))
        
        x = torch.matmul(attn, v)
        batch_size, num_heads, seq_length, depth = x.size()
        x = x.view(batch_size, seq_length, num_heads * depth)
        
        x = self.proj_drop(self.linear_proj_last(x))
        
        return x


In [4]:
# transformer block에 작은 스케일 인자 곱하기
class LayerScale(nn.Module):
    def __init__(self, dim, init_values=1e-5):
        super().__init__()
        self.gamma = nn.Parameter(init_values * torch.ones((dim)))

    def forward(self, x):
        return self.gamma * x

In [5]:
class Block(nn.Module):
    
    def __init__(self,
                 dim,
                 num_heads,
                 mlp_ratio=4.,
                 qkv_bias=False,
                 drop=0.,
                 attn_drop=0.,
                 drop_path=0.,
                 act_layer=nn.GELU,
                 norm_layer=nn.LayerNorm,
                 **kwargs
                ):
        super().__init__()
        
        self.norm1 = norm_layer(dim)
        self.ls1 = LayerScale(dim)
        self.attn = AttentionConv(dim=dim,
                                  num_heads=num_heads,
                                  qkv_bias=qkv_bias,
                                  attn_drop=attn_drop,
                                  proj_drop=drop,
                                  **kwargs)        
        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
        self.norm2 = norm_layer(dim)
        self.ls2 = LayerScale(dim)
        mlp_hidden_dim = int(dim*mlp_ratio)
        self.mlp = nn.Sequential(
            nn.Linear(dim, mlp_hidden_dim),
            act_layer(),
            nn.Linear(mlp_hidden_dim, dim),
            nn.Dropout(drop),
        )
        
    def forward(self, x, h, w):
        res = x
        x = self.norm1(x)
        attn = self.attn(x, h, w)
        x = res + self.drop_path(self.ls1(attn))
        x = x + self.drop_path(self.ls2(self.mlp(self.norm2(x))))
        return x

In [6]:
test_img = torch.Tensor(np.zeros((2,3,224,224))) # B, C, H, W

block = Block(dim=64,
              num_heads=4)

In [7]:
# Stage 1 

## Patch Embedding
convembed = ConvEmbed(patch_size=7, stride=4, padding=2)
stage1_img = convembed(test_img)

## Attention with Convolution
b, c, h, w = stage1_img.shape
stage1_img = rearrange(stage1_img, 'b c h w -> b (h w) c')
stage1_img = block(stage1_img, h=h, w=w)
stage1_img = rearrange(stage1_img, 'b (h w) c -> b c h w', h=h, w=w)

## Check Result
print(f'stage 1 | img shape: {test_img.shape} → Conv Embed Shape: {stage1_img.shape}')

stage 1 | img shape: torch.Size([2, 3, 224, 224]) → Conv Embed Shape: torch.Size([2, 64, 56, 56])


In [8]:
# Stage 2 

## Patch Embedding
convembed = ConvEmbed(patch_size=3, in_chans=64, stride=2, padding=1)
stage2_img = convembed(stage1_img)

## Attention with Convolution
b, c, h, w = stage2_img.shape
stage2_img = rearrange(stage2_img, 'b c h w -> b (h w) c')
stage2_img = block(stage2_img, h=h, w=w)
stage2_img = rearrange(stage2_img, 'b (h w) c -> b c h w', h=h, w=w)

## Check Result
print(f'stage 2 | img shape: {stage1_img.shape} → Conv Embed Shape: {stage2_img.shape}')

stage 2 | img shape: torch.Size([2, 64, 56, 56]) → Conv Embed Shape: torch.Size([2, 64, 28, 28])


In [9]:
# Stage 3 

## Patch Embedding
convembed = ConvEmbed(patch_size=3, in_chans=64, stride=2, padding=1)
stage3_img = convembed(stage2_img)

## Attention with Convolution
b, c, h, w = stage3_img.shape
stage3_img = rearrange(stage3_img, 'b c h w -> b (h w) c')
stage3_img = block(stage3_img, h=h, w=w)
stage3_img = rearrange(stage3_img, 'b (h w) c -> b c h w', h=h, w=w)

## Check Result
print(f'stage 3 | img shape: {stage2_img.shape} → Conv Embed Shape: {stage3_img.shape}')

stage 3 | img shape: torch.Size([2, 64, 28, 28]) → Conv Embed Shape: torch.Size([2, 64, 14, 14])


In [10]:
class VisionTransformer(nn.Module):
    def __init__(self,
                 patch_size=16,
                 patch_stride=16,
                 patch_padding=0,
                 in_chans=3,
                 embed_dim=768,
                 depth=12,
                 num_heads=12,
                 mlp_ratio=4.,
                 qkv_bias=False,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 act_layer=nn.GELU,
                 norm_layer=nn.LayerNorm,
                 init='trunc_norm',
                 **kwargs
                 ):
        
        super().__init__()

        self.patch_embed = ConvEmbed(
            patch_size=patch_size,
            in_chans=in_chans,
            stride=patch_stride,
            padding=patch_padding,
            embed_dim=embed_dim,
            norm_layer=norm_layer
        )

        self.pos_drop = nn.Dropout(p=drop_rate)

        self.blocks = nn.ModuleList([
            Block(
                dim=embed_dim,
                num_heads=num_heads,
                mlp_ratio=mlp_ratio,
                qkv_bias=qkv_bias,
                drop=drop_rate,
                attn_drop=attn_drop_rate,
                drop_path=drop_path_rate,
                act_layer=act_layer,
                norm_layer=norm_layer,
                **kwargs
            ) for _ in range(depth)
        ])

        if init == 'xavier':
            self.apply(self._init_weights_xavier)
        else:
            self.apply(self._init_weights_trunc_normal)

    def _init_weights_trunc_normal(self, m):
        if isinstance(m, nn.Linear):
            trunc_normal_(m.weight, std=0.02)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, (nn.LayerNorm, nn.BatchNorm2d)):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)

    def _init_weights_xavier(self, m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform_(m.weight)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, (nn.LayerNorm, nn.BatchNorm2d)):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)

    def forward(self, x):
        x = self.patch_embed(x)
        _, _, H, W = x.size()

        x = rearrange(x, 'b c h w -> b (h w) c')
        x = self.pos_drop(x)

        for _, blk in enumerate(self.blocks):
            x = blk(x, H, W)
        x = rearrange(x, 'b (h w) c -> b c h w', h=H, w=W)
        return x

In [11]:
class ConvolutionalVisionTransformer(nn.Module):
    def __init__(self,
                 in_chans=3,
                 num_classes=100,
                 act_layer=nn.GELU,
                 norm_layer=nn.LayerNorm,
                 init='trunc_norm',
                 spec=None):
        super().__init__()
        self.num_classes = num_classes

        self.num_stages = spec['NUM_STAGES']
        self.stages = nn.ModuleList()
        for i in range(self.num_stages):
            kwargs = {
                'patch_size': spec['PATCH_SIZE'][i],
                'patch_stride': spec['PATCH_STRIDE'][i],
                'patch_padding': spec['PATCH_PADDING'][i],
                'embed_dim': spec['DIM_EMBED'][i],
                'depth': spec['DEPTH'][i],
                'num_heads': spec['NUM_HEADS'][i],
                'mlp_ratio': spec['MLP_RATIO'][i],
                'qkv_bias': spec['QKV_BIAS'][i],
                'drop_rate': spec['DROP_RATE'][i],
                'attn_drop_rate': spec['ATTN_DROP_RATE'][i],
                'drop_path_rate': spec['DROP_PATH_RATE'][i],
                'kernel_size': spec['KERNEL_QKV'][i],
                'padding_q': spec['PADDING_Q'][i],
                'padding_kv': spec['PADDING_KV'][i],
                'stride_q': spec['STRIDE_Q'][i],
                'stride_kv': spec['STRIDE_KV'][i],
            }

            stage = VisionTransformer(
                in_chans=in_chans,
                init=init,
                act_layer=act_layer,
                norm_layer=norm_layer,
                **kwargs
            )
            
            self.stages.append(stage)

            in_chans = spec['DIM_EMBED'][i]

        dim_embed = spec['DIM_EMBED'][-1]
        self.norm = norm_layer(dim_embed)
        self.pooler = nn.AdaptiveAvgPool1d(1)

        # Classifier head
        self.head = nn.Linear(dim_embed, num_classes) if num_classes > 0 else nn.Identity()
        trunc_normal_(self.head.weight, std=0.02)

    def forward_features(self, x):
        for stage in self.stages:
            x = stage(x)

        x = rearrange(x, 'b c h w -> b (h w) c') # (B, L, C)
        x = self.norm(x)                         # (B, L, C)
        x = self.pooler(x.transpose(1,2))        # (B, C, 1)
        x = torch.flatten(x, 1)                  # (B, C)
        # x = torch.mean(x, dim=1)

        return x

    def forward(self, x):
        x = self.forward_features(x)
        x = self.head(x)

        return x

In [12]:
class QuickGELU(nn.Module):
    def forward(self, x: torch.Tensor):
        return x * torch.sigmoid(1.702 * x)

In [13]:
spec = {
    'NUM_STAGES': 3,
    'PATCH_SIZE': [7,3,3],
    'PATCH_STRIDE': [4,2,2],
    'PATCH_PADDING': [2,1,1],
    'DIM_EMBED': [64,192,384],
    'DEPTH': [1,2,10],
    'NUM_HEADS': [1,3,6],   # original : [1,3,6]
    'MLP_RATIO': [4.,4.,4.],
    'QKV_BIAS': [True, True, True],
    'DROP_RATE': [0.,0.,0.],
    'ATTN_DROP_RATE': [0.,0.,0.],
    'DROP_PATH_RATE': [0.,0.,0.1],
    'KERNEL_QKV': [3,3,3],
    'PADDING_Q': [1,1,1],
    'PADDING_KV': [1,1,1],
    'STRIDE_Q': [1,1,1],
    'STRIDE_KV': [2,2,2],
}

spec

{'NUM_STAGES': 3,
 'PATCH_SIZE': [7, 3, 3],
 'PATCH_STRIDE': [4, 2, 2],
 'PATCH_PADDING': [2, 1, 1],
 'DIM_EMBED': [64, 192, 384],
 'DEPTH': [1, 2, 10],
 'NUM_HEADS': [1, 3, 6],
 'MLP_RATIO': [4.0, 4.0, 4.0],
 'QKV_BIAS': [True, True, True],
 'DROP_RATE': [0.0, 0.0, 0.0],
 'ATTN_DROP_RATE': [0.0, 0.0, 0.0],
 'DROP_PATH_RATE': [0.0, 0.0, 0.1],
 'KERNEL_QKV': [3, 3, 3],
 'PADDING_Q': [1, 1, 1],
 'PADDING_KV': [1, 1, 1],
 'STRIDE_Q': [1, 1, 1],
 'STRIDE_KV': [2, 2, 2]}

In [14]:
model = ConvolutionalVisionTransformer(act_layer=QuickGELU, spec=spec)

test_result = model(test_img)
test_img.shape, test_result.shape, test_result

(torch.Size([2, 3, 224, 224]),
 torch.Size([2, 100]),
 tensor([[ 0.0211, -0.0558, -0.4906,  0.2948,  0.3206,  0.6026, -0.4612,  0.1550,
           0.0135,  0.5312, -0.3943,  0.3365, -0.5384, -0.6001, -0.2411, -0.3099,
           0.1318,  0.2968,  0.3758,  0.7561,  0.2106, -0.3650,  0.4291,  0.1401,
           0.7766, -0.0212,  0.5200,  0.2039,  0.4841, -0.0210, -0.5029,  0.2329,
          -0.1808, -0.3701, -0.1268,  0.2263, -0.0617, -0.4244, -0.4052, -0.4202,
          -0.0278,  0.2804, -0.1042,  0.2972,  0.0252, -0.0428,  0.4662, -0.2253,
           0.1695,  0.4004, -0.2699,  0.1824,  0.4349, -0.5129, -0.5117,  0.0374,
          -0.2373, -0.0456, -0.3491, -0.3741,  0.6553,  0.1080,  0.0342, -0.0314,
          -0.3117,  0.0951, -0.1339, -0.3594,  0.1543, -0.3884, -0.2792,  0.5480,
           0.1498, -0.0871,  0.1263, -0.4148, -0.0138, -1.0696,  0.4535,  0.7342,
           0.3567,  0.4345, -0.4056, -0.6031, -0.5248, -0.7621,  0.4022, -0.0378,
          -0.6098,  0.6182, -0.5991, -0.1844

In [15]:
from torchsummary import summary

summary(model.cuda(), (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 56, 56]           9,472
         LayerNorm-2             [-1, 3136, 64]             128
         ConvEmbed-3           [-1, 64, 56, 56]               0
           Dropout-4             [-1, 3136, 64]               0
         LayerNorm-5             [-1, 3136, 64]             128
            Conv2d-6           [-1, 64, 56, 56]             576
       BatchNorm2d-7           [-1, 64, 56, 56]             128
            Conv2d-8           [-1, 64, 56, 56]           4,160
         Rearrange-9             [-1, 3136, 64]               0
           Conv2d-10           [-1, 64, 28, 28]             576
      BatchNorm2d-11           [-1, 64, 28, 28]             128
           Conv2d-12           [-1, 64, 28, 28]           4,160
        Rearrange-13              [-1, 784, 64]               0
           Conv2d-14           [-1, 64,

In [16]:
# Transforms 정의하기
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8,1), interpolation=transforms.InterpolationMode.LANCZOS),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.9, scale=(0.02, 0.33)),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

data_dir = '../data/sports'
batch_size = 256

train_path = data_dir+'/train'
valid_path = data_dir+'/valid'
test_path = data_dir+'/test'

# dataset load
train_data = ImageFolder(train_path, transform=train_transform)
valid_data = ImageFolder(valid_path, transform=test_transform)
test_data = ImageFolder(test_path, transform=test_transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [17]:
device = 'cuda:2'
max_norm = 1.0 # paper : 100 with G variants

model.to(device)
model_path = '../models/cvt/model.pth'

In [18]:
mixup_fn = Mixup(mixup_alpha=.7, 
                cutmix_alpha=1., 
                prob=.7, 
                switch_prob=0.5, 
                mode='batch',
                label_smoothing=.1,
                num_classes=100)

epochs = 200

criterion = nn.CrossEntropyLoss(label_smoothing=0.)

In [19]:
optimizer = optim.AdamW(model.parameters())
warmup_steps = int(len(train_loader)*(epochs)*0.1)
train_steps = len(train_loader)*(epochs)
scheduler = transformers.get_cosine_schedule_with_warmup(optimizer, 
                                                        num_warmup_steps=warmup_steps, 
                                                        num_training_steps=train_steps,
                                                        num_cycles=0.5)

2024-01-23 16:17:11.613248: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-23 16:17:11.613336: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-23 16:17:11.614242: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-23 16:17:11.619968: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [20]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')

# GradScaler 초기화
scaler = GradScaler()

for epoch in range(epochs):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs, labels = mixup_fn(inputs, labels)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=max_norm)

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
    val_loss /= len(valid_loader)
    val_losses.append(val_loss)
    
    # 모델 저장
    if val_loss < best_loss:
        best_loss = val_loss
        vit_save = True
        if vit_save:
            torch.save(model.state_dict(), model_path)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False

    print(text)
        
text = f"Epoch 당 평균 소요시간 : {training_time / epochs:.2f}초"      
print(text)

Epoch 1: 100%|██████████| 53/53 [00:57<00:00,  1.08s/it]


	Loss: 4.5529850923790125, Val Loss: 4.37026309967041, LR: 5e-05, Duration: 58.77 sec - model saved!


Epoch 2: 100%|██████████| 53/53 [01:02<00:00,  1.17s/it]


	Loss: 4.40489535961511, Val Loss: 4.180859565734863, LR: 0.0001, Duration: 63.50 sec - model saved!


Epoch 3: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


	Loss: 4.309056021132559, Val Loss: 4.035790920257568, LR: 0.00015, Duration: 68.41 sec - model saved!


Epoch 4: 100%|██████████| 53/53 [01:03<00:00,  1.20s/it]


	Loss: 4.2383646425211206, Val Loss: 3.9670908451080322, LR: 0.0002, Duration: 65.10 sec - model saved!


Epoch 5: 100%|██████████| 53/53 [01:02<00:00,  1.18s/it]


	Loss: 4.195841015509839, Val Loss: 3.855345845222473, LR: 0.00025, Duration: 64.22 sec - model saved!


Epoch 6: 100%|██████████| 53/53 [01:03<00:00,  1.20s/it]


	Loss: 4.124346062822162, Val Loss: 3.8080384731292725, LR: 0.0003, Duration: 64.93 sec - model saved!


Epoch 7: 100%|██████████| 53/53 [01:03<00:00,  1.20s/it]


	Loss: 4.1016473680172325, Val Loss: 3.6553266048431396, LR: 0.00035, Duration: 64.76 sec - model saved!


Epoch 8: 100%|██████████| 53/53 [01:02<00:00,  1.19s/it]


	Loss: 4.068467594542593, Val Loss: 3.61457622051239, LR: 0.0004, Duration: 64.58 sec - model saved!


Epoch 9: 100%|██████████| 53/53 [01:03<00:00,  1.19s/it]


	Loss: 3.9984240711859935, Val Loss: 3.44623863697052, LR: 0.00045000000000000004, Duration: 64.93 sec - model saved!


Epoch 10: 100%|██████████| 53/53 [01:02<00:00,  1.18s/it]


	Loss: 3.9184143678197323, Val Loss: 3.3487212657928467, LR: 0.0005, Duration: 64.58 sec - model saved!


Epoch 11: 100%|██████████| 53/53 [01:04<00:00,  1.22s/it]


	Loss: 3.9126845620713144, Val Loss: 3.3722169399261475, LR: 0.00055, Duration: 65.60 sec


Epoch 12: 100%|██████████| 53/53 [01:04<00:00,  1.21s/it]


	Loss: 3.8240257254186667, Val Loss: 3.277010440826416, LR: 0.0006, Duration: 65.38 sec - model saved!


Epoch 13: 100%|██████████| 53/53 [01:04<00:00,  1.21s/it]


	Loss: 3.839031017051553, Val Loss: 3.107643485069275, LR: 0.0006500000000000001, Duration: 65.59 sec - model saved!


Epoch 14: 100%|██████████| 53/53 [01:02<00:00,  1.17s/it]


	Loss: 3.73113400531265, Val Loss: 3.0770044326782227, LR: 0.0007, Duration: 63.90 sec - model saved!


Epoch 15: 100%|██████████| 53/53 [01:01<00:00,  1.17s/it]


	Loss: 3.6439331927389467, Val Loss: 2.882711410522461, LR: 0.00075, Duration: 63.16 sec - model saved!


Epoch 16: 100%|██████████| 53/53 [01:06<00:00,  1.26s/it]


	Loss: 3.572757158639296, Val Loss: 2.8419742584228516, LR: 0.0008, Duration: 68.03 sec - model saved!


Epoch 17: 100%|██████████| 53/53 [01:11<00:00,  1.35s/it]


	Loss: 3.635913025658086, Val Loss: 2.8333007097244263, LR: 0.00085, Duration: 73.22 sec - model saved!


Epoch 18: 100%|██████████| 53/53 [01:04<00:00,  1.21s/it]


	Loss: 3.574366920399216, Val Loss: 2.6856285333633423, LR: 0.0009000000000000001, Duration: 65.65 sec - model saved!


Epoch 19: 100%|██████████| 53/53 [01:05<00:00,  1.23s/it]


	Loss: 3.6331372575939826, Val Loss: 2.7278544902801514, LR: 0.00095, Duration: 66.63 sec


Epoch 20: 100%|██████████| 53/53 [01:08<00:00,  1.29s/it]


	Loss: 3.4598171216137006, Val Loss: 2.61009681224823, LR: 0.001, Duration: 69.34 sec - model saved!


Epoch 21: 100%|██████████| 53/53 [01:06<00:00,  1.26s/it]


	Loss: 3.462913598654405, Val Loss: 2.544796347618103, LR: 0.0009999238475781956, Duration: 68.15 sec - model saved!


Epoch 22: 100%|██████████| 53/53 [01:05<00:00,  1.23s/it]


	Loss: 3.3158136403785563, Val Loss: 2.3013495206832886, LR: 0.0009996954135095479, Duration: 66.42 sec - model saved!


Epoch 23: 100%|██████████| 53/53 [01:04<00:00,  1.21s/it]


	Loss: 3.350925517532061, Val Loss: 2.1872788667678833, LR: 0.0009993147673772868, Duration: 66.62 sec - model saved!


Epoch 24: 100%|██████████| 53/53 [01:06<00:00,  1.25s/it]


	Loss: 3.3003416241339916, Val Loss: 2.1287853717803955, LR: 0.0009987820251299122, Duration: 67.33 sec - model saved!


Epoch 25: 100%|██████████| 53/53 [01:04<00:00,  1.22s/it]


	Loss: 3.41692335650606, Val Loss: 2.1545841693878174, LR: 0.0009980973490458728, Duration: 66.06 sec


Epoch 26: 100%|██████████| 53/53 [01:11<00:00,  1.35s/it]


	Loss: 3.195594216292759, Val Loss: 1.9752426147460938, LR: 0.0009972609476841367, Duration: 73.35 sec - model saved!


Epoch 27: 100%|██████████| 53/53 [01:04<00:00,  1.22s/it]


	Loss: 3.1149910980800413, Val Loss: 1.9763428568840027, LR: 0.0009962730758206612, Duration: 65.87 sec


Epoch 28: 100%|██████████| 53/53 [01:05<00:00,  1.23s/it]


	Loss: 3.0295058826230608, Val Loss: 1.971318006515503, LR: 0.0009951340343707852, Duration: 66.35 sec - model saved!


Epoch 29: 100%|██████████| 53/53 [01:10<00:00,  1.32s/it]


	Loss: 3.0776231738756286, Val Loss: 1.8993996381759644, LR: 0.0009938441702975688, Duration: 71.61 sec - model saved!


Epoch 30: 100%|██████████| 53/53 [01:04<00:00,  1.21s/it]


	Loss: 3.099147751646222, Val Loss: 1.8955983519554138, LR: 0.000992403876506104, Duration: 65.41 sec - model saved!


Epoch 31: 100%|██████████| 53/53 [01:05<00:00,  1.24s/it]


	Loss: 3.057701011873641, Val Loss: 1.8200837969779968, LR: 0.000990813591723832, Duration: 67.70 sec - model saved!


Epoch 32: 100%|██████████| 53/53 [01:01<00:00,  1.16s/it]


	Loss: 3.0390095170938745, Val Loss: 1.7200132608413696, LR: 0.0009890738003669028, Duration: 62.57 sec - model saved!


Epoch 33: 100%|██████████| 53/53 [01:04<00:00,  1.21s/it]


	Loss: 2.9549607555821256, Val Loss: 1.696426808834076, LR: 0.0009871850323926177, Duration: 67.00 sec - model saved!


Epoch 34: 100%|██████████| 53/53 [01:06<00:00,  1.26s/it]


	Loss: 3.1136998005633085, Val Loss: 1.735174298286438, LR: 0.0009851478631379982, Duration: 68.18 sec


Epoch 35: 100%|██████████| 53/53 [01:03<00:00,  1.21s/it]


	Loss: 2.9322088259571, Val Loss: 1.5765542387962341, LR: 0.0009829629131445341, Duration: 65.60 sec - model saved!


Epoch 36: 100%|██████████| 53/53 [01:02<00:00,  1.18s/it]


	Loss: 2.801322005829721, Val Loss: 1.4952527284622192, LR: 0.0009806308479691594, Duration: 64.56 sec - model saved!


Epoch 37: 100%|██████████| 53/53 [01:07<00:00,  1.28s/it]


	Loss: 2.9638673134569853, Val Loss: 1.5360203981399536, LR: 0.0009781523779815178, Duration: 69.64 sec


Epoch 38: 100%|██████████| 53/53 [01:03<00:00,  1.21s/it]


	Loss: 2.741287906214876, Val Loss: 1.4388568997383118, LR: 0.0009755282581475768, Duration: 65.32 sec - model saved!


Epoch 39: 100%|██████████| 53/53 [01:05<00:00,  1.23s/it]


	Loss: 2.833449809056408, Val Loss: 1.4746387600898743, LR: 0.0009727592877996585, Duration: 66.72 sec


Epoch 40: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


	Loss: 2.928648035481291, Val Loss: 1.5736996531486511, LR: 0.0009698463103929542, Duration: 69.10 sec


Epoch 41: 100%|██████████| 53/53 [01:02<00:00,  1.18s/it]


	Loss: 2.884330542582386, Val Loss: 1.4341967701911926, LR: 0.0009667902132486009, Duration: 63.68 sec - model saved!


Epoch 42: 100%|██████████| 53/53 [01:04<00:00,  1.21s/it]


	Loss: 2.769658482299661, Val Loss: 1.3337533473968506, LR: 0.0009635919272833937, Duration: 65.52 sec - model saved!


Epoch 43: 100%|██████████| 53/53 [01:05<00:00,  1.24s/it]


	Loss: 2.803854175333707, Val Loss: 1.3092740774154663, LR: 0.0009602524267262203, Duration: 67.51 sec - model saved!


Epoch 44: 100%|██████████| 53/53 [01:10<00:00,  1.32s/it]


	Loss: 2.9626178921393627, Val Loss: 1.4064651131629944, LR: 0.0009567727288213005, Duration: 71.03 sec


Epoch 45: 100%|██████████| 53/53 [01:09<00:00,  1.32s/it]


	Loss: 2.8487446870443955, Val Loss: 1.4845433235168457, LR: 0.0009531538935183251, Duration: 71.57 sec


Epoch 46: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


	Loss: 2.801461842824828, Val Loss: 1.4286345839500427, LR: 0.0009493970231495835, Duration: 69.95 sec


Epoch 47: 100%|██████████| 53/53 [01:07<00:00,  1.26s/it]


	Loss: 2.7694190065815762, Val Loss: 1.3127281665802002, LR: 0.0009455032620941839, Duration: 68.03 sec


Epoch 48: 100%|██████████| 53/53 [01:03<00:00,  1.20s/it]


	Loss: 2.592758003270851, Val Loss: 1.3275494575500488, LR: 0.0009414737964294635, Duration: 64.68 sec


Epoch 49: 100%|██████████| 53/53 [01:02<00:00,  1.17s/it]


	Loss: 2.7786604683354215, Val Loss: 1.2643755078315735, LR: 0.0009373098535696979, Duration: 64.19 sec - model saved!


Epoch 50: 100%|██████████| 53/53 [01:06<00:00,  1.26s/it]


	Loss: 2.6537193959614016, Val Loss: 1.2713829278945923, LR: 0.0009330127018922195, Duration: 68.24 sec


Epoch 51: 100%|██████████| 53/53 [01:09<00:00,  1.31s/it]


	Loss: 2.386651889333185, Val Loss: 1.2800102829933167, LR: 0.0009285836503510562, Duration: 70.80 sec


Epoch 52: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


	Loss: 2.334265519987862, Val Loss: 1.1405368447303772, LR: 0.0009240240480782129, Duration: 68.97 sec - model saved!


Epoch 53: 100%|██████████| 53/53 [01:03<00:00,  1.20s/it]


	Loss: 2.4822362099053725, Val Loss: 1.129635214805603, LR: 0.0009193352839727121, Duration: 64.88 sec - model saved!


Epoch 54: 100%|██████████| 53/53 [01:03<00:00,  1.20s/it]


	Loss: 2.6379525099160537, Val Loss: 1.3044040203094482, LR: 0.0009145187862775209, Duration: 65.00 sec


Epoch 55: 100%|██████████| 53/53 [01:04<00:00,  1.21s/it]


	Loss: 2.5706374217879095, Val Loss: 1.0452020764350891, LR: 0.0009095760221444959, Duration: 65.58 sec - model saved!


Epoch 56: 100%|██████████| 53/53 [01:01<00:00,  1.15s/it]


	Loss: 2.6323713469055465, Val Loss: 1.130101501941681, LR: 0.0009045084971874737, Duration: 62.59 sec


Epoch 57: 100%|██████████| 53/53 [01:03<00:00,  1.20s/it]


	Loss: 2.487555596063722, Val Loss: 1.136031985282898, LR: 0.0008993177550236464, Duration: 65.58 sec


Epoch 58: 100%|██████████| 53/53 [01:02<00:00,  1.17s/it]


	Loss: 2.554370880126953, Val Loss: 1.035994440317154, LR: 0.0008940053768033609, Duration: 65.85 sec - model saved!


Epoch 59: 100%|██████████| 53/53 [01:05<00:00,  1.24s/it]


	Loss: 2.3156080268464, Val Loss: 1.0894077718257904, LR: 0.0008885729807284854, Duration: 66.76 sec


Epoch 60: 100%|██████████| 53/53 [01:00<00:00,  1.15s/it]


	Loss: 2.419907803805369, Val Loss: 1.0455090999603271, LR: 0.000883022221559489, Duration: 61.90 sec


Epoch 61: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


	Loss: 2.6332807473416597, Val Loss: 1.1262041926383972, LR: 0.000877354790111386, Duration: 69.45 sec


Epoch 62: 100%|██████████| 53/53 [01:06<00:00,  1.25s/it]


	Loss: 2.508747069340832, Val Loss: 1.0395189821720123, LR: 0.0008715724127386971, Duration: 67.68 sec


Epoch 63: 100%|██████████| 53/53 [01:08<00:00,  1.29s/it]


	Loss: 2.4106781100327113, Val Loss: 1.020106166601181, LR: 0.0008656768508095852, Duration: 70.16 sec - model saved!


Epoch 64: 100%|██████████| 53/53 [01:04<00:00,  1.21s/it]


	Loss: 2.428007258559173, Val Loss: 1.0254282057285309, LR: 0.0008596699001693256, Duration: 65.92 sec


Epoch 65: 100%|██████████| 53/53 [01:05<00:00,  1.24s/it]


	Loss: 2.4333730733619547, Val Loss: 1.070679485797882, LR: 0.0008535533905932737, Duration: 66.71 sec


Epoch 66: 100%|██████████| 53/53 [01:03<00:00,  1.20s/it]


	Loss: 2.0554985797630168, Val Loss: 0.9034236371517181, LR: 0.0008473291852294987, Duration: 65.46 sec - model saved!


Epoch 67: 100%|██████████| 53/53 [01:09<00:00,  1.32s/it]


	Loss: 2.2747459344144136, Val Loss: 0.9996425211429596, LR: 0.0008409991800312493, Duration: 71.11 sec


Epoch 68: 100%|██████████| 53/53 [01:01<00:00,  1.16s/it]


	Loss: 2.249709709635321, Val Loss: 0.9472216665744781, LR: 0.0008345653031794292, Duration: 62.46 sec


Epoch 69: 100%|██████████| 53/53 [01:08<00:00,  1.30s/it]


	Loss: 2.324824996714322, Val Loss: 0.866094559431076, LR: 0.0008280295144952537, Duration: 70.02 sec - model saved!


Epoch 70: 100%|██████████| 53/53 [01:03<00:00,  1.20s/it]


	Loss: 2.2101984698817416, Val Loss: 0.9031307399272919, LR: 0.0008213938048432696, Duration: 64.55 sec


Epoch 71: 100%|██████████| 53/53 [01:08<00:00,  1.30s/it]


	Loss: 2.18354679728454, Val Loss: 0.9304220676422119, LR: 0.0008146601955249188, Duration: 70.19 sec


Epoch 72: 100%|██████████| 53/53 [01:04<00:00,  1.21s/it]


	Loss: 2.293254499165517, Val Loss: 0.9383030235767365, LR: 0.0008078307376628291, Duration: 65.12 sec


Epoch 73: 100%|██████████| 53/53 [01:01<00:00,  1.16s/it]


	Loss: 2.193612908417324, Val Loss: 0.9417426586151123, LR: 0.0008009075115760243, Duration: 62.66 sec


Epoch 74: 100%|██████████| 53/53 [01:02<00:00,  1.19s/it]


	Loss: 2.111749158715302, Val Loss: 0.9456266760826111, LR: 0.0007938926261462366, Duration: 63.96 sec


Epoch 75: 100%|██████████| 53/53 [01:02<00:00,  1.17s/it]


	Loss: 2.2993492225431047, Val Loss: 0.9329831004142761, LR: 0.0007867882181755231, Duration: 63.47 sec


Epoch 76: 100%|██████████| 53/53 [01:06<00:00,  1.26s/it]


	Loss: 2.191153296884501, Val Loss: 0.9140001237392426, LR: 0.0007795964517353734, Duration: 68.11 sec


Epoch 77: 100%|██████████| 53/53 [01:07<00:00,  1.28s/it]


	Loss: 2.0403887528293536, Val Loss: 0.8437642157077789, LR: 0.0007723195175075137, Duration: 69.38 sec - model saved!


Epoch 78: 100%|██████████| 53/53 [01:05<00:00,  1.24s/it]


	Loss: 2.2175640340121285, Val Loss: 0.9203191995620728, LR: 0.0007649596321166025, Duration: 66.56 sec


Epoch 79: 100%|██████████| 53/53 [01:09<00:00,  1.31s/it]


	Loss: 2.198874741230371, Val Loss: 0.9437687695026398, LR: 0.0007575190374550272, Duration: 73.65 sec


Epoch 80: 100%|██████████| 53/53 [01:05<00:00,  1.23s/it]


	Loss: 2.1008130761812316, Val Loss: 0.8773991167545319, LR: 0.00075, Duration: 66.43 sec


Epoch 81: 100%|██████████| 53/53 [01:08<00:00,  1.30s/it]


	Loss: 2.3084756855694755, Val Loss: 0.9259405136108398, LR: 0.0007424048101231686, Duration: 70.54 sec


Epoch 82: 100%|██████████| 53/53 [01:10<00:00,  1.32s/it]


	Loss: 2.1820182125523404, Val Loss: 0.9349567294120789, LR: 0.0007347357813929454, Duration: 71.61 sec


Epoch 83: 100%|██████████| 53/53 [01:05<00:00,  1.23s/it]


	Loss: 1.998693828312856, Val Loss: 0.8771927058696747, LR: 0.0007269952498697733, Duration: 66.18 sec


Epoch 84: 100%|██████████| 53/53 [01:05<00:00,  1.24s/it]


	Loss: 2.0645603638774945, Val Loss: 0.8505749702453613, LR: 0.0007191855733945387, Duration: 67.50 sec


Epoch 85: 100%|██████████| 53/53 [01:01<00:00,  1.15s/it]


	Loss: 2.100299783472745, Val Loss: 0.9061897993087769, LR: 0.0007113091308703497, Duration: 62.22 sec


Epoch 86: 100%|██████████| 53/53 [01:02<00:00,  1.19s/it]


	Loss: 2.3644329399432777, Val Loss: 0.8690151274204254, LR: 0.0007033683215379002, Duration: 63.99 sec


Epoch 87: 100%|██████████| 53/53 [01:06<00:00,  1.26s/it]


	Loss: 1.9869960186616429, Val Loss: 0.856202244758606, LR: 0.0006953655642446368, Duration: 68.04 sec


Epoch 88: 100%|██████████| 53/53 [01:03<00:00,  1.20s/it]


	Loss: 2.028511490461961, Val Loss: 0.8536461889743805, LR: 0.0006873032967079561, Duration: 65.66 sec


Epoch 89: 100%|██████████| 53/53 [01:01<00:00,  1.17s/it]


	Loss: 1.9153758849737779, Val Loss: 0.8237971067428589, LR: 0.0006791839747726501, Duration: 63.16 sec - model saved!


Epoch 90: 100%|██████████| 53/53 [00:50<00:00,  1.05it/s]


	Loss: 2.1796236825439164, Val Loss: 0.8068076372146606, LR: 0.0006710100716628344, Duration: 51.64 sec - model saved!


Epoch 91: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.943442553844092, Val Loss: 0.8271751701831818, LR: 0.0006627840772285784, Duration: 50.74 sec


Epoch 92: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 2.0080434871169754, Val Loss: 0.8221621215343475, LR: 0.0006545084971874737, Duration: 50.62 sec


Epoch 93: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 2.0155571699142456, Val Loss: 0.8657822012901306, LR: 0.0006461858523613684, Duration: 50.92 sec


Epoch 94: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 2.0944578310228743, Val Loss: 0.8334069848060608, LR: 0.0006378186779084996, Duration: 50.78 sec


Epoch 95: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 2.0113743431163282, Val Loss: 0.8341044783592224, LR: 0.0006294095225512603, Duration: 50.82 sec


Epoch 96: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.9924622931570377, Val Loss: 0.838939368724823, LR: 0.0006209609477998338, Duration: 50.78 sec


Epoch 97: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.782689872777687, Val Loss: 0.7741664946079254, LR: 0.0006124755271719325, Duration: 50.85 sec - model saved!


Epoch 98: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.9254596885645165, Val Loss: 0.8200420141220093, LR: 0.0006039558454088796, Duration: 50.80 sec


Epoch 99: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.9271867275238037, Val Loss: 0.8260376751422882, LR: 0.0005954044976882724, Duration: 50.61 sec


Epoch 100: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.8583161673455868, Val Loss: 0.8066474199295044, LR: 0.0005868240888334653, Duration: 50.80 sec


Epoch 101: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 2.1396689594916576, Val Loss: 0.8136171996593475, LR: 0.0005782172325201155, Duration: 50.60 sec


Epoch 102: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.8848963987152532, Val Loss: 0.8826410472393036, LR: 0.0005695865504800327, Duration: 50.68 sec


Epoch 103: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 2.044515605242747, Val Loss: 0.8239445090293884, LR: 0.0005609346717025737, Duration: 50.90 sec


Epoch 104: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 2.01988373837381, Val Loss: 0.853959709405899, LR: 0.0005522642316338268, Duration: 50.95 sec


Epoch 105: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.8653327305361909, Val Loss: 0.9208323657512665, LR: 0.0005435778713738292, Duration: 50.68 sec


Epoch 106: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 2.058353797444757, Val Loss: 0.8493340611457825, LR: 0.0005348782368720626, Duration: 50.73 sec


Epoch 107: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 2.002583938949513, Val Loss: 0.812932550907135, LR: 0.000526167978121472, Duration: 50.67 sec


Epoch 108: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.7192679958523445, Val Loss: 0.7967312037944794, LR: 0.0005174497483512506, Duration: 50.67 sec


Epoch 109: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.9520259443319068, Val Loss: 0.8284212946891785, LR: 0.0005087262032186418, Duration: 50.68 sec


Epoch 110: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.8054435028220124, Val Loss: 0.7898607552051544, LR: 0.0005, Duration: 50.86 sec


Epoch 111: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.8536412018649984, Val Loss: 0.7811048626899719, LR: 0.0004912737967813582, Duration: 50.85 sec


Epoch 112: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.945035754509692, Val Loss: 0.9615026414394379, LR: 0.0004825502516487497, Duration: 50.78 sec


Epoch 113: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.7776776945815895, Val Loss: 0.8694575726985931, LR: 0.0004738320218785281, Duration: 50.79 sec


Epoch 114: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.9089340230204024, Val Loss: 0.8249691724777222, LR: 0.00046512176312793734, Duration: 50.73 sec


Epoch 115: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.90153347433738, Val Loss: 0.9259986579418182, LR: 0.00045642212862617086, Duration: 50.95 sec


Epoch 116: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.858366388194966, Val Loss: 0.8191998898983002, LR: 0.00044773576836617336, Duration: 50.95 sec


Epoch 117: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.9464496385376409, Val Loss: 0.8376502990722656, LR: 0.00043906532829742634, Duration: 50.77 sec


Epoch 118: 100%|██████████| 53/53 [00:50<00:00,  1.06it/s]


	Loss: 1.8132716630989651, Val Loss: 0.8611080646514893, LR: 0.0004304134495199674, Duration: 50.98 sec


Epoch 119: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.8526500137347095, Val Loss: 0.7707901895046234, LR: 0.0004217827674798845, Duration: 51.07 sec - model saved!


Epoch 120: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.9216101664417196, Val Loss: 0.7744775116443634, LR: 0.00041317591116653486, Duration: 50.86 sec


Epoch 121: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.7938242543418452, Val Loss: 0.7844637632369995, LR: 0.0004045955023117276, Duration: 50.92 sec


Epoch 122: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.8023364948776532, Val Loss: 0.8331851959228516, LR: 0.0003960441545911204, Duration: 50.90 sec


Epoch 123: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.6481545657481786, Val Loss: 0.8141883313655853, LR: 0.0003875244728280676, Duration: 50.90 sec


Epoch 124: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.6974328317732181, Val Loss: 0.7755322456359863, LR: 0.0003790390522001662, Duration: 50.93 sec


Epoch 125: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.7494023982084022, Val Loss: 0.8592051863670349, LR: 0.0003705904774487396, Duration: 50.91 sec


Epoch 126: 100%|██████████| 53/53 [00:50<00:00,  1.06it/s]


	Loss: 1.811260738462772, Val Loss: 0.7863874435424805, LR: 0.00036218132209150044, Duration: 50.99 sec


Epoch 127: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.7113369500861977, Val Loss: 0.8372033834457397, LR: 0.00035381414763863166, Duration: 50.79 sec


Epoch 128: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.8813409479159229, Val Loss: 0.8268453478813171, LR: 0.00034549150281252633, Duration: 50.83 sec


Epoch 129: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.862740751707329, Val Loss: 0.7642996609210968, LR: 0.00033721592277142175, Duration: 50.95 sec - model saved!


Epoch 130: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 2.0038391093038164, Val Loss: 0.8930239081382751, LR: 0.0003289899283371657, Duration: 50.69 sec


Epoch 131: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.5871035085534149, Val Loss: 0.7913223206996918, LR: 0.00032081602522734986, Duration: 50.82 sec


Epoch 132: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.73360401954291, Val Loss: 0.7970458269119263, LR: 0.00031269670329204396, Duration: 50.79 sec


Epoch 133: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.975220630753715, Val Loss: 0.8182066380977631, LR: 0.0003046344357553632, Duration: 50.76 sec


Epoch 134: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.8040742694206957, Val Loss: 0.8114172220230103, LR: 0.0002966316784621, Duration: 50.70 sec


Epoch 135: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.694759822116708, Val Loss: 0.7909522950649261, LR: 0.0002886908691296504, Duration: 50.67 sec


Epoch 136: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.788320946243574, Val Loss: 0.7805000841617584, LR: 0.00028081442660546124, Duration: 50.62 sec


Epoch 137: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.8811172809240952, Val Loss: 0.8469788432121277, LR: 0.00027300475013022663, Duration: 50.74 sec


Epoch 138: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.7295889966892746, Val Loss: 0.8124634623527527, LR: 0.00026526421860705474, Duration: 50.68 sec


Epoch 139: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.8298986374207262, Val Loss: 0.7811976969242096, LR: 0.0002575951898768315, Duration: 50.67 sec


Epoch 140: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.6554229967999008, Val Loss: 0.7590774893760681, LR: 0.0002500000000000001, Duration: 51.07 sec - model saved!


Epoch 141: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.615232767923823, Val Loss: 0.7876931428909302, LR: 0.00024248096254497287, Duration: 50.74 sec


Epoch 142: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.4995122259517886, Val Loss: 0.7677063047885895, LR: 0.0002350403678833976, Duration: 50.77 sec


Epoch 143: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.557263711713395, Val Loss: 0.8178325891494751, LR: 0.00022768048249248646, Duration: 50.81 sec


Epoch 144: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.6274127634066455, Val Loss: 0.7941284775733948, LR: 0.00022040354826462666, Duration: 50.76 sec


Epoch 145: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.8069549270395964, Val Loss: 0.8900790214538574, LR: 0.00021321178182447708, Duration: 50.66 sec


Epoch 146: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.748389871615284, Val Loss: 0.7834450006484985, LR: 0.00020610737385376348, Duration: 50.69 sec


Epoch 147: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.6704613521413982, Val Loss: 0.8307584226131439, LR: 0.00019909248842397582, Duration: 50.59 sec


Epoch 148: 100%|██████████| 53/53 [00:50<00:00,  1.06it/s]


	Loss: 1.693824806303348, Val Loss: 0.7888597846031189, LR: 0.00019216926233717085, Duration: 50.97 sec


Epoch 149: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.5478664524150345, Val Loss: 0.8219472169876099, LR: 0.00018533980447508135, Duration: 50.89 sec


Epoch 150: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.938036433930667, Val Loss: 0.850498616695404, LR: 0.0001786061951567303, Duration: 50.68 sec


Epoch 151: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.7901414823981951, Val Loss: 0.8335275053977966, LR: 0.00017197048550474643, Duration: 50.82 sec


Epoch 152: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.6726892736722838, Val Loss: 0.8032783567905426, LR: 0.00016543469682057105, Duration: 50.71 sec


Epoch 153: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.7874878183850702, Val Loss: 0.7769253849983215, LR: 0.00015900081996875082, Duration: 50.70 sec


Epoch 154: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.692796620557893, Val Loss: 0.8104836940765381, LR: 0.00015267081477050133, Duration: 50.64 sec


Epoch 155: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.701416478966767, Val Loss: 0.7963142991065979, LR: 0.00014644660940672628, Duration: 50.62 sec


Epoch 156: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.5982793693272572, Val Loss: 0.8212220072746277, LR: 0.00014033009983067452, Duration: 50.73 sec


Epoch 157: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.6638056748318222, Val Loss: 0.8303169310092926, LR: 0.00013432314919041476, Duration: 50.92 sec


Epoch 158: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.50769512383443, Val Loss: 0.8217476308345795, LR: 0.00012842758726130281, Duration: 50.98 sec


Epoch 159: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.7142701610079352, Val Loss: 0.855547308921814, LR: 0.000122645209888614, Duration: 50.81 sec


Epoch 160: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.706531141164168, Val Loss: 0.8480227887630463, LR: 0.00011697777844051105, Duration: 50.72 sec


Epoch 161: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.8025738079592866, Val Loss: 0.7994233965873718, LR: 0.00011142701927151455, Duration: 50.86 sec


Epoch 162: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.7700950334656913, Val Loss: 0.8133866786956787, LR: 0.00010599462319663906, Duration: 50.93 sec


Epoch 163: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.7380728226787638, Val Loss: 0.8373262584209442, LR: 0.00010068224497635369, Duration: 50.83 sec


Epoch 164: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.761056847167465, Val Loss: 0.8512865602970123, LR: 9.549150281252633e-05, Duration: 50.78 sec


Epoch 165: 100%|██████████| 53/53 [00:50<00:00,  1.06it/s]


	Loss: 1.880965733303214, Val Loss: 0.8238430619239807, LR: 9.042397785550405e-05, Duration: 50.99 sec


Epoch 166: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.7774884104728699, Val Loss: 0.8540236949920654, LR: 8.548121372247918e-05, Duration: 50.84 sec


Epoch 167: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.5747479605224897, Val Loss: 0.7977403700351715, LR: 8.066471602728804e-05, Duration: 50.90 sec


Epoch 168: 100%|██████████| 53/53 [00:50<00:00,  1.06it/s]


	Loss: 1.4370173519512393, Val Loss: 0.7838273048400879, LR: 7.597595192178702e-05, Duration: 51.05 sec


Epoch 169: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.7094492529923062, Val Loss: 0.8053511679172516, LR: 7.14163496489439e-05, Duration: 50.88 sec


Epoch 170: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.7100138844184156, Val Loss: 0.9190984666347504, LR: 6.698729810778065e-05, Duration: 50.79 sec


Epoch 171: 100%|██████████| 53/53 [00:50<00:00,  1.06it/s]


	Loss: 1.6914964768121827, Val Loss: 0.7874844670295715, LR: 6.269014643030213e-05, Duration: 51.21 sec


Epoch 172: 100%|██████████| 53/53 [00:50<00:00,  1.06it/s]


	Loss: 1.5943153219402961, Val Loss: 0.8256789147853851, LR: 5.852620357053651e-05, Duration: 51.12 sec


Epoch 173: 100%|██████████| 53/53 [00:50<00:00,  1.06it/s]


	Loss: 1.6352426477198332, Val Loss: 0.7972314059734344, LR: 5.449673790581611e-05, Duration: 51.09 sec


Epoch 174: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.6660690206401754, Val Loss: 0.7993861436843872, LR: 5.060297685041659e-05, Duration: 50.82 sec


Epoch 175: 100%|██████████| 53/53 [00:50<00:00,  1.06it/s]


	Loss: 1.6171040636188578, Val Loss: 0.7933335900306702, LR: 4.684610648167503e-05, Duration: 51.18 sec


Epoch 176: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.6429559588432312, Val Loss: 0.8357365727424622, LR: 4.322727117869951e-05, Duration: 50.76 sec


Epoch 177: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.5567297339439392, Val Loss: 0.7928066849708557, LR: 3.974757327377981e-05, Duration: 50.90 sec


Epoch 178: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.5767251511789717, Val Loss: 0.819984644651413, LR: 3.6408072716606344e-05, Duration: 50.83 sec


Epoch 179: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.5791139209045555, Val Loss: 0.8209741711616516, LR: 3.3209786751399184e-05, Duration: 50.92 sec


Epoch 180: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.5406525787317529, Val Loss: 0.8095232546329498, LR: 3.0153689607045842e-05, Duration: 50.84 sec


Epoch 181: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.6960525726372342, Val Loss: 0.8099813461303711, LR: 2.724071220034158e-05, Duration: 50.81 sec


Epoch 182: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.5983049149783153, Val Loss: 0.8062703311443329, LR: 2.4471741852423235e-05, Duration: 50.81 sec


Epoch 183: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.6106316424765676, Val Loss: 0.7905535995960236, LR: 2.1847622018482283e-05, Duration: 50.88 sec


Epoch 184: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.481811619029855, Val Loss: 0.7974299788475037, LR: 1.9369152030840554e-05, Duration: 50.86 sec


Epoch 185: 100%|██████████| 53/53 [00:49<00:00,  1.06it/s]


	Loss: 1.8859081976818588, Val Loss: 0.7922136187553406, LR: 1.70370868554659e-05, Duration: 50.87 sec


Epoch 186: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.665634090045713, Val Loss: 0.804825633764267, LR: 1.4852136862001764e-05, Duration: 50.64 sec


Epoch 187: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.6095925715734374, Val Loss: 0.8081980049610138, LR: 1.2814967607382432e-05, Duration: 50.59 sec


Epoch 188: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.734211212059237, Val Loss: 0.8108775615692139, LR: 1.0926199633097156e-05, Duration: 50.58 sec


Epoch 189: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.7038070222116866, Val Loss: 0.8126939535140991, LR: 9.186408276168012e-06, Duration: 50.58 sec


Epoch 190: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.7487599287392959, Val Loss: 0.821980893611908, LR: 7.59612349389599e-06, Duration: 50.52 sec


Epoch 191: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.7539891814285855, Val Loss: 0.8027200102806091, LR: 6.15582970243117e-06, Duration: 50.54 sec


Epoch 192: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.5562889812127598, Val Loss: 0.8198049366474152, LR: 4.865965629214819e-06, Duration: 50.51 sec


Epoch 193: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.7097318183700994, Val Loss: 0.8078886270523071, LR: 3.7269241793390084e-06, Duration: 50.53 sec


Epoch 194: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.7021825189860362, Val Loss: 0.8348654508590698, LR: 2.739052315863355e-06, Duration: 50.56 sec


Epoch 195: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.49126753604637, Val Loss: 0.8095850050449371, LR: 1.9026509541272275e-06, Duration: 50.38 sec


Epoch 196: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.6570006372793666, Val Loss: 0.8209457993507385, LR: 1.2179748700879012e-06, Duration: 50.40 sec


Epoch 197: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.6032606014665567, Val Loss: 0.7858821451663971, LR: 6.852326227130834e-07, Duration: 50.39 sec


Epoch 198: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.4993921687018197, Val Loss: 0.7885026633739471, LR: 3.0458649045211895e-07, Duration: 50.42 sec


Epoch 199: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.694274880976047, Val Loss: 0.837200254201889, LR: 7.615242180436521e-08, Duration: 50.40 sec


Epoch 200: 100%|██████████| 53/53 [00:49<00:00,  1.07it/s]


	Loss: 1.6868847766012516, Val Loss: 0.8098738491535187, LR: 0.0, Duration: 50.40 sec
Epoch 당 평균 소요시간 : 57.83초


In [21]:
# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
performance_metrics

Unnamed: 0,Metric,Value
0,Accuracy,0.87
1,Precision,0.884567
2,Recall,0.87
3,F1 Score,0.863506
