In [1]:
import numpy as np
from collections import OrderedDict

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

from torch.cuda.amp import autocast, GradScaler
from torch.nn.utils import clip_grad_norm_
from tqdm import tqdm
import time

from timm.data import Mixup
import transformers

from einops import rearrange
from einops.layers.torch import Rearrange

from timm.models.layers import DropPath, trunc_normal_, to_2tuple

from sklearn.metrics import confusion_matrix
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

  from .autonotebook import tqdm as notebook_tqdm


# 01.Convolutional Token Embedding

In [2]:
class ConvEmbed(nn.Module):
    '''
    img/token map to Conv Embedding
    '''
    
    def __init__(self,
                 patch_size=11, # [11, 7, 7, 3]
                 in_chans=3,   # [3, dim of stage1, dim of stage2]
                 embed_dim=64, # [32, 64, 192, 384]
                 stride=4,     # [6, 4, 4, 2]
                 padding=2,    # [3, 2, 2, 1]
                 norm_layer=None):
        super().__init__()
        self.patch_size = to_2tuple(patch_size)
        
        self.proj = nn.Conv2d(
            in_channels=in_chans,
            out_channels=embed_dim,
            kernel_size=patch_size,
            stride=stride,
            padding=padding
        )
        
        self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
        
    def forward(self, x):
        x = self.proj(x)
        
        _, _, H, W = x.shape
        x = rearrange(x, 'b c h w -> b (h w) c')
        x = self.norm(x)
        x = rearrange(x, 'b (h w) c -> b c h w', h=H, w=W)
        return x
    

In [3]:
class LayerNorm(nn.Module):
    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first. 
    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with 
    shape (batch_size, height, width, channels) while channels_first corresponds to inputs 
    with shape (batch_size, channels, height, width).
    """
    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
        super().__init__()
        self.weight = nn.Parameter(torch.ones(normalized_shape))
        self.bias = nn.Parameter(torch.zeros(normalized_shape))
        self.eps = eps
        self.data_format = data_format
        if self.data_format not in ["channels_last", "channels_first"]:
            raise NotImplementedError 
        self.normalized_shape = (normalized_shape, )
    
    def forward(self, x):
        if self.data_format == "channels_last":
            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
        elif self.data_format == "channels_first":
            u = x.mean(1, keepdim=True)
            s = (x - u).pow(2).mean(1, keepdim=True)
            x = (x - u) / torch.sqrt(s + self.eps)
            x = self.weight[:, None, None] * x + self.bias[:, None, None]
            return x

In [4]:
class AttentionConv(nn.Module):
    def __init__(self,
                 dim=64,        # [32,64,192,384]
                 num_heads=4,   # [1,3,6,9]
                 qkv_bias=False,
                 attn_drop=0.,
                 proj_drop=0.,
                 kernel_size=3,
                 padding_q=1,
                 padding_kv=1,
                 stride_q=1,
                 stride_kv=2,
                 act_layer=nn.GELU,
                 **kwargs
                 ):
        super().__init__()
        self.qkv_bias = qkv_bias
        self.stride_q = stride_q
        self.stride_kv = stride_kv
        self.dim = dim
        self.num_heads = num_heads        
        self.scale = dim ** -0.5
        self.act_layer = act_layer()
        
        self.conv_proj_q = self._build_projection(dim,
                                                  kernel_size,
                                                  padding_q,
                                                  stride_q,
                                                  )
        self.conv_proj_k = self._build_projection(dim,
                                                  kernel_size,
                                                  padding_kv,
                                                  stride_kv,
                                                  )
        
        self.conv_proj_v = self._build_projection(dim,
                                                  kernel_size,
                                                  padding_kv,
                                                  stride_kv,
                                                  )
        
        self.attn_drop = nn.Dropout(attn_drop)
        self.linear_proj_last = nn.Linear(dim, dim)
        self.proj_drop = nn.Dropout(proj_drop)        
        
    def _build_projection(self,
                          dim,
                          kernel_size,
                          padding,
                          stride,
                          ):
        
        proj = nn.Sequential(OrderedDict([
            ('depthwise', nn.Conv2d(
                dim,
                dim,
                kernel_size=kernel_size,
                padding=padding,
                stride=stride,
                bias=self.qkv_bias,
                groups=dim)),
            ('rearrange1', Rearrange('b c h w -> b h w c')),
            ('ln', nn.LayerNorm(dim)),
            ('rearrange2', Rearrange('b h w c -> b c h w')),
            ('pointwise1', nn.Conv2d(
                dim,
                dim*4,
                kernel_size=1,
                bias=self.qkv_bias)),
            ('activation', self.act_layer),
            ('pointwise2', nn.Conv2d(
                dim*4,
                dim,
                kernel_size=1,
                bias=self.qkv_bias)),
            ('rearrange3', Rearrange('b c h w -> b (h w) c')),

        ]))
        
        return proj
    
    def forward(self, x, h, w):
        x = rearrange(x, 'b (h w) c -> b c h w', h=h, w=w)
        
        q = F.normalize(self.conv_proj_q(x), dim=-1)
        k = F.normalize(self.conv_proj_k(x), dim=-1)
        v = self.conv_proj_v(x)
        
        q = rearrange(q, 'b t (h d) -> b h t d', h=self.num_heads)
        k = rearrange(k, 'b t (h d) -> b h t d', h=self.num_heads)
        v = rearrange(v, 'b t (h d) -> b h t d', h=self.num_heads)
        
        attn_score = torch.einsum('bhlk,bhtk->bhlt', [q, k]) * self.scale
        attn = self.attn_drop(F.softmax(attn_score, dim=-1))
        
        x = torch.matmul(attn, v)
        batch_size, num_heads, seq_length, depth = x.size()
        x = x.view(batch_size, seq_length, num_heads * depth)
        
        x = self.proj_drop(self.linear_proj_last(x))
        
        return x


In [5]:
# transformer block에 작은 스케일 인자 곱하기
class LayerScale(nn.Module):
    def __init__(self, dim, init_values=1e-5):
        super().__init__()
        self.gamma = nn.Parameter(init_values * torch.ones((dim)))

    def forward(self, x):
        return self.gamma * x

In [6]:
class Block(nn.Module):
    
    def __init__(self,
                 dim,
                 num_heads,
                 mlp_ratio=4.,
                 qkv_bias=False,
                 drop=0.,
                 attn_drop=0.,
                 drop_path=0.,
                 act_layer=nn.GELU,
                 norm_layer=nn.LayerNorm,
                 **kwargs
                ):
        super().__init__()
        
        self.norm1 = norm_layer(dim)
        self.ls1 = LayerScale(dim)
        self.attn = AttentionConv(dim=dim,
                                  num_heads=num_heads,
                                  qkv_bias=qkv_bias,
                                  attn_drop=attn_drop,
                                  proj_drop=drop,
                                  act_layer=act_layer,
                                  **kwargs)        
        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
        self.norm2 = norm_layer(dim)
        self.ls2 = LayerScale(dim)
        mlp_hidden_dim = int(dim*mlp_ratio)
        self.mlp = nn.Sequential(
            nn.Linear(dim, mlp_hidden_dim),
            act_layer(),
            nn.Linear(mlp_hidden_dim, dim),
            nn.Dropout(drop),
        )
        
    def forward(self, x, h, w):
        res = x
        x = self.norm1(x)
        attn = self.attn(x, h, w)
        x = res + self.drop_path(self.ls1(attn))
        x = x + self.drop_path(self.ls2(self.mlp(self.norm2(x))))
        return x

In [7]:
test_img = torch.Tensor(np.zeros((2,3,224,224))) # B, C, H, W

block = Block(dim=64,
              num_heads=4)

In [8]:
# Stage 1 

## Patch Embedding
convembed = ConvEmbed(patch_size=7, stride=4, padding=2)
stage1_img = convembed(test_img)

## Attention with Convolution
b, c, h, w = stage1_img.shape
stage1_img = rearrange(stage1_img, 'b c h w -> b (h w) c')
stage1_img = block(stage1_img, h=h, w=w)
stage1_img = rearrange(stage1_img, 'b (h w) c -> b c h w', h=h, w=w)

## Check Result
print(f'stage 1 | img shape: {test_img.shape} → Conv Embed Shape: {stage1_img.shape}')

stage 1 | img shape: torch.Size([2, 3, 224, 224]) → Conv Embed Shape: torch.Size([2, 64, 56, 56])


In [9]:
# Stage 2 

## Patch Embedding
convembed = ConvEmbed(patch_size=7, in_chans=64, stride=2, padding=3)
stage2_img = convembed(stage1_img)

## Attention with Convolution
b, c, h, w = stage2_img.shape
stage2_img = rearrange(stage2_img, 'b c h w -> b (h w) c')
stage2_img = block(stage2_img, h=h, w=w)
stage2_img = rearrange(stage2_img, 'b (h w) c -> b c h w', h=h, w=w)

## Check Result
print(f'stage 2 | img shape: {stage1_img.shape} → Conv Embed Shape: {stage2_img.shape}')

stage 2 | img shape: torch.Size([2, 64, 56, 56]) → Conv Embed Shape: torch.Size([2, 64, 28, 28])


In [10]:
# Stage 3 

## Patch Embedding
convembed = ConvEmbed(patch_size=7, in_chans=64, stride=2, padding=3)
stage3_img = convembed(stage2_img)

## Attention with Convolution
b, c, h, w = stage3_img.shape
stage3_img = rearrange(stage3_img, 'b c h w -> b (h w) c')
stage3_img = block(stage3_img, h=h, w=w)
stage3_img = rearrange(stage3_img, 'b (h w) c -> b c h w', h=h, w=w)

## Check Result
print(f'stage 3 | img shape: {stage2_img.shape} → Conv Embed Shape: {stage3_img.shape}')

stage 3 | img shape: torch.Size([2, 64, 28, 28]) → Conv Embed Shape: torch.Size([2, 64, 14, 14])


In [11]:
# Stage 4 

## Patch Embedding
convembed = ConvEmbed(patch_size=7, in_chans=64, stride=2, padding=3)
stage4_img = convembed(stage3_img)

## Attention with Convolution
b, c, h, w = stage4_img.shape
stage4_img = rearrange(stage4_img, 'b c h w -> b (h w) c')
stage4_img = block(stage4_img, h=h, w=w)
stage4_img = rearrange(stage4_img, 'b (h w) c -> b c h w', h=h, w=w)

## Check Result
print(f'stage 4 | img shape: {stage3_img.shape} → Conv Embed Shape: {stage4_img.shape}')

stage 4 | img shape: torch.Size([2, 64, 14, 14]) → Conv Embed Shape: torch.Size([2, 64, 7, 7])


In [12]:
class VisionTransformer(nn.Module):
    def __init__(self,
                 patch_size=16,
                 patch_stride=16,
                 patch_padding=0,
                 in_chans=3,
                 embed_dim=768,
                 depth=12,
                 num_heads=12,
                 mlp_ratio=4.,
                 qkv_bias=False,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 act_layer=nn.GELU,
                 norm_layer=nn.LayerNorm,
                 init='trunc_norm',
                 **kwargs
                 ):
        
        super().__init__()

        self.patch_embed = ConvEmbed(
            patch_size=patch_size,
            in_chans=in_chans,
            stride=patch_stride,
            padding=patch_padding,
            embed_dim=embed_dim,
            norm_layer=norm_layer
        )

        self.pos_drop = nn.Dropout(p=drop_rate)

        self.blocks = nn.ModuleList([
            Block(
                dim=embed_dim,
                num_heads=num_heads,
                mlp_ratio=mlp_ratio,
                qkv_bias=qkv_bias,
                drop=drop_rate,
                attn_drop=attn_drop_rate,
                drop_path=drop_path_rate,
                act_layer=act_layer,
                norm_layer=norm_layer,
                **kwargs
            ) for _ in range(depth)
        ])

        if init == 'xavier':
            self.apply(self._init_weights_xavier)
        else:
            self.apply(self._init_weights_trunc_normal)

    def _init_weights_trunc_normal(self, m):
        if isinstance(m, nn.Linear):
            trunc_normal_(m.weight, std=0.02)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, (nn.LayerNorm, nn.BatchNorm2d)):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)

    def _init_weights_xavier(self, m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform_(m.weight)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, (nn.LayerNorm, nn.BatchNorm2d)):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)

    def forward(self, x):
        x = self.patch_embed(x)
        _, _, H, W = x.size()

        x = rearrange(x, 'b c h w -> b (h w) c')
        x = self.pos_drop(x)

        for _, blk in enumerate(self.blocks):
            x = blk(x, H, W)
        x = rearrange(x, 'b (h w) c -> b c h w', h=H, w=W)
        return x

In [13]:
class ConvolutionalVisionTransformer(nn.Module):
    def __init__(self,
                 in_chans=3,
                 num_classes=100,
                 act_layer=nn.GELU,
                 norm_layer=nn.LayerNorm,
                 init='trunc_norm',
                 spec=None):
        super().__init__()
        self.num_classes = num_classes

        self.num_stages = spec['NUM_STAGES']
        self.stages = nn.ModuleList()
        for i in range(self.num_stages):
            kwargs = {
                'patch_size': spec['PATCH_SIZE'][i],
                'patch_stride': spec['PATCH_STRIDE'][i],
                'patch_padding': spec['PATCH_PADDING'][i],
                'embed_dim': spec['DIM_EMBED'][i],
                'depth': spec['DEPTH'][i],
                'num_heads': spec['NUM_HEADS'][i],
                'mlp_ratio': spec['MLP_RATIO'][i],
                'qkv_bias': spec['QKV_BIAS'][i],
                'drop_rate': spec['DROP_RATE'][i],
                'attn_drop_rate': spec['ATTN_DROP_RATE'][i],
                'drop_path_rate': spec['DROP_PATH_RATE'][i],
                'kernel_size': spec['KERNEL_QKV'][i],
                'padding_q': spec['PADDING_Q'][i],
                'padding_kv': spec['PADDING_KV'][i],
                'stride_q': spec['STRIDE_Q'][i],
                'stride_kv': spec['STRIDE_KV'][i],
            }

            stage = VisionTransformer(
                in_chans=in_chans,
                init=init,
                act_layer=act_layer,
                norm_layer=norm_layer,
                **kwargs
            )
            
            self.stages.append(stage)

            in_chans = spec['DIM_EMBED'][i]

        dim_embed = spec['DIM_EMBED'][-1]
        self.norm = norm_layer(dim_embed)
        self.pooler = nn.AdaptiveAvgPool1d(1)

        # Classifier head
        self.head = nn.Linear(dim_embed, num_classes) if num_classes > 0 else nn.Identity()
        trunc_normal_(self.head.weight, std=0.02)

    def forward_features(self, x):
        for stage in self.stages:
            x = stage(x)

        x = rearrange(x, 'b c h w -> b (h w) c') # (B, L, C)
        x = self.norm(x)                         # (B, L, C)
        x = self.pooler(x.transpose(1,2))        # (B, C, 1)
        x = torch.flatten(x, 1)                  # (B, C)
        # x = torch.mean(x, dim=1)

        return x

    def forward(self, x):
        x = self.forward_features(x)
        x = self.head(x)

        return x

In [14]:
class QuickGELU(nn.Module):
    def forward(self, x: torch.Tensor):
        return x * torch.sigmoid(1.702 * x)

In [15]:
spec = {
    'NUM_STAGES': 4,
    'PATCH_SIZE': [7,7,7,7],
    'PATCH_STRIDE': [4,2,2,2],
    'PATCH_PADDING': [2,3,3,3],
    'DIM_EMBED': [64,128,192,256],
    'DEPTH': [2,2,6,2],
    'NUM_HEADS': [4,8,8,16],   # original : [1,3,6]
    'MLP_RATIO': [4.,4.,4.,4.],
    'QKV_BIAS': [True, True, True, True],
    'DROP_RATE': [0.,0.,0.,0.],
    'ATTN_DROP_RATE': [0.,0.,0.,0.],
    'DROP_PATH_RATE': [0.,0.,0.1,0.1],
    'KERNEL_QKV': [3,3,3,3],
    'PADDING_Q': [1,1,1,1],
    'PADDING_KV': [1,1,1,1],
    'STRIDE_Q': [1,1,1,1],
    'STRIDE_KV': [2,2,2,2],
}

spec

{'NUM_STAGES': 4,
 'PATCH_SIZE': [7, 7, 7, 7],
 'PATCH_STRIDE': [4, 2, 2, 2],
 'PATCH_PADDING': [2, 3, 3, 3],
 'DIM_EMBED': [64, 128, 192, 256],
 'DEPTH': [2, 2, 6, 2],
 'NUM_HEADS': [4, 8, 8, 16],
 'MLP_RATIO': [4.0, 4.0, 4.0, 4.0],
 'QKV_BIAS': [True, True, True, True],
 'DROP_RATE': [0.0, 0.0, 0.0, 0.0],
 'ATTN_DROP_RATE': [0.0, 0.0, 0.0, 0.0],
 'DROP_PATH_RATE': [0.0, 0.0, 0.1, 0.1],
 'KERNEL_QKV': [3, 3, 3, 3],
 'PADDING_Q': [1, 1, 1, 1],
 'PADDING_KV': [1, 1, 1, 1],
 'STRIDE_Q': [1, 1, 1, 1],
 'STRIDE_KV': [2, 2, 2, 2]}

In [16]:
model = ConvolutionalVisionTransformer(act_layer=QuickGELU, spec=spec)

test_result = model(test_img)
test_img.shape, test_result.shape

(torch.Size([2, 3, 224, 224]), torch.Size([2, 100]))

In [17]:
from torchsummary import summary

summary(model.cuda(), (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 56, 56]           9,472
         LayerNorm-2             [-1, 3136, 64]             128
         ConvEmbed-3           [-1, 64, 56, 56]               0
           Dropout-4             [-1, 3136, 64]               0
         LayerNorm-5             [-1, 3136, 64]             128
            Conv2d-6           [-1, 64, 56, 56]             640
         Rearrange-7           [-1, 56, 56, 64]               0
         LayerNorm-8           [-1, 56, 56, 64]             128
         Rearrange-9           [-1, 64, 56, 56]               0
           Conv2d-10          [-1, 256, 56, 56]          16,640
        QuickGELU-11          [-1, 256, 56, 56]               0
        QuickGELU-12          [-1, 256, 56, 56]               0
        QuickGELU-13          [-1, 256, 56, 56]               0
        QuickGELU-14          [-1, 256,

In [18]:
# Transforms 정의하기
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8,1), interpolation=transforms.InterpolationMode.LANCZOS),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.9, scale=(0.02, 0.33)),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

data_dir = '../data/sports'
batch_size = 256

train_path = data_dir+'/train'
valid_path = data_dir+'/valid'
test_path = data_dir+'/test'

# dataset load
train_data = ImageFolder(train_path, transform=train_transform)
valid_data = ImageFolder(valid_path, transform=test_transform)
test_data = ImageFolder(test_path, transform=test_transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [19]:
device = 'cuda:3'
max_norm = 5.0 

model.to(device)
model_path = '../models/cvt/model_revision.pth'

In [20]:
mixup_fn = Mixup(mixup_alpha=.8, 
                cutmix_alpha=1., 
                prob=1., 
                switch_prob=0.5, 
                mode='batch',
                label_smoothing=.1,
                num_classes=100)

epochs = 100

criterion = nn.CrossEntropyLoss(label_smoothing=0.)

In [21]:
optimizer = optim.AdamW(model.parameters())
warmup_steps = int(len(train_loader)*(epochs)*0.1)
train_steps = len(train_loader)*(epochs)
scheduler = transformers.get_cosine_schedule_with_warmup(optimizer, 
                                                        num_warmup_steps=warmup_steps, 
                                                        num_training_steps=train_steps,
                                                        num_cycles=0.5)

2024-01-26 12:59:14.331887: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-26 12:59:14.331979: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-26 12:59:14.332981: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-26 12:59:14.339260: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [22]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')

# GradScaler 초기화
scaler = GradScaler()

for epoch in range(50):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs, labels = mixup_fn(inputs, labels)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=max_norm)

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
    val_loss /= len(valid_loader)
    val_losses.append(val_loss)
    
    # 모델 저장
    if val_loss < best_loss:
        best_loss = val_loss
        vit_save = True
        if vit_save:
            torch.save(model.state_dict(), model_path)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False

    print(text)
        
text = f"Epoch 당 평균 소요시간 : {training_time / epochs:.2f}초"      
print(text)

Epoch 1: 100%|██████████| 53/53 [01:01<00:00,  1.17s/it]


	Loss: 4.491039788947915, Val Loss: 4.250633955001831, LR: 0.0001, Duration: 63.13 sec - model saved!


Epoch 2: 100%|██████████| 53/53 [01:00<00:00,  1.15s/it]


	Loss: 4.3790437950278225, Val Loss: 4.119809627532959, LR: 0.0002, Duration: 62.00 sec - model saved!


Epoch 3: 100%|██████████| 53/53 [01:00<00:00,  1.15s/it]


	Loss: 4.324519427317493, Val Loss: 4.009176731109619, LR: 0.0003, Duration: 62.01 sec - model saved!


Epoch 4: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 4.257045457947929, Val Loss: 3.897426724433899, LR: 0.0004, Duration: 61.91 sec - model saved!


Epoch 5: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 4.192792883459127, Val Loss: 3.7397795915603638, LR: 0.0005, Duration: 61.84 sec - model saved!


Epoch 6: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 4.111031361345975, Val Loss: 3.644646167755127, LR: 0.0006, Duration: 61.82 sec - model saved!


Epoch 7: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 4.131182625608624, Val Loss: 3.570503830909729, LR: 0.0007, Duration: 61.73 sec - model saved!


Epoch 8: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 4.064160005101618, Val Loss: 3.4499846696853638, LR: 0.0008, Duration: 61.80 sec - model saved!


Epoch 9: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 4.027986166612157, Val Loss: 3.4108489751815796, LR: 0.0009000000000000001, Duration: 61.67 sec - model saved!


Epoch 10: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.9973911339381956, Val Loss: 3.235844135284424, LR: 0.001, Duration: 61.76 sec - model saved!


Epoch 11: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.9543190047426044, Val Loss: 3.2171614170074463, LR: 0.0009996954135095479, Duration: 61.86 sec - model saved!


Epoch 12: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.827674334903933, Val Loss: 3.1099013090133667, LR: 0.0009987820251299122, Duration: 61.83 sec - model saved!


Epoch 13: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.8569642822697476, Val Loss: 3.0267852544784546, LR: 0.0009972609476841367, Duration: 61.76 sec - model saved!


Epoch 14: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.8774879563529536, Val Loss: 2.954328179359436, LR: 0.0009951340343707852, Duration: 61.73 sec - model saved!


Epoch 15: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.766154001343925, Val Loss: 2.8296825885772705, LR: 0.000992403876506104, Duration: 61.71 sec - model saved!


Epoch 16: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.773716112352767, Val Loss: 2.747504949569702, LR: 0.0009890738003669028, Duration: 61.59 sec - model saved!


Epoch 17: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.7036735291750924, Val Loss: 2.7663460969924927, LR: 0.0009851478631379982, Duration: 61.46 sec


Epoch 18: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.6905135388644235, Val Loss: 2.620993494987488, LR: 0.0009806308479691594, Duration: 61.65 sec - model saved!


Epoch 19: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.6245140399572984, Val Loss: 2.608414649963379, LR: 0.0009755282581475768, Duration: 61.66 sec - model saved!


Epoch 20: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.5727882385253906, Val Loss: 2.471630334854126, LR: 0.0009698463103929542, Duration: 61.66 sec - model saved!


Epoch 21: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.565060273656305, Val Loss: 2.5728055238723755, LR: 0.0009635919272833937, Duration: 61.49 sec


Epoch 22: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.5276912518267363, Val Loss: 2.3284218311309814, LR: 0.0009567727288213005, Duration: 61.77 sec - model saved!


Epoch 23: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.556080894650153, Val Loss: 2.2296935319900513, LR: 0.0009493970231495835, Duration: 61.63 sec - model saved!


Epoch 24: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.4078589385410525, Val Loss: 2.1222803592681885, LR: 0.0009414737964294635, Duration: 61.63 sec - model saved!


Epoch 25: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.334725209002225, Val Loss: 2.0900317430496216, LR: 0.0009330127018922195, Duration: 61.85 sec - model saved!


Epoch 26: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.4933261961307167, Val Loss: 2.074242115020752, LR: 0.0009240240480782129, Duration: 61.60 sec - model saved!


Epoch 27: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.4045373988601395, Val Loss: 2.0458030700683594, LR: 0.0009145187862775209, Duration: 61.77 sec - model saved!


Epoch 28: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.3715593634911305, Val Loss: 1.9632411003112793, LR: 0.0009045084971874737, Duration: 61.66 sec - model saved!


Epoch 29: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.266687555133172, Val Loss: 1.9416547417640686, LR: 0.0008940053768033609, Duration: 61.67 sec - model saved!


Epoch 30: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.3741801819711363, Val Loss: 1.9517342448234558, LR: 0.000883022221559489, Duration: 61.47 sec


Epoch 31: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.317072521965459, Val Loss: 1.845496118068695, LR: 0.0008715724127386971, Duration: 61.60 sec - model saved!


Epoch 32: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.2580979275253585, Val Loss: 1.7860190868377686, LR: 0.0008596699001693256, Duration: 61.76 sec - model saved!


Epoch 33: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.258915028482113, Val Loss: 1.7462166547775269, LR: 0.0008473291852294987, Duration: 61.66 sec - model saved!


Epoch 34: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.0867727477595492, Val Loss: 1.6886086463928223, LR: 0.0008345653031794292, Duration: 61.75 sec - model saved!


Epoch 35: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.1998259616347977, Val Loss: 1.6266695261001587, LR: 0.0008213938048432696, Duration: 61.84 sec - model saved!


Epoch 36: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.133610765889006, Val Loss: 1.598767876625061, LR: 0.0008078307376628291, Duration: 61.77 sec - model saved!


Epoch 37: 100%|██████████| 53/53 [01:00<00:00,  1.15s/it]


	Loss: 3.0084940847360864, Val Loss: 1.5904669761657715, LR: 0.0007938926261462366, Duration: 61.92 sec - model saved!


Epoch 38: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.0751553436495223, Val Loss: 1.617292582988739, LR: 0.0007795964517353734, Duration: 61.59 sec


Epoch 39: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.9540500370961316, Val Loss: 1.4375624060630798, LR: 0.0007649596321166025, Duration: 61.77 sec - model saved!


Epoch 40: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.982711270170392, Val Loss: 1.4386451244354248, LR: 0.00075, Duration: 61.67 sec


Epoch 41: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.024764580546685, Val Loss: 1.4747841358184814, LR: 0.0007347357813929454, Duration: 61.46 sec


Epoch 42: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.988191496651128, Val Loss: 1.418253481388092, LR: 0.0007191855733945387, Duration: 61.69 sec - model saved!


Epoch 43: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 3.0399051477324286, Val Loss: 1.3937439322471619, LR: 0.0007033683215379002, Duration: 61.75 sec - model saved!


Epoch 44: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.9420273821308927, Val Loss: 1.391537070274353, LR: 0.0006873032967079561, Duration: 61.76 sec - model saved!


Epoch 45: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.9671568240759507, Val Loss: 1.406368374824524, LR: 0.0006710100716628344, Duration: 61.53 sec


Epoch 46: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.7893805121475794, Val Loss: 1.3200807571411133, LR: 0.0006545084971874737, Duration: 61.73 sec - model saved!


Epoch 47: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.8643419832553505, Val Loss: 1.3233751058578491, LR: 0.0006378186779084996, Duration: 61.57 sec


Epoch 48: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.8003722834137252, Val Loss: 1.2586259841918945, LR: 0.0006209609477998338, Duration: 61.69 sec - model saved!


Epoch 49: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.8297858215727896, Val Loss: 1.2884414196014404, LR: 0.0006039558454088796, Duration: 61.51 sec


Epoch 50: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.7412464753636776, Val Loss: 1.2197287678718567, LR: 0.0005868240888334653, Duration: 61.59 sec - model saved!
Epoch 당 평균 소요시간 : 30.87초


In [23]:
# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
performance_metrics

  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Metric,Value
0,Accuracy,0.748
1,Precision,0.776603
2,Recall,0.748
3,F1 Score,0.736155


In [24]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')

# GradScaler 초기화
scaler = GradScaler()

for epoch in range(50):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs, labels = mixup_fn(inputs, labels)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=max_norm)

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
    val_loss /= len(valid_loader)
    val_losses.append(val_loss)
    
    # 모델 저장
    if val_loss < best_loss:
        best_loss = val_loss
        vit_save = True
        if vit_save:
            torch.save(model.state_dict(), model_path)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False

    print(text)
        
# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
performance_metrics

Epoch 1: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.664388946767123, Val Loss: 1.182432472705841, LR: 0.0005695865504800327, Duration: 61.71 sec - model saved!


Epoch 2: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.690109801742266, Val Loss: 1.1874754428863525, LR: 0.0005522642316338268, Duration: 61.66 sec


Epoch 3: 100%|██████████| 53/53 [01:00<00:00,  1.15s/it]


	Loss: 2.658997306283915, Val Loss: 1.1869313716888428, LR: 0.0005348782368720626, Duration: 61.93 sec


Epoch 4: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.725780673746793, Val Loss: 1.1693055033683777, LR: 0.0005174497483512506, Duration: 61.68 sec - model saved!


Epoch 5: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.645304999261532, Val Loss: 1.1657801866531372, LR: 0.0005, Duration: 61.57 sec - model saved!


Epoch 6: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.7765888672954633, Val Loss: 1.1800063848495483, LR: 0.0004825502516487497, Duration: 61.48 sec


Epoch 7: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.665741443634033, Val Loss: 1.1219471096992493, LR: 0.00046512176312793734, Duration: 61.76 sec - model saved!


Epoch 8: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.7277145565680736, Val Loss: 1.1381598114967346, LR: 0.00044773576836617336, Duration: 61.61 sec


Epoch 9: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.6650882891888887, Val Loss: 1.1050522923469543, LR: 0.0004304134495199674, Duration: 61.65 sec - model saved!


Epoch 10: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.552897235132613, Val Loss: 1.0660218596458435, LR: 0.00041317591116653486, Duration: 61.70 sec - model saved!


Epoch 11: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.6931786424708815, Val Loss: 1.0880324840545654, LR: 0.0003960441545911204, Duration: 61.42 sec


Epoch 12: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.7023172693432502, Val Loss: 1.067952811717987, LR: 0.0003790390522001662, Duration: 61.52 sec


Epoch 13: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.5251943975124718, Val Loss: 1.1014899015426636, LR: 0.00036218132209150044, Duration: 61.50 sec


Epoch 14: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.6650103735473922, Val Loss: 1.0900723338127136, LR: 0.00034549150281252633, Duration: 61.58 sec


Epoch 15: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.6865972955271884, Val Loss: 1.0965940356254578, LR: 0.0003289899283371657, Duration: 61.50 sec


Epoch 16: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.6102457136478066, Val Loss: 1.0710596442222595, LR: 0.00031269670329204396, Duration: 61.60 sec


Epoch 17: 100%|██████████| 53/53 [01:00<00:00,  1.15s/it]


	Loss: 2.5886582698462144, Val Loss: 1.059012770652771, LR: 0.0002966316784621, Duration: 61.90 sec - model saved!


Epoch 18: 100%|██████████| 53/53 [01:00<00:00,  1.15s/it]


	Loss: 2.481731151634792, Val Loss: 1.075021207332611, LR: 0.00028081442660546124, Duration: 61.88 sec


Epoch 19: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.5424877000304886, Val Loss: 1.0477690696716309, LR: 0.00026526421860705474, Duration: 61.75 sec - model saved!


Epoch 20: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.6523905115307502, Val Loss: 1.0448976159095764, LR: 0.0002500000000000001, Duration: 61.75 sec - model saved!


Epoch 21: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.6889889780080543, Val Loss: 1.0668753385543823, LR: 0.0002350403678833976, Duration: 61.59 sec


Epoch 22: 100%|██████████| 53/53 [01:00<00:00,  1.15s/it]


	Loss: 2.658509171233987, Val Loss: 1.0499722957611084, LR: 0.00022040354826462666, Duration: 61.84 sec


Epoch 23: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.584170638390307, Val Loss: 1.0469989776611328, LR: 0.00020610737385376348, Duration: 61.66 sec


Epoch 24: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.5876458793316246, Val Loss: 1.0145525932312012, LR: 0.00019216926233717085, Duration: 61.89 sec - model saved!


Epoch 25: 100%|██████████| 53/53 [01:01<00:00,  1.15s/it]


	Loss: 2.4392318005831735, Val Loss: 1.0258703827857971, LR: 0.0001786061951567303, Duration: 62.20 sec


Epoch 26: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.537560026600676, Val Loss: 1.024184763431549, LR: 0.00016543469682057105, Duration: 61.74 sec


Epoch 27: 100%|██████████| 53/53 [01:00<00:00,  1.15s/it]


	Loss: 2.6047259434214176, Val Loss: 0.9980037212371826, LR: 0.00015267081477050133, Duration: 62.05 sec - model saved!


Epoch 28: 100%|██████████| 53/53 [01:00<00:00,  1.15s/it]


	Loss: 2.6459678074098982, Val Loss: 1.0099011957645416, LR: 0.00014033009983067452, Duration: 61.91 sec


Epoch 29: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.296959751057175, Val Loss: 1.0075197219848633, LR: 0.00012842758726130281, Duration: 61.68 sec


Epoch 30: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.6155606463270367, Val Loss: 0.9911544919013977, LR: 0.00011697777844051105, Duration: 61.87 sec - model saved!


Epoch 31: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.5283864421664544, Val Loss: 1.0100283920764923, LR: 0.00010599462319663906, Duration: 61.72 sec


Epoch 32: 100%|██████████| 53/53 [01:00<00:00,  1.15s/it]


	Loss: 2.529347172323263, Val Loss: 1.0094934701919556, LR: 9.549150281252633e-05, Duration: 61.78 sec


Epoch 33: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.513541131649377, Val Loss: 1.0003241896629333, LR: 8.548121372247918e-05, Duration: 61.67 sec


Epoch 34: 100%|██████████| 53/53 [01:00<00:00,  1.15s/it]


	Loss: 2.495615358622569, Val Loss: 0.9906017184257507, LR: 7.597595192178702e-05, Duration: 61.94 sec - model saved!


Epoch 35: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.5116015357791253, Val Loss: 0.977340579032898, LR: 6.698729810778065e-05, Duration: 61.89 sec - model saved!


Epoch 36: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.5669742305323764, Val Loss: 0.9889179170131683, LR: 5.852620357053651e-05, Duration: 61.72 sec


Epoch 37: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.490295691310235, Val Loss: 0.9839414656162262, LR: 5.060297685041659e-05, Duration: 61.63 sec


Epoch 38: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.444623843678888, Val Loss: 0.9805052280426025, LR: 4.322727117869951e-05, Duration: 61.58 sec


Epoch 39: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.442868743302687, Val Loss: 0.968334436416626, LR: 3.6408072716606344e-05, Duration: 61.84 sec - model saved!


Epoch 40: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.4938369741979636, Val Loss: 0.9714963138103485, LR: 3.0153689607045842e-05, Duration: 61.61 sec


Epoch 41: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.341384620036719, Val Loss: 0.968377411365509, LR: 2.4471741852423235e-05, Duration: 61.71 sec


Epoch 42: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.5525424233022727, Val Loss: 0.9791654944419861, LR: 1.9369152030840554e-05, Duration: 61.70 sec


Epoch 43: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.561695065138475, Val Loss: 0.9694730341434479, LR: 1.4852136862001764e-05, Duration: 61.62 sec


Epoch 44: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.567573884748063, Val Loss: 0.9660724103450775, LR: 1.0926199633097156e-05, Duration: 61.79 sec - model saved!


Epoch 45: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.3567223323965973, Val Loss: 0.9649331271648407, LR: 7.59612349389599e-06, Duration: 61.76 sec - model saved!


Epoch 46: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.498915487865232, Val Loss: 0.9672039151191711, LR: 4.865965629214819e-06, Duration: 61.71 sec


Epoch 47: 100%|██████████| 53/53 [01:00<00:00,  1.15s/it]


	Loss: 2.486452577249059, Val Loss: 0.9657709002494812, LR: 2.739052315863355e-06, Duration: 61.83 sec


Epoch 48: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.3929346237542495, Val Loss: 0.9653393924236298, LR: 1.2179748700879012e-06, Duration: 61.72 sec


Epoch 49: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.475441665019629, Val Loss: 0.9657445549964905, LR: 3.0458649045211895e-07, Duration: 61.59 sec


Epoch 50: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]


	Loss: 2.4677765841753976, Val Loss: 0.9656921625137329, LR: 0.0, Duration: 61.56 sec


  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Metric,Value
0,Accuracy,0.808
1,Precision,0.816901
2,Recall,0.808
3,F1 Score,0.797948
