In [1]:
!pip install python-box

[0m

In [1]:
import numpy as np
import yaml
from box import Box

import torch
import torch.nn as nn
import torch.optim as optim

import simmim
from swin_v2 import SwinTransformerV2

from torch.cuda.amp import autocast, GradScaler
from torch.nn.utils import clip_grad_norm_
from tqdm import tqdm
import time

from timm.data import Mixup
import transformers

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
simmim_config = yaml.load(open('config/pretrain.yaml'), Loader=yaml.FullLoader)
simmim_config

{'MODEL': {'TYPE': 'swinv2',
  'NAME': 'simmim_pretrain',
  'DROP_PATH_RATE': 0.0,
  'SWIN': {'EMBED_DIM': 96,
   'DEPTHS': [2, 2, 6, 2],
   'NUM_HEADS': [3, 6, 12, 24],
   'WINDOW_SIZE': 6,
   'PATCH_SIZE': 4}},
 'DATA': {'IMG_SIZE': 192,
  'MASK_PATCH_SIZE': 32,
  'MASK_RATIO': 0.6,
  'BATCH_SIZE': 1024,
  'NUM_WORKERS': 24,
  'DATA_PATH': '../../data/sports'},
 'TRAIN': {'EPOCHS': 100,
  'WARMUP_EPOCHS': 10,
  'BASE_LR': 0.0014,
  'WEIGHT_DECAY': 0.05,
  'CLIP_GRAD': 5}}

In [3]:
encoder_config = {'img_size':simmim_config['DATA']['IMG_SIZE'], 
                'patch_size':simmim_config['MODEL']['SWIN']['PATCH_SIZE'], 
                'in_chans':3, 
                'num_classes':100,
                'embed_dim':simmim_config['MODEL']['SWIN']['EMBED_DIM'], 
                'depths':simmim_config['MODEL']['SWIN']['DEPTHS'], 
                'num_heads':simmim_config['MODEL']['SWIN']['NUM_HEADS'],           
                'window_size':simmim_config['MODEL']['SWIN']['WINDOW_SIZE'], 
                'mlp_ratio':4., 
                'qkv_bias':True, 
                'qk_scale':None,
                'drop_rate':0., 
                'attn_drop_rate':0., 
                'drop_path_rate':simmim_config['MODEL']['DROP_PATH_RATE'],
                'norm_layer':nn.LayerNorm, 
                'patch_norm':True, 
                'pretrained_window_sizes':[0,0,0,0],
                'ape':True}

encoder_stride = 32
in_chans = encoder_config['in_chans']
patch_size = encoder_config['patch_size']

In [4]:
encoder = simmim.SwinTransformerV2ForSimMIM(**encoder_config)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [5]:
model = simmim.SimMIM( encoder=encoder, 
                       encoder_stride=encoder_stride, 
                       in_chans=in_chans, 
                       patch_size=patch_size)

In [6]:
mask_generator = simmim.MaskGenerator(input_size=224,
                                      mask_patch_size=28,
                                      model_patch_size=28,
                                      mask_ratio=.6)
mask = mask_generator()
mask

array([[0, 0, 1, 0, 1, 1, 0, 0],
       [0, 1, 0, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 0],
       [1, 0, 1, 1, 1, 1, 0, 0],
       [0, 0, 1, 1, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 1, 0, 1],
       [0, 1, 1, 1, 0, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 0]])

In [7]:
print(f"생성된 mask의 비율은 {mask.sum() / (mask.shape[0]*mask.shape[1])*100}%")

생성된 mask의 비율은 60.9375%


In [8]:
simmim_config = Box(simmim_config)
dataloader = simmim.build_loader_simmim(simmim_config)

samples = next(iter(dataloader))
len(samples)

3

In [10]:
samples[0].shape, samples[1].shape, samples[2].shape 

(torch.Size([1024, 3, 192, 192]),
 torch.Size([1024, 48, 48]),
 torch.Size([1024]))

In [11]:
base_lr = float(simmim_config.TRAIN.BASE_LR)
weight_decay = simmim_config.TRAIN.WEIGHT_DECAY
optimizer = optim.AdamW(model.parameters(), lr=base_lr, weight_decay=weight_decay)
warmup_epochs = simmim_config.TRAIN.WARMUP_EPOCHS
train_epochs = simmim_config.TRAIN.EPOCHS

scheduler = transformers.get_cosine_schedule_with_warmup(optimizer, 
                                                        num_warmup_steps=warmup_epochs*len(dataloader), 
                                                        num_training_steps=train_epochs*len(dataloader),
                                                        num_cycles=0.5)

2024-01-15 18:13:44.430872: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-15 18:13:44.430935: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-15 18:13:44.431673: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-15 18:13:44.436981: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [21]:
device = 'cuda:3'
model.to(device)
torch.backends.cudnn.benchmark = True

model_save = True
simmim_path = '../../models/swin2/simmim.pth'

In [52]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')

# GradScaler 초기화
scaler = GradScaler()

for epoch in range(train_epochs):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        image, mask = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            loss = model(image, mask)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        if simmim_config.TRAIN.CLIP_GRAD:
            clip_grad_norm_(model.parameters(), max_norm=simmim_config.TRAIN.CLIP_GRAD)
        else:
            clip_grad_norm_(model.parameters())

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(dataloader)
    losses.append(epoch_loss)

    # 모델 저장
    if epoch_loss < best_loss:
        
        best_loss = epoch_loss
        vit_save = model_save
        if vit_save:
            torch.save(model.state_dict(), simmim_path)
        
    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss:.4f}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False    
        
    print(text)


Epoch 1: 100%|██████████| 14/14 [00:19<00:00,  1.41s/it]


	Loss: 1.1431, LR: 0.00014000000000000001, Duration: 20.80 sec - model saved!


Epoch 2: 100%|██████████| 14/14 [00:16<00:00,  1.19s/it]


	Loss: 1.0948, LR: 0.00028000000000000003, Duration: 18.31 sec - model saved!


Epoch 3: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]


	Loss: 1.0480, LR: 0.00041999999999999996, Duration: 18.71 sec - model saved!


Epoch 4: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]


	Loss: 0.9340, LR: 0.0005600000000000001, Duration: 18.18 sec - model saved!


Epoch 5: 100%|██████████| 14/14 [00:17<00:00,  1.27s/it]


	Loss: 0.8531, LR: 0.0007, Duration: 19.26 sec - model saved!


Epoch 6: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]


	Loss: 0.8270, LR: 0.0008399999999999999, Duration: 18.23 sec - model saved!


Epoch 7: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]


	Loss: 0.7686, LR: 0.00098, Duration: 18.27 sec - model saved!


Epoch 8: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]


	Loss: 0.7524, LR: 0.0011200000000000001, Duration: 19.08 sec - model saved!


Epoch 9: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]


	Loss: 0.7315, LR: 0.00126, Duration: 18.45 sec - model saved!


Epoch 10: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]


	Loss: 0.7071, LR: 0.0014, Duration: 19.07 sec - model saved!


Epoch 11: 100%|██████████| 14/14 [00:18<00:00,  1.29s/it]


	Loss: 0.6956, LR: 0.001399573578913367, Duration: 19.40 sec - model saved!


Epoch 12: 100%|██████████| 14/14 [00:17<00:00,  1.28s/it]


	Loss: 0.6847, LR: 0.001398294835181877, Duration: 19.29 sec - model saved!


Epoch 13: 100%|██████████| 14/14 [00:17<00:00,  1.22s/it]


	Loss: 0.6833, LR: 0.0013961653267577914, Duration: 18.49 sec - model saved!


Epoch 14: 100%|██████████| 14/14 [00:16<00:00,  1.18s/it]


	Loss: 0.6623, LR: 0.0013931876481190993, Duration: 17.99 sec - model saved!


Epoch 15: 100%|██████████| 14/14 [00:17<00:00,  1.28s/it]


	Loss: 0.6521, LR: 0.0013893654271085456, Duration: 19.18 sec - model saved!


Epoch 16: 100%|██████████| 14/14 [00:16<00:00,  1.19s/it]


	Loss: 0.6384, LR: 0.001384703320513664, Duration: 18.03 sec - model saved!


Epoch 17: 100%|██████████| 14/14 [00:18<00:00,  1.29s/it]


	Loss: 0.6315, LR: 0.0013792070083931975, Duration: 19.42 sec - model saved!


Epoch 18: 100%|██████████| 14/14 [00:18<00:00,  1.30s/it]


	Loss: 0.6190, LR: 0.0013728831871568231, Duration: 19.62 sec - model saved!


Epoch 19: 100%|██████████| 14/14 [00:17<00:00,  1.27s/it]


	Loss: 0.6035, LR: 0.0013657395614066075, Duration: 19.10 sec - model saved!


Epoch 20: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]


	Loss: 0.6075, LR: 0.001357784834550136, Duration: 17.82 sec


Epoch 21: 100%|██████████| 14/14 [00:17<00:00,  1.28s/it]


	Loss: 0.5977, LR: 0.0013490286981967512, Duration: 19.43 sec - model saved!


Epoch 22: 100%|██████████| 14/14 [00:18<00:00,  1.29s/it]

	Loss: 0.5994, LR: 0.0013394818203498204, Duration: 19.08 sec



Epoch 23: 100%|██████████| 14/14 [00:16<00:00,  1.18s/it]


	Loss: 0.5893, LR: 0.0013291558324094168, Duration: 17.90 sec - model saved!


Epoch 24: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]

	Loss: 0.5946, LR: 0.0013180633150012488, Duration: 18.77 sec



Epoch 25: 100%|██████████| 14/14 [00:17<00:00,  1.27s/it]

	Loss: 0.5932, LR: 0.0013062177826491071, Duration: 18.91 sec



Epoch 26: 100%|██████████| 14/14 [00:16<00:00,  1.19s/it]

	Loss: 0.5922, LR: 0.001293633667309498, Duration: 17.75 sec



Epoch 27: 100%|██████████| 14/14 [00:18<00:00,  1.29s/it]

	Loss: 0.5905, LR: 0.001280326300788529, Duration: 19.24 sec



Epoch 28: 100%|██████████| 14/14 [00:17<00:00,  1.25s/it]


	Loss: 0.5792, LR: 0.0012663118960624632, Duration: 18.83 sec - model saved!


Epoch 29: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]

	Loss: 0.5897, LR: 0.0012516075275247052, Duration: 18.90 sec



Epoch 30: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]


	Loss: 0.5771, LR: 0.0012362311101832846, Duration: 18.40 sec - model saved!


Epoch 31: 100%|██████████| 14/14 [00:17<00:00,  1.27s/it]


	Loss: 0.5723, LR: 0.001220201377834176, Duration: 19.26 sec - model saved!


Epoch 32: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]

	Loss: 0.5776, LR: 0.0012035378602370558, Duration: 18.77 sec



Epoch 33: 100%|██████████| 14/14 [00:18<00:00,  1.32s/it]


	Loss: 0.5718, LR: 0.0011862608593212981, Duration: 19.73 sec - model saved!


Epoch 34: 100%|██████████| 14/14 [00:17<00:00,  1.23s/it]


	Loss: 0.5701, LR: 0.0011683914244512007, Duration: 18.63 sec - model saved!


Epoch 35: 100%|██████████| 14/14 [00:17<00:00,  1.28s/it]

	Loss: 0.5737, LR: 0.0011499513267805774, Duration: 18.99 sec



Epoch 36: 100%|██████████| 14/14 [00:17<00:00,  1.25s/it]


	Loss: 0.5694, LR: 0.0011309630327279608, Duration: 18.95 sec - model saved!


Epoch 37: 100%|██████████| 14/14 [00:17<00:00,  1.22s/it]


	Loss: 0.5686, LR: 0.0011114496766047313, Duration: 18.53 sec - model saved!


Epoch 38: 100%|██████████| 14/14 [00:16<00:00,  1.19s/it]


	Loss: 0.5640, LR: 0.0010914350324295228, Duration: 18.14 sec - model saved!


Epoch 39: 100%|██████████| 14/14 [00:18<00:00,  1.30s/it]


	Loss: 0.5606, LR: 0.0010709434849632434, Duration: 19.64 sec - model saved!


Epoch 40: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]


	Loss: 0.5555, LR: 0.00105, Duration: 18.23 sec - model saved!


Epoch 41: 100%|██████████| 14/14 [00:17<00:00,  1.27s/it]


	Loss: 0.5513, LR: 0.0010286300939501235, Duration: 19.22 sec - model saved!


Epoch 42: 100%|██████████| 14/14 [00:17<00:00,  1.28s/it]

	Loss: 0.5524, LR: 0.001006859802752354, Duration: 19.17 sec



Epoch 43: 100%|██████████| 14/14 [00:17<00:00,  1.25s/it]

	Loss: 0.5520, LR: 0.0009847156501530602, Duration: 18.76 sec



Epoch 44: 100%|██████████| 14/14 [00:17<00:00,  1.28s/it]

	Loss: 0.5520, LR: 0.0009622246153911386, Duration: 19.18 sec



Epoch 45: 100%|██████████| 14/14 [00:17<00:00,  1.28s/it]

	Loss: 0.5608, LR: 0.0009394141003279682, Duration: 19.07 sec



Epoch 46: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]

	Loss: 0.5657, LR: 0.0009163118960624632, Duration: 18.10 sec



Epoch 47: 100%|██████████| 14/14 [00:17<00:00,  1.25s/it]


	Loss: 0.5494, LR: 0.0008929461490718994, Duration: 18.90 sec - model saved!


Epoch 48: 100%|██████████| 14/14 [00:17<00:00,  1.25s/it]


	Loss: 0.5461, LR: 0.0008693453269197673, Duration: 18.92 sec - model saved!


Epoch 49: 100%|██████████| 14/14 [00:17<00:00,  1.27s/it]


	Loss: 0.5436, LR: 0.0008455381835724314, Duration: 19.28 sec - model saved!


Epoch 50: 100%|██████████| 14/14 [00:18<00:00,  1.30s/it]


	Loss: 0.5433, LR: 0.0008215537243668514, Duration: 19.60 sec - model saved!


Epoch 51: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]


	Loss: 0.5417, LR: 0.0007974211706720458, Duration: 18.22 sec - model saved!


Epoch 52: 100%|██████████| 14/14 [00:16<00:00,  1.17s/it]

	Loss: 0.5434, LR: 0.0007731699242873575, Duration: 17.62 sec



Epoch 53: 100%|██████████| 14/14 [00:18<00:00,  1.30s/it]

	Loss: 0.5431, LR: 0.0007488295316208876, Duration: 19.35 sec



Epoch 54: 100%|██████████| 14/14 [00:17<00:00,  1.25s/it]


	Loss: 0.5378, LR: 0.0007244296476917508, Duration: 18.86 sec - model saved!


Epoch 55: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]

	Loss: 0.5424, LR: 0.0007, Duration: 18.14 sec



Epoch 56: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.5508, LR: 0.0006755703523082495, Duration: 18.60 sec



Epoch 57: 100%|██████████| 14/14 [00:18<00:00,  1.31s/it]


	Loss: 0.5443, LR: 0.0006511704683791123, Duration: 19.53 sec


Epoch 58: 100%|██████████| 14/14 [00:18<00:00,  1.30s/it]

	Loss: 0.5384, LR: 0.0006268300757126426, Duration: 19.36 sec



Epoch 59: 100%|██████████| 14/14 [00:18<00:00,  1.31s/it]


	Loss: 0.5340, LR: 0.0006025788293279544, Duration: 19.71 sec - model saved!


Epoch 60: 100%|██████████| 14/14 [00:17<00:00,  1.22s/it]


	Loss: 0.5321, LR: 0.0005784462756331488, Duration: 18.53 sec - model saved!


Epoch 61: 100%|██████████| 14/14 [00:17<00:00,  1.25s/it]


	Loss: 0.5317, LR: 0.0005544618164275686, Duration: 18.94 sec - model saved!


Epoch 62: 100%|██████████| 14/14 [00:16<00:00,  1.19s/it]


	Loss: 0.5306, LR: 0.0005306546730802327, Duration: 18.01 sec - model saved!


Epoch 63: 100%|██████████| 14/14 [00:18<00:00,  1.30s/it]


	Loss: 0.5293, LR: 0.0005070538509281006, Duration: 19.52 sec - model saved!


Epoch 64: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]


	Loss: 0.5289, LR: 0.0004836881039375369, Duration: 18.33 sec - model saved!


Epoch 65: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]

	Loss: 0.5300, LR: 0.0004605858996720319, Duration: 17.92 sec



Epoch 66: 100%|██████████| 14/14 [00:16<00:00,  1.19s/it]


	Loss: 0.5268, LR: 0.0004377753846088615, Duration: 18.08 sec - model saved!


Epoch 67: 100%|██████████| 14/14 [00:16<00:00,  1.19s/it]

	Loss: 0.5269, LR: 0.00041528434984693997, Duration: 17.91 sec



Epoch 68: 100%|██████████| 14/14 [00:17<00:00,  1.22s/it]


	Loss: 0.5264, LR: 0.00039314019724764573, Duration: 18.55 sec - model saved!


Epoch 69: 100%|██████████| 14/14 [00:17<00:00,  1.22s/it]

	Loss: 0.5269, LR: 0.00037136990604987665, Duration: 18.14 sec



Epoch 70: 100%|██████████| 14/14 [00:18<00:00,  1.30s/it]

	Loss: 0.5282, LR: 0.00035000000000000016, Duration: 19.32 sec



Epoch 71: 100%|██████████| 14/14 [00:17<00:00,  1.28s/it]


	Loss: 0.5256, LR: 0.00032905651503675667, Duration: 19.32 sec - model saved!


Epoch 72: 100%|██████████| 14/14 [00:17<00:00,  1.23s/it]

	Loss: 0.5257, LR: 0.0003085649675704773, Duration: 18.38 sec



Epoch 73: 100%|██████████| 14/14 [00:18<00:00,  1.31s/it]


	Loss: 0.5244, LR: 0.0002885503233952689, Duration: 19.74 sec - model saved!


Epoch 74: 100%|██████████| 14/14 [00:17<00:00,  1.27s/it]


	Loss: 0.5243, LR: 0.0002690369672720392, Duration: 19.29 sec - model saved!


Epoch 75: 100%|██████████| 14/14 [00:17<00:00,  1.23s/it]


	Loss: 0.5218, LR: 0.00025004867321942243, Duration: 18.66 sec - model saved!


Epoch 76: 100%|██████████| 14/14 [00:17<00:00,  1.27s/it]

	Loss: 0.5222, LR: 0.00023160857554879947, Duration: 18.90 sec



Epoch 77: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]

	Loss: 0.5220, LR: 0.00021373914067870185, Duration: 18.83 sec



Epoch 78: 100%|██████████| 14/14 [00:16<00:00,  1.19s/it]

	Loss: 0.5219, LR: 0.00019646213976294433, Duration: 17.80 sec



Epoch 79: 100%|██████████| 14/14 [00:18<00:00,  1.29s/it]


	Loss: 0.5215, LR: 0.00017979862216582396, Duration: 19.36 sec - model saved!


Epoch 80: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]


	Loss: 0.5205, LR: 0.00016376888981671546, Duration: 18.23 sec - model saved!


Epoch 81: 100%|██████████| 14/14 [00:18<00:00,  1.30s/it]

	Loss: 0.5209, LR: 0.00014839247247529466, Duration: 19.33 sec



Epoch 82: 100%|██████████| 14/14 [00:18<00:00,  1.33s/it]


	Loss: 0.5195, LR: 0.00013368810393753685, Duration: 20.06 sec - model saved!


Epoch 83: 100%|██████████| 14/14 [00:18<00:00,  1.31s/it]


	Loss: 0.5193, LR: 0.00011967369921147086, Duration: 19.76 sec - model saved!


Epoch 84: 100%|██████████| 14/14 [00:18<00:00,  1.32s/it]


	Loss: 0.5185, LR: 0.00010636633269050183, Duration: 19.81 sec - model saved!


Epoch 85: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]


	Loss: 0.5190, LR: 9.37822173508929e-05, Duration: 18.45 sec


Epoch 86: 100%|██████████| 14/14 [00:16<00:00,  1.19s/it]

	Loss: 0.5186, LR: 8.19366849987511e-05, Duration: 17.74 sec



Epoch 87: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]


	Loss: 0.5181, LR: 7.084416759058323e-05, Duration: 18.28 sec - model saved!


Epoch 88: 100%|██████████| 14/14 [00:18<00:00,  1.29s/it]


	Loss: 0.5180, LR: 6.0518179650179314e-05, Duration: 19.41 sec - model saved!


Epoch 89: 100%|██████████| 14/14 [00:18<00:00,  1.30s/it]

	Loss: 0.5187, LR: 5.097130180324888e-05, Duration: 19.24 sec



Epoch 90: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]

	Loss: 0.5184, LR: 4.221516544986418e-05, Duration: 18.78 sec



Epoch 91: 100%|██████████| 14/14 [00:18<00:00,  1.31s/it]


	Loss: 0.5172, LR: 3.426043859339253e-05, Duration: 19.71 sec - model saved!


Epoch 92: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]

	Loss: 0.5179, LR: 2.7116812843176773e-05, Duration: 17.86 sec



Epoch 93: 100%|██████████| 14/14 [00:17<00:00,  1.27s/it]


	Loss: 0.5165, LR: 2.0792991606802468e-05, Duration: 19.20 sec - model saved!


Epoch 94: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.5176, LR: 1.5296679486336016e-05, Duration: 18.48 sec



Epoch 95: 100%|██████████| 14/14 [00:17<00:00,  1.23s/it]

	Loss: 0.5168, LR: 1.0634572891454386e-05, Duration: 18.23 sec



Epoch 96: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]

	Loss: 0.5180, LR: 6.812351880900747e-06, Duration: 18.67 sec



Epoch 97: 100%|██████████| 14/14 [00:18<00:00,  1.31s/it]

	Loss: 0.5176, LR: 3.834673242208697e-06, Duration: 19.42 sec



Epoch 98: 100%|██████████| 14/14 [00:16<00:00,  1.19s/it]

	Loss: 0.5173, LR: 1.7051648181230617e-06, Duration: 17.74 sec



Epoch 99: 100%|██████████| 14/14 [00:17<00:00,  1.25s/it]

	Loss: 0.5166, LR: 4.264210866329665e-07, Duration: 18.66 sec



Epoch 100: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.5177, LR: 0.0, Duration: 18.38 sec





In [115]:
model.cpu()
del model
torch.cuda.empty_cache()

In [116]:
model = SwinTransformerV2(pretrained_window_sizes=[6,6,6,6], ape=True, drop_path_rate=0.3)
model.state_dict().keys()

odict_keys(['absolute_pos_embed', 'embeddings.patch_embeddings.weight', 'embeddings.patch_embeddings.bias', 'embeddings.norm.weight', 'embeddings.norm.bias', 'stages.0.blocks.0.attn_mask', 'stages.0.blocks.0.attn.t_scale', 'stages.0.blocks.0.attn.relative_coords_table', 'stages.0.blocks.0.attn.relative_position_index', 'stages.0.blocks.0.attn.crpb_mlp.0.weight', 'stages.0.blocks.0.attn.crpb_mlp.0.bias', 'stages.0.blocks.0.attn.crpb_mlp.3.weight', 'stages.0.blocks.0.attn.qkv.weight', 'stages.0.blocks.0.attn.qkv.bias', 'stages.0.blocks.0.attn.proj.weight', 'stages.0.blocks.0.attn.proj.bias', 'stages.0.blocks.0.norm1.weight', 'stages.0.blocks.0.norm1.bias', 'stages.0.blocks.0.mlp.fc1.weight', 'stages.0.blocks.0.mlp.fc1.bias', 'stages.0.blocks.0.mlp.fc2.weight', 'stages.0.blocks.0.mlp.fc2.bias', 'stages.0.blocks.0.norm2.weight', 'stages.0.blocks.0.norm2.bias', 'stages.0.blocks.1.attn_mask', 'stages.0.blocks.1.attn.t_scale', 'stages.0.blocks.1.attn.relative_coords_table', 'stages.0.blocks.1

In [117]:
model.state_dict()['embeddings.patch_embeddings.weight'][0]

tensor([[[ 0.0830,  0.1194,  0.1033, -0.0618],
         [-0.0452, -0.0010,  0.0245,  0.0998],
         [ 0.0673, -0.0723,  0.1352,  0.1109],
         [ 0.1027,  0.0734,  0.0886, -0.1377]],

        [[ 0.1120,  0.1359, -0.0219,  0.0250],
         [ 0.0192, -0.0022, -0.0717, -0.0349],
         [-0.0985,  0.0495,  0.0731,  0.1403],
         [ 0.0597, -0.0444,  0.0440,  0.1281]],

        [[ 0.0116,  0.0292,  0.0172, -0.1337],
         [-0.1150,  0.0171, -0.0060, -0.1312],
         [ 0.0137, -0.0317,  0.0986, -0.0627],
         [-0.0626, -0.0838,  0.0119,  0.0636]]])

In [118]:
model.state_dict()['stages.3.blocks.1.attn.crpb_mlp.3.weight'][0]

tensor([-0.0328,  0.0135,  0.0025, -0.0392,  0.0284, -0.0045, -0.0103, -0.0006,
        -0.0028,  0.0062,  0.0049, -0.0020, -0.0225,  0.0086,  0.0056, -0.0130,
         0.0148, -0.0415,  0.0096,  0.0207, -0.0064,  0.0079, -0.0041,  0.0384,
         0.0001, -0.0082, -0.0096,  0.0080, -0.0094,  0.0036, -0.0073,  0.0298,
         0.0184,  0.0070,  0.0067,  0.0127, -0.0013,  0.0212, -0.0090,  0.0016,
         0.0107,  0.0123, -0.0002,  0.0295, -0.0177, -0.0127, -0.0237,  0.0018,
         0.0116, -0.0168, -0.0194, -0.0190,  0.0367,  0.0396,  0.0250, -0.0085,
         0.0220, -0.0386,  0.0394,  0.0310, -0.0080, -0.0173,  0.0035,  0.0094,
        -0.0144, -0.0048,  0.0084, -0.0164,  0.0243, -0.0139,  0.0296, -0.0467,
        -0.0184,  0.0130, -0.0216,  0.0245, -0.0582,  0.0193,  0.0250, -0.0057,
        -0.0006, -0.0196,  0.0241, -0.0173, -0.0187, -0.0049,  0.0185,  0.0192,
        -0.0130, -0.0217, -0.0250, -0.0028,  0.0036,  0.0015,  0.0019,  0.0076,
         0.0318, -0.0063, -0.0278, -0.00

In [119]:
swin_config = yaml.load(open('config/train.yaml'), Loader=yaml.FullLoader)
swin_config

{'MODEL': {'TYPE': 'swinv2',
  'NAME': 'simmim_train',
  'PRETRAINED': '../../models/swin2/simmim.pth',
  'DROP_PATH_RATE': 0.2,
  'SWIN': {'EMBED_DIM': 96,
   'DEPTHS': [2, 2, 6, 2],
   'NUM_HEADS': [3, 6, 12, 24],
   'WINDOW_SIZE': 7,
   'PATCH_SIZE': 4}},
 'DATA': {'IMG_SIZE': 224,
  'MASK_PATCH_SIZE': 32,
  'MASK_RATIO': 0.6,
  'BATCH_SIZE': 960,
  'NUM_WORKERS': 24,
  'DATA_PATH': '../../data/sports'},
 'TRAIN': {'EPOCHS': 20,
  'WARMUP_EPOCHS': 10,
  'BASE_LR': '1e-4',
  'WEIGHT_DECAY': 0.05,
  'CLIP_GRAD': 5}}

In [120]:
def load_pretrained(config, model):
    print(f"==============> Loading weight {config.MODEL.PRETRAINED} for fine-tuning......")
    state_dict = torch.load(config.MODEL.PRETRAINED, map_location='cpu')

    # remain encoder only
    not_encoder_keys = [k for k in state_dict.keys() if 'encoder' not in k]
    for k in not_encoder_keys:
        del state_dict[k]
        
    # remove prefix encoder.
    state_dict = {k.replace('encoder.', ''):v for k, v in state_dict.items()}

    # delete relative_position_index since we always re-init it
    relative_position_index_keys = [k for k in state_dict.keys() if "relative_position_index" in k]
    for k in relative_position_index_keys:
        del state_dict[k]

    # delete relative_coords_table since we always re-init it
    relative_position_index_keys = [k for k in state_dict.keys() if "relative_coords_table" in k]
    for k in relative_position_index_keys:
        del state_dict[k]

    # delete attn_mask since we always re-init it
    attn_mask_keys = [k for k in state_dict.keys() if "attn_mask" in k]
    for k in attn_mask_keys:
        del state_dict[k]

    # bicubic interpolate relative_position_bias_table if not match
    relative_position_bias_table_keys = [k for k in state_dict.keys() if "relative_position_bias_table" in k]
    for k in relative_position_bias_table_keys:
        relative_position_bias_table_pretrained = state_dict[k]
        relative_position_bias_table_current = model.state_dict()[k]
        L1, nH1 = relative_position_bias_table_pretrained.size()
        L2, nH2 = relative_position_bias_table_current.size()
        if nH1 != nH2:
            print(f"Error in loading {k}, passing......")
        else:
            if L1 != L2:
                # bicubic interpolate relative_position_bias_table if not match
                S1 = int(L1 ** 0.5)
                S2 = int(L2 ** 0.5)
                relative_position_bias_table_pretrained_resized = torch.nn.functional.interpolate(
                    relative_position_bias_table_pretrained.permute(1, 0).view(1, nH1, S1, S1), size=(S2, S2),
                    mode='bicubic')
                state_dict[k] = relative_position_bias_table_pretrained_resized.view(nH2, L2).permute(1, 0)

    # bicubic interpolate absolute_pos_embed if not match
    absolute_pos_embed_keys = [k for k in state_dict.keys() if "absolute_pos_embed" in k]
    for k in absolute_pos_embed_keys:
        # dpe
        absolute_pos_embed_pretrained = state_dict[k]
        absolute_pos_embed_current = model.state_dict()[k.replace('encoder.','')]
        _, L1, C1 = absolute_pos_embed_pretrained.size()
        _, L2, C2 = absolute_pos_embed_current.size()
        if C1 != C1:
            print(f"Error in loading {k}, passing......")
        else:
            if L1 != L2:
                S1 = int(L1 ** 0.5)
                S2 = int(L2 ** 0.5)
                absolute_pos_embed_pretrained = absolute_pos_embed_pretrained.reshape(-1, S1, S1, C1)
                absolute_pos_embed_pretrained = absolute_pos_embed_pretrained.permute(0, 3, 1, 2)
                absolute_pos_embed_pretrained_resized = torch.nn.functional.interpolate(
                    absolute_pos_embed_pretrained, size=(S2, S2), mode='bicubic')
                absolute_pos_embed_pretrained_resized = absolute_pos_embed_pretrained_resized.permute(0, 2, 3, 1)
                absolute_pos_embed_pretrained_resized = absolute_pos_embed_pretrained_resized.flatten(1, 2)
                state_dict[k] = absolute_pos_embed_pretrained_resized

    # check classifier, if not match, then re-init classifier to zero
    head_bias_pretrained = state_dict['classifier.bias']
    Nc1 = head_bias_pretrained.shape[0]
    Nc2 = model.classifier.bias.shape[0]
    if (Nc1 != Nc2):
        torch.nn.init.constant_(model.classifier.bias, 0.)
        torch.nn.init.constant_(model.classifier.weight, 0.)
        del state_dict['classifier.weight']
        del state_dict['classifier.bias']
        print(f"Error in loading classifier head, re-init classifier head to 0")

    msg = model.load_state_dict(state_dict, strict=False)
    print(msg)

    print(f"=> loaded successfully '{config.MODEL.PRETRAINED}'")

    torch.cuda.empty_cache()

In [121]:
swin_config = Box(swin_config)
load_pretrained(swin_config, model)

_IncompatibleKeys(missing_keys=['stages.0.blocks.0.attn_mask', 'stages.0.blocks.0.attn.relative_coords_table', 'stages.0.blocks.0.attn.relative_position_index', 'stages.0.blocks.1.attn_mask', 'stages.0.blocks.1.attn.relative_coords_table', 'stages.0.blocks.1.attn.relative_position_index', 'stages.1.blocks.0.attn_mask', 'stages.1.blocks.0.attn.relative_coords_table', 'stages.1.blocks.0.attn.relative_position_index', 'stages.1.blocks.1.attn_mask', 'stages.1.blocks.1.attn.relative_coords_table', 'stages.1.blocks.1.attn.relative_position_index', 'stages.2.blocks.0.attn_mask', 'stages.2.blocks.0.attn.relative_coords_table', 'stages.2.blocks.0.attn.relative_position_index', 'stages.2.blocks.1.attn_mask', 'stages.2.blocks.1.attn.relative_coords_table', 'stages.2.blocks.1.attn.relative_position_index', 'stages.2.blocks.2.attn_mask', 'stages.2.blocks.2.attn.relative_coords_table', 'stages.2.blocks.2.attn.relative_position_index', 'stages.2.blocks.3.attn_mask', 'stages.2.blocks.3.attn.relative_c

In [122]:
model.state_dict()['embeddings.patch_embeddings.weight'][0]

tensor([[[ 0.0915,  0.0452,  0.0492, -0.1203],
         [-0.0344, -0.0366, -0.0396, -0.1072],
         [ 0.1185,  0.0418,  0.0388,  0.0307],
         [ 0.1307,  0.0662, -0.1047,  0.1280]],

        [[-0.1221, -0.0301, -0.1284,  0.0514],
         [-0.0583,  0.0590, -0.0052, -0.0822],
         [-0.0587, -0.0320,  0.0118, -0.0611],
         [-0.1188,  0.1018,  0.0837, -0.0119]],

        [[-0.0537, -0.0778, -0.0539,  0.1121],
         [ 0.0669, -0.0011,  0.1144,  0.0723],
         [ 0.0919, -0.0840, -0.0773,  0.0947],
         [-0.0562, -0.0688, -0.0784,  0.0850]]])

In [123]:
model.state_dict()['stages.3.blocks.1.attn.crpb_mlp.3.weight'][0]

tensor([-0.0378, -0.0356, -0.0572,  0.0410,  0.1083,  0.0806, -0.0449,  0.1297,
         0.1483, -0.0354, -0.0157,  0.0207,  0.0078, -0.0074, -0.0733,  0.1325,
         0.0424, -0.0151,  0.0299,  0.0710, -0.0764,  0.0846,  0.0357,  0.0234,
         0.0228, -0.0675,  0.0554,  0.0503,  0.1562,  0.0724, -0.0679, -0.0528,
        -0.0609, -0.0841,  0.0542,  0.0328, -0.0305, -0.0002, -0.0288,  0.1304,
         0.0463,  0.0064, -0.0864, -0.0470, -0.0721, -0.0730, -0.0987, -0.0274,
        -0.0715,  0.0071,  0.0931,  0.0537,  0.1133, -0.0511, -0.0168,  0.0127,
         0.1350,  0.0222, -0.0723,  0.0142, -0.0255, -0.0166, -0.0845,  0.0744,
         0.0075, -0.0324, -0.0903, -0.1243,  0.0328,  0.0239,  0.0102,  0.0438,
        -0.1146, -0.0742, -0.0043,  0.0933,  0.0712, -0.0298, -0.0953,  0.1248,
         0.0784, -0.0411, -0.0242,  0.1539,  0.0641,  0.0427, -0.0101, -0.0378,
        -0.0225,  0.1030,  0.0277, -0.0708, -0.0067,  0.0402,  0.0110,  0.0257,
        -0.0235,  0.0217,  0.0561,  0.01

In [124]:
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

In [125]:
# Transforms 정의하기
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8,1), interpolation=transforms.InterpolationMode.LANCZOS),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.9, scale=(0.02, 0.33)),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

data_dir = '../../data/sports'
batch_size = 960

train_path = data_dir+'/train'
valid_path = data_dir+'/valid'
test_path = data_dir+'/test'

# dataset load
train_data = ImageFolder(train_path, transform=train_transform)
valid_data = ImageFolder(valid_path, transform=test_transform)
test_data = ImageFolder(test_path, transform=test_transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [126]:
max_norm = 1.0 # paper : 100 with G variants

model.to(device)
model_path = '../../models/swin2/model_w_simmim.pth'

In [127]:
mixup_fn = Mixup(mixup_alpha=.7, 
                cutmix_alpha=.7, 
                prob=.7, 
                switch_prob=0.5, 
                mode='batch',
                label_smoothing=.1,
                num_classes=100)

epochs = 150

criterion = nn.CrossEntropyLoss(label_smoothing=0.)


In [128]:
layer_names = []
for i, (name, params) in enumerate(model.named_parameters()):
    lr = base_lr
    print(f'{i}: {name}')
    layer_names.append(name)

0: absolute_pos_embed
1: embeddings.patch_embeddings.weight
2: embeddings.patch_embeddings.bias
3: embeddings.norm.weight
4: embeddings.norm.bias
5: stages.0.blocks.0.attn.t_scale
6: stages.0.blocks.0.attn.crpb_mlp.0.weight
7: stages.0.blocks.0.attn.crpb_mlp.0.bias
8: stages.0.blocks.0.attn.crpb_mlp.3.weight
9: stages.0.blocks.0.attn.qkv.weight
10: stages.0.blocks.0.attn.qkv.bias
11: stages.0.blocks.0.attn.proj.weight
12: stages.0.blocks.0.attn.proj.bias
13: stages.0.blocks.0.norm1.weight
14: stages.0.blocks.0.norm1.bias
15: stages.0.blocks.0.mlp.fc1.weight
16: stages.0.blocks.0.mlp.fc1.bias
17: stages.0.blocks.0.mlp.fc2.weight
18: stages.0.blocks.0.mlp.fc2.bias
19: stages.0.blocks.0.norm2.weight
20: stages.0.blocks.0.norm2.bias
21: stages.0.blocks.1.attn.t_scale
22: stages.0.blocks.1.attn.crpb_mlp.0.weight
23: stages.0.blocks.1.attn.crpb_mlp.0.bias
24: stages.0.blocks.1.attn.crpb_mlp.3.weight
25: stages.0.blocks.1.attn.qkv.weight
26: stages.0.blocks.1.attn.qkv.bias
27: stages.0.blocks

In [129]:
layer_names.reverse()
layer_names[:5]

['classifier.bias',
 'classifier.weight',
 'layernorm.bias',
 'layernorm.weight',
 'stages.3.blocks.1.norm2.bias']

In [130]:
lr      = 1.4e-3      # paper : 1.4e-3
lr_mult = 0.87  # paper : 0.87
weight_decay = 0.01 # paper : 0.1

param_groups = []
prev_group_name = layer_names[0].split('.')[0]

for idx, name in enumerate(layer_names):
    
    cur_group_name = name.split('.')[0]
    
    if cur_group_name != prev_group_name:
        lr *= lr_mult
    prev_group_name = cur_group_name
    
    print(f"{idx}: {name}'s lr={lr}")
    
    param_groups += [{'params': [ p for n, p in model.named_parameters() if n == name and p.requires_grad],
                      'lr' : lr,
                      'weight_decay': weight_decay}]

0: classifier.bias's lr=0.0014
1: classifier.weight's lr=0.0014
2: layernorm.bias's lr=0.001218
3: layernorm.weight's lr=0.001218
4: stages.3.blocks.1.norm2.bias's lr=0.0010596599999999998
5: stages.3.blocks.1.norm2.weight's lr=0.0010596599999999998
6: stages.3.blocks.1.mlp.fc2.bias's lr=0.0010596599999999998
7: stages.3.blocks.1.mlp.fc2.weight's lr=0.0010596599999999998
8: stages.3.blocks.1.mlp.fc1.bias's lr=0.0010596599999999998
9: stages.3.blocks.1.mlp.fc1.weight's lr=0.0010596599999999998
10: stages.3.blocks.1.norm1.bias's lr=0.0010596599999999998
11: stages.3.blocks.1.norm1.weight's lr=0.0010596599999999998
12: stages.3.blocks.1.attn.proj.bias's lr=0.0010596599999999998
13: stages.3.blocks.1.attn.proj.weight's lr=0.0010596599999999998
14: stages.3.blocks.1.attn.qkv.bias's lr=0.0010596599999999998
15: stages.3.blocks.1.attn.qkv.weight's lr=0.0010596599999999998
16: stages.3.blocks.1.attn.crpb_mlp.3.weight's lr=0.0010596599999999998
17: stages.3.blocks.1.attn.crpb_mlp.0.bias's lr=0.

In [None]:
# # 모델 레이어의 이름 추출
# layer_names = []
# for i, (name, params) in enumerate(model.named_parameters()):
#     lr = base_lr
#     print(f'{i}: {name}')
#     layer_names.append(name)

# # 뒷 레이어부터 시작하도록 뒤집기    
# layer_names.reverse()

# # 하이퍼 파라미터 정의
# lr      = 1.4e-3      # paper : 1.4e-3
# lr_mult = 0.87  # paper : 0.87
# weight_decay = 0.01 # paper : 0.1

# param_groups = []
# prev_group_name = layer_names[0].split('.')[0] # 그룹명 초기화

# for idx, name in enumerate(layer_names):    
#     cur_group_name = name.split('.')[0]    
#     if cur_group_name != prev_group_name: # 동일한 그룹에 속하면 동일한 학습율
#         lr *= lr_mult
#     prev_group_name = cur_group_name    
    
#     param_groups += [{'params': [ p for n, p in model.named_parameters() if n == name and p.requires_grad],
#                       'lr' : lr,
#                       'weight_decay': weight_decay}]

In [131]:
optimizer = optim.AdamW(param_groups)
warmup_steps = int(len(train_loader)*(epochs)*0.1)
train_steps = len(train_loader)*(epochs)
scheduler = transformers.get_cosine_schedule_with_warmup(optimizer, 
                                                        num_warmup_steps=warmup_steps, 
                                                        num_training_steps=train_steps,
                                                        num_cycles=0.5)

In [132]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')

# GradScaler 초기화
scaler = GradScaler()

for epoch in range(100):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs, labels = mixup_fn(inputs, labels)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=max_norm)

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
    val_loss /= len(valid_loader)
    val_losses.append(val_loss)
    
    # 모델 저장
    if val_loss < best_loss:
        best_loss = val_loss
        vit_save = True
        if vit_save:
            torch.save(model.state_dict(), model_path)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False

    print(text)
        
text = f"Epoch 당 평균 소요시간 : {training_time / epochs:.2f}초"      
print(text)

Epoch 1: 100%|██████████| 15/15 [00:50<00:00,  3.36s/it]


	Loss: 4.640709622701009, Val Loss: 4.525732517242432, LR: 9.333333333333333e-05, Duration: 51.90 sec - model saved!


Epoch 2: 100%|██████████| 15/15 [00:50<00:00,  3.36s/it]


	Loss: 4.465182367960612, Val Loss: 4.235929489135742, LR: 0.00018666666666666666, Duration: 51.52 sec - model saved!


Epoch 3: 100%|██████████| 15/15 [00:50<00:00,  3.35s/it]


	Loss: 4.285793622334798, Val Loss: 3.9897332191467285, LR: 0.00028000000000000003, Duration: 51.46 sec - model saved!


Epoch 4: 100%|██████████| 15/15 [00:50<00:00,  3.37s/it]


	Loss: 4.223414929707845, Val Loss: 3.925478458404541, LR: 0.0003733333333333333, Duration: 51.77 sec - model saved!


Epoch 5: 100%|██████████| 15/15 [00:50<00:00,  3.36s/it]


	Loss: 4.131762758890788, Val Loss: 3.800685167312622, LR: 0.00046666666666666666, Duration: 51.54 sec - model saved!


Epoch 6: 100%|██████████| 15/15 [00:50<00:00,  3.36s/it]


	Loss: 4.08875257174174, Val Loss: 3.5996603965759277, LR: 0.0005600000000000001, Duration: 51.54 sec - model saved!


Epoch 7: 100%|██████████| 15/15 [00:50<00:00,  3.36s/it]


	Loss: 4.03374293645223, Val Loss: 3.5427401065826416, LR: 0.0006533333333333333, Duration: 51.53 sec - model saved!


Epoch 8: 100%|██████████| 15/15 [00:50<00:00,  3.36s/it]


	Loss: 3.928725337982178, Val Loss: 3.4848854541778564, LR: 0.0007466666666666666, Duration: 51.51 sec - model saved!


Epoch 9: 100%|██████████| 15/15 [00:50<00:00,  3.36s/it]


	Loss: 4.04330514272054, Val Loss: 3.4428153038024902, LR: 0.0008399999999999999, Duration: 51.55 sec - model saved!


Epoch 10: 100%|██████████| 15/15 [00:50<00:00,  3.35s/it]


	Loss: 3.853068971633911, Val Loss: 3.192392587661743, LR: 0.0009333333333333333, Duration: 51.36 sec - model saved!


Epoch 11: 100%|██████████| 15/15 [00:50<00:00,  3.35s/it]


	Loss: 3.940928411483765, Val Loss: 3.074150800704956, LR: 0.0010266666666666666, Duration: 51.44 sec - model saved!


Epoch 12: 100%|██████████| 15/15 [00:50<00:00,  3.37s/it]


	Loss: 3.586457697550456, Val Loss: 2.8808090686798096, LR: 0.0011200000000000001, Duration: 51.75 sec - model saved!


Epoch 13: 100%|██████████| 15/15 [00:50<00:00,  3.36s/it]


	Loss: 3.808212073644002, Val Loss: 2.9665651321411133, LR: 0.0012133333333333334, Duration: 51.29 sec


Epoch 14: 100%|██████████| 15/15 [00:50<00:00,  3.36s/it]


	Loss: 3.603448422749837, Val Loss: 2.853593349456787, LR: 0.0013066666666666667, Duration: 51.53 sec - model saved!


Epoch 15: 100%|██████████| 15/15 [00:50<00:00,  3.36s/it]


	Loss: 3.7452565828959146, Val Loss: 2.857118844985962, LR: 0.0014, Duration: 51.37 sec


Epoch 16: 100%|██████████| 15/15 [00:50<00:00,  3.36s/it]


	Loss: 3.7859806219736734, Val Loss: 2.7394979000091553, LR: 0.001399810468825623, Duration: 51.57 sec - model saved!


Epoch 17: 100%|██████████| 15/15 [00:50<00:00,  3.36s/it]


	Loss: 3.6915764172871905, Val Loss: 2.658094882965088, LR: 0.0013992419779369672, Duration: 51.57 sec - model saved!


Epoch 18: 100%|██████████| 15/15 [00:50<00:00,  3.36s/it]


	Loss: 3.5827109018961587, Val Loss: 2.610522508621216, LR: 0.001398294835181877, Duration: 51.57 sec - model saved!


Epoch 19: 100%|██████████| 15/15 [00:50<00:00,  3.36s/it]


	Loss: 3.3371769269307454, Val Loss: 2.4410219192504883, LR: 0.001396969553454863, Duration: 51.61 sec - model saved!


Epoch 20: 100%|██████████| 15/15 [00:50<00:00,  3.37s/it]


	Loss: 3.4988106727600097, Val Loss: 2.4959654808044434, LR: 0.0013952668504193602, Duration: 51.40 sec


Epoch 21: 100%|██████████| 15/15 [00:50<00:00,  3.35s/it]


	Loss: 3.4955929120381675, Val Loss: 2.3629655838012695, LR: 0.0013931876481190993, Duration: 51.46 sec - model saved!


Epoch 22: 100%|██████████| 15/15 [00:50<00:00,  3.35s/it]


	Loss: 3.5100342591603595, Val Loss: 2.165684461593628, LR: 0.0013907330724788056, Duration: 51.31 sec - model saved!


Epoch 23: 100%|██████████| 15/15 [00:50<00:00,  3.35s/it]


	Loss: 3.465053017934163, Val Loss: 2.37201189994812, LR: 0.0013879044526944892, Duration: 51.22 sec


Epoch 24: 100%|██████████| 15/15 [00:50<00:00,  3.35s/it]


	Loss: 3.5234920978546143, Val Loss: 2.2359769344329834, LR: 0.001384703320513664, Duration: 51.14 sec


Epoch 25: 100%|██████████| 15/15 [00:50<00:00,  3.37s/it]


	Loss: 3.2505664507548016, Val Loss: 2.0291783809661865, LR: 0.0013811314094058767, Duration: 51.69 sec - model saved!


Epoch 26: 100%|██████████| 15/15 [00:50<00:00,  3.36s/it]


	Loss: 3.34499306678772, Val Loss: 2.0712969303131104, LR: 0.0013771906536240047, Duration: 51.25 sec


Epoch 27:   7%|▋         | 1/15 [00:03<00:49,  3.57s/it]

In [None]:
from sklearn.metrics import confusion_matrix
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [None]:
# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
performance_metrics

  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Metric,Value
0,Accuracy,0.816
1,Precision,0.829698
2,Recall,0.816
3,F1 Score,0.799969


In [None]:
for epoch in range(10):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs, labels = mixup_fn(inputs, labels)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=max_norm)

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
    val_loss /= len(valid_loader)
    val_losses.append(val_loss)
    
    # 모델 저장
    if val_loss < best_loss:
        best_loss = val_loss
        vit_save = True
        if vit_save:
            torch.save(model.state_dict(), model_path)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False

    print(text)
        
text = f"Epoch 당 평균 소요시간 : {training_time / epochs:.2f}초"      
print(text)

# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
performance_metrics

Epoch 1: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.6922353823979694, Val Loss: 0.9384442567825317, LR: 0.0002913307727590911, Duration: 49.89 sec


Epoch 2: 100%|██████████| 15/15 [00:48<00:00,  3.25s/it]


	Loss: 2.657900913556417, Val Loss: 0.9159315824508667, LR: 0.00028081442660546124, Duration: 49.68 sec


Epoch 3: 100%|██████████| 15/15 [00:48<00:00,  3.27s/it]


	Loss: 2.7689472595850626, Val Loss: 0.950180172920227, LR: 0.0002704167733064941, Duration: 49.88 sec


Epoch 4: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.521797521909078, Val Loss: 0.9163442254066467, LR: 0.0002601434433748771, Duration: 49.97 sec


Epoch 5: 100%|██████████| 15/15 [00:48<00:00,  3.27s/it]


	Loss: 2.568425250053406, Val Loss: 0.8977269530296326, LR: 0.0002500000000000001, Duration: 50.11 sec - model saved!


Epoch 6: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.7730035543441773, Val Loss: 0.8929747343063354, LR: 0.00023999193603539232, Duration: 50.21 sec - model saved!


Epoch 7: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.7291774272918703, Val Loss: 0.9171628355979919, LR: 0.00023012467102424372, Duration: 49.96 sec


Epoch 8: 100%|██████████| 15/15 [00:49<00:00,  3.28s/it]


	Loss: 2.7100916067759195, Val Loss: 0.9216888546943665, LR: 0.00022040354826462666, Duration: 50.11 sec


Epoch 9: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.713088115056356, Val Loss: 0.8970794081687927, LR: 0.00021083383191600674, Duration: 49.97 sec


Epoch 10: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.761340061823527, Val Loss: 0.904382586479187, LR: 0.00020142070414860702, Duration: 49.98 sec
Epoch 당 평균 소요시간 : 55.61초


  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Metric,Value
0,Accuracy,0.804
1,Precision,0.82904
2,Recall,0.804
3,F1 Score,0.793341


In [None]:
for epoch in range(10):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs, labels = mixup_fn(inputs, labels)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=max_norm)

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
    val_loss /= len(valid_loader)
    val_losses.append(val_loss)
    
    # 모델 저장
    if val_loss < best_loss:
        best_loss = val_loss
        vit_save = True
        if vit_save:
            torch.save(model.state_dict(), model_path)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False

    print(text)
        
text = f"Epoch 당 평균 소요시간 : {training_time / epochs:.2f}초"      
print(text)

# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
performance_metrics

Epoch 1: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.4435115416844684, Val Loss: 0.8637356162071228, LR: 0.00019216926233717085, Duration: 50.26 sec - model saved!


Epoch 2: 100%|██████████| 15/15 [00:48<00:00,  3.27s/it]


	Loss: 2.6883505662282308, Val Loss: 0.8625206351280212, LR: 0.0001830845163006448, Duration: 50.11 sec - model saved!


Epoch 3: 100%|██████████| 15/15 [00:49<00:00,  3.30s/it]


	Loss: 2.4792883078257244, Val Loss: 0.8568503260612488, LR: 0.00017417138558927244, Duration: 50.58 sec - model saved!


Epoch 4: 100%|██████████| 15/15 [00:49<00:00,  3.30s/it]


	Loss: 2.7088448286056517, Val Loss: 0.8607507348060608, LR: 0.00016543469682057105, Duration: 50.38 sec


Epoch 5: 100%|██████████| 15/15 [00:49<00:00,  3.30s/it]


	Loss: 2.614908711115519, Val Loss: 0.8643636703491211, LR: 0.00015687918106563326, Duration: 50.34 sec


Epoch 6: 100%|██████████| 15/15 [00:48<00:00,  3.26s/it]


	Loss: 2.411665678024292, Val Loss: 0.8612100481987, LR: 0.00014850947128716913, Duration: 49.84 sec


Epoch 7: 100%|██████████| 15/15 [00:49<00:00,  3.28s/it]


	Loss: 2.604859987894694, Val Loss: 0.8613155484199524, LR: 0.00014033009983067452, Duration: 50.01 sec


Epoch 8: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.504119610786438, Val Loss: 0.8607224822044373, LR: 0.00013234549597008571, Duration: 50.01 sec


Epoch 9: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.7026391585667926, Val Loss: 0.8616685271263123, LR: 0.0001245599835092504, Duration: 49.96 sec


Epoch 10: 100%|██████████| 15/15 [00:49<00:00,  3.28s/it]


	Loss: 2.2587844928105674, Val Loss: 0.831054151058197, LR: 0.00011697777844051105, Duration: 50.40 sec - model saved!
Epoch 당 평균 소요시간 : 60.63초


  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Metric,Value
0,Accuracy,0.836
1,Precision,0.851548
2,Recall,0.836
3,F1 Score,0.82632


In [None]:
for epoch in range(10):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs, labels = mixup_fn(inputs, labels)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=max_norm)

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
    val_loss /= len(valid_loader)
    val_losses.append(val_loss)
    
    # 모델 저장
    if val_loss < best_loss:
        best_loss = val_loss
        vit_save = True
        if vit_save:
            torch.save(model.state_dict(), model_path)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False

    print(text)
        
text = f"Epoch 당 평균 소요시간 : {training_time / epochs:.2f}초"      
print(text)

# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
performance_metrics

Epoch 1: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.585992956161499, Val Loss: 0.8371878862380981, LR: 0.0001096029866616704, Duration: 49.97 sec


Epoch 2: 100%|██████████| 15/15 [00:49<00:00,  3.28s/it]


	Loss: 2.2336838006973267, Val Loss: 0.8162788152694702, LR: 0.00010243960175257604, Duration: 50.28 sec - model saved!


Epoch 3: 100%|██████████| 15/15 [00:49<00:00,  3.32s/it]


	Loss: 2.600951560338338, Val Loss: 0.8022180199623108, LR: 9.549150281252633e-05, Duration: 50.92 sec - model saved!


Epoch 4: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.7257570187250773, Val Loss: 0.8217611908912659, LR: 8.876245235966885e-05, Duration: 49.95 sec


Epoch 5: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.461031977335612, Val Loss: 0.8239462375640869, LR: 8.225609429353187e-05, Duration: 49.94 sec


Epoch 6: 100%|██████████| 15/15 [00:48<00:00,  3.26s/it]


	Loss: 2.575312360127767, Val Loss: 0.8151116371154785, LR: 7.597595192178702e-05, Duration: 49.85 sec


Epoch 7: 100%|██████████| 15/15 [00:49<00:00,  3.28s/it]


	Loss: 2.5168095588684083, Val Loss: 0.8133642077445984, LR: 6.992542605231739e-05, Duration: 50.07 sec


Epoch 8: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.538121747970581, Val Loss: 0.8043501377105713, LR: 6.410779315161885e-05, Duration: 49.98 sec


Epoch 9: 100%|██████████| 15/15 [00:49<00:00,  3.28s/it]


	Loss: 2.4347947597503663, Val Loss: 0.8149065375328064, LR: 5.852620357053651e-05, Duration: 50.17 sec


Epoch 10: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.482781473795573, Val Loss: 0.7992389798164368, LR: 5.318367983829392e-05, Duration: 50.22 sec - model saved!
Epoch 당 평균 소요시간 : 65.64초


Unnamed: 0,Metric,Value
0,Accuracy,0.832
1,Precision,0.854385
2,Recall,0.832
3,F1 Score,0.82471


In [None]:
for epoch in range(10):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs, labels = mixup_fn(inputs, labels)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=max_norm)

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
    val_loss /= len(valid_loader)
    val_losses.append(val_loss)
    
    # 모델 저장
    if val_loss < best_loss:
        best_loss = val_loss
        vit_save = True
        if vit_save:
            torch.save(model.state_dict(), model_path)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False

    print(text)
        
text = f"Epoch 당 평균 소요시간 : {training_time / epochs:.2f}초"      
print(text)

# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
performance_metrics

Epoch 1: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.156569306055705, Val Loss: 0.8060244917869568, LR: 4.8083115025739754e-05, Duration: 49.93 sec


Epoch 2: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.29921023050944, Val Loss: 0.7820650339126587, LR: 4.322727117869951e-05, Duration: 50.15 sec - model saved!


Epoch 3: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.451512368520101, Val Loss: 0.79397052526474, LR: 3.861877782227885e-05, Duration: 49.97 sec


Epoch 4: 100%|██████████| 15/15 [00:49<00:00,  3.29s/it]


	Loss: 2.636488103866577, Val Loss: 0.7902220487594604, LR: 3.426013053692878e-05, Duration: 50.22 sec


Epoch 5: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.39673334757487, Val Loss: 0.7954288721084595, LR: 3.0153689607045842e-05, Duration: 49.98 sec


Epoch 6: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.4496769825617473, Val Loss: 0.7924708724021912, LR: 2.63016787428354e-05, Duration: 49.92 sec


Epoch 7: 100%|██████████| 15/15 [00:49<00:00,  3.28s/it]


	Loss: 2.685069513320923, Val Loss: 0.7937246561050415, LR: 2.2706183876134045e-05, Duration: 50.06 sec


Epoch 8: 100%|██████████| 15/15 [00:49<00:00,  3.28s/it]


	Loss: 2.662763730684916, Val Loss: 0.8041825890541077, LR: 1.9369152030840554e-05, Duration: 50.13 sec


Epoch 9: 100%|██████████| 15/15 [00:49<00:00,  3.28s/it]


	Loss: 2.558410406112671, Val Loss: 0.8004736304283142, LR: 1.6292390268568102e-05, Duration: 50.07 sec


Epoch 10: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.3674689133961997, Val Loss: 0.8002943396568298, LR: 1.3477564710088097e-05, Duration: 49.93 sec
Epoch 당 평균 소요시간 : 70.65초


  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Metric,Value
0,Accuracy,0.83
1,Precision,0.850464
2,Recall,0.83
3,F1 Score,0.821716


In [None]:
for epoch in range(10):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs, labels = mixup_fn(inputs, labels)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=max_norm)

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
    val_loss /= len(valid_loader)
    val_losses.append(val_loss)
    
    # 모델 저장
    if val_loss < best_loss:
        best_loss = val_loss
        vit_save = True
        if vit_save:
            torch.save(model.state_dict(), model_path)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False

    print(text)
        
text = f"Epoch 당 평균 소요시간 : {training_time / epochs:.2f}초"      
print(text)

# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
performance_metrics

Epoch 1: 100%|██████████| 15/15 [00:49<00:00,  3.29s/it]


	Loss: 2.5325933933258056, Val Loss: 0.7967063784599304, LR: 1.0926199633097156e-05, Duration: 50.19 sec


Epoch 2: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.5717090368270874, Val Loss: 0.8010687232017517, LR: 8.639676646793382e-06, Duration: 49.94 sec


Epoch 3: 100%|██████████| 15/15 [00:49<00:00,  3.28s/it]


	Loss: 2.3656076908111574, Val Loss: 0.7969766855239868, LR: 6.61923394371039e-06, Duration: 50.06 sec


Epoch 4: 100%|██████████| 15/15 [00:48<00:00,  3.26s/it]


	Loss: 2.4497125864028932, Val Loss: 0.7960164546966553, LR: 4.865965629214819e-06, Duration: 49.85 sec


Epoch 5: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.407879130045573, Val Loss: 0.794651985168457, LR: 3.3808211290284885e-06, Duration: 50.00 sec


Epoch 6: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.454337787628174, Val Loss: 0.7948555946350098, LR: 2.1646046750978256e-06, Duration: 50.01 sec


Epoch 7: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.474202871322632, Val Loss: 0.7953035831451416, LR: 1.2179748700879012e-06, Duration: 50.02 sec


Epoch 8: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.4273924191792804, Val Loss: 0.7954041361808777, LR: 5.41444330737717e-07, Duration: 49.92 sec


Epoch 9: 100%|██████████| 15/15 [00:49<00:00,  3.27s/it]


	Loss: 2.3542084137598676, Val Loss: 0.7953218221664429, LR: 1.3537941026914301e-07, Duration: 50.01 sec


Epoch 10: 100%|██████████| 15/15 [00:48<00:00,  3.27s/it]


	Loss: 2.427381428082784, Val Loss: 0.7953270673751831, LR: 0.0, Duration: 49.87 sec
Epoch 당 평균 소요시간 : 75.64초


  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,Metric,Value
0,Accuracy,0.826
1,Precision,0.846056
2,Recall,0.826
3,F1 Score,0.81698
