# 1.Library Import

In [13]:
import yaml
from box import Box

import torch
import torch.nn as nn
import torch.optim as optim

import simmim
from swin_v2 import SwinTransformerV2

from torch.cuda.amp import autocast, GradScaler
from torch.nn.utils import clip_grad_norm_
from tqdm import tqdm
import time

from timm.data import Mixup
import transformers

# 2.Configuration

In [14]:
simmim_config = yaml.load(open('config/pretrain.yaml'), Loader=yaml.FullLoader)
simmim_config

{'MODEL': {'TYPE': 'swinv2',
  'NAME': 'simmim_pretrain',
  'DROP_PATH_RATE': 0.0,
  'SWIN': {'EMBED_DIM': 96,
   'DEPTHS': [2, 2, 6, 2],
   'NUM_HEADS': [3, 6, 12, 24],
   'WINDOW_SIZE': 6,
   'PATCH_SIZE': 4}},
 'DATA': {'IMG_SIZE': 192,
  'MASK_PATCH_SIZE': 32,
  'MASK_RATIO': 0.6,
  'BATCH_SIZE': 1024,
  'NUM_WORKERS': 24,
  'DATA_PATH': '../../data/sports'},
 'TRAIN': {'EPOCHS': 100,
  'WARMUP_EPOCHS': 10,
  'BASE_LR': 0.0014,
  'WEIGHT_DECAY': 0.05,
  'CLIP_GRAD': 5}}

In [15]:
encoder_config = {'img_size':simmim_config['DATA']['IMG_SIZE'], 
                'patch_size':simmim_config['MODEL']['SWIN']['PATCH_SIZE'], 
                'in_chans':3, 
                'num_classes':100,
                'embed_dim':simmim_config['MODEL']['SWIN']['EMBED_DIM'], 
                'depths':simmim_config['MODEL']['SWIN']['DEPTHS'], 
                'num_heads':simmim_config['MODEL']['SWIN']['NUM_HEADS'],           
                'window_size':simmim_config['MODEL']['SWIN']['WINDOW_SIZE'], 
                'mlp_ratio':4., 
                'qkv_bias':True, 
                'qk_scale':None,
                'drop_rate':0., 
                'attn_drop_rate':0., 
                'drop_path_rate':simmim_config['MODEL']['DROP_PATH_RATE'],
                'norm_layer':nn.LayerNorm, 
                'patch_norm':True, 
                'pretrained_window_sizes':[0,0,0,0],
                'ape':True}

encoder_stride = 32
in_chans = encoder_config['in_chans']
patch_size = encoder_config['patch_size']

# 3.Load SimMIM

In [16]:
encoder = simmim.SwinTransformerV2ForSimMIM(**encoder_config)

In [17]:
model = simmim.SimMIM( encoder=encoder, 
                       encoder_stride=encoder_stride, 
                       in_chans=in_chans, 
                       patch_size=patch_size)

## Mask Generator Test

In [18]:
mask_generator = simmim.MaskGenerator(input_size=224,
                                      mask_patch_size=28,
                                      model_patch_size=28,
                                      mask_ratio=.6)
mask = mask_generator()
mask

array([[0, 1, 0, 0, 1, 1, 1, 1],
       [1, 1, 1, 0, 0, 0, 0, 1],
       [1, 0, 1, 1, 1, 1, 0, 0],
       [1, 1, 0, 1, 0, 0, 1, 1],
       [1, 1, 0, 1, 0, 1, 0, 0],
       [0, 1, 0, 1, 1, 0, 1, 0],
       [1, 0, 0, 0, 1, 1, 1, 1],
       [1, 0, 1, 1, 1, 1, 1, 1]])

In [19]:
print(f"생성된 mask의 비율은 {mask.sum() / (mask.shape[0]*mask.shape[1])*100}%")

생성된 mask의 비율은 60.9375%


## SimMIM DataLoader

In [20]:
simmim_config = Box(simmim_config)
dataloader = simmim.build_loader_simmim(simmim_config)

samples = next(iter(dataloader))
len(samples)

3

In [21]:
samples[0].shape, samples[1].shape, samples[2].shape 

(torch.Size([1024, 3, 192, 192]),
 torch.Size([1024, 48, 48]),
 torch.Size([1024]))

## Hyper Parameters and etc.

In [22]:
base_lr = float(simmim_config.TRAIN.BASE_LR)
weight_decay = simmim_config.TRAIN.WEIGHT_DECAY
optimizer = optim.AdamW(model.parameters(), lr=base_lr, weight_decay=weight_decay)
warmup_epochs = simmim_config.TRAIN.WARMUP_EPOCHS
train_epochs = simmim_config.TRAIN.EPOCHS

scheduler = transformers.get_cosine_schedule_with_warmup(optimizer, 
                                                        num_warmup_steps=warmup_epochs*len(dataloader), 
                                                        num_training_steps=train_epochs*len(dataloader),
                                                        num_cycles=0.5)

In [23]:
device = 'cuda:3'
model.to(device)
torch.backends.cudnn.benchmark = True

model_save = True
simmim_path = '../../models/swin2/simmim.pth'

# 4.Train SimMIM

In [24]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')

# GradScaler 초기화
scaler = GradScaler()

for epoch in range(train_epochs):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        image, mask = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            loss = model(image, mask)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        if simmim_config.TRAIN.CLIP_GRAD:
            clip_grad_norm_(model.parameters(), max_norm=simmim_config.TRAIN.CLIP_GRAD)
        else:
            clip_grad_norm_(model.parameters())

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(dataloader)
    losses.append(epoch_loss)

    # 모델 저장
    if epoch_loss < best_loss:
        
        best_loss = epoch_loss
        vit_save = model_save
        if vit_save:
            torch.save(model.state_dict(), simmim_path)
        
    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss:.4f}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False    
        
    print(text)


Epoch 1: 100%|██████████| 14/14 [00:19<00:00,  1.40s/it]


	Loss: 1.1412, LR: 0.00014000000000000001, Duration: 20.75 sec - model saved!


Epoch 2: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]


	Loss: 1.0940, LR: 0.00028000000000000003, Duration: 18.91 sec - model saved!


Epoch 3: 100%|██████████| 14/14 [00:17<00:00,  1.22s/it]


	Loss: 1.0501, LR: 0.00041999999999999996, Duration: 18.29 sec - model saved!


Epoch 4: 100%|██████████| 14/14 [00:17<00:00,  1.23s/it]


	Loss: 0.9446, LR: 0.0005600000000000001, Duration: 18.56 sec - model saved!


Epoch 5: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]


	Loss: 0.8564, LR: 0.0007, Duration: 17.95 sec - model saved!


Epoch 6: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]


	Loss: 0.7880, LR: 0.0008399999999999999, Duration: 18.15 sec - model saved!


Epoch 7: 100%|██████████| 14/14 [00:18<00:00,  1.29s/it]

	Loss: 0.7477, LR: 0.00098, Duration: 19.33 sec - model saved!



Epoch 8: 100%|██████████| 14/14 [00:17<00:00,  1.22s/it]

	Loss: 0.7619, LR: 0.0011200000000000001, Duration: 18.10 sec



Epoch 9: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]


	Loss: 0.7224, LR: 0.00126, Duration: 18.15 sec - model saved!


Epoch 10: 100%|██████████| 14/14 [00:17<00:00,  1.23s/it]


	Loss: 0.7048, LR: 0.0014, Duration: 18.47 sec - model saved!


Epoch 11: 100%|██████████| 14/14 [00:17<00:00,  1.27s/it]


	Loss: 0.6859, LR: 0.001399573578913367, Duration: 18.98 sec - model saved!


Epoch 12: 100%|██████████| 14/14 [00:17<00:00,  1.25s/it]


	Loss: 0.6772, LR: 0.001398294835181877, Duration: 18.69 sec - model saved!


Epoch 13: 100%|██████████| 14/14 [00:17<00:00,  1.25s/it]

	Loss: 0.6765, LR: 0.0013961653267577914, Duration: 18.86 sec - model saved!



Epoch 14: 100%|██████████| 14/14 [00:17<00:00,  1.21s/it]


	Loss: 0.6503, LR: 0.0013931876481190993, Duration: 18.33 sec - model saved!


Epoch 15: 100%|██████████| 14/14 [00:17<00:00,  1.27s/it]


	Loss: 0.6469, LR: 0.0013893654271085456, Duration: 18.99 sec - model saved!


Epoch 16: 100%|██████████| 14/14 [00:17<00:00,  1.25s/it]

	Loss: 0.6423, LR: 0.001384703320513664, Duration: 18.76 sec - model saved!



Epoch 17: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]


	Loss: 0.6183, LR: 0.0013792070083931975, Duration: 18.29 sec - model saved!


Epoch 18: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.6149, LR: 0.0013728831871568231, Duration: 18.60 sec - model saved!



Epoch 19: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.6085, LR: 0.0013657395614066075, Duration: 18.57 sec - model saved!



Epoch 20: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]

	Loss: 0.6131, LR: 0.001357784834550136, Duration: 18.79 sec



Epoch 21: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.6166, LR: 0.0013490286981967512, Duration: 18.53 sec



Epoch 22: 100%|██████████| 14/14 [00:17<00:00,  1.28s/it]


	Loss: 0.5999, LR: 0.0013394818203498204, Duration: 19.14 sec - model saved!


Epoch 23: 100%|██████████| 14/14 [00:17<00:00,  1.22s/it]


	Loss: 0.5846, LR: 0.0013291558324094168, Duration: 18.39 sec - model saved!


Epoch 24: 100%|██████████| 14/14 [00:18<00:00,  1.29s/it]

	Loss: 0.5908, LR: 0.0013180633150012488, Duration: 19.11 sec



Epoch 25: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]

	Loss: 0.5852, LR: 0.0013062177826491071, Duration: 18.63 sec



Epoch 26: 100%|██████████| 14/14 [00:17<00:00,  1.22s/it]


	Loss: 0.5818, LR: 0.001293633667309498, Duration: 18.30 sec - model saved!


Epoch 27: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.5862, LR: 0.001280326300788529, Duration: 18.44 sec



Epoch 28: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]


	Loss: 0.5788, LR: 0.0012663118960624632, Duration: 18.68 sec - model saved!


Epoch 29: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]

	Loss: 0.5782, LR: 0.0012516075275247052, Duration: 18.90 sec - model saved!



Epoch 30: 100%|██████████| 14/14 [00:18<00:00,  1.31s/it]

	Loss: 0.5743, LR: 0.0012362311101832846, Duration: 19.69 sec - model saved!



Epoch 31: 100%|██████████| 14/14 [00:17<00:00,  1.22s/it]


	Loss: 0.5682, LR: 0.001220201377834176, Duration: 18.34 sec - model saved!


Epoch 32: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]

	Loss: 0.5837, LR: 0.0012035378602370558, Duration: 18.71 sec



Epoch 33: 100%|██████████| 14/14 [00:17<00:00,  1.25s/it]

	Loss: 0.5727, LR: 0.0011862608593212981, Duration: 18.65 sec



Epoch 34: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]


	Loss: 0.5614, LR: 0.0011683914244512007, Duration: 18.79 sec - model saved!


Epoch 35: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]

	Loss: 0.5787, LR: 0.0011499513267805774, Duration: 17.97 sec



Epoch 36: 100%|██████████| 14/14 [00:16<00:00,  1.17s/it]

	Loss: 0.5751, LR: 0.0011309630327279608, Duration: 17.58 sec



Epoch 37: 100%|██████████| 14/14 [00:17<00:00,  1.23s/it]


	Loss: 0.5596, LR: 0.0011114496766047313, Duration: 18.60 sec - model saved!


Epoch 38: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]

	Loss: 0.5613, LR: 0.0010914350324295228, Duration: 17.95 sec



Epoch 39: 100%|██████████| 14/14 [00:18<00:00,  1.30s/it]

	Loss: 0.5569, LR: 0.0010709434849632434, Duration: 19.36 sec - model saved!



Epoch 40: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.5572, LR: 0.00105, Duration: 18.37 sec



Epoch 41: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.5528, LR: 0.0010286300939501235, Duration: 18.62 sec - model saved!



Epoch 42: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]


	Loss: 0.5473, LR: 0.001006859802752354, Duration: 18.62 sec - model saved!


Epoch 43: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]

	Loss: 0.5503, LR: 0.0009847156501530602, Duration: 17.88 sec



Epoch 44: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]

	Loss: 0.5486, LR: 0.0009622246153911386, Duration: 17.83 sec



Epoch 45: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]

	Loss: 0.5493, LR: 0.0009394141003279682, Duration: 17.83 sec



Epoch 46: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.5475, LR: 0.0009163118960624632, Duration: 18.51 sec



Epoch 47: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]

	Loss: 0.5476, LR: 0.0008929461490718994, Duration: 18.50 sec



Epoch 48: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.5477, LR: 0.0008693453269197673, Duration: 18.44 sec



Epoch 49: 100%|██████████| 14/14 [00:17<00:00,  1.28s/it]

	Loss: 0.5512, LR: 0.0008455381835724314, Duration: 18.99 sec



Epoch 50: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.5548, LR: 0.0008215537243668514, Duration: 18.38 sec



Epoch 51: 100%|██████████| 14/14 [00:17<00:00,  1.23s/it]


	Loss: 0.5438, LR: 0.0007974211706720458, Duration: 18.36 sec - model saved!


Epoch 52: 100%|██████████| 14/14 [00:18<00:00,  1.29s/it]

	Loss: 0.5403, LR: 0.0007731699242873575, Duration: 19.33 sec - model saved!



Epoch 53: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]


	Loss: 0.5366, LR: 0.0007488295316208876, Duration: 18.01 sec - model saved!


Epoch 54: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]

	Loss: 0.5438, LR: 0.0007244296476917508, Duration: 18.74 sec



Epoch 55: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]

	Loss: 0.5468, LR: 0.0007, Duration: 17.91 sec



Epoch 56: 100%|██████████| 14/14 [00:17<00:00,  1.28s/it]

	Loss: 0.5379, LR: 0.0006755703523082495, Duration: 18.97 sec



Epoch 57: 100%|██████████| 14/14 [00:16<00:00,  1.18s/it]

	Loss: 0.5400, LR: 0.0006511704683791123, Duration: 17.56 sec



Epoch 58: 100%|██████████| 14/14 [00:17<00:00,  1.23s/it]

	Loss: 0.5381, LR: 0.0006268300757126426, Duration: 18.30 sec



Epoch 59: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]


	Loss: 0.5354, LR: 0.0006025788293279544, Duration: 17.99 sec - model saved!


Epoch 60: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.5373, LR: 0.0005784462756331488, Duration: 18.48 sec



Epoch 61: 100%|██████████| 14/14 [00:17<00:00,  1.23s/it]


	Loss: 0.5351, LR: 0.0005544618164275686, Duration: 18.51 sec - model saved!


Epoch 62: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.5330, LR: 0.0005306546730802327, Duration: 18.65 sec - model saved!



Epoch 63: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]


	Loss: 0.5302, LR: 0.0005070538509281006, Duration: 18.19 sec - model saved!


Epoch 64: 100%|██████████| 14/14 [00:17<00:00,  1.25s/it]

	Loss: 0.5310, LR: 0.0004836881039375369, Duration: 18.54 sec



Epoch 65: 100%|██████████| 14/14 [00:18<00:00,  1.29s/it]


	Loss: 0.5297, LR: 0.0004605858996720319, Duration: 19.31 sec - model saved!


Epoch 66: 100%|██████████| 14/14 [00:17<00:00,  1.22s/it]

	Loss: 0.5271, LR: 0.0004377753846088615, Duration: 18.36 sec - model saved!



Epoch 67: 100%|██████████| 14/14 [00:17<00:00,  1.28s/it]


	Loss: 0.5254, LR: 0.00041528434984693997, Duration: 19.21 sec - model saved!


Epoch 68: 100%|██████████| 14/14 [00:16<00:00,  1.18s/it]

	Loss: 0.5255, LR: 0.00039314019724764573, Duration: 17.61 sec



Epoch 69: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]

	Loss: 0.5269, LR: 0.00037136990604987665, Duration: 18.73 sec



Epoch 70: 100%|██████████| 14/14 [00:17<00:00,  1.27s/it]

	Loss: 0.5258, LR: 0.00035000000000000016, Duration: 18.84 sec



Epoch 71: 100%|██████████| 14/14 [00:17<00:00,  1.27s/it]


	Loss: 0.5245, LR: 0.00032905651503675667, Duration: 19.08 sec - model saved!


Epoch 72: 100%|██████████| 14/14 [00:18<00:00,  1.30s/it]


	Loss: 0.5221, LR: 0.0003085649675704773, Duration: 19.50 sec - model saved!


Epoch 73: 100%|██████████| 14/14 [00:17<00:00,  1.23s/it]

	Loss: 0.5225, LR: 0.0002885503233952689, Duration: 18.26 sec



Epoch 74: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]

	Loss: 0.5220, LR: 0.0002690369672720392, Duration: 18.19 sec - model saved!



Epoch 75: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]


	Loss: 0.5209, LR: 0.00025004867321942243, Duration: 18.19 sec - model saved!


Epoch 76: 100%|██████████| 14/14 [00:17<00:00,  1.28s/it]

	Loss: 0.5211, LR: 0.00023160857554879947, Duration: 19.04 sec



Epoch 77: 100%|██████████| 14/14 [00:17<00:00,  1.28s/it]


	Loss: 0.5208, LR: 0.00021373914067870185, Duration: 19.03 sec - model saved!


Epoch 78: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]


	Loss: 0.5208, LR: 0.00019646213976294433, Duration: 18.18 sec - model saved!


Epoch 79: 100%|██████████| 14/14 [00:18<00:00,  1.29s/it]

	Loss: 0.5192, LR: 0.00017979862216582396, Duration: 19.14 sec - model saved!



Epoch 80: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.5202, LR: 0.00016376888981671546, Duration: 18.32 sec



Epoch 81: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]

	Loss: 0.5193, LR: 0.00014839247247529466, Duration: 17.83 sec



Epoch 82: 100%|██████████| 14/14 [00:17<00:00,  1.22s/it]


	Loss: 0.5176, LR: 0.00013368810393753685, Duration: 18.31 sec - model saved!


Epoch 83: 100%|██████████| 14/14 [00:18<00:00,  1.29s/it]

	Loss: 0.5186, LR: 0.00011967369921147086, Duration: 19.17 sec



Epoch 84: 100%|██████████| 14/14 [00:17<00:00,  1.22s/it]


	Loss: 0.5178, LR: 0.00010636633269050183, Duration: 18.02 sec


Epoch 85: 100%|██████████| 14/14 [00:18<00:00,  1.30s/it]


	Loss: 0.5174, LR: 9.37822173508929e-05, Duration: 19.45 sec - model saved!


Epoch 86: 100%|██████████| 14/14 [00:18<00:00,  1.31s/it]


	Loss: 0.5170, LR: 8.19366849987511e-05, Duration: 19.52 sec - model saved!


Epoch 87: 100%|██████████| 14/14 [00:17<00:00,  1.25s/it]

	Loss: 0.5158, LR: 7.084416759058323e-05, Duration: 18.71 sec - model saved!



Epoch 88: 100%|██████████| 14/14 [00:17<00:00,  1.23s/it]

	Loss: 0.5173, LR: 6.0518179650179314e-05, Duration: 18.26 sec



Epoch 89: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]

	Loss: 0.5167, LR: 5.097130180324888e-05, Duration: 18.05 sec



Epoch 90: 100%|██████████| 14/14 [00:16<00:00,  1.21s/it]

	Loss: 0.5170, LR: 4.221516544986418e-05, Duration: 17.97 sec



Epoch 91: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]


	Loss: 0.5170, LR: 3.426043859339253e-05, Duration: 18.75 sec


Epoch 92: 100%|██████████| 14/14 [00:17<00:00,  1.28s/it]

	Loss: 0.5155, LR: 2.7116812843176773e-05, Duration: 19.20 sec - model saved!



Epoch 93: 100%|██████████| 14/14 [00:17<00:00,  1.27s/it]

	Loss: 0.5159, LR: 2.0792991606802468e-05, Duration: 18.91 sec



Epoch 94: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]

	Loss: 0.5175, LR: 1.5296679486336016e-05, Duration: 18.66 sec



Epoch 95: 100%|██████████| 14/14 [00:16<00:00,  1.20s/it]

	Loss: 0.5159, LR: 1.0634572891454386e-05, Duration: 17.95 sec



Epoch 96: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.5163, LR: 6.812351880900747e-06, Duration: 18.37 sec



Epoch 97: 100%|██████████| 14/14 [00:17<00:00,  1.26s/it]

	Loss: 0.5163, LR: 3.834673242208697e-06, Duration: 18.74 sec



Epoch 98: 100%|██████████| 14/14 [00:17<00:00,  1.24s/it]

	Loss: 0.5168, LR: 1.7051648181230617e-06, Duration: 18.51 sec



Epoch 99: 100%|██████████| 14/14 [00:18<00:00,  1.29s/it]

	Loss: 0.5165, LR: 4.264210866329665e-07, Duration: 19.19 sec



Epoch 100: 100%|██████████| 14/14 [00:17<00:00,  1.23s/it]


	Loss: 0.5154, LR: 0.0, Duration: 18.59 sec - model saved!


## Del SimMIM Model

In [25]:
model.cpu()
del model
torch.cuda.empty_cache()

# 5.Load Swin V2 for stage-2 training

In [26]:
model = SwinTransformerV2(pretrained_window_sizes=[6,6,6,6], ape=True, drop_path_rate=0.3)
model.state_dict().keys()

odict_keys(['absolute_pos_embed', 'embeddings.patch_embeddings.weight', 'embeddings.patch_embeddings.bias', 'embeddings.norm.weight', 'embeddings.norm.bias', 'stages.0.blocks.0.attn_mask', 'stages.0.blocks.0.attn.t_scale', 'stages.0.blocks.0.attn.relative_coords_table', 'stages.0.blocks.0.attn.relative_position_index', 'stages.0.blocks.0.attn.crpb_mlp.0.weight', 'stages.0.blocks.0.attn.crpb_mlp.0.bias', 'stages.0.blocks.0.attn.crpb_mlp.3.weight', 'stages.0.blocks.0.attn.qkv.weight', 'stages.0.blocks.0.attn.qkv.bias', 'stages.0.blocks.0.attn.proj.weight', 'stages.0.blocks.0.attn.proj.bias', 'stages.0.blocks.0.norm1.weight', 'stages.0.blocks.0.norm1.bias', 'stages.0.blocks.0.mlp.fc1.weight', 'stages.0.blocks.0.mlp.fc1.bias', 'stages.0.blocks.0.mlp.fc2.weight', 'stages.0.blocks.0.mlp.fc2.bias', 'stages.0.blocks.0.norm2.weight', 'stages.0.blocks.0.norm2.bias', 'stages.0.blocks.1.attn_mask', 'stages.0.blocks.1.attn.t_scale', 'stages.0.blocks.1.attn.relative_coords_table', 'stages.0.blocks.1

In [27]:
from torchsummary import summary

summary(model.to('cuda'), (3,224,224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 96, 56, 56]           4,704
         LayerNorm-2             [-1, 3136, 96]             192
        embeddings-3             [-1, 3136, 96]               0
           Dropout-4             [-1, 3136, 96]               0
            Linear-5              [-1, 49, 288]          27,936
            Linear-6          [-1, 13, 13, 384]           1,152
              ReLU-7          [-1, 13, 13, 384]               0
           Dropout-8          [-1, 13, 13, 384]               0
            Linear-9            [-1, 13, 13, 3]           1,152
          Softmax-10            [-1, 3, 49, 49]               0
          Dropout-11            [-1, 3, 49, 49]               0
           Linear-12               [-1, 49, 96]           9,312
          Dropout-13               [-1, 49, 96]               0
  WindowAttention-14               [-1,

## Default Parameter(weight) Check
- 추후 SimMIM 가중치가 제대로 불러와졌는지 확인용

In [28]:
model.state_dict()['embeddings.patch_embeddings.weight'][0]

tensor([[[ 0.0249, -0.1357,  0.0652, -0.0484],
         [-0.0892,  0.0055,  0.0608, -0.0454],
         [-0.0204,  0.0156, -0.1238,  0.0323],
         [ 0.1443,  0.0266,  0.1002, -0.0322]],

        [[ 0.0634, -0.1199,  0.1377,  0.1177],
         [ 0.0953, -0.0080,  0.0917,  0.1258],
         [ 0.0817, -0.0437,  0.0734, -0.0057],
         [ 0.0237,  0.0038, -0.0514,  0.0629]],

        [[-0.1387, -0.0589,  0.0992, -0.0170],
         [ 0.0031,  0.0119, -0.1375, -0.1187],
         [ 0.0408, -0.0498, -0.0778,  0.0416],
         [ 0.0169, -0.1076,  0.1107,  0.0740]]], device='cuda:0')

In [29]:
model.state_dict()['stages.3.blocks.1.attn.crpb_mlp.3.weight'][0]

tensor([-7.6564e-03, -3.0947e-03,  3.9520e-03,  9.8743e-05, -9.1680e-03,
        -6.5241e-03, -2.9101e-02,  2.5674e-02, -1.5266e-02, -2.6604e-03,
        -2.3008e-02,  1.6138e-02, -5.8916e-03,  2.3412e-03,  2.2706e-02,
         6.3729e-04, -3.1659e-02, -7.6400e-03, -1.1566e-02, -3.8812e-02,
        -1.4659e-02,  2.2244e-02, -6.0563e-03, -9.8120e-03,  2.1949e-02,
        -3.3376e-02, -5.6768e-03, -4.7501e-02, -2.0839e-02,  1.1640e-02,
         2.9832e-02, -5.1324e-02,  4.6017e-03,  1.2906e-02, -1.7035e-02,
        -4.3480e-04,  2.3710e-02, -1.1125e-02,  1.0401e-02, -1.4300e-02,
         6.3200e-03, -2.8566e-02,  3.1112e-03,  3.4539e-02,  4.0399e-02,
         7.7253e-03, -5.3018e-03,  3.3467e-02, -2.1451e-02,  2.1735e-02,
        -1.0947e-04, -1.8737e-02,  3.7710e-02,  2.7607e-02,  8.8451e-03,
        -2.7601e-02, -1.2940e-03, -5.7506e-03, -2.1240e-02, -1.0631e-02,
        -1.7865e-02,  2.1000e-02,  3.1405e-02, -7.4189e-03, -3.4847e-03,
         2.2864e-02, -1.7541e-02, -2.3306e-02,  1.2

## Load Swin v2 config

In [30]:
swin_config = yaml.load(open('config/train.yaml'), Loader=yaml.FullLoader)
swin_config

{'MODEL': {'TYPE': 'swinv2',
  'NAME': 'simmim_train',
  'PRETRAINED': '../../models/swin2/simmim.pth',
  'DROP_PATH_RATE': 0.2,
  'SWIN': {'EMBED_DIM': 96,
   'DEPTHS': [2, 2, 6, 2],
   'NUM_HEADS': [3, 6, 12, 24],
   'WINDOW_SIZE': 7,
   'PATCH_SIZE': 4}},
 'DATA': {'IMG_SIZE': 224,
  'MASK_PATCH_SIZE': 32,
  'MASK_RATIO': 0.6,
  'BATCH_SIZE': 960,
  'NUM_WORKERS': 24,
  'DATA_PATH': '../../data/sports'},
 'TRAIN': {'EPOCHS': 20,
  'WARMUP_EPOCHS': 10,
  'BASE_LR': '1e-4',
  'WEIGHT_DECAY': 0.05,
  'CLIP_GRAD': 5}}

## Load weight from SimMIM Model
- Different Image/Window Size
- Image와 Window 사이즈의 비율은 맞춰야함
  ex) 192÷6 = 224÷7 = 32

In [31]:
def load_pretrained(config, model):
    print(f"==============> Loading weight {config.MODEL.PRETRAINED} for fine-tuning......")
    state_dict = torch.load(config.MODEL.PRETRAINED, map_location='cpu')

    # remain encoder only
    not_encoder_keys = [k for k in state_dict.keys() if 'encoder' not in k]
    for k in not_encoder_keys:
        del state_dict[k]
        
    # remove prefix encoder.
    state_dict = {k.replace('encoder.', ''):v for k, v in state_dict.items()}

    # delete relative_position_index since we always re-init it
    relative_position_index_keys = [k for k in state_dict.keys() if "relative_position_index" in k]
    for k in relative_position_index_keys:
        del state_dict[k]

    # delete relative_coords_table since we always re-init it
    relative_position_index_keys = [k for k in state_dict.keys() if "relative_coords_table" in k]
    for k in relative_position_index_keys:
        del state_dict[k]

    # delete attn_mask since we always re-init it
    attn_mask_keys = [k for k in state_dict.keys() if "attn_mask" in k]
    for k in attn_mask_keys:
        del state_dict[k]

    # bicubic interpolate relative_position_bias_table if not match
    relative_position_bias_table_keys = [k for k in state_dict.keys() if "relative_position_bias_table" in k]
    for k in relative_position_bias_table_keys:
        relative_position_bias_table_pretrained = state_dict[k]
        relative_position_bias_table_current = model.state_dict()[k]
        L1, nH1 = relative_position_bias_table_pretrained.size()
        L2, nH2 = relative_position_bias_table_current.size()
        if nH1 != nH2:
            print(f"Error in loading {k}, passing......")
        else:
            if L1 != L2:
                # bicubic interpolate relative_position_bias_table if not match
                S1 = int(L1 ** 0.5)
                S2 = int(L2 ** 0.5)
                relative_position_bias_table_pretrained_resized = torch.nn.functional.interpolate(
                    relative_position_bias_table_pretrained.permute(1, 0).view(1, nH1, S1, S1), size=(S2, S2),
                    mode='bicubic')
                state_dict[k] = relative_position_bias_table_pretrained_resized.view(nH2, L2).permute(1, 0)

    # bicubic interpolate absolute_pos_embed if not match
    absolute_pos_embed_keys = [k for k in state_dict.keys() if "absolute_pos_embed" in k]
    for k in absolute_pos_embed_keys:
        # dpe
        absolute_pos_embed_pretrained = state_dict[k]
        absolute_pos_embed_current = model.state_dict()[k.replace('encoder.','')]
        _, L1, C1 = absolute_pos_embed_pretrained.size()
        _, L2, C2 = absolute_pos_embed_current.size()
        if C1 != C1:
            print(f"Error in loading {k}, passing......")
        else:
            if L1 != L2:
                S1 = int(L1 ** 0.5)
                S2 = int(L2 ** 0.5)
                absolute_pos_embed_pretrained = absolute_pos_embed_pretrained.reshape(-1, S1, S1, C1)
                absolute_pos_embed_pretrained = absolute_pos_embed_pretrained.permute(0, 3, 1, 2)
                absolute_pos_embed_pretrained_resized = torch.nn.functional.interpolate(
                    absolute_pos_embed_pretrained, size=(S2, S2), mode='bicubic')
                absolute_pos_embed_pretrained_resized = absolute_pos_embed_pretrained_resized.permute(0, 2, 3, 1)
                absolute_pos_embed_pretrained_resized = absolute_pos_embed_pretrained_resized.flatten(1, 2)
                state_dict[k] = absolute_pos_embed_pretrained_resized

    # check classifier, if not match, then re-init classifier to zero
    head_bias_pretrained = state_dict['classifier.bias']
    Nc1 = head_bias_pretrained.shape[0]
    Nc2 = model.classifier.bias.shape[0]
    if (Nc1 != Nc2):
        torch.nn.init.constant_(model.classifier.bias, 0.)
        torch.nn.init.constant_(model.classifier.weight, 0.)
        del state_dict['classifier.weight']
        del state_dict['classifier.bias']
        print(f"Error in loading classifier head, re-init classifier head to 0")

    msg = model.load_state_dict(state_dict, strict=False)
    print(msg)

    print(f"=> loaded successfully '{config.MODEL.PRETRAINED}'")

    torch.cuda.empty_cache()

In [32]:
swin_config = Box(swin_config)
load_pretrained(swin_config, model)

_IncompatibleKeys(missing_keys=['stages.0.blocks.0.attn_mask', 'stages.0.blocks.0.attn.relative_coords_table', 'stages.0.blocks.0.attn.relative_position_index', 'stages.0.blocks.1.attn_mask', 'stages.0.blocks.1.attn.relative_coords_table', 'stages.0.blocks.1.attn.relative_position_index', 'stages.1.blocks.0.attn_mask', 'stages.1.blocks.0.attn.relative_coords_table', 'stages.1.blocks.0.attn.relative_position_index', 'stages.1.blocks.1.attn_mask', 'stages.1.blocks.1.attn.relative_coords_table', 'stages.1.blocks.1.attn.relative_position_index', 'stages.2.blocks.0.attn_mask', 'stages.2.blocks.0.attn.relative_coords_table', 'stages.2.blocks.0.attn.relative_position_index', 'stages.2.blocks.1.attn_mask', 'stages.2.blocks.1.attn.relative_coords_table', 'stages.2.blocks.1.attn.relative_position_index', 'stages.2.blocks.2.attn_mask', 'stages.2.blocks.2.attn.relative_coords_table', 'stages.2.blocks.2.attn.relative_position_index', 'stages.2.blocks.3.attn_mask', 'stages.2.blocks.3.attn.relative_c

## Check Loading Weight Results
- 정상적으로 불러와졌는지 확인

In [33]:
model.state_dict()['embeddings.patch_embeddings.weight'][0]

tensor([[[-0.0885, -0.0692, -0.0402, -0.0523],
         [-0.1125,  0.1111,  0.0967, -0.0874],
         [-0.0227, -0.0821,  0.0540, -0.0839],
         [-0.0902,  0.0955,  0.1533,  0.0951]],

        [[-0.0110, -0.0767, -0.0311, -0.0313],
         [-0.0856, -0.0221, -0.0438,  0.0515],
         [ 0.0448,  0.0269, -0.0029, -0.0253],
         [ 0.1158,  0.0870,  0.0266,  0.0537]],

        [[-0.0808, -0.0060,  0.0093, -0.1139],
         [ 0.0042,  0.0294,  0.0802, -0.1004],
         [-0.0022, -0.1235, -0.0276, -0.0003],
         [-0.0660, -0.1091,  0.0580,  0.0781]]], device='cuda:0')

In [34]:
model.state_dict()['stages.3.blocks.1.attn.crpb_mlp.3.weight'][0]

tensor([-0.1812,  0.1375, -0.1465,  0.0959, -0.2070, -0.1725,  0.2529,  0.1662,
        -0.1603, -0.1488,  0.2225, -0.1905,  0.0231, -0.1829,  0.3743, -0.1590,
        -0.0029,  0.2117, -0.2130, -0.1355, -0.1971, -0.1558, -0.0692, -0.1649,
        -0.0777, -0.0042,  0.1551,  0.1437,  0.1677, -0.1359, -0.1575, -0.1934,
        -0.1672,  0.2042, -0.1687,  0.3358, -0.2298,  0.1301, -0.2281, -0.2121,
        -0.1469,  0.2563, -0.2390, -0.1649, -0.1861, -0.1773,  0.0715,  0.1244,
        -0.1361, -0.1702, -0.1773,  0.1325,  0.2626,  0.3273, -0.1300, -0.1592,
         0.2470,  0.2190, -0.1812, -0.1574, -0.1818, -0.1866, -0.1661,  0.1727,
        -0.0497,  0.3164, -0.1375, -0.1660,  0.1463, -0.1910,  0.2380, -0.1886,
        -0.2017, -0.1808,  0.2015, -0.1037, -0.1745,  0.1759,  0.2304, -0.1498,
        -0.0723,  0.1504, -0.1591, -0.1455,  0.1189,  0.0184, -0.1670,  0.2948,
        -0.1489, -0.2176, -0.1764, -0.0125,  0.2113, -0.1551,  0.1681, -0.1744,
        -0.1858, -0.1692, -0.1872, -0.15

# 6.Stage-2 Traing
- Supervised pre-training

## Define Transform, Loss, etc.

In [35]:
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

In [36]:
# Transforms 정의하기
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8,1), interpolation=transforms.InterpolationMode.LANCZOS),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=0.9, scale=(0.02, 0.33)),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

data_dir = '../../data/sports'
batch_size = 960

train_path = data_dir+'/train'
valid_path = data_dir+'/valid'
test_path = data_dir+'/test'

# dataset load
train_data = ImageFolder(train_path, transform=train_transform)
valid_data = ImageFolder(valid_path, transform=test_transform)
test_data = ImageFolder(test_path, transform=test_transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [37]:
max_norm = 1.0 # paper : 100 with G variants

model.to(device)
model_path = '../../models/swin2/model_w_simmim.pth'

In [38]:
mixup_fn = Mixup(mixup_alpha=.7, 
                cutmix_alpha=.7, 
                prob=.7, 
                switch_prob=0.5, 
                mode='batch',
                label_smoothing=.1,
                num_classes=100)

epochs = 150

criterion = nn.CrossEntropyLoss(label_smoothing=0.)

## Layer-Wise Learning Rate Decay ★

In [39]:
layer_names = []
for i, (name, params) in enumerate(model.named_parameters()):
    lr = base_lr
    print(f'{i}: {name}')
    layer_names.append(name)

0: absolute_pos_embed
1: embeddings.patch_embeddings.weight
2: embeddings.patch_embeddings.bias
3: embeddings.norm.weight
4: embeddings.norm.bias
5: stages.0.blocks.0.attn.t_scale
6: stages.0.blocks.0.attn.crpb_mlp.0.weight
7: stages.0.blocks.0.attn.crpb_mlp.0.bias
8: stages.0.blocks.0.attn.crpb_mlp.3.weight
9: stages.0.blocks.0.attn.qkv.weight
10: stages.0.blocks.0.attn.qkv.bias
11: stages.0.blocks.0.attn.proj.weight
12: stages.0.blocks.0.attn.proj.bias
13: stages.0.blocks.0.norm1.weight
14: stages.0.blocks.0.norm1.bias
15: stages.0.blocks.0.mlp.fc1.weight
16: stages.0.blocks.0.mlp.fc1.bias
17: stages.0.blocks.0.mlp.fc2.weight
18: stages.0.blocks.0.mlp.fc2.bias
19: stages.0.blocks.0.norm2.weight
20: stages.0.blocks.0.norm2.bias
21: stages.0.blocks.1.attn.t_scale
22: stages.0.blocks.1.attn.crpb_mlp.0.weight
23: stages.0.blocks.1.attn.crpb_mlp.0.bias
24: stages.0.blocks.1.attn.crpb_mlp.3.weight
25: stages.0.blocks.1.attn.qkv.weight
26: stages.0.blocks.1.attn.qkv.bias
27: stages.0.blocks

In [40]:
layer_names.reverse()
layer_names[:5]

['classifier.bias',
 'classifier.weight',
 'layernorm.bias',
 'layernorm.weight',
 'stages.3.blocks.1.norm2.bias']

In [41]:
lr      = 1.4e-3      # paper : 1.4e-3
lr_mult = 0.87  # paper : 0.87
weight_decay = 0.01 # paper : 0.1

param_groups = []
prev_group_name = layer_names[0].split('.')[0]

for idx, name in enumerate(layer_names):
    
    cur_group_name = name.split('.')[0]
    
    if cur_group_name != prev_group_name:
        lr *= lr_mult
    prev_group_name = cur_group_name
    weight_decay = 0.01 if ('weight' in name) and ('norm' not in name) else 0
    
    print(f"{idx}: {name}'s lr={lr}, weight_decay={weight_decay}")
    
    param_groups += [{'params': [ p for n, p in model.named_parameters() if n == name and p.requires_grad],
                      'lr' : lr,
                      'weight_decay': weight_decay}]

0: classifier.bias's lr=0.0014, weight_decay=0
1: classifier.weight's lr=0.0014, weight_decay=0.01
2: layernorm.bias's lr=0.001218, weight_decay=0
3: layernorm.weight's lr=0.001218, weight_decay=0
4: stages.3.blocks.1.norm2.bias's lr=0.0010596599999999998, weight_decay=0
5: stages.3.blocks.1.norm2.weight's lr=0.0010596599999999998, weight_decay=0
6: stages.3.blocks.1.mlp.fc2.bias's lr=0.0010596599999999998, weight_decay=0
7: stages.3.blocks.1.mlp.fc2.weight's lr=0.0010596599999999998, weight_decay=0.01
8: stages.3.blocks.1.mlp.fc1.bias's lr=0.0010596599999999998, weight_decay=0
9: stages.3.blocks.1.mlp.fc1.weight's lr=0.0010596599999999998, weight_decay=0.01
10: stages.3.blocks.1.norm1.bias's lr=0.0010596599999999998, weight_decay=0
11: stages.3.blocks.1.norm1.weight's lr=0.0010596599999999998, weight_decay=0
12: stages.3.blocks.1.attn.proj.bias's lr=0.0010596599999999998, weight_decay=0
13: stages.3.blocks.1.attn.proj.weight's lr=0.0010596599999999998, weight_decay=0.01
14: stages.3.b

In [42]:
# # 모델 레이어의 이름 추출
# layer_names = []
# for i, (name, params) in enumerate(model.named_parameters()):
#     lr = base_lr
#     print(f'{i}: {name}')
#     layer_names.append(name)

# # 뒷 레이어부터 시작하도록 뒤집기    
# layer_names.reverse()

# # 하이퍼 파라미터 정의
# lr      = 1.4e-3      # paper : 1.4e-3
# lr_mult = 0.87  # paper : 0.87
# weight_decay = 0.01 # paper : 0.1

# param_groups = []
# prev_group_name = layer_names[0].split('.')[0] # 그룹명 초기화

# for idx, name in enumerate(layer_names):    
#     cur_group_name = name.split('.')[0]    
#     if cur_group_name != prev_group_name: # 동일한 그룹에 속하면 동일한 학습율
#         lr *= lr_mult
#     prev_group_name = cur_group_name    
    
#     param_groups += [{'params': [ p for n, p in model.named_parameters() if n == name and p.requires_grad],
#                       'lr' : lr,
#                       'weight_decay': weight_decay}]

In [43]:
optimizer = optim.AdamW(param_groups)
warmup_steps = int(len(train_loader)*(epochs)*0.1)
train_steps = len(train_loader)*(epochs)
scheduler = transformers.get_cosine_schedule_with_warmup(optimizer, 
                                                        num_warmup_steps=warmup_steps, 
                                                        num_training_steps=train_steps,
                                                        num_cycles=0.5)

## Model Train
- 100에포크 먼저 학습하며 결과 확인하고, 이후 10에포크 학습하며 결과 확인

In [44]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')

# GradScaler 초기화
scaler = GradScaler()

for epoch in range(100):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs, labels = mixup_fn(inputs, labels)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=max_norm)

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
    val_loss /= len(valid_loader)
    val_losses.append(val_loss)
    
    # 모델 저장
    if val_loss < best_loss:
        best_loss = val_loss
        vit_save = True
        if vit_save:
            torch.save(model.state_dict(), model_path)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False

    print(text)
        
text = f"Epoch 당 평균 소요시간 : {training_time / epochs:.2f}초"      
print(text)

Epoch 1: 100%|██████████| 15/15 [00:58<00:00,  3.87s/it]


	Loss: 4.626991844177246, Val Loss: 4.506292343139648, LR: 9.333333333333333e-05, Duration: 59.55 sec - model saved!


Epoch 2: 100%|██████████| 15/15 [01:00<00:00,  4.06s/it]


	Loss: 4.460748704274495, Val Loss: 4.2201457023620605, LR: 0.00018666666666666666, Duration: 62.18 sec - model saved!


Epoch 3: 100%|██████████| 15/15 [00:52<00:00,  3.50s/it]


	Loss: 4.340983549753825, Val Loss: 4.004014015197754, LR: 0.00028000000000000003, Duration: 53.71 sec - model saved!


Epoch 4: 100%|██████████| 15/15 [00:52<00:00,  3.51s/it]


	Loss: 4.222826862335205, Val Loss: 3.8812460899353027, LR: 0.0003733333333333333, Duration: 53.87 sec - model saved!


Epoch 5: 100%|██████████| 15/15 [01:01<00:00,  4.13s/it]


	Loss: 4.161698865890503, Val Loss: 3.8016414642333984, LR: 0.00046666666666666666, Duration: 63.24 sec - model saved!


Epoch 6: 100%|██████████| 15/15 [01:00<00:00,  4.05s/it]


	Loss: 4.084051465988159, Val Loss: 3.6472675800323486, LR: 0.0005600000000000001, Duration: 62.18 sec - model saved!


Epoch 7: 100%|██████████| 15/15 [01:01<00:00,  4.07s/it]


	Loss: 4.062760845820109, Val Loss: 3.4813220500946045, LR: 0.0006533333333333333, Duration: 62.23 sec - model saved!


Epoch 8: 100%|██████████| 15/15 [01:00<00:00,  4.02s/it]


	Loss: 4.024969832102458, Val Loss: 3.3530468940734863, LR: 0.0007466666666666666, Duration: 61.69 sec - model saved!


Epoch 9: 100%|██████████| 15/15 [00:58<00:00,  3.93s/it]


	Loss: 3.831586503982544, Val Loss: 3.214343547821045, LR: 0.0008399999999999999, Duration: 60.13 sec - model saved!


Epoch 10: 100%|██████████| 15/15 [01:04<00:00,  4.30s/it]


	Loss: 3.815111176172892, Val Loss: 3.2054519653320312, LR: 0.0009333333333333333, Duration: 66.16 sec - model saved!


Epoch 11: 100%|██████████| 15/15 [00:58<00:00,  3.89s/it]


	Loss: 3.8399160067240397, Val Loss: 3.1536366939544678, LR: 0.0010266666666666666, Duration: 59.63 sec - model saved!


Epoch 12: 100%|██████████| 15/15 [00:58<00:00,  3.92s/it]


	Loss: 3.725848372777303, Val Loss: 3.017932176589966, LR: 0.0011200000000000001, Duration: 60.04 sec - model saved!


Epoch 13: 100%|██████████| 15/15 [01:02<00:00,  4.17s/it]


	Loss: 3.7307978947957356, Val Loss: 2.867175340652466, LR: 0.0012133333333333334, Duration: 63.92 sec - model saved!


Epoch 14: 100%|██████████| 15/15 [00:57<00:00,  3.87s/it]


	Loss: 3.8719071706136066, Val Loss: 2.814711332321167, LR: 0.0013066666666666667, Duration: 59.20 sec - model saved!


Epoch 15: 100%|██████████| 15/15 [01:03<00:00,  4.22s/it]


	Loss: 3.724492899576823, Val Loss: 2.7849462032318115, LR: 0.0014, Duration: 64.66 sec - model saved!


Epoch 16: 100%|██████████| 15/15 [01:02<00:00,  4.18s/it]


	Loss: 3.608774677912394, Val Loss: 2.673262596130371, LR: 0.001399810468825623, Duration: 64.79 sec - model saved!


Epoch 17: 100%|██████████| 15/15 [01:02<00:00,  4.18s/it]


	Loss: 3.6665937105814614, Val Loss: 2.7115185260772705, LR: 0.0013992419779369672, Duration: 63.71 sec


Epoch 18: 100%|██████████| 15/15 [01:03<00:00,  4.24s/it]


	Loss: 3.524884796142578, Val Loss: 2.4193263053894043, LR: 0.001398294835181877, Duration: 64.85 sec - model saved!


Epoch 19: 100%|██████████| 15/15 [01:02<00:00,  4.19s/it]


	Loss: 3.4447612285614015, Val Loss: 2.377565860748291, LR: 0.001396969553454863, Duration: 64.13 sec - model saved!


Epoch 20: 100%|██████████| 15/15 [01:03<00:00,  4.22s/it]


	Loss: 3.2468255678812663, Val Loss: 2.2881226539611816, LR: 0.0013952668504193602, Duration: 64.48 sec - model saved!


Epoch 21: 100%|██████████| 15/15 [00:59<00:00,  3.95s/it]


	Loss: 3.4544591108957925, Val Loss: 2.360004425048828, LR: 0.0013931876481190993, Duration: 61.16 sec


Epoch 22: 100%|██████████| 15/15 [00:59<00:00,  3.99s/it]


	Loss: 3.494812234242757, Val Loss: 2.245044231414795, LR: 0.0013907330724788056, Duration: 61.15 sec - model saved!


Epoch 23: 100%|██████████| 15/15 [01:00<00:00,  4.03s/it]


	Loss: 3.2503856499989827, Val Loss: 2.3022923469543457, LR: 0.0013879044526944892, Duration: 61.61 sec


Epoch 24: 100%|██████████| 15/15 [00:54<00:00,  3.64s/it]


	Loss: 3.3364601771036786, Val Loss: 2.1666336059570312, LR: 0.001384703320513664, Duration: 55.75 sec - model saved!


Epoch 25: 100%|██████████| 15/15 [00:56<00:00,  3.80s/it]


	Loss: 3.233853832880656, Val Loss: 2.0786757469177246, LR: 0.0013811314094058767, Duration: 58.14 sec - model saved!


Epoch 26: 100%|██████████| 15/15 [01:02<00:00,  4.19s/it]


	Loss: 3.43223287264506, Val Loss: 2.0300819873809814, LR: 0.0013771906536240047, Duration: 64.10 sec - model saved!


Epoch 27: 100%|██████████| 15/15 [01:00<00:00,  4.03s/it]


	Loss: 3.29380095799764, Val Loss: 2.0296013355255127, LR: 0.0013728831871568231, Duration: 62.17 sec - model saved!


Epoch 28: 100%|██████████| 15/15 [00:58<00:00,  3.90s/it]


	Loss: 3.1586191018422443, Val Loss: 1.8927744626998901, LR: 0.0013682113425734124, Duration: 59.76 sec - model saved!


Epoch 29: 100%|██████████| 15/15 [00:59<00:00,  3.94s/it]


	Loss: 3.1102041562398273, Val Loss: 1.8319021463394165, LR: 0.0013631776497600304, Duration: 60.25 sec - model saved!


Epoch 30: 100%|██████████| 15/15 [00:57<00:00,  3.81s/it]


	Loss: 3.1117082913716634, Val Loss: 1.7852400541305542, LR: 0.001357784834550136, Duration: 58.36 sec - model saved!


Epoch 31: 100%|██████████| 15/15 [00:57<00:00,  3.81s/it]


	Loss: 3.0635855833689374, Val Loss: 1.693884015083313, LR: 0.0013520358172482998, Duration: 58.68 sec - model saved!


Epoch 32: 100%|██████████| 15/15 [00:59<00:00,  3.99s/it]


	Loss: 3.1664939085642496, Val Loss: 1.762235403060913, LR: 0.0013459337110488096, Duration: 61.02 sec


Epoch 33: 100%|██████████| 15/15 [00:57<00:00,  3.84s/it]


	Loss: 3.368815231323242, Val Loss: 1.71271550655365, LR: 0.0013394818203498204, Duration: 58.63 sec


Epoch 34: 100%|██████████| 15/15 [00:59<00:00,  3.95s/it]


	Loss: 3.126810598373413, Val Loss: 1.6290462017059326, LR: 0.0013326836389639645, Duration: 60.48 sec - model saved!


Epoch 35: 100%|██████████| 15/15 [01:03<00:00,  4.24s/it]


	Loss: 2.947221803665161, Val Loss: 1.6011756658554077, LR: 0.0013255428482263885, Duration: 64.78 sec - model saved!


Epoch 36: 100%|██████████| 15/15 [00:59<00:00,  3.98s/it]


	Loss: 3.201627318064372, Val Loss: 1.6126904487609863, LR: 0.0013180633150012488, Duration: 61.40 sec


Epoch 37: 100%|██████████| 15/15 [00:59<00:00,  4.00s/it]


	Loss: 3.1338157176971437, Val Loss: 1.625458002090454, LR: 0.0013102490895877336, Duration: 60.92 sec


Epoch 38: 100%|██████████| 15/15 [01:01<00:00,  4.09s/it]


	Loss: 2.950371201833089, Val Loss: 1.5287359952926636, LR: 0.001302104403526756, Duration: 62.59 sec - model saved!


Epoch 39: 100%|██████████| 15/15 [01:02<00:00,  4.14s/it]


	Loss: 2.7992828845977784, Val Loss: 1.4854750633239746, LR: 0.001293633667309498, Duration: 63.30 sec - model saved!


Epoch 40: 100%|██████████| 15/15 [00:58<00:00,  3.88s/it]


	Loss: 2.7886632919311523, Val Loss: 1.4712761640548706, LR: 0.0012848414679890556, Duration: 59.94 sec - model saved!


Epoch 41: 100%|██████████| 15/15 [00:58<00:00,  3.92s/it]


	Loss: 3.024151007334391, Val Loss: 1.4806206226348877, LR: 0.0012757325666964635, Duration: 59.79 sec


Epoch 42: 100%|██████████| 15/15 [00:56<00:00,  3.76s/it]


	Loss: 2.7476744492848715, Val Loss: 1.4383769035339355, LR: 0.0012663118960624632, Duration: 57.53 sec - model saved!


Epoch 43: 100%|██████████| 15/15 [01:00<00:00,  4.05s/it]


	Loss: 3.203972053527832, Val Loss: 1.5764886140823364, LR: 0.0012565845575463934, Duration: 61.67 sec


Epoch 44: 100%|██████████| 15/15 [01:01<00:00,  4.11s/it]


	Loss: 2.6780605634053547, Val Loss: 1.3299708366394043, LR: 0.0012465558186736615, Duration: 63.58 sec - model saved!


Epoch 45: 100%|██████████| 15/15 [01:03<00:00,  4.25s/it]


	Loss: 2.7449944098790486, Val Loss: 1.327117919921875, LR: 0.0012362311101832846, Duration: 65.14 sec - model saved!


Epoch 46: 100%|██████████| 15/15 [01:00<00:00,  4.02s/it]


	Loss: 3.141310199101766, Val Loss: 1.4603554010391235, LR: 0.0012256160230870495, Duration: 61.64 sec


Epoch 47: 100%|██████████| 15/15 [01:00<00:00,  4.03s/it]


	Loss: 2.9438440640767416, Val Loss: 1.4267041683197021, LR: 0.00121471630564188, Duration: 61.33 sec


Epoch 48: 100%|██████████| 15/15 [01:00<00:00,  4.02s/it]


	Loss: 2.9078289826711017, Val Loss: 1.4213134050369263, LR: 0.0012035378602370558, Duration: 61.24 sec


Epoch 49: 100%|██████████| 15/15 [01:01<00:00,  4.08s/it]


	Loss: 2.999505074818929, Val Loss: 1.4604978561401367, LR: 0.0011920867401979632, Duration: 62.20 sec


Epoch 50: 100%|██████████| 15/15 [00:59<00:00,  3.96s/it]


	Loss: 2.783182080586751, Val Loss: 1.2845427989959717, LR: 0.0011803691465081135, Duration: 60.91 sec - model saved!


Epoch 51: 100%|██████████| 15/15 [00:59<00:00,  3.96s/it]


	Loss: 3.024645932515462, Val Loss: 1.3165875673294067, LR: 0.0011683914244512007, Duration: 60.40 sec


Epoch 52: 100%|██████████| 15/15 [01:06<00:00,  4.41s/it]


	Loss: 2.5351617336273193, Val Loss: 1.229169487953186, LR: 0.0011561600601750187, Duration: 67.41 sec - model saved!


Epoch 53: 100%|██████████| 15/15 [01:06<00:00,  4.40s/it]


	Loss: 2.484326791763306, Val Loss: 1.1407653093338013, LR: 0.001143681677179097, Duration: 67.32 sec - model saved!


Epoch 54: 100%|██████████| 15/15 [00:56<00:00,  3.80s/it]


	Loss: 2.9471678098042804, Val Loss: 1.2126445770263672, LR: 0.0011309630327279608, Duration: 58.29 sec


Epoch 55: 100%|██████████| 15/15 [01:00<00:00,  4.05s/it]


	Loss: 2.9026586691538494, Val Loss: 1.2953814268112183, LR: 0.0011180110141919503, Duration: 61.76 sec


Epoch 56: 100%|██████████| 15/15 [00:56<00:00,  3.78s/it]


	Loss: 2.8830923159917194, Val Loss: 1.2230854034423828, LR: 0.0011048326353175905, Duration: 57.68 sec


Epoch 57: 100%|██████████| 15/15 [00:59<00:00,  3.97s/it]


	Loss: 2.5572444279988606, Val Loss: 1.1186017990112305, LR: 0.0010914350324295228, Duration: 60.78 sec - model saved!


Epoch 58: 100%|██████████| 15/15 [00:57<00:00,  3.86s/it]


	Loss: 2.4161037683486937, Val Loss: 1.1070709228515625, LR: 0.0010778254605660592, Duration: 60.95 sec - model saved!


Epoch 59: 100%|██████████| 15/15 [01:02<00:00,  4.20s/it]


	Loss: 3.070221034685771, Val Loss: 1.1393178701400757, LR: 0.0010640112895504506, Duration: 63.95 sec


Epoch 60: 100%|██████████| 15/15 [01:04<00:00,  4.28s/it]


	Loss: 2.817952561378479, Val Loss: 1.1211862564086914, LR: 0.00105, Duration: 65.35 sec


Epoch 61: 100%|██████████| 15/15 [01:03<00:00,  4.21s/it]


	Loss: 2.7799264351526896, Val Loss: 1.0508002042770386, LR: 0.0010357991792751724, Duration: 64.47 sec - model saved!


Epoch 62: 100%|██████████| 15/15 [01:01<00:00,  4.12s/it]


	Loss: 2.5487555901209515, Val Loss: 1.0566086769104004, LR: 0.001021416517370908, Duration: 62.77 sec


Epoch 63: 100%|██████████| 15/15 [01:04<00:00,  4.30s/it]


	Loss: 2.059247899055481, Val Loss: 0.9606173038482666, LR: 0.001006859802752354, Duration: 65.80 sec - model saved!


Epoch 64: 100%|██████████| 15/15 [00:57<00:00,  3.80s/it]


	Loss: 2.4399437506993613, Val Loss: 1.143599033355713, LR: 0.0009921369181372726, Duration: 57.97 sec


Epoch 65: 100%|██████████| 15/15 [00:55<00:00,  3.72s/it]


	Loss: 2.580682571729024, Val Loss: 1.0343647003173828, LR: 0.0009772558362274098, Duration: 56.75 sec


Epoch 66: 100%|██████████| 15/15 [01:03<00:00,  4.24s/it]


	Loss: 2.5591801246007284, Val Loss: 0.950772762298584, LR: 0.0009622246153911386, Duration: 64.81 sec - model saved!


Epoch 67: 100%|██████████| 15/15 [00:59<00:00,  3.99s/it]


	Loss: 2.620518128077189, Val Loss: 0.9949902296066284, LR: 0.0009470513952997081, Duration: 60.84 sec


Epoch 68: 100%|██████████| 15/15 [01:06<00:00,  4.41s/it]


	Loss: 2.5110288143157957, Val Loss: 1.0437566041946411, LR: 0.0009317443925194707, Duration: 67.10 sec


Epoch 69: 100%|██████████| 15/15 [01:05<00:00,  4.39s/it]


	Loss: 2.5541162331899008, Val Loss: 0.9843952059745789, LR: 0.0009163118960624632, Duration: 66.84 sec


Epoch 70: 100%|██████████| 15/15 [00:57<00:00,  3.83s/it]


	Loss: 2.6537261883417766, Val Loss: 1.0147548913955688, LR: 0.0009007622628977632, Duration: 58.40 sec


Epoch 71: 100%|██████████| 15/15 [01:02<00:00,  4.14s/it]


	Loss: 2.617645263671875, Val Loss: 0.990867555141449, LR: 0.0008851039134260417, Duration: 63.20 sec


Epoch 72: 100%|██████████| 15/15 [01:01<00:00,  4.11s/it]


	Loss: 2.430638233820597, Val Loss: 0.9600546956062317, LR: 0.0008693453269197673, Duration: 62.62 sec


Epoch 73: 100%|██████████| 15/15 [00:57<00:00,  3.82s/it]


	Loss: 2.5182886123657227, Val Loss: 0.968648374080658, LR: 0.0008534950369315323, Duration: 59.01 sec


Epoch 74: 100%|██████████| 15/15 [00:57<00:00,  3.83s/it]


	Loss: 2.3815385421117146, Val Loss: 0.8900267481803894, LR: 0.0008375616266729811, Duration: 58.68 sec - model saved!


Epoch 75: 100%|██████████| 15/15 [00:59<00:00,  3.96s/it]


	Loss: 2.1455774704615274, Val Loss: 0.8932757377624512, LR: 0.0008215537243668514, Duration: 60.39 sec


Epoch 76: 100%|██████████| 15/15 [01:01<00:00,  4.08s/it]


	Loss: 2.3128055810928343, Val Loss: 0.8197294473648071, LR: 0.0008054799985746381, Duration: 62.43 sec - model saved!


Epoch 77: 100%|██████████| 15/15 [01:00<00:00,  4.06s/it]


	Loss: 2.588639012972514, Val Loss: 0.9120055437088013, LR: 0.0007893491535024164, Duration: 61.80 sec


Epoch 78: 100%|██████████| 15/15 [01:02<00:00,  4.14s/it]


	Loss: 2.025572498639425, Val Loss: 0.8358860611915588, LR: 0.0007731699242873575, Duration: 63.14 sec


Epoch 79: 100%|██████████| 15/15 [01:01<00:00,  4.10s/it]


	Loss: 2.215488338470459, Val Loss: 0.8271394968032837, LR: 0.0007569510722675008, Duration: 62.45 sec


Epoch 80: 100%|██████████| 15/15 [00:58<00:00,  3.88s/it]


	Loss: 1.8983399311701457, Val Loss: 0.7774518728256226, LR: 0.000740701380237333, Duration: 59.39 sec - model saved!


Epoch 81: 100%|██████████| 15/15 [00:58<00:00,  3.88s/it]


	Loss: 2.1339569807052614, Val Loss: 0.7692208290100098, LR: 0.0007244296476917508, Duration: 59.44 sec - model saved!


Epoch 82: 100%|██████████| 15/15 [01:08<00:00,  4.58s/it]


	Loss: 2.211329468091329, Val Loss: 0.8157359957695007, LR: 0.0007081446860609781, Duration: 69.62 sec


Epoch 83: 100%|██████████| 15/15 [01:02<00:00,  4.19s/it]


	Loss: 2.3808172941207886, Val Loss: 0.8505873084068298, LR: 0.0006918553139390222, Duration: 64.92 sec


Epoch 84: 100%|██████████| 15/15 [01:03<00:00,  4.25s/it]


	Loss: 2.097442150115967, Val Loss: 0.7704806327819824, LR: 0.0006755703523082495, Duration: 64.65 sec


Epoch 85: 100%|██████████| 15/15 [01:05<00:00,  4.38s/it]


	Loss: 2.5336355129877726, Val Loss: 0.798114001750946, LR: 0.000659298619762667, Duration: 66.59 sec


Epoch 86: 100%|██████████| 15/15 [00:59<00:00,  3.96s/it]


	Loss: 2.468687383333842, Val Loss: 0.8326034545898438, LR: 0.0006430489277324992, Duration: 60.48 sec


Epoch 87: 100%|██████████| 15/15 [00:59<00:00,  3.99s/it]


	Loss: 2.0093989928563434, Val Loss: 0.7356250882148743, LR: 0.0006268300757126426, Duration: 60.96 sec - model saved!


Epoch 88: 100%|██████████| 15/15 [00:58<00:00,  3.91s/it]


	Loss: 2.146946597099304, Val Loss: 0.7460469007492065, LR: 0.0006106508464975837, Duration: 59.55 sec


Epoch 89: 100%|██████████| 15/15 [01:00<00:00,  4.06s/it]


	Loss: 2.3722941716512045, Val Loss: 0.8162336945533752, LR: 0.0005945200014253619, Duration: 61.88 sec


Epoch 90: 100%|██████████| 15/15 [01:01<00:00,  4.11s/it]


	Loss: 2.0552615404129027, Val Loss: 0.741635262966156, LR: 0.0005784462756331488, Duration: 62.56 sec


Epoch 91: 100%|██████████| 15/15 [00:59<00:00,  3.96s/it]


	Loss: 2.1116454521814982, Val Loss: 0.8096517324447632, LR: 0.0005624383733270188, Duration: 60.32 sec


Epoch 92: 100%|██████████| 15/15 [00:56<00:00,  3.79s/it]


	Loss: 2.2188883145650227, Val Loss: 0.8021658658981323, LR: 0.0005465049630684676, Duration: 58.25 sec


Epoch 93: 100%|██████████| 15/15 [00:57<00:00,  3.85s/it]


	Loss: 2.2783074378967285, Val Loss: 0.7974361777305603, LR: 0.0005306546730802327, Duration: 58.65 sec


Epoch 94: 100%|██████████| 15/15 [00:57<00:00,  3.83s/it]


	Loss: 2.255167826016744, Val Loss: 0.7705899477005005, LR: 0.0005148960865739587, Duration: 58.52 sec


Epoch 95: 100%|██████████| 15/15 [00:59<00:00,  3.97s/it]


	Loss: 2.312365436553955, Val Loss: 0.7779889702796936, LR: 0.000499237737102237, Duration: 60.59 sec


Epoch 96: 100%|██████████| 15/15 [01:00<00:00,  4.00s/it]


	Loss: 2.0388551632563274, Val Loss: 0.75716632604599, LR: 0.0004836881039375369, Duration: 61.02 sec


Epoch 97: 100%|██████████| 15/15 [00:59<00:00,  3.98s/it]


	Loss: 2.162968667348226, Val Loss: 0.7820665240287781, LR: 0.0004682556074805294, Duration: 60.59 sec


Epoch 98: 100%|██████████| 15/15 [01:00<00:00,  4.01s/it]


	Loss: 2.307865786552429, Val Loss: 0.7429741024971008, LR: 0.00045294860470029185, Duration: 61.19 sec


Epoch 99: 100%|██████████| 15/15 [00:58<00:00,  3.92s/it]


	Loss: 1.9684695959091187, Val Loss: 0.7502778172492981, LR: 0.0004377753846088615, Duration: 59.86 sec


Epoch 100: 100%|██████████| 15/15 [00:58<00:00,  3.92s/it]


	Loss: 2.2147297620773316, Val Loss: 0.790279746055603, LR: 0.0004227441637725902, Duration: 59.84 sec
Epoch 당 평균 소요시간 : 41.04초


In [45]:
from sklearn.metrics import confusion_matrix
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

In [46]:
# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
performance_metrics

Unnamed: 0,Metric,Value
0,Accuracy,0.836
1,Precision,0.856131
2,Recall,0.836
3,F1 Score,0.826792


### 110 Epoch Result

In [47]:
for epoch in range(10):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs, labels = mixup_fn(inputs, labels)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=max_norm)

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
    val_loss /= len(valid_loader)
    val_losses.append(val_loss)
    
    # 모델 저장
    if val_loss < best_loss:
        best_loss = val_loss
        vit_save = True
        if vit_save:
            torch.save(model.state_dict(), model_path)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False

    print(text)
        
text = f"Epoch 당 평균 소요시간 : {training_time / epochs:.2f}초"      
print(text)

# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
performance_metrics

Epoch 1: 100%|██████████| 15/15 [00:58<00:00,  3.93s/it]


	Loss: 1.8434817949930826, Val Loss: 0.714182436466217, LR: 0.0004078630818627275, Duration: 60.17 sec - model saved!


Epoch 2: 100%|██████████| 15/15 [00:58<00:00,  3.91s/it]


	Loss: 2.0573649326960246, Val Loss: 0.7710936665534973, LR: 0.00039314019724764573, Duration: 60.87 sec


Epoch 3: 100%|██████████| 15/15 [01:02<00:00,  4.15s/it]


	Loss: 2.2468320687611896, Val Loss: 0.770478367805481, LR: 0.0003785834826290917, Duration: 63.45 sec


Epoch 4: 100%|██████████| 15/15 [00:58<00:00,  3.87s/it]


	Loss: 1.833013939857483, Val Loss: 0.7523660063743591, LR: 0.00036420082072482785, Duration: 58.97 sec


Epoch 5: 100%|██████████| 15/15 [00:51<00:00,  3.40s/it]


	Loss: 1.9475464502970377, Val Loss: 0.7664220929145813, LR: 0.00035000000000000016, Duration: 51.98 sec


Epoch 6:  87%|████████▋ | 13/15 [00:46<00:07,  3.57s/it]

### 120 Epoch Result

In [None]:
for epoch in range(10):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs, labels = mixup_fn(inputs, labels)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=max_norm)

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
    val_loss /= len(valid_loader)
    val_losses.append(val_loss)
    
    # 모델 저장
    if val_loss < best_loss:
        best_loss = val_loss
        vit_save = True
        if vit_save:
            torch.save(model.state_dict(), model_path)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False

    print(text)
        
text = f"Epoch 당 평균 소요시간 : {training_time / epochs:.2f}초"      
print(text)

# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
performance_metrics

### 130 Epoch Result

In [None]:
for epoch in range(10):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs, labels = mixup_fn(inputs, labels)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=max_norm)

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
    val_loss /= len(valid_loader)
    val_losses.append(val_loss)
    
    # 모델 저장
    if val_loss < best_loss:
        best_loss = val_loss
        vit_save = True
        if vit_save:
            torch.save(model.state_dict(), model_path)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False

    print(text)
        
text = f"Epoch 당 평균 소요시간 : {training_time / epochs:.2f}초"      
print(text)

# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
performance_metrics

### 140 Epoch Result

In [None]:
for epoch in range(10):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs, labels = mixup_fn(inputs, labels)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=max_norm)

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
    val_loss /= len(valid_loader)
    val_losses.append(val_loss)
    
    # 모델 저장
    if val_loss < best_loss:
        best_loss = val_loss
        vit_save = True
        if vit_save:
            torch.save(model.state_dict(), model_path)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False

    print(text)
        
text = f"Epoch 당 평균 소요시간 : {training_time / epochs:.2f}초"      
print(text)

# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
performance_metrics

### 150 Epoch Result

In [None]:
for epoch in range(10):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        inputs, labels = mixup_fn(inputs, labels)
        optimizer.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
        # 스케일링된 그라디언트 계산
        scaler.scale(loss).backward()

        # 그라디언트 클리핑 전에 스케일링 제거
        scaler.unscale_(optimizer)
        clip_grad_norm_(model.parameters(), max_norm=max_norm)

        # 옵티마이저 스텝 및 스케일러 업데이트
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in valid_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
    val_loss /= len(valid_loader)
    val_losses.append(val_loss)
    
    # 모델 저장
    if val_loss < best_loss:
        best_loss = val_loss
        vit_save = True
        if vit_save:
            torch.save(model.state_dict(), model_path)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
    
    if vit_save:
        text += f' - model saved!'
        vit_save = False

    print(text)
        
text = f"Epoch 당 평균 소요시간 : {training_time / epochs:.2f}초"      
print(text)

# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
performance_metrics