In [1]:
import torch
import torch.nn as nn

import torch.optim as optim
from torchvision import transforms
from torchvision.datasets import ImageFolder
import torchvision.transforms.functional as F
from torch.utils.data import DataLoader

from torch.cuda.amp import autocast, GradScaler
from torch.nn.utils import clip_grad_norm_
from tqdm import tqdm
import time

from timm.data import Mixup
from timm.utils import ModelEmaV3
from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy
import transformers

from sklearn.metrics import confusion_matrix
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from torchsummary import summary

from convnext_v1 import load_convNext_v1
from convnext_v2 import load_convNext
import math
import warnings
from torch.optim.lr_scheduler import _LRScheduler

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class CosineWarmupScheduler(_LRScheduler):
    def __init__(self, optimizer, num_warmup_steps, num_training_steps, num_cycles=0.5, min_lr=1e-6, last_epoch=-1, verbose=False):
        self.num_warmup_steps = num_warmup_steps
        self.num_training_steps = num_training_steps
        self.num_cycles = num_cycles
        self.min_lr = min_lr
        self.base_lrs = [group['lr'] for group in optimizer.param_groups]
        super().__init__(optimizer, last_epoch, verbose)

    def get_lr(self):
        if not self._get_lr_called_within_step:
            warnings.warn("To get the last learning rate computed by the scheduler, "
                          "please use `get_last_lr()`.", UserWarning)
        
        lrs = []
        for base_lr in self.base_lrs:
            if self.last_epoch < self.num_warmup_steps:
                # Linear warmup
                lr = (base_lr - self.min_lr) * self.last_epoch / max(1, self.num_warmup_steps) + self.min_lr
            else:
                # Cosine annealing
                progress = (self.last_epoch - self.num_warmup_steps) / max(1, self.num_training_steps - self.num_warmup_steps)
                lr = self.min_lr + (base_lr - self.min_lr) * 0.5 * (1 + math.cos(math.pi * self.num_cycles * 2.0 * progress))
            lrs.append(lr)
        return lrs

In [3]:
model_v1 = load_convNext_v1()

In [4]:
# 총 파라미터 수 계산
total_params = sum(p.numel() for p in model_v1.parameters())

# 학습 가능한 파라미터 수 계산
trainable_params = sum(p.numel() for p in model_v1.parameters() if p.requires_grad)

print('='*80)
print(f"\nTotal Parameters: {total_params:,}")
print(f"Trainable Parameters: {trainable_params:,}\n")
print('='*80)


Total Parameters: 27,897,028
Trainable Parameters: 27,897,028



In [5]:
# Transforms 정의하기
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.6,1), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=1., scale=(0.02, 0.33)),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

data_dir = '../../data/sports'
batch_size = 512

train_path = data_dir+'/train'
valid_path = data_dir+'/valid'
test_path = data_dir+'/test'

# dataset load
train_data = ImageFolder(train_path, transform=train_transform)
valid_data = ImageFolder(valid_path, transform=test_transform)
test_data = ImageFolder(test_path, transform=test_transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

device = 'cuda:3'
max_norm = 3.0 

model_v1.to(device)

model_ema = None
ema_active = True
if ema_active:
    ema_decay = 0.9998
    model_ema = ModelEmaV3(
        model_v1,
        decay=ema_decay,
    )
    print(f"Using EMA with decay = {ema_decay}")

model_path = ''

mixup = True
if mixup :
    mixup_fn = Mixup(mixup_alpha=.8, 
                    cutmix_alpha=1., 
                    prob=1., 
                    switch_prob=0.5, 
                    mode='batch',
                    label_smoothing=.1,
                    num_classes=100)
    
    criterion = SoftTargetCrossEntropy()
else :
    criterion = LabelSmoothingCrossEntropy(.1)
    
criterion = nn.CrossEntropyLoss(label_smoothing=0.)

epochs = 100

optimizer = optim.AdamW(model_v1.parameters(), lr=4e-3, weight_decay=0.05)
warmup_steps = int(len(train_loader)*(epochs)*0.1)
train_steps = len(train_loader)*(epochs)
scheduler = CosineWarmupScheduler(optimizer, 
                                num_warmup_steps=warmup_steps, 
                                num_training_steps=train_steps,
                                num_cycles=0.5,
                                min_lr=1e-7)

Using EMA with decay = 0.9998




In [6]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')

# GradScaler 초기화
scaler = GradScaler()

for i in range(epochs // 100):
    for epoch in range(100):
        model_v1.train()
        start_time = time.time()
        running_loss = 0.0
        pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1 + i*100}")
        
        for _, data in pbar:
            inputs, labels = data[0].to(device), data[1].to(device)
            inputs, labels = mixup_fn(inputs, labels)
            optimizer.zero_grad()

            # AutoCast 적용
            with autocast():
                outputs = model_v1(inputs)
                loss = criterion(outputs, labels)
                
            # 스케일링된 그라디언트 계산
            scaler.scale(loss).backward()

            # 그라디언트 클리핑 전에 스케일링 제거
            scaler.unscale_(optimizer)
            clip_grad_norm_(model_v1.parameters(), max_norm=max_norm)

            # 옵티마이저 스텝 및 스케일러 업데이트
            scaler.step(optimizer)
            scaler.update()
            
            # EMA 모델 업데이트
            if model_ema is not None:
                model_ema.update(model_v1)
                
            scheduler.step()
                
            lr = optimizer.param_groups[0]["lr"]
            lrs.append(lr)
            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        losses.append(epoch_loss)        

        model_v1.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data in valid_loader:
                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = model_v1(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
        val_loss /= len(valid_loader)
        val_losses.append(val_loss)
        
        # 모델 저장
        total_loss = val_loss + epoch_loss
        if total_loss < best_loss:
            best_loss = total_loss
            model_save = False
            if model_save:
                torch.save(model_v1.state_dict(), model_path)

        epoch_duration = time.time() - start_time
        training_time += epoch_duration
        
        text = f'\tLoss: {epoch_loss:.4f}, Val_Loss: {val_loss:.4f}, Total Mean Loss: {total_loss/2:.4f}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
        
        if model_save:
            text += f' - model saved!'
            model_save = False

        print(text)

    # 예측 수행 및 레이블 저장
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model_v1(images)
            _, predicted = torch.max(outputs, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # 혼동 행렬 생성
    cm = confusion_matrix(all_labels, all_preds)

    # 예측과 실제 레이블
    y_true = all_labels  # 실제 레이블
    y_pred = all_preds  # 모델에 의해 예측된 레이블

    # 전체 데이터셋에 대한 정확도
    accuracy = accuracy_score(y_true, y_pred)

    # 평균 정밀도, 리콜, F1-Score ('weighted')
    precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

    # 판다스 데이터프레임으로 결과 정리
    performance_metrics = pd.DataFrame({
        'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
        'Value': [accuracy, precision, recall, f1_score]
    })

    # 데이터프레임 출력
    print(f"\n[{i*100+100} epoch result]\n", performance_metrics)

Epoch 1: 100%|██████████| 27/27 [00:56<00:00,  2.08s/it]


	Loss: 4.4774, Val_Loss: 4.0328, Total Mean Loss: 4.2551, LR: 0.00040008999999999997, Duration: 57.10 sec


Epoch 2: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 4.3636, Val_Loss: 3.9938, Total Mean Loss: 4.1787, LR: 0.0008000799999999999, Duration: 56.24 sec


Epoch 3: 100%|██████████| 27/27 [00:55<00:00,  2.06s/it]


	Loss: 4.3047, Val_Loss: 3.7656, Total Mean Loss: 4.0352, LR: 0.0012000700000000001, Duration: 56.54 sec


Epoch 4: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 4.2676, Val_Loss: 3.7741, Total Mean Loss: 4.0209, LR: 0.00160006, Duration: 56.30 sec


Epoch 5: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 4.1701, Val_Loss: 3.5921, Total Mean Loss: 3.8811, LR: 0.0020000499999999997, Duration: 56.13 sec


Epoch 6: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 4.1979, Val_Loss: 3.5395, Total Mean Loss: 3.8687, LR: 0.00240004, Duration: 56.20 sec


Epoch 7: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 4.1892, Val_Loss: 3.5515, Total Mean Loss: 3.8703, LR: 0.00280003, Duration: 56.03 sec


Epoch 8: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 4.0846, Val_Loss: 3.3567, Total Mean Loss: 3.7207, LR: 0.0032000199999999996, Duration: 56.08 sec


Epoch 9: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 4.0832, Val_Loss: 3.2658, Total Mean Loss: 3.6745, LR: 0.00360001, Duration: 56.34 sec


Epoch 10: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.9733, Val_Loss: 3.2181, Total Mean Loss: 3.5957, LR: 0.004, Duration: 56.18 sec


Epoch 11: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.9585, Val_Loss: 3.1206, Total Mean Loss: 3.5395, LR: 0.003998781684496841, Duration: 56.13 sec


Epoch 12: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.9332, Val_Loss: 2.9625, Total Mean Loss: 3.4478, LR: 0.003995128222317136, Duration: 56.06 sec


Epoch 13: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 3.9000, Val_Loss: 2.9625, Total Mean Loss: 3.4313, LR: 0.003989044064641779, Duration: 55.92 sec


Epoch 14: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.8427, Val_Loss: 2.8141, Total Mean Loss: 3.3284, LR: 0.0039805366240797035, Duration: 56.11 sec


Epoch 15: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 3.7609, Val_Loss: 2.6553, Total Mean Loss: 3.2081, LR: 0.003969616265636766, Duration: 55.97 sec


Epoch 16: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.8194, Val_Loss: 2.6457, Total Mean Loss: 3.2325, LR: 0.003956296294087574, Duration: 55.85 sec


Epoch 17: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 3.6426, Val_Loss: 2.5182, Total Mean Loss: 3.0804, LR: 0.003940592937765679, Duration: 55.96 sec


Epoch 18: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.6091, Val_Loss: 2.3043, Total Mean Loss: 2.9567, LR: 0.003922525328791841, Duration: 56.04 sec


Epoch 19: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.7503, Val_Loss: 2.3364, Total Mean Loss: 3.0433, LR: 0.0039021154797644923, Duration: 55.73 sec


Epoch 20: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.5655, Val_Loss: 2.2997, Total Mean Loss: 2.9326, LR: 0.0038793882569407774, Duration: 55.96 sec


Epoch 21: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.5359, Val_Loss: 2.1852, Total Mean Loss: 2.8605, LR: 0.0038543713499408464, Duration: 55.90 sec


Epoch 22: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.5495, Val_Loss: 2.2021, Total Mean Loss: 2.8758, LR: 0.003827095238012319, Duration: 56.02 sec


Epoch 23: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.4314, Val_Loss: 1.8701, Total Mean Loss: 2.6508, LR: 0.003797593152896019, Duration: 55.81 sec


Epoch 24: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 3.5395, Val_Loss: 2.0334, Total Mean Loss: 2.7864, LR: 0.0037659010383382105, Duration: 55.96 sec


Epoch 25: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.3701, Val_Loss: 1.9876, Total Mean Loss: 2.6788, LR: 0.003732057506298688, Duration: 55.82 sec


Epoch 26: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.3378, Val_Loss: 1.7445, Total Mean Loss: 2.5411, LR: 0.0036961037899080436, Duration: 55.90 sec


Epoch 27: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.2249, Val_Loss: 1.6983, Total Mean Loss: 2.4616, LR: 0.0036580836932314552, Duration: 55.84 sec


Epoch 28: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 3.3712, Val_Loss: 1.7954, Total Mean Loss: 2.5833, LR: 0.003618043537900176, Duration: 56.32 sec


Epoch 29: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.2627, Val_Loss: 1.6444, Total Mean Loss: 2.4536, LR: 0.003576032106675763, Duration: 55.88 sec


Epoch 30: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.2453, Val_Loss: 1.6847, Total Mean Loss: 2.4650, LR: 0.0035321005840157995, Duration: 55.90 sec


Epoch 31: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.0739, Val_Loss: 1.4796, Total Mean Loss: 2.2767, LR: 0.0034863024937135142, Duration: 55.85 sec


Epoch 32: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.2991, Val_Loss: 1.4953, Total Mean Loss: 2.3972, LR: 0.003438693633687285, Duration: 56.03 sec


Epoch 33: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 3.0333, Val_Loss: 1.4960, Total Mean Loss: 2.2647, LR: 0.0033893320079994714, Duration: 55.97 sec


Epoch 34: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.1110, Val_Loss: 1.4444, Total Mean Loss: 2.2777, LR: 0.003338277756187398, Duration: 56.15 sec


Epoch 35: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.1195, Val_Loss: 1.5030, Total Mean Loss: 2.3113, LR: 0.003285593079992594, Duration: 56.06 sec


Epoch 36: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.1100, Val_Loss: 1.3832, Total Mean Loss: 2.2466, LR: 0.00323134216757755, Duration: 56.12 sec


Epoch 37: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.0795, Val_Loss: 1.4472, Total Mean Loss: 2.2634, LR: 0.0031755911153223313, Duration: 56.11 sec


Epoch 38: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.9890, Val_Loss: 1.2568, Total Mean Loss: 2.1229, LR: 0.0031184078472963196, Duration: 56.06 sec


Epoch 39: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 3.1263, Val_Loss: 1.3073, Total Mean Loss: 2.2168, LR: 0.003059862032503198, Duration: 55.97 sec


Epoch 40: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.0529, Val_Loss: 1.2554, Total Mean Loss: 2.1542, LR: 0.0030000249999999995, Duration: 56.05 sec


Epoch 41: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.7628, Val_Loss: 1.1780, Total Mean Loss: 1.9704, LR: 0.002938969651993642, Duration: 56.07 sec


Epoch 42: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 2.8932, Val_Loss: 1.1685, Total Mean Loss: 2.0308, LR: 0.002876770375020815, Duration: 55.86 sec


Epoch 43: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.8992, Val_Loss: 1.2290, Total Mean Loss: 2.0641, LR: 0.0028135029493194467, Duration: 56.12 sec


Epoch 44: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 2.9412, Val_Loss: 1.2404, Total Mean Loss: 2.0908, LR: 0.0027492444565021534, Duration: 56.27 sec


Epoch 45: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 2.9296, Val_Loss: 1.1521, Total Mean Loss: 2.0409, LR: 0.0026840731856441714, Duration: 55.90 sec


Epoch 46: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.6970, Val_Loss: 1.0895, Total Mean Loss: 1.8932, LR: 0.0026180685379001757, Duration: 56.01 sec


Epoch 47: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.5881, Val_Loss: 0.9863, Total Mean Loss: 1.7872, LR: 0.002551310929766207, Duration: 55.95 sec


Epoch 48: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 2.9589, Val_Loss: 1.0798, Total Mean Loss: 2.0193, LR: 0.002483881695104555, Duration: 55.89 sec


Epoch 49: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.7937, Val_Loss: 1.0734, Total Mean Loss: 1.9336, LR: 0.0024158629860509774, Duration: 55.96 sec


Epoch 50: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.8662, Val_Loss: 1.0298, Total Mean Loss: 1.9480, LR: 0.0023473376729249776, Duration: 56.12 sec


Epoch 51: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.6298, Val_Loss: 1.0176, Total Mean Loss: 1.8237, LR: 0.0022783892432650826, Duration: 55.98 sec


Epoch 52: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2934, Val_Loss: 0.9682, Total Mean Loss: 1.6308, LR: 0.0022091017001121434, Duration: 56.07 sec


Epoch 53: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 2.7997, Val_Loss: 1.0152, Total Mean Loss: 1.9075, LR: 0.002139559459664563, Duration: 55.89 sec


Epoch 54: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.5031, Val_Loss: 0.9415, Total Mean Loss: 1.7223, LR: 0.0020698472484301667, Duration: 56.06 sec


Epoch 55: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 2.3984, Val_Loss: 0.9141, Total Mean Loss: 1.6562, LR: 0.0020000499999999997, Duration: 55.90 sec


Epoch 56: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.5740, Val_Loss: 0.8883, Total Mean Loss: 1.7312, LR: 0.0019302527515698336, Duration: 55.99 sec


Epoch 57: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.6690, Val_Loss: 0.9551, Total Mean Loss: 1.8120, LR: 0.0018605405403354365, Duration: 55.98 sec


Epoch 58: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.6134, Val_Loss: 0.9461, Total Mean Loss: 1.7797, LR: 0.0017909982998878568, Duration: 56.05 sec


Epoch 59: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.5852, Val_Loss: 0.9271, Total Mean Loss: 1.7561, LR: 0.0017217107567349176, Duration: 56.02 sec


Epoch 60: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 2.5558, Val_Loss: 0.8173, Total Mean Loss: 1.6865, LR: 0.0016527623270750228, Duration: 56.21 sec


Epoch 61: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.5551, Val_Loss: 0.8222, Total Mean Loss: 1.6887, LR: 0.0015842370139490226, Duration: 55.97 sec


Epoch 62: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.5135, Val_Loss: 0.8065, Total Mean Loss: 1.6600, LR: 0.0015162183048954448, Duration: 55.95 sec


Epoch 63: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.4218, Val_Loss: 0.8387, Total Mean Loss: 1.6302, LR: 0.0014487890702337925, Duration: 56.10 sec


Epoch 64: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.3691, Val_Loss: 0.8183, Total Mean Loss: 1.5937, LR: 0.001382031462099824, Duration: 56.03 sec


Epoch 65: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.4415, Val_Loss: 0.8186, Total Mean Loss: 1.6301, LR: 0.001316026814355829, Duration: 56.01 sec


Epoch 66: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.4425, Val_Loss: 0.8346, Total Mean Loss: 1.6386, LR: 0.0012508555434978467, Duration: 56.06 sec


Epoch 67: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.3497, Val_Loss: 0.7826, Total Mean Loss: 1.5661, LR: 0.0011865970506805537, Duration: 56.03 sec


Epoch 68: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 2.3265, Val_Loss: 0.7799, Total Mean Loss: 1.5532, LR: 0.0011233296249791845, Duration: 55.93 sec


Epoch 69: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.5047, Val_Loss: 0.8032, Total Mean Loss: 1.6540, LR: 0.0010611303480063583, Duration: 56.02 sec


Epoch 70: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 2.0608, Val_Loss: 0.7569, Total Mean Loss: 1.4088, LR: 0.0010000750000000004, Duration: 56.27 sec


Epoch 71: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2896, Val_Loss: 0.7690, Total Mean Loss: 1.5293, LR: 0.000940237967496802, Duration: 56.04 sec


Epoch 72: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.3211, Val_Loss: 0.7482, Total Mean Loss: 1.5347, LR: 0.0008816921527036801, Duration: 55.96 sec


Epoch 73: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.3094, Val_Loss: 0.7497, Total Mean Loss: 1.5295, LR: 0.0008245088846776685, Duration: 55.97 sec


Epoch 74: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.3828, Val_Loss: 0.7911, Total Mean Loss: 1.5869, LR: 0.0007687578324224496, Duration: 55.95 sec


Epoch 75: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.3892, Val_Loss: 0.7802, Total Mean Loss: 1.5847, LR: 0.0007145069200074055, Duration: 56.00 sec


Epoch 76: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 2.3461, Val_Loss: 0.7231, Total Mean Loss: 1.5346, LR: 0.000661822243812602, Duration: 56.24 sec


Epoch 77: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.1545, Val_Loss: 0.7566, Total Mean Loss: 1.4556, LR: 0.0006107679920005282, Duration: 56.03 sec


Epoch 78: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 2.2703, Val_Loss: 0.7705, Total Mean Loss: 1.5204, LR: 0.0005614063663127149, Duration: 56.34 sec


Epoch 79: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.3561, Val_Loss: 0.7499, Total Mean Loss: 1.5530, LR: 0.000513797506286485, Duration: 56.12 sec


Epoch 80: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2656, Val_Loss: 0.7683, Total Mean Loss: 1.5169, LR: 0.00046799941598420013, Duration: 56.14 sec


Epoch 81: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2729, Val_Loss: 0.7432, Total Mean Loss: 1.5081, LR: 0.0004240678933242365, Duration: 56.04 sec


Epoch 82: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2517, Val_Loss: 0.7783, Total Mean Loss: 1.5150, LR: 0.00038205646209982404, Duration: 56.18 sec


Epoch 83: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2930, Val_Loss: 0.7482, Total Mean Loss: 1.5206, LR: 0.0003420163067685445, Duration: 56.11 sec


Epoch 84: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.1917, Val_Loss: 0.7575, Total Mean Loss: 1.4746, LR: 0.0003039962100919559, Duration: 56.06 sec


Epoch 85: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2999, Val_Loss: 0.7637, Total Mean Loss: 1.5318, LR: 0.0002680424937013118, Duration: 56.07 sec


Epoch 86: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.1889, Val_Loss: 0.7282, Total Mean Loss: 1.4586, LR: 0.00023419896166178896, Duration: 56.07 sec


Epoch 87: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.1473, Val_Loss: 0.7745, Total Mean Loss: 1.4609, LR: 0.0002025068471039813, Duration: 56.04 sec


Epoch 88: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.1531, Val_Loss: 0.8005, Total Mean Loss: 1.4768, LR: 0.00017300476198768016, Duration: 56.04 sec


Epoch 89: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2986, Val_Loss: 0.7652, Total Mean Loss: 1.5319, LR: 0.00014572865005915372, Duration: 56.19 sec


Epoch 90: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.3740, Val_Loss: 0.7677, Total Mean Loss: 1.5708, LR: 0.00012071174305922266, Duration: 56.13 sec


Epoch 91: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2788, Val_Loss: 0.7759, Total Mean Loss: 1.5273, LR: 9.79845202355077e-05, Duration: 56.12 sec


Epoch 92: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 2.1289, Val_Loss: 0.7813, Total Mean Loss: 1.4551, LR: 7.757467120815912e-05, Duration: 56.22 sec


Epoch 93: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.0349, Val_Loss: 0.7657, Total Mean Loss: 1.4003, LR: 5.950706223432085e-05, Duration: 56.15 sec


Epoch 94: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.0171, Val_Loss: 0.7645, Total Mean Loss: 1.3908, LR: 4.3803705912425316e-05, Duration: 55.99 sec


Epoch 95: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.0323, Val_Loss: 0.7693, Total Mean Loss: 1.4008, LR: 3.0483734363234566e-05, Duration: 56.10 sec


Epoch 96: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.2488, Val_Loss: 0.7582, Total Mean Loss: 1.5035, LR: 1.9563375920296352e-05, Duration: 55.99 sec


Epoch 97: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 1.8898, Val_Loss: 0.7615, Total Mean Loss: 1.3256, LR: 1.1055935358221834e-05, Duration: 56.10 sec


Epoch 98: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.1196, Val_Loss: 0.7607, Total Mean Loss: 1.4401, LR: 4.971777682864596e-06, Duration: 56.02 sec


Epoch 99: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.0866, Val_Loss: 0.7598, Total Mean Loss: 1.4232, LR: 1.3183155031594304e-06, Duration: 56.12 sec


Epoch 100: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.1089, Val_Loss: 0.7597, Total Mean Loss: 1.4343, LR: 1e-07, Duration: 56.03 sec

[100 epoch result]
       Metric     Value
0   Accuracy  0.876000
1  Precision  0.896143
2     Recall  0.876000
3   F1 Score  0.872440


In [7]:
model_v1.cpu()
del model_v1

In [8]:
model_v2 = load_convNext()

# 총 파라미터 수 계산
total_params = sum(p.numel() for p in model_v2.parameters())

# 학습 가능한 파라미터 수 계산
trainable_params = sum(p.numel() for p in model_v2.parameters() if p.requires_grad)

print('='*80)
print(f"\nTotal Parameters: {total_params:,}")
print(f"Trainable Parameters: {trainable_params:,}\n")
print('='*80)


Total Parameters: 27,943,396
Trainable Parameters: 27,943,396



In [9]:
model_v2.to(device)

model_ema = None
ema_active = True
if ema_active:
    ema_decay = 0.9998
    model_ema = ModelEmaV3(
        model_v2,
        decay=ema_decay,
    )
    print(f"Using EMA with decay = {ema_decay}")

model_path = ''

mixup = True
if mixup :
    mixup_fn = Mixup(mixup_alpha=.8, 
                    cutmix_alpha=1., 
                    prob=1., 
                    switch_prob=0.5, 
                    mode='batch',
                    label_smoothing=.1,
                    num_classes=100)
    
    criterion = SoftTargetCrossEntropy()
else :
    criterion = LabelSmoothingCrossEntropy(.1)
    
criterion = nn.CrossEntropyLoss(label_smoothing=0.)

epochs = 100

optimizer = optim.AdamW(model_v2.parameters(), lr=4e-3, weight_decay=0.05)
warmup_steps = int(len(train_loader)*(epochs)*0.1)
train_steps = len(train_loader)*(epochs)
scheduler = CosineWarmupScheduler(optimizer, 
                                num_warmup_steps=warmup_steps, 
                                num_training_steps=train_steps,
                                num_cycles=0.5,
                                min_lr=1e-7)

Using EMA with decay = 0.9998




In [10]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')

# GradScaler 초기화
scaler = GradScaler()

for i in range(epochs // 100):
    for epoch in range(100):
        model_v2.train()
        start_time = time.time()
        running_loss = 0.0
        pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1 + i*100}")
        
        for _, data in pbar:
            inputs, labels = data[0].to(device), data[1].to(device)
            inputs, labels = mixup_fn(inputs, labels)
            optimizer.zero_grad()

            # AutoCast 적용
            with autocast():
                outputs = model_v2(inputs)
                loss = criterion(outputs, labels)
                
            # 스케일링된 그라디언트 계산
            scaler.scale(loss).backward()

            # 그라디언트 클리핑 전에 스케일링 제거
            scaler.unscale_(optimizer)
            clip_grad_norm_(model_v2.parameters(), max_norm=max_norm)

            # 옵티마이저 스텝 및 스케일러 업데이트
            scaler.step(optimizer)
            scaler.update()
            
            # EMA 모델 업데이트
            if model_ema is not None:
                model_ema.update(model_v2)
                
            scheduler.step()
                
            lr = optimizer.param_groups[0]["lr"]
            lrs.append(lr)
            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        losses.append(epoch_loss)        

        model_v2.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data in valid_loader:
                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = model_v2(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
        val_loss /= len(valid_loader)
        val_losses.append(val_loss)
        
        # 모델 저장
        total_loss = val_loss + epoch_loss
        if total_loss < best_loss:
            best_loss = total_loss
            model_save = False
            if model_save:
                torch.save(model_v2.state_dict(), model_path)

        epoch_duration = time.time() - start_time
        training_time += epoch_duration
        
        text = f'\tLoss: {epoch_loss:.4f}, Val_Loss: {val_loss:.4f}, Total Mean Loss: {total_loss/2:.4f}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
        
        if model_save:
            text += f' - model saved!'
            model_save = False

        print(text)

    # 예측 수행 및 레이블 저장
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model_v2(images)
            _, predicted = torch.max(outputs, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # 혼동 행렬 생성
    cm = confusion_matrix(all_labels, all_preds)

    # 예측과 실제 레이블
    y_true = all_labels  # 실제 레이블
    y_pred = all_preds  # 모델에 의해 예측된 레이블

    # 전체 데이터셋에 대한 정확도
    accuracy = accuracy_score(y_true, y_pred)

    # 평균 정밀도, 리콜, F1-Score ('weighted')
    precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

    # 판다스 데이터프레임으로 결과 정리
    performance_metrics = pd.DataFrame({
        'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
        'Value': [accuracy, precision, recall, f1_score]
    })

    # 데이터프레임 출력
    print(f"\n[{i*100+100} epoch result]\n", performance_metrics)

Epoch 1: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 4.4953, Val_Loss: 3.9957, Total Mean Loss: 4.2455, LR: 0.00040008999999999997, Duration: 67.57 sec


Epoch 2: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 4.3383, Val_Loss: 3.8066, Total Mean Loss: 4.0724, LR: 0.0008000799999999999, Duration: 67.57 sec


Epoch 3: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 4.2863, Val_Loss: 3.7261, Total Mean Loss: 4.0062, LR: 0.0012000700000000001, Duration: 67.70 sec


Epoch 4: 100%|██████████| 27/27 [01:06<00:00,  2.48s/it]


	Loss: 4.2008, Val_Loss: 3.6685, Total Mean Loss: 3.9346, LR: 0.00160006, Duration: 67.98 sec


Epoch 5: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 4.2399, Val_Loss: 3.5638, Total Mean Loss: 3.9018, LR: 0.0020000499999999997, Duration: 67.65 sec


Epoch 6: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 4.1107, Val_Loss: 3.3985, Total Mean Loss: 3.7546, LR: 0.00240004, Duration: 67.89 sec


Epoch 7: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 4.0405, Val_Loss: 3.2016, Total Mean Loss: 3.6211, LR: 0.00280003, Duration: 67.75 sec


Epoch 8: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 4.0470, Val_Loss: 3.0241, Total Mean Loss: 3.5355, LR: 0.0032000199999999996, Duration: 67.74 sec


Epoch 9: 100%|██████████| 27/27 [01:07<00:00,  2.48s/it]


	Loss: 4.0337, Val_Loss: 3.0783, Total Mean Loss: 3.5560, LR: 0.00360001, Duration: 68.12 sec


Epoch 10: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 3.9008, Val_Loss: 2.8049, Total Mean Loss: 3.3528, LR: 0.004, Duration: 67.85 sec


Epoch 11: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 3.7319, Val_Loss: 2.6172, Total Mean Loss: 3.1745, LR: 0.003998781684496841, Duration: 67.93 sec


Epoch 12: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.7359, Val_Loss: 2.5972, Total Mean Loss: 3.1665, LR: 0.003995128222317136, Duration: 67.60 sec


Epoch 13: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.7365, Val_Loss: 2.3938, Total Mean Loss: 3.0652, LR: 0.003989044064641779, Duration: 67.60 sec


Epoch 14: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.5635, Val_Loss: 2.3816, Total Mean Loss: 2.9726, LR: 0.0039805366240797035, Duration: 67.60 sec


Epoch 15: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.7393, Val_Loss: 2.2984, Total Mean Loss: 3.0189, LR: 0.003969616265636766, Duration: 67.59 sec


Epoch 16: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.5639, Val_Loss: 2.1167, Total Mean Loss: 2.8403, LR: 0.003956296294087574, Duration: 67.48 sec


Epoch 17: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.6771, Val_Loss: 2.0762, Total Mean Loss: 2.8767, LR: 0.003940592937765679, Duration: 67.58 sec


Epoch 18: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.4355, Val_Loss: 2.0092, Total Mean Loss: 2.7224, LR: 0.003922525328791841, Duration: 67.61 sec


Epoch 19: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.4700, Val_Loss: 2.1298, Total Mean Loss: 2.7999, LR: 0.0039021154797644923, Duration: 67.51 sec


Epoch 20: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.5844, Val_Loss: 2.1278, Total Mean Loss: 2.8561, LR: 0.0038793882569407774, Duration: 67.55 sec


Epoch 21: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.4391, Val_Loss: 1.9167, Total Mean Loss: 2.6779, LR: 0.0038543713499408464, Duration: 67.53 sec


Epoch 22: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.3674, Val_Loss: 1.7117, Total Mean Loss: 2.5396, LR: 0.003827095238012319, Duration: 67.48 sec


Epoch 23: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.1984, Val_Loss: 1.6721, Total Mean Loss: 2.4352, LR: 0.003797593152896019, Duration: 67.56 sec


Epoch 24: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.3351, Val_Loss: 1.7236, Total Mean Loss: 2.5294, LR: 0.0037659010383382105, Duration: 67.45 sec


Epoch 25: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 3.2980, Val_Loss: 1.5397, Total Mean Loss: 2.4188, LR: 0.003732057506298688, Duration: 67.72 sec


Epoch 26: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 3.2775, Val_Loss: 1.6634, Total Mean Loss: 2.4704, LR: 0.0036961037899080436, Duration: 67.67 sec


Epoch 27: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.1404, Val_Loss: 1.6914, Total Mean Loss: 2.4159, LR: 0.0036580836932314552, Duration: 67.52 sec


Epoch 28: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.2671, Val_Loss: 1.5354, Total Mean Loss: 2.4012, LR: 0.003618043537900176, Duration: 67.54 sec


Epoch 29: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.9860, Val_Loss: 1.4390, Total Mean Loss: 2.2125, LR: 0.003576032106675763, Duration: 67.41 sec


Epoch 30: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.1470, Val_Loss: 1.3110, Total Mean Loss: 2.2290, LR: 0.0035321005840157995, Duration: 67.41 sec


Epoch 31: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.8700, Val_Loss: 1.2069, Total Mean Loss: 2.0385, LR: 0.0034863024937135142, Duration: 67.52 sec


Epoch 32: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.0219, Val_Loss: 1.3896, Total Mean Loss: 2.2057, LR: 0.003438693633687285, Duration: 67.56 sec


Epoch 33: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.0175, Val_Loss: 1.3374, Total Mean Loss: 2.1774, LR: 0.0033893320079994714, Duration: 67.50 sec


Epoch 34: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.8511, Val_Loss: 1.1852, Total Mean Loss: 2.0182, LR: 0.003338277756187398, Duration: 67.57 sec


Epoch 35: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.0255, Val_Loss: 1.2675, Total Mean Loss: 2.1465, LR: 0.003285593079992594, Duration: 67.41 sec


Epoch 36: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.9926, Val_Loss: 1.1701, Total Mean Loss: 2.0814, LR: 0.00323134216757755, Duration: 67.49 sec


Epoch 37: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.8710, Val_Loss: 1.1276, Total Mean Loss: 1.9993, LR: 0.0031755911153223313, Duration: 67.41 sec


Epoch 38: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.8884, Val_Loss: 1.0406, Total Mean Loss: 1.9645, LR: 0.0031184078472963196, Duration: 67.40 sec


Epoch 39: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 2.6145, Val_Loss: 1.1127, Total Mean Loss: 1.8636, LR: 0.003059862032503198, Duration: 67.70 sec


Epoch 40: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.9169, Val_Loss: 1.0917, Total Mean Loss: 2.0043, LR: 0.0030000249999999995, Duration: 67.52 sec


Epoch 41: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.9894, Val_Loss: 1.0460, Total Mean Loss: 2.0177, LR: 0.002938969651993642, Duration: 67.55 sec


Epoch 42: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.7877, Val_Loss: 1.0938, Total Mean Loss: 1.9408, LR: 0.002876770375020815, Duration: 67.37 sec


Epoch 43: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.5092, Val_Loss: 0.9956, Total Mean Loss: 1.7524, LR: 0.0028135029493194467, Duration: 67.48 sec


Epoch 44: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.6443, Val_Loss: 1.0065, Total Mean Loss: 1.8254, LR: 0.0027492444565021534, Duration: 67.49 sec


Epoch 45: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 2.4927, Val_Loss: 0.9553, Total Mean Loss: 1.7240, LR: 0.0026840731856441714, Duration: 67.69 sec


Epoch 46: 100%|██████████| 27/27 [01:07<00:00,  2.48s/it]


	Loss: 2.5427, Val_Loss: 0.9239, Total Mean Loss: 1.7333, LR: 0.0026180685379001757, Duration: 68.13 sec


Epoch 47: 100%|██████████| 27/27 [01:06<00:00,  2.45s/it]


	Loss: 2.6331, Val_Loss: 0.9464, Total Mean Loss: 1.7898, LR: 0.002551310929766207, Duration: 67.36 sec


Epoch 48: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.6876, Val_Loss: 0.8948, Total Mean Loss: 1.7912, LR: 0.002483881695104555, Duration: 67.42 sec


Epoch 49: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.6114, Val_Loss: 0.9237, Total Mean Loss: 1.7676, LR: 0.0024158629860509774, Duration: 67.55 sec


Epoch 50: 100%|██████████| 27/27 [01:06<00:00,  2.45s/it]


	Loss: 2.6508, Val_Loss: 0.9648, Total Mean Loss: 1.8078, LR: 0.0023473376729249776, Duration: 67.33 sec


Epoch 51: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.5695, Val_Loss: 0.9561, Total Mean Loss: 1.7628, LR: 0.0022783892432650826, Duration: 67.48 sec


Epoch 52: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.4391, Val_Loss: 0.8177, Total Mean Loss: 1.6284, LR: 0.0022091017001121434, Duration: 67.44 sec


Epoch 53: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.4974, Val_Loss: 0.8427, Total Mean Loss: 1.6701, LR: 0.002139559459664563, Duration: 67.52 sec


Epoch 54: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.4575, Val_Loss: 0.7762, Total Mean Loss: 1.6168, LR: 0.0020698472484301667, Duration: 67.46 sec


Epoch 55: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.5227, Val_Loss: 0.8147, Total Mean Loss: 1.6687, LR: 0.0020000499999999997, Duration: 67.56 sec


Epoch 56: 100%|██████████| 27/27 [01:06<00:00,  2.45s/it]


	Loss: 2.4267, Val_Loss: 0.8300, Total Mean Loss: 1.6284, LR: 0.0019302527515698336, Duration: 67.35 sec


Epoch 57: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.4398, Val_Loss: 0.7255, Total Mean Loss: 1.5827, LR: 0.0018605405403354365, Duration: 67.56 sec


Epoch 58: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.2488, Val_Loss: 0.8539, Total Mean Loss: 1.5514, LR: 0.0017909982998878568, Duration: 67.61 sec


Epoch 59: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1985, Val_Loss: 0.7956, Total Mean Loss: 1.4970, LR: 0.0017217107567349176, Duration: 67.54 sec


Epoch 60: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.3759, Val_Loss: 0.8497, Total Mean Loss: 1.6128, LR: 0.0016527623270750228, Duration: 67.58 sec


Epoch 61: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 2.2043, Val_Loss: 0.8578, Total Mean Loss: 1.5311, LR: 0.0015842370139490226, Duration: 67.73 sec


Epoch 62: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.3755, Val_Loss: 0.8482, Total Mean Loss: 1.6119, LR: 0.0015162183048954448, Duration: 67.44 sec


Epoch 63: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1355, Val_Loss: 0.8108, Total Mean Loss: 1.4731, LR: 0.0014487890702337925, Duration: 67.54 sec


Epoch 64: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1350, Val_Loss: 0.7789, Total Mean Loss: 1.4570, LR: 0.001382031462099824, Duration: 67.42 sec


Epoch 65: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1932, Val_Loss: 0.8184, Total Mean Loss: 1.5058, LR: 0.001316026814355829, Duration: 67.60 sec


Epoch 66: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1584, Val_Loss: 0.7591, Total Mean Loss: 1.4588, LR: 0.0012508555434978467, Duration: 67.54 sec


Epoch 67: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 2.2821, Val_Loss: 0.8223, Total Mean Loss: 1.5522, LR: 0.0011865970506805537, Duration: 67.65 sec


Epoch 68: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.2484, Val_Loss: 0.8220, Total Mean Loss: 1.5352, LR: 0.0011233296249791845, Duration: 67.50 sec


Epoch 69: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1439, Val_Loss: 0.7533, Total Mean Loss: 1.4486, LR: 0.0010611303480063583, Duration: 67.41 sec


Epoch 70: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.2082, Val_Loss: 0.7790, Total Mean Loss: 1.4936, LR: 0.0010000750000000004, Duration: 67.47 sec


Epoch 71: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.2692, Val_Loss: 0.7999, Total Mean Loss: 1.5346, LR: 0.000940237967496802, Duration: 67.54 sec


Epoch 72: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0755, Val_Loss: 0.7652, Total Mean Loss: 1.4203, LR: 0.0008816921527036801, Duration: 67.51 sec


Epoch 73: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1475, Val_Loss: 0.7764, Total Mean Loss: 1.4620, LR: 0.0008245088846776685, Duration: 67.45 sec


Epoch 74: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1947, Val_Loss: 0.7709, Total Mean Loss: 1.4828, LR: 0.0007687578324224496, Duration: 67.51 sec


Epoch 75: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1150, Val_Loss: 0.7763, Total Mean Loss: 1.4456, LR: 0.0007145069200074055, Duration: 67.46 sec


Epoch 76: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.2568, Val_Loss: 0.7770, Total Mean Loss: 1.5169, LR: 0.000661822243812602, Duration: 67.61 sec


Epoch 77: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0956, Val_Loss: 0.8004, Total Mean Loss: 1.4480, LR: 0.0006107679920005282, Duration: 67.49 sec


Epoch 78: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.2704, Val_Loss: 0.8184, Total Mean Loss: 1.5444, LR: 0.0005614063663127149, Duration: 67.54 sec


Epoch 79: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1208, Val_Loss: 0.7864, Total Mean Loss: 1.4536, LR: 0.000513797506286485, Duration: 67.39 sec


Epoch 80: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.2827, Val_Loss: 0.7837, Total Mean Loss: 1.5332, LR: 0.00046799941598420013, Duration: 67.39 sec


Epoch 81: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 1.9240, Val_Loss: 0.7633, Total Mean Loss: 1.3437, LR: 0.0004240678933242365, Duration: 67.58 sec


Epoch 82: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1949, Val_Loss: 0.7407, Total Mean Loss: 1.4678, LR: 0.00038205646209982404, Duration: 67.43 sec


Epoch 83: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 1.9439, Val_Loss: 0.7818, Total Mean Loss: 1.3629, LR: 0.0003420163067685445, Duration: 67.42 sec


Epoch 84: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0535, Val_Loss: 0.7407, Total Mean Loss: 1.3971, LR: 0.0003039962100919559, Duration: 67.52 sec


Epoch 85: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0102, Val_Loss: 0.7809, Total Mean Loss: 1.3955, LR: 0.0002680424937013118, Duration: 67.37 sec


Epoch 86: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.3759, Val_Loss: 0.7563, Total Mean Loss: 1.5661, LR: 0.00023419896166178896, Duration: 67.55 sec


Epoch 87: 100%|██████████| 27/27 [01:06<00:00,  2.45s/it]


	Loss: 2.2072, Val_Loss: 0.7671, Total Mean Loss: 1.4871, LR: 0.0002025068471039813, Duration: 67.28 sec


Epoch 88: 100%|██████████| 27/27 [01:06<00:00,  2.45s/it]


	Loss: 2.0811, Val_Loss: 0.8004, Total Mean Loss: 1.4408, LR: 0.00017300476198768016, Duration: 67.27 sec


Epoch 89: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0743, Val_Loss: 0.7554, Total Mean Loss: 1.4149, LR: 0.00014572865005915372, Duration: 67.54 sec


Epoch 90: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0216, Val_Loss: 0.7178, Total Mean Loss: 1.3697, LR: 0.00012071174305922266, Duration: 67.37 sec


Epoch 91: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 1.9036, Val_Loss: 0.7793, Total Mean Loss: 1.3415, LR: 9.79845202355077e-05, Duration: 67.45 sec


Epoch 92: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1194, Val_Loss: 0.7585, Total Mean Loss: 1.4390, LR: 7.757467120815912e-05, Duration: 67.51 sec


Epoch 93: 100%|██████████| 27/27 [01:06<00:00,  2.45s/it]


	Loss: 2.1681, Val_Loss: 0.7490, Total Mean Loss: 1.4586, LR: 5.950706223432085e-05, Duration: 67.35 sec


Epoch 94: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0356, Val_Loss: 0.7410, Total Mean Loss: 1.3883, LR: 4.3803705912425316e-05, Duration: 67.52 sec


Epoch 95: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 1.9751, Val_Loss: 0.7690, Total Mean Loss: 1.3720, LR: 3.0483734363234566e-05, Duration: 67.76 sec


Epoch 96: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 2.0435, Val_Loss: 0.7558, Total Mean Loss: 1.3996, LR: 1.9563375920296352e-05, Duration: 67.69 sec


Epoch 97: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0720, Val_Loss: 0.7562, Total Mean Loss: 1.4141, LR: 1.1055935358221834e-05, Duration: 67.62 sec


Epoch 98: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 2.1632, Val_Loss: 0.7543, Total Mean Loss: 1.4588, LR: 4.971777682864596e-06, Duration: 67.64 sec


Epoch 99: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1685, Val_Loss: 0.7549, Total Mean Loss: 1.4617, LR: 1.3183155031594304e-06, Duration: 67.57 sec


Epoch 100: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0983, Val_Loss: 0.7549, Total Mean Loss: 1.4266, LR: 1e-07, Duration: 67.38 sec

[100 epoch result]
       Metric     Value
0   Accuracy  0.894000
1  Precision  0.912238
2     Recall  0.894000
3   F1 Score  0.891306


## SSH with FCMAE

In [3]:
from model.fcmae import convnextv2_fcmae_tiny

model = convnextv2_fcmae_tiny()



In [4]:
# Transforms 정의하기
train_transform = transforms.Compose([
            transforms.RandomResizedCrop(224, scale=(0.2, 1.0), interpolation=3),  # 3 is bicubic
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

data_dir = '../../data/sports/'
batch_size = 800
train_path = data_dir

# dataset load
train_data = ImageFolder(train_path, transform=train_transform)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True)

len(train_loader)
device = 'cuda:0'
model.to(device)

model_path = '../../model/convnext/fcmae.pt'

epochs = 500
optimizer = optim.AdamW(model.parameters(), lr=1.5e-4, weight_decay=0.05, betas=(0.9, 0.95))

warmup_steps = int(len(train_loader)*(epochs)*0.1)
train_steps = len(train_loader)*(epochs)
scheduler = CosineWarmupScheduler(optimizer, 
                                num_warmup_steps=50, 
                                num_training_steps=500,
                                num_cycles=0.5,
                                min_lr=1e-7)



In [None]:

training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')

for epoch in range(epochs):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        optimizer.zero_grad()
        
        samples= data[0].to(device)
        loss, _, _ = model(samples, mask_ratio=0.6)
        
        loss.backward()
        optimizer.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        
    
    # 모델 저장 로직 조정
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        if epoch > (epochs // 2) :
            torch.save(model.state_dict(), model_path)
            model_saved_text = ' - model saved!'
        else :
            model_saved_text = ' - model save pass'
    else:
        model_saved_text = ''
    
    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss:,.4f}, LR: {lr}, Duration: {epoch_duration:.2f} sec{model_saved_text}'
    print(text)

    # 에폭마다 스케줄러 업데이트
    scheduler.step()

## FineTuning after ssh

In [3]:
from model.convnextv2 import load_convNext

model = load_convNext(droppath=0.2)

In [4]:
model.state_dict().keys()

odict_keys(['stem.stem_conv.weight', 'stem.stem_conv.bias', 'stem.stem_ln.weight', 'stem.stem_ln.bias', 'downsample_layers.0.stem_conv.weight', 'downsample_layers.0.stem_conv.bias', 'downsample_layers.0.stem_ln.weight', 'downsample_layers.0.stem_ln.bias', 'downsample_layers.1.ds_ln0.weight', 'downsample_layers.1.ds_ln0.bias', 'downsample_layers.1.ds_conv1.weight', 'downsample_layers.1.ds_conv1.bias', 'downsample_layers.2.ds_ln1.weight', 'downsample_layers.2.ds_ln1.bias', 'downsample_layers.2.ds_conv2.weight', 'downsample_layers.2.ds_conv2.bias', 'downsample_layers.3.ds_ln2.weight', 'downsample_layers.3.ds_ln2.bias', 'downsample_layers.3.ds_conv3.weight', 'downsample_layers.3.ds_conv3.bias', 'stages.0.0.dwconv.weight', 'stages.0.0.dwconv.bias', 'stages.0.0.layernorm.weight', 'stages.0.0.layernorm.bias', 'stages.0.0.pwconv1.weight', 'stages.0.0.pwconv1.bias', 'stages.0.0.grn.gamma', 'stages.0.0.grn.beta', 'stages.0.0.pwconv2.weight', 'stages.0.0.pwconv2.bias', 'stages.0.1.dwconv.weight',

In [5]:
model.state_dict()['stem.stem_conv.weight'][0]

tensor([[[-0.0210, -0.1009,  0.1258, -0.1308],
         [ 0.0649,  0.0201, -0.0583,  0.0681],
         [ 0.0280,  0.0370,  0.0578, -0.0648],
         [-0.1016, -0.1418,  0.1152,  0.1145]],

        [[-0.1182, -0.0415,  0.0019,  0.0749],
         [-0.0851,  0.1249, -0.0309,  0.0025],
         [ 0.0188,  0.1149,  0.1096,  0.0237],
         [-0.1322,  0.0144,  0.1397,  0.0282]],

        [[ 0.1233,  0.1423, -0.0962,  0.0821],
         [ 0.0732,  0.0843,  0.0378, -0.0136],
         [ 0.0488,  0.0995,  0.0273,  0.0825],
         [-0.0682, -0.0409,  0.0648, -0.0143]]])

In [6]:
model.state_dict()['stages.0.0.dwconv.weight'][0]

tensor([[[-2.6829e-02,  4.3303e-02,  1.1755e-01,  9.1654e-02,  4.6168e-02,
          -2.0700e-02,  1.3269e-02],
         [ 9.6401e-02,  5.3274e-02, -3.8076e-02,  4.9968e-02,  8.6762e-04,
           6.1436e-02,  1.4115e-01],
         [-1.4044e-01, -1.1432e-01,  2.9335e-02,  1.0103e-01, -1.3687e-02,
          -7.5368e-02, -1.1560e-01],
         [-3.3938e-03,  6.5108e-02, -1.6344e-02,  1.4442e-02, -9.3134e-02,
          -1.2409e-01, -1.5133e-02],
         [-1.3989e-01, -8.8871e-02, -4.0106e-02,  5.0907e-02, -1.2918e-01,
          -2.8023e-02,  5.5662e-02],
         [ 1.3996e-01,  1.3011e-01,  6.6979e-05,  8.9666e-02,  1.3147e-01,
          -2.3832e-02, -8.9970e-02],
         [-3.1620e-02,  7.8192e-02, -7.8324e-02,  1.3465e-01, -1.2800e-01,
          -1.2070e-01,  1.4094e-01]]])

In [7]:
from collections import OrderedDict

def remap_checkpoint_keys(ckpt):
    new_ckpt = OrderedDict()
    for k, v in ckpt.items():
        if k.startswith('encoder'):
            k = '.'.join(k.split('.')[1:]) # remove encoder in the name
        if k.endswith('kernel'):
            k = '.'.join(k.split('.')[:-1]) # remove kernel in the name
            new_k = k + '.weight'
            if len(v.shape) == 3: # resahpe standard convolution
                kv, in_dim, out_dim = v.shape
                ks = int(math.sqrt(kv))
                new_ckpt[new_k] = v.permute(2, 1, 0).\
                    reshape(out_dim, in_dim, ks, ks).transpose(3, 2)
            elif len(v.shape) == 2: # reshape depthwise convolution
                kv, dim = v.shape
                ks = int(math.sqrt(kv))
                new_ckpt[new_k] = v.permute(1, 0).\
                    reshape(dim, 1, ks, ks).transpose(3, 2)
            continue
        elif 'ln' in k or 'linear' in k:
            k = k.split('.')
            k.pop(-2) # remove ln and linear in the name
            new_k = '.'.join(k)
        else:
            new_k = k
        new_ckpt[new_k] = v

    # reshape grn affine parameters and biases
    for k, v in new_ckpt.items():
        if k.endswith('bias') and len(v.shape) != 1:
            new_ckpt[k] = v.reshape(-1)
        elif 'grn' in k:
            new_ckpt[k] = v.unsqueeze(0).unsqueeze(1)
    return new_ckpt

def load_state_dict(model, state_dict, prefix='', ignore_missing="relative_position_index"):
    missing_keys = []
    unexpected_keys = []
    error_msgs = []
    # copy state_dict so _load_from_state_dict can modify it
    metadata = getattr(state_dict, '_metadata', None)
    state_dict = state_dict.copy()
    if metadata is not None:
        state_dict._metadata = metadata

    def load(module, prefix=''):
        local_metadata = {} if metadata is None else metadata.get(
            prefix[:-1], {})
        module._load_from_state_dict(
            state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs)
        for name, child in module._modules.items():
            if child is not None:
                load(child, prefix + name + '.')

    load(model, prefix=prefix)

    warn_missing_keys = []
    ignore_missing_keys = []
    for key in missing_keys:
        keep_flag = True
        for ignore_key in ignore_missing.split('|'):
            if ignore_key in key:
                keep_flag = False
                break
        if keep_flag:
            warn_missing_keys.append(key)
        else:
            ignore_missing_keys.append(key)

    missing_keys = warn_missing_keys

    if len(missing_keys) > 0:
        print("Weights of {} not initialized from pretrained model: {}".format(
            model.__class__.__name__, missing_keys))
    if len(unexpected_keys) > 0:
        print("Weights from pretrained model not used in {}: {}".format(
            model.__class__.__name__, unexpected_keys))
    if len(ignore_missing_keys) > 0:
        print("Ignored weights of {} not initialized from pretrained model: {}".format(
            model.__class__.__name__, ignore_missing_keys))
    if len(error_msgs) > 0:
        print('\n'.join(error_msgs))

In [8]:
pretrain_path = '../../model/convnext/fcmae.pt'
checkpoint_model = torch.load(pretrain_path, map_location='cpu')

state_dict = model.state_dict()
for k in ['head.weight', 'head.bias']:
    if k in checkpoint_model and checkpoint_model[k].shape != state_dict[k].shape:
        print(f"Removing key {k} from head of pretrained checkpoint")
        del checkpoint_model[k]


# remove decoder weights
checkpoint_model_keys = list(checkpoint_model.keys())
for k in checkpoint_model_keys:
    if 'decoder' in k or 'mask_token'in k or \
        'proj' in k or 'pred' in k:
        print(f"Removing key {k} from decoder of pretrained checkpoint")
        del checkpoint_model[k]

checkpoint_model = remap_checkpoint_keys(checkpoint_model)
load_state_dict(model, checkpoint_model, prefix='')

Removing key mask_token from decoder of pretrained checkpoint
Removing key proj.weight from decoder of pretrained checkpoint
Removing key proj.bias from decoder of pretrained checkpoint
Removing key decoder.0.dwconv.weight from decoder of pretrained checkpoint
Removing key decoder.0.dwconv.bias from decoder of pretrained checkpoint
Removing key decoder.0.layernorm.weight from decoder of pretrained checkpoint
Removing key decoder.0.layernorm.bias from decoder of pretrained checkpoint
Removing key decoder.0.pwconv1.weight from decoder of pretrained checkpoint
Removing key decoder.0.pwconv1.bias from decoder of pretrained checkpoint
Removing key decoder.0.grn.gamma from decoder of pretrained checkpoint
Removing key decoder.0.grn.beta from decoder of pretrained checkpoint
Removing key decoder.0.pwconv2.weight from decoder of pretrained checkpoint
Removing key decoder.0.pwconv2.bias from decoder of pretrained checkpoint
Removing key pred.weight from decoder of pretrained checkpoint
Removing

In [9]:
model.state_dict()['stem.stem_conv.weight'][0]

tensor([[[ 0.2945,  0.7202, -0.3333, -0.2289],
         [-0.5944,  0.4452,  0.9049, -0.2540],
         [ 0.4562, -0.8116,  1.4202,  0.4324],
         [ 0.1777,  0.3165, -1.3547,  0.2427]],

        [[-1.5330, -1.2280, -0.4023, -0.5692],
         [-0.5322, -0.0219,  0.3301, -1.0079],
         [ 0.4587, -0.0214, -0.6747, -0.7620],
         [ 0.1962, -0.4949, -0.0375, -0.0354]],

        [[-0.8695,  0.9264, -0.2321, -1.5825],
         [-0.4034, -0.4807,  0.2547, -0.3684],
         [-0.6111, -1.2094,  0.8702,  0.9287],
         [ 1.1989,  0.0221, -0.5370,  0.5360]]])

In [20]:
model.state_dict()['stages.0.0.dwconv.weight'][0]

tensor([[[ 1.2617, -0.2949, -0.1866,  0.1731,  0.0968, -0.2891, -0.9204],
         [-0.5181,  0.9795, -0.8359,  0.5205, -0.4570,  0.2246, -0.1680],
         [-0.5283, -0.1686, -0.2800,  1.1797, -1.3213,  0.7119, -1.5332],
         [-0.2485, -0.1683,  1.0381,  0.2788,  0.3088,  0.2258, -0.1799],
         [ 1.6299,  0.7847,  0.8198, -1.2764, -0.3535, -1.3438,  0.1346],
         [-0.9390,  0.4802,  0.1530,  0.2759, -0.5771, -0.9058,  0.6665],
         [ 0.7075,  0.3372, -0.4517, -0.3804,  0.4553,  0.9985,  0.3589]]],
       device='cuda:4')

In [19]:
model = model.float()

In [13]:
# 총 파라미터 수 계산
total_params = sum(p.numel() for p in model.parameters())

# 학습 가능한 파라미터 수 계산
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print('='*80)
print(f"\nTotal Parameters: {total_params:,}")
print(f"Trainable Parameters: {trainable_params:,}\n")
print('='*80)


Total Parameters: 27,943,396
Trainable Parameters: 27,943,396



In [21]:
# Transforms 정의하기
train_transform = transforms.Compose([
    transforms.TrivialAugmentWide(interpolation=F.InterpolationMode.BICUBIC),
    transforms.RandomResizedCrop(224, scale=(0.6,1), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

data_dir = '../../data/sports'
batch_size = 256

train_path = data_dir+'/train'
valid_path = data_dir+'/valid'
test_path = data_dir+'/test'

# dataset load
train_data = ImageFolder(train_path, transform=train_transform)
valid_data = ImageFolder(valid_path, transform=test_transform)
test_data = ImageFolder(test_path, transform=test_transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [22]:
device = 'cuda:4'
max_norm = 3.0 

model.to(device)

model_ema = None
ema_active = True
if ema_active:
    ema_decay = 0.999
    model_ema = ModelEmaV3(
        model,
        decay=ema_decay,
    )
    print(f"Using EMA with decay = {ema_decay}")

model_path = ''

mixup = True
if mixup :
    mixup_fn = Mixup(mixup_alpha=.8, 
                    cutmix_alpha=1., 
                    prob=1., 
                    switch_prob=0.5, 
                    mode='batch',
                    label_smoothing=.1,
                    num_classes=100)
    
    criterion = SoftTargetCrossEntropy()
else :
    criterion = LabelSmoothingCrossEntropy(.1)
    
criterion = nn.CrossEntropyLoss(label_smoothing=0.)

Using EMA with decay = 0.999


### LayerWise Learning Decay

In [23]:
# LLRD
layer_names = []
for i, (name, params) in enumerate(model.named_parameters()):
    layer_names.append(name)
    
layer_names.reverse()

lr      = 8e-3  
lr_mult = 0.9  
weight_decay = 0.05 

param_groups = []
prev_group_name = layer_names[0].split('.')[0]

for idx, name in enumerate(layer_names):
    
    cur_group_name = name.split('.')[0]
    
    if cur_group_name != prev_group_name:
        lr *= lr_mult
    prev_group_name = cur_group_name
    
    # print(f"{idx}: {name}'s lr={lr}")
    
    param_groups += [{'params': [ p for n, p in model.named_parameters() if n == name and p.requires_grad],
                      'lr' : lr,
                      'weight_decay': weight_decay}]

In [24]:
epochs = 100

optimizer = optim.AdamW(param_groups)
warmup_steps = int(len(train_loader)*(epochs)*0.1)
train_steps = len(train_loader)*(epochs)
scheduler = CosineWarmupScheduler(optimizer, 
                                num_warmup_steps=warmup_steps, 
                                num_training_steps=train_steps,
                                num_cycles=0.5,
                                min_lr=1e-7)



In [30]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')
# model_save = False
loss_sampling = []


# GradScaler 초기화
scaler = GradScaler()

for i in range(epochs // 100):
    for epoch in range(100):
        model.train()
        start_time = time.time()
        running_loss = 0.0
        pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1 + i*100}")
        
        for _, data in pbar:
            inputs, labels = data[0].to(device), data[1].to(device)
            inputs, labels = mixup_fn(inputs, labels)
            optimizer.zero_grad()

            # AutoCast 적용
            with autocast():
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
            # 스케일링된 그라디언트 계산
            scaler.scale(loss).backward()

            # 그라디언트 클리핑 전에 스케일링 제거
            scaler.unscale_(optimizer)
            clip_grad_norm_(model.parameters(), max_norm=max_norm)

            # 옵티마이저 스텝 및 스케일러 업데이트
            scaler.step(optimizer)
            scaler.update()
            
            # EMA 모델 업데이트
            if model_ema is not None:
                model_ema.update(model)
                
            scheduler.step()
                
            lr = optimizer.param_groups[0]["lr"]
            lrs.append(lr)
            running_loss += loss.item()
            loss_sampling.append(loss.item())

        epoch_loss = running_loss / len(train_loader)
        losses.append(epoch_loss)        

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data in valid_loader:
                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
        val_loss /= len(valid_loader)
        val_losses.append(val_loss)
        
        # 모델 저장
        total_loss = val_loss + epoch_loss
        if total_loss < best_loss:
            best_loss = total_loss
            model_save = False
            if model_save:
                torch.save(model.state_dict(), model_path)

        epoch_duration = time.time() - start_time
        training_time += epoch_duration
        
        text = f'\tLoss: {epoch_loss:.4f}, Val_Loss: {val_loss:.4f}, Total Mean Loss: {total_loss/2:.4f}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
        
        if model_save:
            text += f' - model saved!'
            model_save = False

        print(text)

    # 예측 수행 및 레이블 저장
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # 혼동 행렬 생성
    cm = confusion_matrix(all_labels, all_preds)

    # 예측과 실제 레이블
    y_true = all_labels  # 실제 레이블
    y_pred = all_preds  # 모델에 의해 예측된 레이블

    # 전체 데이터셋에 대한 정확도
    accuracy = accuracy_score(y_true, y_pred)

    # 평균 정밀도, 리콜, F1-Score ('weighted')
    precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

    # 판다스 데이터프레임으로 결과 정리
    performance_metrics = pd.DataFrame({
        'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
        'Value': [accuracy, precision, recall, f1_score]
    })

    # 데이터프레임 출력
    print(f"\n[{i*100+100} epoch result]\n", performance_metrics)

Epoch 1:   0%|          | 0/53 [00:00<?, ?it/s]

Epoch 1: 100%|██████████| 53/53 [01:21<00:00,  1.54s/it]


NameError: name 'model_save' is not defined

In [29]:
running_loss

nan

In [27]:
loss

tensor(4.7050, device='cuda:4')

In [16]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')
model_save = False

for i in range(epochs // 100):
    for epoch in range(100):
        model.train()
        start_time = time.time()
        running_loss = 0.0
        pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1 + i*100}")
        
        for _, data in pbar:
            inputs, labels = data[0].to(device), data[1].to(device)
            inputs, labels = mixup_fn(inputs, labels)
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
                
            loss.backward()
            # 그라디언트 클리핑 적용
            clip_grad_norm_(model.parameters(), max_norm=max_norm)
            optimizer.step()
            
            # EMA 모델 업데이트, 필요한 경우
            if model_ema is not None:
                model_ema.update(model)
                
            scheduler.step()
                
            lr = optimizer.param_groups[0]["lr"]
            lrs.append(lr)
            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        losses.append(epoch_loss)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data in valid_loader:
                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
        val_loss /= len(valid_loader)
        val_losses.append(val_loss)
        
        # 모델 저장 조건 수정
        total_loss = val_loss + epoch_loss
        if total_loss < best_loss:
            best_loss = total_loss
            # torch.save(model.state_dict(), model_path)
            model_save = True
            save_text = ' - model saved!'
        else:
            save_text = ''

        epoch_duration = time.time() - start_time
        training_time += epoch_duration
        
        text = f'\tLoss: {epoch_loss:.4f}, Val_Loss: {val_loss:.4f}, Total Mean Loss: {total_loss/2:.4f}, LR: {lr}, Duration: {epoch_duration:.2f} sec{save_text}'
        print(text)

    # 예측 수행 및 레이블 저장
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # 혼동 행렬 생성
    cm = confusion_matrix(all_labels, all_preds)

    # 예측과 실제 레이블
    y_true = all_labels  # 실제 레이블
    y_pred = all_preds  # 모델에 의해 예측된 레이블

    # 전체 데이터셋에 대한 정확도
    accuracy = accuracy_score(y_true, y_pred)

    # 평균 정밀도, 리콜, F1-Score ('weighted')
    precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

    # 판다스 데이터프레임으로 결과 정리
    performance_metrics = pd.DataFrame({
        'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
        'Value': [accuracy, precision, recall, f1_score]
    })

    # 데이터프레임 출력
    print(f"\n[{i*100+100} epoch result]\n", performance_metrics)

Epoch 1: 100%|██████████| 53/53 [01:41<00:00,  1.92s/it]


	Loss: 4.5950, Val_Loss: 4.1453, Total Mean Loss: 4.3702, LR: 0.00080009, Duration: 103.15 sec - model saved!


Epoch 2: 100%|██████████| 53/53 [01:39<00:00,  1.88s/it]


	Loss: 4.4743, Val_Loss: 4.0018, Total Mean Loss: 4.2381, LR: 0.0016000800000000003, Duration: 100.79 sec - model saved!


Epoch 3: 100%|██████████| 53/53 [01:40<00:00,  1.89s/it]


	Loss: 4.4219, Val_Loss: 3.7958, Total Mean Loss: 4.1088, LR: 0.00240007, Duration: 101.36 sec - model saved!


Epoch 4: 100%|██████████| 53/53 [01:39<00:00,  1.88s/it]


	Loss: 4.3900, Val_Loss: 3.7397, Total Mean Loss: 4.0649, LR: 0.0032000600000000002, Duration: 100.97 sec - model saved!


Epoch 5: 100%|██████████| 53/53 [01:39<00:00,  1.88s/it]


	Loss: 4.3341, Val_Loss: 3.6376, Total Mean Loss: 3.9859, LR: 0.004000050000000001, Duration: 100.68 sec - model saved!


Epoch 6: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 4.2782, Val_Loss: 3.5548, Total Mean Loss: 3.9165, LR: 0.00480004, Duration: 100.57 sec - model saved!


Epoch 7: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 4.2267, Val_Loss: 3.5453, Total Mean Loss: 3.8860, LR: 0.005600030000000001, Duration: 100.35 sec - model saved!


Epoch 8: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 4.1982, Val_Loss: 3.4035, Total Mean Loss: 3.8008, LR: 0.006400020000000001, Duration: 100.46 sec - model saved!


Epoch 9: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 4.1818, Val_Loss: 3.3875, Total Mean Loss: 3.7846, LR: 0.007200010000000001, Duration: 100.41 sec - model saved!


Epoch 10: 100%|██████████| 53/53 [01:38<00:00,  1.87s/it]


	Loss: 4.1136, Val_Loss: 3.3307, Total Mean Loss: 3.7221, LR: 0.008, Duration: 100.29 sec - model saved!


Epoch 11: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 4.1009, Val_Loss: 3.2129, Total Mean Loss: 3.6569, LR: 0.007997563338535033, Duration: 100.33 sec - model saved!


Epoch 12: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 4.1097, Val_Loss: 3.1263, Total Mean Loss: 3.6180, LR: 0.007990256322836784, Duration: 100.57 sec - model saved!


Epoch 13: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 4.0599, Val_Loss: 3.1223, Total Mean Loss: 3.5911, LR: 0.007978087855378325, Duration: 100.44 sec - model saved!


Epoch 14: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 4.0101, Val_Loss: 2.9449, Total Mean Loss: 3.4775, LR: 0.007961072761562845, Duration: 100.31 sec - model saved!


Epoch 15: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.9389, Val_Loss: 2.8412, Total Mean Loss: 3.3900, LR: 0.007939231771661183, Duration: 100.35 sec - model saved!


Epoch 16: 100%|██████████| 53/53 [01:38<00:00,  1.86s/it]


	Loss: 3.9606, Val_Loss: 2.7031, Total Mean Loss: 3.3318, LR: 0.007912591495555185, Duration: 100.07 sec - model saved!


Epoch 17: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.9554, Val_Loss: 2.6873, Total Mean Loss: 3.3214, LR: 0.007881184390317672, Duration: 100.29 sec - model saved!


Epoch 18: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.8809, Val_Loss: 2.6832, Total Mean Loss: 3.2821, LR: 0.007845048720668478, Duration: 100.26 sec - model saved!


Epoch 19: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.8607, Val_Loss: 2.6046, Total Mean Loss: 3.2326, LR: 0.007804228512354801, Duration: 100.28 sec - model saved!


Epoch 20: 100%|██████████| 53/53 [01:39<00:00,  1.88s/it]


	Loss: 3.9065, Val_Loss: 2.6144, Total Mean Loss: 3.2604, LR: 0.007758773498512596, Duration: 100.74 sec


Epoch 21: 100%|██████████| 53/53 [01:39<00:00,  1.89s/it]


	Loss: 3.8473, Val_Loss: 2.4817, Total Mean Loss: 3.1645, LR: 0.0077087390590744225, Duration: 101.22 sec - model saved!


Epoch 22: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.7878, Val_Loss: 2.4968, Total Mean Loss: 3.1423, LR: 0.007654186153297522, Duration: 100.28 sec - model saved!


Epoch 23: 100%|██████████| 53/53 [01:39<00:00,  1.88s/it]


	Loss: 3.8438, Val_Loss: 2.4143, Total Mean Loss: 3.1290, LR: 0.007595181245494354, Duration: 100.72 sec - model saved!


Epoch 24: 100%|██████████| 53/53 [01:38<00:00,  1.87s/it]


	Loss: 3.7342, Val_Loss: 2.1994, Total Mean Loss: 2.9668, LR: 0.007531796224056066, Duration: 100.29 sec - model saved!


Epoch 25: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.7472, Val_Loss: 2.0719, Total Mean Loss: 2.9096, LR: 0.007464108313867567, Duration: 100.27 sec - model saved!


Epoch 26: 100%|██████████| 53/53 [01:38<00:00,  1.87s/it]


	Loss: 3.6859, Val_Loss: 2.3841, Total Mean Loss: 3.0350, LR: 0.007392199982220897, Duration: 100.18 sec


Epoch 27: 100%|██████████| 53/53 [01:38<00:00,  1.87s/it]


	Loss: 3.7741, Val_Loss: 2.3366, Total Mean Loss: 3.0553, LR: 0.00731615883834154, Duration: 100.17 sec


Epoch 28: 100%|██████████| 53/53 [01:38<00:00,  1.87s/it]


	Loss: 3.5131, Val_Loss: 1.9265, Total Mean Loss: 2.7198, LR: 0.007236077526650072, Duration: 100.15 sec - model saved!


Epoch 29: 100%|██████████| 53/53 [01:38<00:00,  1.87s/it]


	Loss: 3.6372, Val_Loss: 2.2424, Total Mean Loss: 2.9398, LR: 0.007152053613889208, Duration: 100.14 sec


Epoch 30: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.7462, Val_Loss: 2.1536, Total Mean Loss: 2.9499, LR: 0.007064189470253756, Duration: 100.26 sec


Epoch 31: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.4918, Val_Loss: 1.6969, Total Mean Loss: 2.5943, LR: 0.006972592144668304, Duration: 100.28 sec - model saved!


Epoch 32: 100%|██████████| 53/53 [01:38<00:00,  1.87s/it]


	Loss: 3.5671, Val_Loss: 1.8910, Total Mean Loss: 2.7291, LR: 0.0068773732343645885, Duration: 100.24 sec


Epoch 33: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.5183, Val_Loss: 1.7305, Total Mean Loss: 2.6244, LR: 0.006778648748917467, Duration: 100.40 sec


Epoch 34: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.4650, Val_Loss: 1.8143, Total Mean Loss: 2.6396, LR: 0.006676538968905116, Duration: 100.35 sec


Epoch 35: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.5158, Val_Loss: 1.9125, Total Mean Loss: 2.7142, LR: 0.006571168299365673, Duration: 100.57 sec


Epoch 36: 100%|██████████| 53/53 [01:39<00:00,  1.88s/it]


	Loss: 3.4353, Val_Loss: 1.7001, Total Mean Loss: 2.5677, LR: 0.006462665118228867, Duration: 100.66 sec - model saved!


Epoch 37: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.4231, Val_Loss: 1.6558, Total Mean Loss: 2.5395, LR: 0.006351161619907278, Duration: 100.42 sec - model saved!


Epoch 38: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.4246, Val_Loss: 1.6772, Total Mean Loss: 2.5509, LR: 0.006236793654237814, Duration: 100.34 sec


Epoch 39: 100%|██████████| 53/53 [01:38<00:00,  1.87s/it]


	Loss: 3.3955, Val_Loss: 1.6819, Total Mean Loss: 2.5387, LR: 0.006119700560969609, Duration: 100.21 sec - model saved!


Epoch 40: 100%|██████████| 53/53 [01:38<00:00,  1.87s/it]


	Loss: 3.3652, Val_Loss: 1.4796, Total Mean Loss: 2.4224, LR: 0.006000025000000001, Duration: 100.18 sec - model saved!


Epoch 41: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.3364, Val_Loss: 1.4906, Total Mean Loss: 2.4135, LR: 0.005877912777565424, Duration: 100.48 sec - model saved!


Epoch 42: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.3491, Val_Loss: 1.5221, Total Mean Loss: 2.4356, LR: 0.005753512668598971, Duration: 100.56 sec


Epoch 43: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.2630, Val_Loss: 1.3132, Total Mean Loss: 2.2881, LR: 0.005626976235471049, Duration: 100.36 sec - model saved!


Epoch 44: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.2669, Val_Loss: 1.4940, Total Mean Loss: 2.3805, LR: 0.005498457643333979, Duration: 100.54 sec


Epoch 45: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.2238, Val_Loss: 1.2956, Total Mean Loss: 2.2597, LR: 0.00536811347229551, Duration: 100.38 sec - model saved!


Epoch 46: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.2199, Val_Loss: 1.3076, Total Mean Loss: 2.2638, LR: 0.005236102526650072, Duration: 100.38 sec


Epoch 47: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.0790, Val_Loss: 1.3395, Total Mean Loss: 2.2093, LR: 0.005102585641400206, Duration: 100.55 sec - model saved!


Epoch 48: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.2223, Val_Loss: 1.4247, Total Mean Loss: 2.3235, LR: 0.004967725486303891, Duration: 100.48 sec


Epoch 49: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.0647, Val_Loss: 1.2575, Total Mean Loss: 2.1611, LR: 0.004831686367686497, Duration: 100.35 sec - model saved!


Epoch 50: 100%|██████████| 53/53 [01:39<00:00,  1.88s/it]


	Loss: 3.1002, Val_Loss: 1.2536, Total Mean Loss: 2.1769, LR: 0.004694634028258839, Duration: 100.68 sec


Epoch 51: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.1481, Val_Loss: 1.1780, Total Mean Loss: 2.1631, LR: 0.004556735445185214, Duration: 100.47 sec


Epoch 52: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.9954, Val_Loss: 1.2351, Total Mean Loss: 2.1153, LR: 0.004418158626647451, Duration: 100.29 sec - model saved!


Epoch 53: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.9960, Val_Loss: 1.2575, Total Mean Loss: 2.1268, LR: 0.004279072407152814, Duration: 100.52 sec


Epoch 54: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.1454, Val_Loss: 1.2170, Total Mean Loss: 2.1812, LR: 0.00413964624183517, Duration: 100.39 sec


Epoch 55: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.0753, Val_Loss: 1.1802, Total Mean Loss: 2.1278, LR: 0.004000050000000001, Duration: 100.43 sec


Epoch 56: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.9854, Val_Loss: 1.1241, Total Mean Loss: 2.0548, LR: 0.0038604537581648324, Duration: 100.51 sec - model saved!


Epoch 57: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 3.0592, Val_Loss: 1.2451, Total Mean Loss: 2.1521, LR: 0.0037210275928471863, Duration: 100.32 sec


Epoch 58: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.8498, Val_Loss: 1.0461, Total Mean Loss: 1.9479, LR: 0.00358194137335255, Duration: 100.52 sec - model saved!


Epoch 59: 100%|██████████| 53/53 [01:39<00:00,  1.88s/it]


	Loss: 3.0221, Val_Loss: 1.1233, Total Mean Loss: 2.0727, LR: 0.0034433645548147874, Duration: 100.73 sec


Epoch 60: 100%|██████████| 53/53 [01:39<00:00,  1.88s/it]


	Loss: 3.0172, Val_Loss: 1.0411, Total Mean Loss: 2.0291, LR: 0.0033054659717411624, Duration: 100.79 sec


Epoch 61: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.7804, Val_Loss: 1.0907, Total Mean Loss: 1.9356, LR: 0.003168413632313504, Duration: 100.34 sec - model saved!


Epoch 62: 100%|██████████| 53/53 [01:38<00:00,  1.87s/it]


	Loss: 2.9019, Val_Loss: 1.1806, Total Mean Loss: 2.0412, LR: 0.00303237451369611, Duration: 100.27 sec


Epoch 63: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.7794, Val_Loss: 0.9896, Total Mean Loss: 1.8845, LR: 0.0028975143585997947, Duration: 100.63 sec - model saved!


Epoch 64: 100%|██████████| 53/53 [01:39<00:00,  1.88s/it]


	Loss: 2.8134, Val_Loss: 1.0241, Total Mean Loss: 1.9187, LR: 0.0027639974733499294, Duration: 100.72 sec


Epoch 65: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.7360, Val_Loss: 0.9448, Total Mean Loss: 1.8404, LR: 0.002631986527704492, Duration: 100.32 sec - model saved!


Epoch 66: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.8561, Val_Loss: 1.0950, Total Mean Loss: 1.9755, LR: 0.0025016423566660228, Duration: 100.30 sec


Epoch 67: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.5907, Val_Loss: 0.9677, Total Mean Loss: 1.7792, LR: 0.0023731237645289536, Duration: 100.32 sec - model saved!


Epoch 68: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.6671, Val_Loss: 0.9903, Total Mean Loss: 1.8287, LR: 0.0022465873314010294, Duration: 100.53 sec


Epoch 69: 100%|██████████| 53/53 [01:38<00:00,  1.87s/it]


	Loss: 2.6060, Val_Loss: 0.9529, Total Mean Loss: 1.7795, LR: 0.002122187222434577, Duration: 100.21 sec


Epoch 70: 100%|██████████| 53/53 [01:38<00:00,  1.87s/it]


	Loss: 2.6892, Val_Loss: 1.0041, Total Mean Loss: 1.8466, LR: 0.002000075000000001, Duration: 100.25 sec


Epoch 71: 100%|██████████| 53/53 [01:41<00:00,  1.91s/it]


	Loss: 2.6691, Val_Loss: 0.9944, Total Mean Loss: 1.8318, LR: 0.0018803994390303928, Duration: 102.34 sec


Epoch 72: 100%|██████████| 53/53 [01:39<00:00,  1.88s/it]


	Loss: 2.5781, Val_Loss: 0.9373, Total Mean Loss: 1.7577, LR: 0.001763306345762187, Duration: 100.83 sec - model saved!


Epoch 73: 100%|██████████| 53/53 [01:39<00:00,  1.88s/it]


	Loss: 2.7129, Val_Loss: 0.9540, Total Mean Loss: 1.8334, LR: 0.0016489383800927227, Duration: 100.65 sec


Epoch 74: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.6384, Val_Loss: 0.9378, Total Mean Loss: 1.7881, LR: 0.0015374348817711334, Duration: 100.51 sec


Epoch 75: 100%|██████████| 53/53 [01:39<00:00,  1.88s/it]


	Loss: 2.4799, Val_Loss: 0.9027, Total Mean Loss: 1.6913, LR: 0.001428931700634327, Duration: 100.69 sec - model saved!


Epoch 76: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.5822, Val_Loss: 0.8696, Total Mean Loss: 1.7259, LR: 0.0013235610310948864, Duration: 100.49 sec


Epoch 77: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.5972, Val_Loss: 0.9180, Total Mean Loss: 1.7576, LR: 0.0012214512510825336, Duration: 100.58 sec


Epoch 78: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.6590, Val_Loss: 0.9583, Total Mean Loss: 1.8086, LR: 0.0011227267656354132, Duration: 100.57 sec


Epoch 79: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.5161, Val_Loss: 0.9193, Total Mean Loss: 1.7177, LR: 0.0010275078553316965, Duration: 100.40 sec


Epoch 80: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.6772, Val_Loss: 0.9182, Total Mean Loss: 1.7977, LR: 0.0009359105297462444, Duration: 100.55 sec


Epoch 81: 100%|██████████| 53/53 [01:38<00:00,  1.87s/it]


	Loss: 2.4884, Val_Loss: 0.8812, Total Mean Loss: 1.6848, LR: 0.0008480463861107927, Duration: 100.15 sec - model saved!


Epoch 82: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.5202, Val_Loss: 0.8777, Total Mean Loss: 1.6989, LR: 0.0007640224733499294, Duration: 100.48 sec


Epoch 83: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.7413, Val_Loss: 0.9471, Total Mean Loss: 1.8442, LR: 0.0006839411616584612, Duration: 100.54 sec


Epoch 84: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.5241, Val_Loss: 0.8664, Total Mean Loss: 1.6953, LR: 0.000607900017779104, Duration: 100.56 sec


Epoch 85: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.5300, Val_Loss: 0.8870, Total Mean Loss: 1.7085, LR: 0.0005359916861324344, Duration: 100.55 sec


Epoch 86: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.4429, Val_Loss: 0.8650, Total Mean Loss: 1.6540, LR: 0.000468303775943935, Duration: 100.62 sec - model saved!


Epoch 87: 100%|██████████| 53/53 [01:39<00:00,  1.88s/it]


	Loss: 2.6560, Val_Loss: 0.8964, Total Mean Loss: 1.7762, LR: 0.0004049187545056477, Duration: 100.85 sec


Epoch 88: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.5887, Val_Loss: 0.8557, Total Mean Loss: 1.7222, LR: 0.00034591384670247825, Duration: 100.54 sec


Epoch 89: 100%|██████████| 53/53 [01:39<00:00,  1.88s/it]


	Loss: 2.4711, Val_Loss: 0.8516, Total Mean Loss: 1.6613, LR: 0.0002913609409255791, Duration: 100.97 sec


Epoch 90: 100%|██████████| 53/53 [01:40<00:00,  1.89s/it]


	Loss: 2.5234, Val_Loss: 0.8694, Total Mean Loss: 1.6964, LR: 0.00024132650148740606, Duration: 101.34 sec


Epoch 91: 100%|██████████| 53/53 [01:39<00:00,  1.87s/it]


	Loss: 2.5279, Val_Loss: 0.8560, Total Mean Loss: 1.6920, LR: 0.00019587148764520066, Duration: 100.64 sec


Epoch 92: 100%|██████████| 53/53 [01:40<00:00,  1.89s/it]


	Loss: 2.5953, Val_Loss: 0.8657, Total Mean Loss: 1.7305, LR: 0.00015505127933152136, Duration: 101.71 sec


Epoch 93: 100%|██████████| 53/53 [01:41<00:00,  1.92s/it]


	Loss: 2.6174, Val_Loss: 0.8608, Total Mean Loss: 1.7391, LR: 0.00011891560968232792, Duration: 102.95 sec


Epoch 94: 100%|██████████| 53/53 [01:41<00:00,  1.91s/it]


	Loss: 2.4460, Val_Loss: 0.8459, Total Mean Loss: 1.6459, LR: 8.750850444481394e-05, Duration: 102.68 sec - model saved!


Epoch 95: 100%|██████████| 53/53 [01:41<00:00,  1.91s/it]


	Loss: 2.5132, Val_Loss: 0.8560, Total Mean Loss: 1.6846, LR: 6.0868228338818537e-05, Duration: 102.48 sec


Epoch 96: 100%|██████████| 53/53 [01:41<00:00,  1.91s/it]


	Loss: 2.4481, Val_Loss: 0.8532, Total Mean Loss: 1.6506, LR: 3.902723843715564e-05, Duration: 102.59 sec


Epoch 97: 100%|██████████| 53/53 [01:41<00:00,  1.92s/it]


	Loss: 2.4170, Val_Loss: 0.8491, Total Mean Loss: 1.6330, LR: 2.2012144621675257e-05, Duration: 102.89 sec - model saved!


Epoch 98: 100%|██████████| 53/53 [01:41<00:00,  1.92s/it]


	Loss: 2.5952, Val_Loss: 0.8477, Total Mean Loss: 1.7215, LR: 9.843677163216202e-06, Duration: 103.13 sec


Epoch 99: 100%|██████████| 53/53 [01:40<00:00,  1.90s/it]


	Loss: 2.5665, Val_Loss: 0.8481, Total Mean Loss: 1.7073, LR: 2.5366614649679064e-06, Duration: 102.18 sec


Epoch 100: 100%|██████████| 53/53 [01:40<00:00,  1.90s/it]


	Loss: 2.5940, Val_Loss: 0.8485, Total Mean Loss: 1.7212, LR: 1e-07, Duration: 101.85 sec

[100 epoch result]
       Metric     Value
0   Accuracy  0.858000
1  Precision  0.885246
2     Recall  0.858000
3   F1 Score  0.852777


In [11]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')
model_save = False

for i in range(epochs // 100):
    for epoch in range(100):
        model.train()
        start_time = time.time()
        running_loss = 0.0
        pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1 + i*100}")
        
        for _, data in pbar:
            inputs, labels = data[0].to(device), data[1].to(device)
            inputs, labels = mixup_fn(inputs, labels)
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
                
            loss.backward()
            # 그라디언트 클리핑 적용
            clip_grad_norm_(model.parameters(), max_norm=max_norm)
            optimizer.step()
            
            # EMA 모델 업데이트, 필요한 경우
            if model_ema is not None:
                model_ema.update(model)
                
            scheduler.step()
                
            lr = optimizer.param_groups[0]["lr"]
            lrs.append(lr)
            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        losses.append(epoch_loss)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data in valid_loader:
                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
        val_loss /= len(valid_loader)
        val_losses.append(val_loss)
        
        # 모델 저장 조건 수정
        total_loss = val_loss + epoch_loss
        if total_loss < best_loss:
            best_loss = total_loss
            # torch.save(model.state_dict(), model_path)
            model_save = True
            save_text = ' - model saved!'
        else:
            save_text = ''

        epoch_duration = time.time() - start_time
        training_time += epoch_duration
        
        text = f'\tLoss: {epoch_loss:.4f}, Val_Loss: {val_loss:.4f}, Total Mean Loss: {total_loss/2:.4f}, LR: {lr}, Duration: {epoch_duration:.2f} sec{save_text}'
        print(text)

    # 예측 수행 및 레이블 저장
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # 혼동 행렬 생성
    cm = confusion_matrix(all_labels, all_preds)

    # 예측과 실제 레이블
    y_true = all_labels  # 실제 레이블
    y_pred = all_preds  # 모델에 의해 예측된 레이블

    # 전체 데이터셋에 대한 정확도
    accuracy = accuracy_score(y_true, y_pred)

    # 평균 정밀도, 리콜, F1-Score ('weighted')
    precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

    # 판다스 데이터프레임으로 결과 정리
    performance_metrics = pd.DataFrame({
        'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
        'Value': [accuracy, precision, recall, f1_score]
    })

    # 데이터프레임 출력
    print(f"\n[{i*100+100} epoch result]\n", performance_metrics)

Epoch 1: 100%|██████████| 53/53 [01:26<00:00,  1.63s/it]


	Loss: 4.5563, Val_Loss: 4.1654, Total Mean Loss: 4.3608, LR: 0.00080009, Duration: 87.62 sec - model saved!


Epoch 2: 100%|██████████| 53/53 [01:26<00:00,  1.63s/it]


	Loss: 4.3917, Val_Loss: 3.9178, Total Mean Loss: 4.1548, LR: 0.0016000800000000003, Duration: 87.55 sec - model saved!


Epoch 3: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 4.3214, Val_Loss: 3.7507, Total Mean Loss: 4.0361, LR: 0.00240007, Duration: 86.81 sec - model saved!


Epoch 4: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 4.2388, Val_Loss: 3.5222, Total Mean Loss: 3.8805, LR: 0.0032000600000000002, Duration: 87.06 sec - model saved!


Epoch 5: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 4.2083, Val_Loss: 3.3973, Total Mean Loss: 3.8028, LR: 0.004000050000000001, Duration: 86.90 sec - model saved!


Epoch 6: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 4.1185, Val_Loss: 3.3511, Total Mean Loss: 3.7348, LR: 0.00480004, Duration: 87.11 sec - model saved!


Epoch 7: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 4.0174, Val_Loss: 3.1440, Total Mean Loss: 3.5807, LR: 0.005600030000000001, Duration: 86.66 sec - model saved!


Epoch 8: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 4.0014, Val_Loss: 3.1263, Total Mean Loss: 3.5639, LR: 0.006400020000000001, Duration: 86.77 sec - model saved!


Epoch 9: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.9488, Val_Loss: 3.1212, Total Mean Loss: 3.5350, LR: 0.007200010000000001, Duration: 86.88 sec - model saved!


Epoch 10: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 3.9208, Val_Loss: 2.9826, Total Mean Loss: 3.4517, LR: 0.008, Duration: 87.30 sec - model saved!


Epoch 11: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 3.9851, Val_Loss: 2.9941, Total Mean Loss: 3.4896, LR: 0.007997563338535033, Duration: 87.13 sec


Epoch 12: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 3.8892, Val_Loss: 2.9489, Total Mean Loss: 3.4191, LR: 0.007990256322836784, Duration: 87.00 sec - model saved!


Epoch 13: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 3.8864, Val_Loss: 2.7167, Total Mean Loss: 3.3015, LR: 0.007978087855378325, Duration: 87.30 sec - model saved!


Epoch 14: 100%|██████████| 53/53 [01:26<00:00,  1.63s/it]


	Loss: 3.8182, Val_Loss: 2.7901, Total Mean Loss: 3.3041, LR: 0.007961072761562845, Duration: 87.85 sec


Epoch 15: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.7849, Val_Loss: 2.7059, Total Mean Loss: 3.2454, LR: 0.007939231771661183, Duration: 86.87 sec - model saved!


Epoch 16: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.6598, Val_Loss: 2.5405, Total Mean Loss: 3.1002, LR: 0.007912591495555185, Duration: 86.89 sec - model saved!


Epoch 17: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.7671, Val_Loss: 2.6574, Total Mean Loss: 3.2122, LR: 0.007881184390317672, Duration: 86.86 sec


Epoch 18: 100%|██████████| 53/53 [01:26<00:00,  1.63s/it]


	Loss: 3.6945, Val_Loss: 2.4415, Total Mean Loss: 3.0680, LR: 0.007845048720668478, Duration: 87.71 sec - model saved!


Epoch 19: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 3.6868, Val_Loss: 2.3510, Total Mean Loss: 3.0189, LR: 0.007804228512354801, Duration: 87.21 sec - model saved!


Epoch 20: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 3.6406, Val_Loss: 2.3411, Total Mean Loss: 2.9908, LR: 0.007758773498512596, Duration: 87.06 sec - model saved!


Epoch 21: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 3.6445, Val_Loss: 2.3136, Total Mean Loss: 2.9791, LR: 0.0077087390590744225, Duration: 87.15 sec - model saved!


Epoch 22: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.6159, Val_Loss: 2.2397, Total Mean Loss: 2.9278, LR: 0.007654186153297522, Duration: 86.72 sec - model saved!


Epoch 23: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.5935, Val_Loss: 2.0328, Total Mean Loss: 2.8132, LR: 0.007595181245494354, Duration: 86.83 sec - model saved!


Epoch 24: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.4351, Val_Loss: 2.0582, Total Mean Loss: 2.7467, LR: 0.007531796224056066, Duration: 86.70 sec - model saved!


Epoch 25: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.4777, Val_Loss: 2.0330, Total Mean Loss: 2.7553, LR: 0.007464108313867567, Duration: 86.64 sec


Epoch 26: 100%|██████████| 53/53 [01:25<00:00,  1.60s/it]


	Loss: 3.4881, Val_Loss: 1.9760, Total Mean Loss: 2.7321, LR: 0.007392199982220897, Duration: 86.37 sec - model saved!


Epoch 27: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.5389, Val_Loss: 2.0232, Total Mean Loss: 2.7811, LR: 0.00731615883834154, Duration: 86.66 sec


Epoch 28: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.4022, Val_Loss: 1.8963, Total Mean Loss: 2.6492, LR: 0.007236077526650072, Duration: 86.70 sec - model saved!


Epoch 29: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.5182, Val_Loss: 1.8773, Total Mean Loss: 2.6977, LR: 0.007152053613889208, Duration: 86.50 sec


Epoch 30: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.3529, Val_Loss: 1.8399, Total Mean Loss: 2.5964, LR: 0.007064189470253756, Duration: 86.77 sec - model saved!


Epoch 31: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.2930, Val_Loss: 1.7967, Total Mean Loss: 2.5449, LR: 0.006972592144668304, Duration: 86.60 sec - model saved!


Epoch 32: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.3703, Val_Loss: 1.7348, Total Mean Loss: 2.5526, LR: 0.0068773732343645885, Duration: 86.60 sec


Epoch 33: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.3009, Val_Loss: 1.7267, Total Mean Loss: 2.5138, LR: 0.006778648748917467, Duration: 86.75 sec - model saved!


Epoch 34: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.2119, Val_Loss: 1.6292, Total Mean Loss: 2.4206, LR: 0.006676538968905116, Duration: 86.70 sec - model saved!


Epoch 35: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.1620, Val_Loss: 1.5133, Total Mean Loss: 2.3376, LR: 0.006571168299365673, Duration: 86.88 sec - model saved!


Epoch 36: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.1843, Val_Loss: 1.5053, Total Mean Loss: 2.3448, LR: 0.006462665118228867, Duration: 86.78 sec


Epoch 37: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.1414, Val_Loss: 1.3177, Total Mean Loss: 2.2296, LR: 0.006351161619907278, Duration: 86.61 sec - model saved!


Epoch 38: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.0108, Val_Loss: 1.5092, Total Mean Loss: 2.2600, LR: 0.006236793654237814, Duration: 86.50 sec


Epoch 39: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 3.1740, Val_Loss: 1.3641, Total Mean Loss: 2.2691, LR: 0.006119700560969609, Duration: 86.81 sec


Epoch 40: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 2.9807, Val_Loss: 1.2969, Total Mean Loss: 2.1388, LR: 0.006000025000000001, Duration: 87.05 sec - model saved!


Epoch 41: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 2.9306, Val_Loss: 1.2610, Total Mean Loss: 2.0958, LR: 0.005877912777565424, Duration: 87.31 sec - model saved!


Epoch 42: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 2.8986, Val_Loss: 1.3522, Total Mean Loss: 2.1254, LR: 0.005753512668598971, Duration: 87.09 sec


Epoch 43: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 2.8660, Val_Loss: 1.3146, Total Mean Loss: 2.0903, LR: 0.005626976235471049, Duration: 87.23 sec - model saved!


Epoch 44: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 2.9464, Val_Loss: 1.2041, Total Mean Loss: 2.0752, LR: 0.005498457643333979, Duration: 86.86 sec - model saved!


Epoch 45: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 2.8439, Val_Loss: 1.2760, Total Mean Loss: 2.0599, LR: 0.00536811347229551, Duration: 87.02 sec - model saved!


Epoch 46: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 2.9969, Val_Loss: 1.2580, Total Mean Loss: 2.1275, LR: 0.005236102526650072, Duration: 87.05 sec


Epoch 47: 100%|██████████| 53/53 [01:26<00:00,  1.63s/it]


	Loss: 2.9186, Val_Loss: 1.2203, Total Mean Loss: 2.0695, LR: 0.005102585641400206, Duration: 87.73 sec


Epoch 48: 100%|██████████| 53/53 [01:26<00:00,  1.63s/it]


	Loss: 2.6892, Val_Loss: 1.1642, Total Mean Loss: 1.9267, LR: 0.004967725486303891, Duration: 87.82 sec - model saved!


Epoch 49: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 2.6235, Val_Loss: 1.0926, Total Mean Loss: 1.8581, LR: 0.004831686367686497, Duration: 87.17 sec - model saved!


Epoch 50: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 2.6083, Val_Loss: 1.0765, Total Mean Loss: 1.8424, LR: 0.004694634028258839, Duration: 87.23 sec - model saved!


Epoch 51: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 2.7431, Val_Loss: 1.1853, Total Mean Loss: 1.9642, LR: 0.004556735445185214, Duration: 87.11 sec


Epoch 52: 100%|██████████| 53/53 [01:26<00:00,  1.63s/it]


	Loss: 2.6445, Val_Loss: 1.0437, Total Mean Loss: 1.8441, LR: 0.004418158626647451, Duration: 87.54 sec


Epoch 53: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 2.7108, Val_Loss: 1.0481, Total Mean Loss: 1.8794, LR: 0.004279072407152814, Duration: 86.92 sec


Epoch 54: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 2.7669, Val_Loss: 1.1752, Total Mean Loss: 1.9710, LR: 0.00413964624183517, Duration: 87.12 sec


Epoch 55: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 2.7558, Val_Loss: 1.0321, Total Mean Loss: 1.8939, LR: 0.004000050000000001, Duration: 86.95 sec


Epoch 56: 100%|██████████| 53/53 [01:26<00:00,  1.63s/it]


	Loss: 2.5931, Val_Loss: 1.0003, Total Mean Loss: 1.7967, LR: 0.0038604537581648324, Duration: 87.91 sec - model saved!


Epoch 57: 100%|██████████| 53/53 [01:24<00:00,  1.60s/it]


	Loss: 2.6435, Val_Loss: 1.1054, Total Mean Loss: 1.8745, LR: 0.0037210275928471863, Duration: 86.26 sec


Epoch 58: 100%|██████████| 53/53 [01:26<00:00,  1.62s/it]


	Loss: 2.6450, Val_Loss: 0.9969, Total Mean Loss: 1.8209, LR: 0.00358194137335255, Duration: 87.39 sec


Epoch 59: 100%|██████████| 53/53 [01:26<00:00,  1.63s/it]


	Loss: 2.6999, Val_Loss: 1.1443, Total Mean Loss: 1.9221, LR: 0.0034433645548147874, Duration: 87.51 sec


Epoch 60: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 2.4818, Val_Loss: 0.9374, Total Mean Loss: 1.7096, LR: 0.0033054659717411624, Duration: 86.48 sec - model saved!


Epoch 61: 100%|██████████| 53/53 [01:24<00:00,  1.60s/it]


	Loss: 2.4163, Val_Loss: 1.0617, Total Mean Loss: 1.7390, LR: 0.003168413632313504, Duration: 86.36 sec


Epoch 62: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 2.5977, Val_Loss: 0.9466, Total Mean Loss: 1.7721, LR: 0.00303237451369611, Duration: 87.13 sec


Epoch 63: 100%|██████████| 53/53 [01:26<00:00,  1.62s/it]


	Loss: 2.6180, Val_Loss: 1.0342, Total Mean Loss: 1.8261, LR: 0.0028975143585997947, Duration: 87.35 sec


Epoch 64: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 2.3581, Val_Loss: 1.0173, Total Mean Loss: 1.6877, LR: 0.0027639974733499294, Duration: 86.97 sec - model saved!


Epoch 65: 100%|██████████| 53/53 [01:24<00:00,  1.60s/it]


	Loss: 2.5972, Val_Loss: 1.0593, Total Mean Loss: 1.8283, LR: 0.002631986527704492, Duration: 86.22 sec


Epoch 66: 100%|██████████| 53/53 [01:24<00:00,  1.60s/it]


	Loss: 2.6766, Val_Loss: 1.0492, Total Mean Loss: 1.8629, LR: 0.0025016423566660228, Duration: 86.37 sec


Epoch 67: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 2.5841, Val_Loss: 0.9766, Total Mean Loss: 1.7803, LR: 0.0023731237645289536, Duration: 86.46 sec


Epoch 68: 100%|██████████| 53/53 [01:25<00:00,  1.62s/it]


	Loss: 2.5268, Val_Loss: 0.9900, Total Mean Loss: 1.7584, LR: 0.0022465873314010294, Duration: 87.17 sec


Epoch 69: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 2.3839, Val_Loss: 0.9932, Total Mean Loss: 1.6886, LR: 0.002122187222434577, Duration: 86.44 sec


Epoch 70: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 2.4237, Val_Loss: 1.0001, Total Mean Loss: 1.7119, LR: 0.002000075000000001, Duration: 86.53 sec


Epoch 71: 100%|██████████| 53/53 [01:25<00:00,  1.60s/it]


	Loss: 2.3826, Val_Loss: 0.9615, Total Mean Loss: 1.6721, LR: 0.0018803994390303928, Duration: 86.31 sec - model saved!


Epoch 72: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 2.2972, Val_Loss: 0.9028, Total Mean Loss: 1.6000, LR: 0.001763306345762187, Duration: 86.40 sec - model saved!


Epoch 73: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 2.4625, Val_Loss: 0.9573, Total Mean Loss: 1.7099, LR: 0.0016489383800927227, Duration: 86.77 sec


Epoch 74: 100%|██████████| 53/53 [01:25<00:00,  1.61s/it]


	Loss: 2.4015, Val_Loss: 0.9411, Total Mean Loss: 1.6713, LR: 0.0015374348817711334, Duration: 86.72 sec


Epoch 75:  53%|█████▎    | 28/53 [00:45<00:40,  1.61s/it]