# STEP 00 - Import Libraries

In [1]:
import torch
import torch.nn as nn

import torch.optim as optim
from torchvision import transforms
from torchvision.datasets import ImageFolder
import torchvision.transforms.functional as F
from torch.utils.data import DataLoader

from torch.cuda.amp import autocast, GradScaler
from torch.nn.utils import clip_grad_norm_
from tqdm import tqdm
import time

from timm.data import Mixup
from timm.utils import ModelEmaV3
from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy

from sklearn.metrics import confusion_matrix
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

from convnext_v1 import load_convNext_v1
from convnext_v2 import load_convNext
import math
import warnings
from torch.optim.lr_scheduler import _LRScheduler

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class CosineWarmupScheduler(_LRScheduler):
    def __init__(self, optimizer, num_warmup_steps, num_training_steps, num_cycles=0.5, min_lr=1e-6, last_epoch=-1, verbose=False):
        self.num_warmup_steps = num_warmup_steps
        self.num_training_steps = num_training_steps
        self.num_cycles = num_cycles
        self.min_lr = min_lr
        self.base_lrs = [group['lr'] for group in optimizer.param_groups]
        super().__init__(optimizer, last_epoch, verbose)

    def get_lr(self):
        if not self._get_lr_called_within_step:
            warnings.warn("To get the last learning rate computed by the scheduler, "
                          "please use `get_last_lr()`.", UserWarning)
        
        lrs = []
        for base_lr in self.base_lrs:
            if self.last_epoch < self.num_warmup_steps:
                # Linear warmup
                lr = (base_lr - self.min_lr) * self.last_epoch / max(1, self.num_warmup_steps) + self.min_lr
            else:
                # Cosine annealing
                progress = (self.last_epoch - self.num_warmup_steps) / max(1, self.num_training_steps - self.num_warmup_steps)
                lr = self.min_lr + (base_lr - self.min_lr) * 0.5 * (1 + math.cos(math.pi * self.num_cycles * 2.0 * progress))
            lrs.append(lr)
        return lrs

# STEP 01 - Test ConvNeXt V1(sup)

In [3]:
model_v1 = load_convNext_v1()

In [4]:
# 총 파라미터 수 계산
total_params = sum(p.numel() for p in model_v1.parameters())

# 학습 가능한 파라미터 수 계산
trainable_params = sum(p.numel() for p in model_v1.parameters() if p.requires_grad)

print('='*80)
print(f"\nTotal Parameters: {total_params:,}")
print(f"Trainable Parameters: {trainable_params:,}\n")
print('='*80)


Total Parameters: 27,897,028
Trainable Parameters: 27,897,028



In [5]:
# Transforms 정의하기
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.6,1), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.RandomErasing(p=1., scale=(0.02, 0.33)),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

data_dir = '../../data/sports'
batch_size = 512

train_path = data_dir+'/train'
valid_path = data_dir+'/valid'
test_path = data_dir+'/test'

# dataset load
train_data = ImageFolder(train_path, transform=train_transform)
valid_data = ImageFolder(valid_path, transform=test_transform)
test_data = ImageFolder(test_path, transform=test_transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

device = 'cuda:3'
max_norm = 3.0 

model_v1.to(device)

model_ema = None
ema_active = True
if ema_active:
    ema_decay = 0.9998
    model_ema = ModelEmaV3(
        model_v1,
        decay=ema_decay,
    )
    print(f"Using EMA with decay = {ema_decay}")

model_path = ''

mixup = True
if mixup :
    mixup_fn = Mixup(mixup_alpha=.8, 
                    cutmix_alpha=1., 
                    prob=1., 
                    switch_prob=0.5, 
                    mode='batch',
                    label_smoothing=.1,
                    num_classes=100)
    
    criterion = SoftTargetCrossEntropy()
else :
    criterion = LabelSmoothingCrossEntropy(.1)
    
criterion = nn.CrossEntropyLoss(label_smoothing=0.)

epochs = 100

optimizer = optim.AdamW(model_v1.parameters(), lr=4e-3, weight_decay=0.05)
warmup_steps = int(len(train_loader)*(epochs)*0.1)
train_steps = len(train_loader)*(epochs)
scheduler = CosineWarmupScheduler(optimizer, 
                                num_warmup_steps=warmup_steps, 
                                num_training_steps=train_steps,
                                num_cycles=0.5,
                                min_lr=1e-7)

Using EMA with decay = 0.9998




In [6]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')

# GradScaler 초기화
scaler = GradScaler()

for i in range(epochs // 100):
    for epoch in range(100):
        model_v1.train()
        start_time = time.time()
        running_loss = 0.0
        pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1 + i*100}")
        
        for _, data in pbar:
            inputs, labels = data[0].to(device), data[1].to(device)
            inputs, labels = mixup_fn(inputs, labels)
            optimizer.zero_grad()

            # AutoCast 적용
            with autocast():
                outputs = model_v1(inputs)
                loss = criterion(outputs, labels)
                
            # 스케일링된 그라디언트 계산
            scaler.scale(loss).backward()

            # 그라디언트 클리핑 전에 스케일링 제거
            scaler.unscale_(optimizer)
            clip_grad_norm_(model_v1.parameters(), max_norm=max_norm)

            # 옵티마이저 스텝 및 스케일러 업데이트
            scaler.step(optimizer)
            scaler.update()
            
            # EMA 모델 업데이트
            if model_ema is not None:
                model_ema.update(model_v1)
                
            scheduler.step()
                
            lr = optimizer.param_groups[0]["lr"]
            lrs.append(lr)
            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        losses.append(epoch_loss)        

        model_v1.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data in valid_loader:
                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = model_v1(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
        val_loss /= len(valid_loader)
        val_losses.append(val_loss)
        
        # 모델 저장
        total_loss = val_loss + epoch_loss
        if total_loss < best_loss:
            best_loss = total_loss
            model_save = False
            if model_save:
                torch.save(model_v1.state_dict(), model_path)

        epoch_duration = time.time() - start_time
        training_time += epoch_duration
        
        text = f'\tLoss: {epoch_loss:.4f}, Val_Loss: {val_loss:.4f}, Total Mean Loss: {total_loss/2:.4f}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
        
        if model_save:
            text += f' - model saved!'
            model_save = False

        print(text)

    # 예측 수행 및 레이블 저장
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model_v1(images)
            _, predicted = torch.max(outputs, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # 혼동 행렬 생성
    cm = confusion_matrix(all_labels, all_preds)

    # 예측과 실제 레이블
    y_true = all_labels  # 실제 레이블
    y_pred = all_preds  # 모델에 의해 예측된 레이블

    # 전체 데이터셋에 대한 정확도
    accuracy = accuracy_score(y_true, y_pred)

    # 평균 정밀도, 리콜, F1-Score ('weighted')
    precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

    # 판다스 데이터프레임으로 결과 정리
    performance_metrics = pd.DataFrame({
        'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
        'Value': [accuracy, precision, recall, f1_score]
    })

    # 데이터프레임 출력
    print(f"\n[{i*100+100} epoch result]\n", performance_metrics)

Epoch 1: 100%|██████████| 27/27 [00:56<00:00,  2.08s/it]


	Loss: 4.4774, Val_Loss: 4.0328, Total Mean Loss: 4.2551, LR: 0.00040008999999999997, Duration: 57.10 sec


Epoch 2: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 4.3636, Val_Loss: 3.9938, Total Mean Loss: 4.1787, LR: 0.0008000799999999999, Duration: 56.24 sec


Epoch 3: 100%|██████████| 27/27 [00:55<00:00,  2.06s/it]


	Loss: 4.3047, Val_Loss: 3.7656, Total Mean Loss: 4.0352, LR: 0.0012000700000000001, Duration: 56.54 sec


Epoch 4: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 4.2676, Val_Loss: 3.7741, Total Mean Loss: 4.0209, LR: 0.00160006, Duration: 56.30 sec


Epoch 5: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 4.1701, Val_Loss: 3.5921, Total Mean Loss: 3.8811, LR: 0.0020000499999999997, Duration: 56.13 sec


Epoch 6: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 4.1979, Val_Loss: 3.5395, Total Mean Loss: 3.8687, LR: 0.00240004, Duration: 56.20 sec


Epoch 7: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 4.1892, Val_Loss: 3.5515, Total Mean Loss: 3.8703, LR: 0.00280003, Duration: 56.03 sec


Epoch 8: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 4.0846, Val_Loss: 3.3567, Total Mean Loss: 3.7207, LR: 0.0032000199999999996, Duration: 56.08 sec


Epoch 9: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 4.0832, Val_Loss: 3.2658, Total Mean Loss: 3.6745, LR: 0.00360001, Duration: 56.34 sec


Epoch 10: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.9733, Val_Loss: 3.2181, Total Mean Loss: 3.5957, LR: 0.004, Duration: 56.18 sec


Epoch 11: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.9585, Val_Loss: 3.1206, Total Mean Loss: 3.5395, LR: 0.003998781684496841, Duration: 56.13 sec


Epoch 12: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.9332, Val_Loss: 2.9625, Total Mean Loss: 3.4478, LR: 0.003995128222317136, Duration: 56.06 sec


Epoch 13: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 3.9000, Val_Loss: 2.9625, Total Mean Loss: 3.4313, LR: 0.003989044064641779, Duration: 55.92 sec


Epoch 14: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.8427, Val_Loss: 2.8141, Total Mean Loss: 3.3284, LR: 0.0039805366240797035, Duration: 56.11 sec


Epoch 15: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 3.7609, Val_Loss: 2.6553, Total Mean Loss: 3.2081, LR: 0.003969616265636766, Duration: 55.97 sec


Epoch 16: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.8194, Val_Loss: 2.6457, Total Mean Loss: 3.2325, LR: 0.003956296294087574, Duration: 55.85 sec


Epoch 17: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 3.6426, Val_Loss: 2.5182, Total Mean Loss: 3.0804, LR: 0.003940592937765679, Duration: 55.96 sec


Epoch 18: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.6091, Val_Loss: 2.3043, Total Mean Loss: 2.9567, LR: 0.003922525328791841, Duration: 56.04 sec


Epoch 19: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.7503, Val_Loss: 2.3364, Total Mean Loss: 3.0433, LR: 0.0039021154797644923, Duration: 55.73 sec


Epoch 20: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.5655, Val_Loss: 2.2997, Total Mean Loss: 2.9326, LR: 0.0038793882569407774, Duration: 55.96 sec


Epoch 21: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.5359, Val_Loss: 2.1852, Total Mean Loss: 2.8605, LR: 0.0038543713499408464, Duration: 55.90 sec


Epoch 22: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.5495, Val_Loss: 2.2021, Total Mean Loss: 2.8758, LR: 0.003827095238012319, Duration: 56.02 sec


Epoch 23: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.4314, Val_Loss: 1.8701, Total Mean Loss: 2.6508, LR: 0.003797593152896019, Duration: 55.81 sec


Epoch 24: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 3.5395, Val_Loss: 2.0334, Total Mean Loss: 2.7864, LR: 0.0037659010383382105, Duration: 55.96 sec


Epoch 25: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.3701, Val_Loss: 1.9876, Total Mean Loss: 2.6788, LR: 0.003732057506298688, Duration: 55.82 sec


Epoch 26: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.3378, Val_Loss: 1.7445, Total Mean Loss: 2.5411, LR: 0.0036961037899080436, Duration: 55.90 sec


Epoch 27: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.2249, Val_Loss: 1.6983, Total Mean Loss: 2.4616, LR: 0.0036580836932314552, Duration: 55.84 sec


Epoch 28: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 3.3712, Val_Loss: 1.7954, Total Mean Loss: 2.5833, LR: 0.003618043537900176, Duration: 56.32 sec


Epoch 29: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.2627, Val_Loss: 1.6444, Total Mean Loss: 2.4536, LR: 0.003576032106675763, Duration: 55.88 sec


Epoch 30: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.2453, Val_Loss: 1.6847, Total Mean Loss: 2.4650, LR: 0.0035321005840157995, Duration: 55.90 sec


Epoch 31: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 3.0739, Val_Loss: 1.4796, Total Mean Loss: 2.2767, LR: 0.0034863024937135142, Duration: 55.85 sec


Epoch 32: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.2991, Val_Loss: 1.4953, Total Mean Loss: 2.3972, LR: 0.003438693633687285, Duration: 56.03 sec


Epoch 33: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 3.0333, Val_Loss: 1.4960, Total Mean Loss: 2.2647, LR: 0.0033893320079994714, Duration: 55.97 sec


Epoch 34: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.1110, Val_Loss: 1.4444, Total Mean Loss: 2.2777, LR: 0.003338277756187398, Duration: 56.15 sec


Epoch 35: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.1195, Val_Loss: 1.5030, Total Mean Loss: 2.3113, LR: 0.003285593079992594, Duration: 56.06 sec


Epoch 36: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.1100, Val_Loss: 1.3832, Total Mean Loss: 2.2466, LR: 0.00323134216757755, Duration: 56.12 sec


Epoch 37: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.0795, Val_Loss: 1.4472, Total Mean Loss: 2.2634, LR: 0.0031755911153223313, Duration: 56.11 sec


Epoch 38: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.9890, Val_Loss: 1.2568, Total Mean Loss: 2.1229, LR: 0.0031184078472963196, Duration: 56.06 sec


Epoch 39: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 3.1263, Val_Loss: 1.3073, Total Mean Loss: 2.2168, LR: 0.003059862032503198, Duration: 55.97 sec


Epoch 40: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 3.0529, Val_Loss: 1.2554, Total Mean Loss: 2.1542, LR: 0.0030000249999999995, Duration: 56.05 sec


Epoch 41: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.7628, Val_Loss: 1.1780, Total Mean Loss: 1.9704, LR: 0.002938969651993642, Duration: 56.07 sec


Epoch 42: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 2.8932, Val_Loss: 1.1685, Total Mean Loss: 2.0308, LR: 0.002876770375020815, Duration: 55.86 sec


Epoch 43: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.8992, Val_Loss: 1.2290, Total Mean Loss: 2.0641, LR: 0.0028135029493194467, Duration: 56.12 sec


Epoch 44: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 2.9412, Val_Loss: 1.2404, Total Mean Loss: 2.0908, LR: 0.0027492444565021534, Duration: 56.27 sec


Epoch 45: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 2.9296, Val_Loss: 1.1521, Total Mean Loss: 2.0409, LR: 0.0026840731856441714, Duration: 55.90 sec


Epoch 46: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.6970, Val_Loss: 1.0895, Total Mean Loss: 1.8932, LR: 0.0026180685379001757, Duration: 56.01 sec


Epoch 47: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.5881, Val_Loss: 0.9863, Total Mean Loss: 1.7872, LR: 0.002551310929766207, Duration: 55.95 sec


Epoch 48: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 2.9589, Val_Loss: 1.0798, Total Mean Loss: 2.0193, LR: 0.002483881695104555, Duration: 55.89 sec


Epoch 49: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.7937, Val_Loss: 1.0734, Total Mean Loss: 1.9336, LR: 0.0024158629860509774, Duration: 55.96 sec


Epoch 50: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.8662, Val_Loss: 1.0298, Total Mean Loss: 1.9480, LR: 0.0023473376729249776, Duration: 56.12 sec


Epoch 51: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.6298, Val_Loss: 1.0176, Total Mean Loss: 1.8237, LR: 0.0022783892432650826, Duration: 55.98 sec


Epoch 52: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2934, Val_Loss: 0.9682, Total Mean Loss: 1.6308, LR: 0.0022091017001121434, Duration: 56.07 sec


Epoch 53: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 2.7997, Val_Loss: 1.0152, Total Mean Loss: 1.9075, LR: 0.002139559459664563, Duration: 55.89 sec


Epoch 54: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.5031, Val_Loss: 0.9415, Total Mean Loss: 1.7223, LR: 0.0020698472484301667, Duration: 56.06 sec


Epoch 55: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 2.3984, Val_Loss: 0.9141, Total Mean Loss: 1.6562, LR: 0.0020000499999999997, Duration: 55.90 sec


Epoch 56: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.5740, Val_Loss: 0.8883, Total Mean Loss: 1.7312, LR: 0.0019302527515698336, Duration: 55.99 sec


Epoch 57: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.6690, Val_Loss: 0.9551, Total Mean Loss: 1.8120, LR: 0.0018605405403354365, Duration: 55.98 sec


Epoch 58: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.6134, Val_Loss: 0.9461, Total Mean Loss: 1.7797, LR: 0.0017909982998878568, Duration: 56.05 sec


Epoch 59: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.5852, Val_Loss: 0.9271, Total Mean Loss: 1.7561, LR: 0.0017217107567349176, Duration: 56.02 sec


Epoch 60: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 2.5558, Val_Loss: 0.8173, Total Mean Loss: 1.6865, LR: 0.0016527623270750228, Duration: 56.21 sec


Epoch 61: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.5551, Val_Loss: 0.8222, Total Mean Loss: 1.6887, LR: 0.0015842370139490226, Duration: 55.97 sec


Epoch 62: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.5135, Val_Loss: 0.8065, Total Mean Loss: 1.6600, LR: 0.0015162183048954448, Duration: 55.95 sec


Epoch 63: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.4218, Val_Loss: 0.8387, Total Mean Loss: 1.6302, LR: 0.0014487890702337925, Duration: 56.10 sec


Epoch 64: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.3691, Val_Loss: 0.8183, Total Mean Loss: 1.5937, LR: 0.001382031462099824, Duration: 56.03 sec


Epoch 65: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.4415, Val_Loss: 0.8186, Total Mean Loss: 1.6301, LR: 0.001316026814355829, Duration: 56.01 sec


Epoch 66: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.4425, Val_Loss: 0.8346, Total Mean Loss: 1.6386, LR: 0.0012508555434978467, Duration: 56.06 sec


Epoch 67: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.3497, Val_Loss: 0.7826, Total Mean Loss: 1.5661, LR: 0.0011865970506805537, Duration: 56.03 sec


Epoch 68: 100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


	Loss: 2.3265, Val_Loss: 0.7799, Total Mean Loss: 1.5532, LR: 0.0011233296249791845, Duration: 55.93 sec


Epoch 69: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.5047, Val_Loss: 0.8032, Total Mean Loss: 1.6540, LR: 0.0010611303480063583, Duration: 56.02 sec


Epoch 70: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 2.0608, Val_Loss: 0.7569, Total Mean Loss: 1.4088, LR: 0.0010000750000000004, Duration: 56.27 sec


Epoch 71: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2896, Val_Loss: 0.7690, Total Mean Loss: 1.5293, LR: 0.000940237967496802, Duration: 56.04 sec


Epoch 72: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.3211, Val_Loss: 0.7482, Total Mean Loss: 1.5347, LR: 0.0008816921527036801, Duration: 55.96 sec


Epoch 73: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.3094, Val_Loss: 0.7497, Total Mean Loss: 1.5295, LR: 0.0008245088846776685, Duration: 55.97 sec


Epoch 74: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.3828, Val_Loss: 0.7911, Total Mean Loss: 1.5869, LR: 0.0007687578324224496, Duration: 55.95 sec


Epoch 75: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.3892, Val_Loss: 0.7802, Total Mean Loss: 1.5847, LR: 0.0007145069200074055, Duration: 56.00 sec


Epoch 76: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 2.3461, Val_Loss: 0.7231, Total Mean Loss: 1.5346, LR: 0.000661822243812602, Duration: 56.24 sec


Epoch 77: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.1545, Val_Loss: 0.7566, Total Mean Loss: 1.4556, LR: 0.0006107679920005282, Duration: 56.03 sec


Epoch 78: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 2.2703, Val_Loss: 0.7705, Total Mean Loss: 1.5204, LR: 0.0005614063663127149, Duration: 56.34 sec


Epoch 79: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.3561, Val_Loss: 0.7499, Total Mean Loss: 1.5530, LR: 0.000513797506286485, Duration: 56.12 sec


Epoch 80: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2656, Val_Loss: 0.7683, Total Mean Loss: 1.5169, LR: 0.00046799941598420013, Duration: 56.14 sec


Epoch 81: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2729, Val_Loss: 0.7432, Total Mean Loss: 1.5081, LR: 0.0004240678933242365, Duration: 56.04 sec


Epoch 82: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2517, Val_Loss: 0.7783, Total Mean Loss: 1.5150, LR: 0.00038205646209982404, Duration: 56.18 sec


Epoch 83: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2930, Val_Loss: 0.7482, Total Mean Loss: 1.5206, LR: 0.0003420163067685445, Duration: 56.11 sec


Epoch 84: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.1917, Val_Loss: 0.7575, Total Mean Loss: 1.4746, LR: 0.0003039962100919559, Duration: 56.06 sec


Epoch 85: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2999, Val_Loss: 0.7637, Total Mean Loss: 1.5318, LR: 0.0002680424937013118, Duration: 56.07 sec


Epoch 86: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.1889, Val_Loss: 0.7282, Total Mean Loss: 1.4586, LR: 0.00023419896166178896, Duration: 56.07 sec


Epoch 87: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.1473, Val_Loss: 0.7745, Total Mean Loss: 1.4609, LR: 0.0002025068471039813, Duration: 56.04 sec


Epoch 88: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.1531, Val_Loss: 0.8005, Total Mean Loss: 1.4768, LR: 0.00017300476198768016, Duration: 56.04 sec


Epoch 89: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2986, Val_Loss: 0.7652, Total Mean Loss: 1.5319, LR: 0.00014572865005915372, Duration: 56.19 sec


Epoch 90: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.3740, Val_Loss: 0.7677, Total Mean Loss: 1.5708, LR: 0.00012071174305922266, Duration: 56.13 sec


Epoch 91: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.2788, Val_Loss: 0.7759, Total Mean Loss: 1.5273, LR: 9.79845202355077e-05, Duration: 56.12 sec


Epoch 92: 100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


	Loss: 2.1289, Val_Loss: 0.7813, Total Mean Loss: 1.4551, LR: 7.757467120815912e-05, Duration: 56.22 sec


Epoch 93: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.0349, Val_Loss: 0.7657, Total Mean Loss: 1.4003, LR: 5.950706223432085e-05, Duration: 56.15 sec


Epoch 94: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.0171, Val_Loss: 0.7645, Total Mean Loss: 1.3908, LR: 4.3803705912425316e-05, Duration: 55.99 sec


Epoch 95: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.0323, Val_Loss: 0.7693, Total Mean Loss: 1.4008, LR: 3.0483734363234566e-05, Duration: 56.10 sec


Epoch 96: 100%|██████████| 27/27 [00:54<00:00,  2.04s/it]


	Loss: 2.2488, Val_Loss: 0.7582, Total Mean Loss: 1.5035, LR: 1.9563375920296352e-05, Duration: 55.99 sec


Epoch 97: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 1.8898, Val_Loss: 0.7615, Total Mean Loss: 1.3256, LR: 1.1055935358221834e-05, Duration: 56.10 sec


Epoch 98: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.1196, Val_Loss: 0.7607, Total Mean Loss: 1.4401, LR: 4.971777682864596e-06, Duration: 56.02 sec


Epoch 99: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.0866, Val_Loss: 0.7598, Total Mean Loss: 1.4232, LR: 1.3183155031594304e-06, Duration: 56.12 sec


Epoch 100: 100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


	Loss: 2.1089, Val_Loss: 0.7597, Total Mean Loss: 1.4343, LR: 1e-07, Duration: 56.03 sec

[100 epoch result]
       Metric     Value
0   Accuracy  0.876000
1  Precision  0.896143
2     Recall  0.876000
3   F1 Score  0.872440


In [7]:
model_v1.cpu()
del model_v1

# STEP 02 - Test ConvNeXt V2(sup)

In [8]:
model_v2 = load_convNext()

# 총 파라미터 수 계산
total_params = sum(p.numel() for p in model_v2.parameters())

# 학습 가능한 파라미터 수 계산
trainable_params = sum(p.numel() for p in model_v2.parameters() if p.requires_grad)

print('='*80)
print(f"\nTotal Parameters: {total_params:,}")
print(f"Trainable Parameters: {trainable_params:,}\n")
print('='*80)


Total Parameters: 27,943,396
Trainable Parameters: 27,943,396



In [9]:
model_v2.to(device)

model_ema = None
ema_active = True
if ema_active:
    ema_decay = 0.9998
    model_ema = ModelEmaV3(
        model_v2,
        decay=ema_decay,
    )
    print(f"Using EMA with decay = {ema_decay}")

model_path = ''

mixup = True
if mixup :
    mixup_fn = Mixup(mixup_alpha=.8, 
                    cutmix_alpha=1., 
                    prob=1., 
                    switch_prob=0.5, 
                    mode='batch',
                    label_smoothing=.1,
                    num_classes=100)
    
    criterion = SoftTargetCrossEntropy()
else :
    criterion = LabelSmoothingCrossEntropy(.1)
    
criterion = nn.CrossEntropyLoss(label_smoothing=0.)

epochs = 100

optimizer = optim.AdamW(model_v2.parameters(), lr=4e-3, weight_decay=0.05)
warmup_steps = int(len(train_loader)*(epochs)*0.1)
train_steps = len(train_loader)*(epochs)
scheduler = CosineWarmupScheduler(optimizer, 
                                num_warmup_steps=warmup_steps, 
                                num_training_steps=train_steps,
                                num_cycles=0.5,
                                min_lr=1e-7)

Using EMA with decay = 0.9998




In [10]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')

# GradScaler 초기화
scaler = GradScaler()

for i in range(epochs // 100):
    for epoch in range(100):
        model_v2.train()
        start_time = time.time()
        running_loss = 0.0
        pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1 + i*100}")
        
        for _, data in pbar:
            inputs, labels = data[0].to(device), data[1].to(device)
            inputs, labels = mixup_fn(inputs, labels)
            optimizer.zero_grad()

            # AutoCast 적용
            with autocast():
                outputs = model_v2(inputs)
                loss = criterion(outputs, labels)
                
            # 스케일링된 그라디언트 계산
            scaler.scale(loss).backward()

            # 그라디언트 클리핑 전에 스케일링 제거
            scaler.unscale_(optimizer)
            clip_grad_norm_(model_v2.parameters(), max_norm=max_norm)

            # 옵티마이저 스텝 및 스케일러 업데이트
            scaler.step(optimizer)
            scaler.update()
            
            # EMA 모델 업데이트
            if model_ema is not None:
                model_ema.update(model_v2)
                
            scheduler.step()
                
            lr = optimizer.param_groups[0]["lr"]
            lrs.append(lr)
            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        losses.append(epoch_loss)        

        model_v2.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data in valid_loader:
                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = model_v2(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
        val_loss /= len(valid_loader)
        val_losses.append(val_loss)
        
        # 모델 저장
        total_loss = val_loss + epoch_loss
        if total_loss < best_loss:
            best_loss = total_loss
            model_save = False
            if model_save:
                torch.save(model_v2.state_dict(), model_path)

        epoch_duration = time.time() - start_time
        training_time += epoch_duration
        
        text = f'\tLoss: {epoch_loss:.4f}, Val_Loss: {val_loss:.4f}, Total Mean Loss: {total_loss/2:.4f}, LR: {lr}, Duration: {epoch_duration:.2f} sec'
        
        if model_save:
            text += f' - model saved!'
            model_save = False

        print(text)

    # 예측 수행 및 레이블 저장
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model_v2(images)
            _, predicted = torch.max(outputs, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # 혼동 행렬 생성
    cm = confusion_matrix(all_labels, all_preds)

    # 예측과 실제 레이블
    y_true = all_labels  # 실제 레이블
    y_pred = all_preds  # 모델에 의해 예측된 레이블

    # 전체 데이터셋에 대한 정확도
    accuracy = accuracy_score(y_true, y_pred)

    # 평균 정밀도, 리콜, F1-Score ('weighted')
    precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

    # 판다스 데이터프레임으로 결과 정리
    performance_metrics = pd.DataFrame({
        'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
        'Value': [accuracy, precision, recall, f1_score]
    })

    # 데이터프레임 출력
    print(f"\n[{i*100+100} epoch result]\n", performance_metrics)

Epoch 1: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 4.4953, Val_Loss: 3.9957, Total Mean Loss: 4.2455, LR: 0.00040008999999999997, Duration: 67.57 sec


Epoch 2: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 4.3383, Val_Loss: 3.8066, Total Mean Loss: 4.0724, LR: 0.0008000799999999999, Duration: 67.57 sec


Epoch 3: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 4.2863, Val_Loss: 3.7261, Total Mean Loss: 4.0062, LR: 0.0012000700000000001, Duration: 67.70 sec


Epoch 4: 100%|██████████| 27/27 [01:06<00:00,  2.48s/it]


	Loss: 4.2008, Val_Loss: 3.6685, Total Mean Loss: 3.9346, LR: 0.00160006, Duration: 67.98 sec


Epoch 5: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 4.2399, Val_Loss: 3.5638, Total Mean Loss: 3.9018, LR: 0.0020000499999999997, Duration: 67.65 sec


Epoch 6: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 4.1107, Val_Loss: 3.3985, Total Mean Loss: 3.7546, LR: 0.00240004, Duration: 67.89 sec


Epoch 7: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 4.0405, Val_Loss: 3.2016, Total Mean Loss: 3.6211, LR: 0.00280003, Duration: 67.75 sec


Epoch 8: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 4.0470, Val_Loss: 3.0241, Total Mean Loss: 3.5355, LR: 0.0032000199999999996, Duration: 67.74 sec


Epoch 9: 100%|██████████| 27/27 [01:07<00:00,  2.48s/it]


	Loss: 4.0337, Val_Loss: 3.0783, Total Mean Loss: 3.5560, LR: 0.00360001, Duration: 68.12 sec


Epoch 10: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 3.9008, Val_Loss: 2.8049, Total Mean Loss: 3.3528, LR: 0.004, Duration: 67.85 sec


Epoch 11: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 3.7319, Val_Loss: 2.6172, Total Mean Loss: 3.1745, LR: 0.003998781684496841, Duration: 67.93 sec


Epoch 12: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.7359, Val_Loss: 2.5972, Total Mean Loss: 3.1665, LR: 0.003995128222317136, Duration: 67.60 sec


Epoch 13: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.7365, Val_Loss: 2.3938, Total Mean Loss: 3.0652, LR: 0.003989044064641779, Duration: 67.60 sec


Epoch 14: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.5635, Val_Loss: 2.3816, Total Mean Loss: 2.9726, LR: 0.0039805366240797035, Duration: 67.60 sec


Epoch 15: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.7393, Val_Loss: 2.2984, Total Mean Loss: 3.0189, LR: 0.003969616265636766, Duration: 67.59 sec


Epoch 16: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.5639, Val_Loss: 2.1167, Total Mean Loss: 2.8403, LR: 0.003956296294087574, Duration: 67.48 sec


Epoch 17: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.6771, Val_Loss: 2.0762, Total Mean Loss: 2.8767, LR: 0.003940592937765679, Duration: 67.58 sec


Epoch 18: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.4355, Val_Loss: 2.0092, Total Mean Loss: 2.7224, LR: 0.003922525328791841, Duration: 67.61 sec


Epoch 19: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.4700, Val_Loss: 2.1298, Total Mean Loss: 2.7999, LR: 0.0039021154797644923, Duration: 67.51 sec


Epoch 20: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.5844, Val_Loss: 2.1278, Total Mean Loss: 2.8561, LR: 0.0038793882569407774, Duration: 67.55 sec


Epoch 21: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.4391, Val_Loss: 1.9167, Total Mean Loss: 2.6779, LR: 0.0038543713499408464, Duration: 67.53 sec


Epoch 22: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.3674, Val_Loss: 1.7117, Total Mean Loss: 2.5396, LR: 0.003827095238012319, Duration: 67.48 sec


Epoch 23: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.1984, Val_Loss: 1.6721, Total Mean Loss: 2.4352, LR: 0.003797593152896019, Duration: 67.56 sec


Epoch 24: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.3351, Val_Loss: 1.7236, Total Mean Loss: 2.5294, LR: 0.0037659010383382105, Duration: 67.45 sec


Epoch 25: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 3.2980, Val_Loss: 1.5397, Total Mean Loss: 2.4188, LR: 0.003732057506298688, Duration: 67.72 sec


Epoch 26: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 3.2775, Val_Loss: 1.6634, Total Mean Loss: 2.4704, LR: 0.0036961037899080436, Duration: 67.67 sec


Epoch 27: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.1404, Val_Loss: 1.6914, Total Mean Loss: 2.4159, LR: 0.0036580836932314552, Duration: 67.52 sec


Epoch 28: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.2671, Val_Loss: 1.5354, Total Mean Loss: 2.4012, LR: 0.003618043537900176, Duration: 67.54 sec


Epoch 29: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.9860, Val_Loss: 1.4390, Total Mean Loss: 2.2125, LR: 0.003576032106675763, Duration: 67.41 sec


Epoch 30: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.1470, Val_Loss: 1.3110, Total Mean Loss: 2.2290, LR: 0.0035321005840157995, Duration: 67.41 sec


Epoch 31: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.8700, Val_Loss: 1.2069, Total Mean Loss: 2.0385, LR: 0.0034863024937135142, Duration: 67.52 sec


Epoch 32: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.0219, Val_Loss: 1.3896, Total Mean Loss: 2.2057, LR: 0.003438693633687285, Duration: 67.56 sec


Epoch 33: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.0175, Val_Loss: 1.3374, Total Mean Loss: 2.1774, LR: 0.0033893320079994714, Duration: 67.50 sec


Epoch 34: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.8511, Val_Loss: 1.1852, Total Mean Loss: 2.0182, LR: 0.003338277756187398, Duration: 67.57 sec


Epoch 35: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 3.0255, Val_Loss: 1.2675, Total Mean Loss: 2.1465, LR: 0.003285593079992594, Duration: 67.41 sec


Epoch 36: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.9926, Val_Loss: 1.1701, Total Mean Loss: 2.0814, LR: 0.00323134216757755, Duration: 67.49 sec


Epoch 37: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.8710, Val_Loss: 1.1276, Total Mean Loss: 1.9993, LR: 0.0031755911153223313, Duration: 67.41 sec


Epoch 38: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.8884, Val_Loss: 1.0406, Total Mean Loss: 1.9645, LR: 0.0031184078472963196, Duration: 67.40 sec


Epoch 39: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 2.6145, Val_Loss: 1.1127, Total Mean Loss: 1.8636, LR: 0.003059862032503198, Duration: 67.70 sec


Epoch 40: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.9169, Val_Loss: 1.0917, Total Mean Loss: 2.0043, LR: 0.0030000249999999995, Duration: 67.52 sec


Epoch 41: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.9894, Val_Loss: 1.0460, Total Mean Loss: 2.0177, LR: 0.002938969651993642, Duration: 67.55 sec


Epoch 42: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.7877, Val_Loss: 1.0938, Total Mean Loss: 1.9408, LR: 0.002876770375020815, Duration: 67.37 sec


Epoch 43: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.5092, Val_Loss: 0.9956, Total Mean Loss: 1.7524, LR: 0.0028135029493194467, Duration: 67.48 sec


Epoch 44: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.6443, Val_Loss: 1.0065, Total Mean Loss: 1.8254, LR: 0.0027492444565021534, Duration: 67.49 sec


Epoch 45: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 2.4927, Val_Loss: 0.9553, Total Mean Loss: 1.7240, LR: 0.0026840731856441714, Duration: 67.69 sec


Epoch 46: 100%|██████████| 27/27 [01:07<00:00,  2.48s/it]


	Loss: 2.5427, Val_Loss: 0.9239, Total Mean Loss: 1.7333, LR: 0.0026180685379001757, Duration: 68.13 sec


Epoch 47: 100%|██████████| 27/27 [01:06<00:00,  2.45s/it]


	Loss: 2.6331, Val_Loss: 0.9464, Total Mean Loss: 1.7898, LR: 0.002551310929766207, Duration: 67.36 sec


Epoch 48: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.6876, Val_Loss: 0.8948, Total Mean Loss: 1.7912, LR: 0.002483881695104555, Duration: 67.42 sec


Epoch 49: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.6114, Val_Loss: 0.9237, Total Mean Loss: 1.7676, LR: 0.0024158629860509774, Duration: 67.55 sec


Epoch 50: 100%|██████████| 27/27 [01:06<00:00,  2.45s/it]


	Loss: 2.6508, Val_Loss: 0.9648, Total Mean Loss: 1.8078, LR: 0.0023473376729249776, Duration: 67.33 sec


Epoch 51: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.5695, Val_Loss: 0.9561, Total Mean Loss: 1.7628, LR: 0.0022783892432650826, Duration: 67.48 sec


Epoch 52: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.4391, Val_Loss: 0.8177, Total Mean Loss: 1.6284, LR: 0.0022091017001121434, Duration: 67.44 sec


Epoch 53: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.4974, Val_Loss: 0.8427, Total Mean Loss: 1.6701, LR: 0.002139559459664563, Duration: 67.52 sec


Epoch 54: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.4575, Val_Loss: 0.7762, Total Mean Loss: 1.6168, LR: 0.0020698472484301667, Duration: 67.46 sec


Epoch 55: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.5227, Val_Loss: 0.8147, Total Mean Loss: 1.6687, LR: 0.0020000499999999997, Duration: 67.56 sec


Epoch 56: 100%|██████████| 27/27 [01:06<00:00,  2.45s/it]


	Loss: 2.4267, Val_Loss: 0.8300, Total Mean Loss: 1.6284, LR: 0.0019302527515698336, Duration: 67.35 sec


Epoch 57: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.4398, Val_Loss: 0.7255, Total Mean Loss: 1.5827, LR: 0.0018605405403354365, Duration: 67.56 sec


Epoch 58: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.2488, Val_Loss: 0.8539, Total Mean Loss: 1.5514, LR: 0.0017909982998878568, Duration: 67.61 sec


Epoch 59: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1985, Val_Loss: 0.7956, Total Mean Loss: 1.4970, LR: 0.0017217107567349176, Duration: 67.54 sec


Epoch 60: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.3759, Val_Loss: 0.8497, Total Mean Loss: 1.6128, LR: 0.0016527623270750228, Duration: 67.58 sec


Epoch 61: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 2.2043, Val_Loss: 0.8578, Total Mean Loss: 1.5311, LR: 0.0015842370139490226, Duration: 67.73 sec


Epoch 62: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.3755, Val_Loss: 0.8482, Total Mean Loss: 1.6119, LR: 0.0015162183048954448, Duration: 67.44 sec


Epoch 63: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1355, Val_Loss: 0.8108, Total Mean Loss: 1.4731, LR: 0.0014487890702337925, Duration: 67.54 sec


Epoch 64: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1350, Val_Loss: 0.7789, Total Mean Loss: 1.4570, LR: 0.001382031462099824, Duration: 67.42 sec


Epoch 65: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1932, Val_Loss: 0.8184, Total Mean Loss: 1.5058, LR: 0.001316026814355829, Duration: 67.60 sec


Epoch 66: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1584, Val_Loss: 0.7591, Total Mean Loss: 1.4588, LR: 0.0012508555434978467, Duration: 67.54 sec


Epoch 67: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 2.2821, Val_Loss: 0.8223, Total Mean Loss: 1.5522, LR: 0.0011865970506805537, Duration: 67.65 sec


Epoch 68: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.2484, Val_Loss: 0.8220, Total Mean Loss: 1.5352, LR: 0.0011233296249791845, Duration: 67.50 sec


Epoch 69: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1439, Val_Loss: 0.7533, Total Mean Loss: 1.4486, LR: 0.0010611303480063583, Duration: 67.41 sec


Epoch 70: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.2082, Val_Loss: 0.7790, Total Mean Loss: 1.4936, LR: 0.0010000750000000004, Duration: 67.47 sec


Epoch 71: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.2692, Val_Loss: 0.7999, Total Mean Loss: 1.5346, LR: 0.000940237967496802, Duration: 67.54 sec


Epoch 72: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0755, Val_Loss: 0.7652, Total Mean Loss: 1.4203, LR: 0.0008816921527036801, Duration: 67.51 sec


Epoch 73: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1475, Val_Loss: 0.7764, Total Mean Loss: 1.4620, LR: 0.0008245088846776685, Duration: 67.45 sec


Epoch 74: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1947, Val_Loss: 0.7709, Total Mean Loss: 1.4828, LR: 0.0007687578324224496, Duration: 67.51 sec


Epoch 75: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1150, Val_Loss: 0.7763, Total Mean Loss: 1.4456, LR: 0.0007145069200074055, Duration: 67.46 sec


Epoch 76: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.2568, Val_Loss: 0.7770, Total Mean Loss: 1.5169, LR: 0.000661822243812602, Duration: 67.61 sec


Epoch 77: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0956, Val_Loss: 0.8004, Total Mean Loss: 1.4480, LR: 0.0006107679920005282, Duration: 67.49 sec


Epoch 78: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.2704, Val_Loss: 0.8184, Total Mean Loss: 1.5444, LR: 0.0005614063663127149, Duration: 67.54 sec


Epoch 79: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1208, Val_Loss: 0.7864, Total Mean Loss: 1.4536, LR: 0.000513797506286485, Duration: 67.39 sec


Epoch 80: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.2827, Val_Loss: 0.7837, Total Mean Loss: 1.5332, LR: 0.00046799941598420013, Duration: 67.39 sec


Epoch 81: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 1.9240, Val_Loss: 0.7633, Total Mean Loss: 1.3437, LR: 0.0004240678933242365, Duration: 67.58 sec


Epoch 82: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1949, Val_Loss: 0.7407, Total Mean Loss: 1.4678, LR: 0.00038205646209982404, Duration: 67.43 sec


Epoch 83: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 1.9439, Val_Loss: 0.7818, Total Mean Loss: 1.3629, LR: 0.0003420163067685445, Duration: 67.42 sec


Epoch 84: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0535, Val_Loss: 0.7407, Total Mean Loss: 1.3971, LR: 0.0003039962100919559, Duration: 67.52 sec


Epoch 85: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0102, Val_Loss: 0.7809, Total Mean Loss: 1.3955, LR: 0.0002680424937013118, Duration: 67.37 sec


Epoch 86: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.3759, Val_Loss: 0.7563, Total Mean Loss: 1.5661, LR: 0.00023419896166178896, Duration: 67.55 sec


Epoch 87: 100%|██████████| 27/27 [01:06<00:00,  2.45s/it]


	Loss: 2.2072, Val_Loss: 0.7671, Total Mean Loss: 1.4871, LR: 0.0002025068471039813, Duration: 67.28 sec


Epoch 88: 100%|██████████| 27/27 [01:06<00:00,  2.45s/it]


	Loss: 2.0811, Val_Loss: 0.8004, Total Mean Loss: 1.4408, LR: 0.00017300476198768016, Duration: 67.27 sec


Epoch 89: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0743, Val_Loss: 0.7554, Total Mean Loss: 1.4149, LR: 0.00014572865005915372, Duration: 67.54 sec


Epoch 90: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0216, Val_Loss: 0.7178, Total Mean Loss: 1.3697, LR: 0.00012071174305922266, Duration: 67.37 sec


Epoch 91: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 1.9036, Val_Loss: 0.7793, Total Mean Loss: 1.3415, LR: 9.79845202355077e-05, Duration: 67.45 sec


Epoch 92: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1194, Val_Loss: 0.7585, Total Mean Loss: 1.4390, LR: 7.757467120815912e-05, Duration: 67.51 sec


Epoch 93: 100%|██████████| 27/27 [01:06<00:00,  2.45s/it]


	Loss: 2.1681, Val_Loss: 0.7490, Total Mean Loss: 1.4586, LR: 5.950706223432085e-05, Duration: 67.35 sec


Epoch 94: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0356, Val_Loss: 0.7410, Total Mean Loss: 1.3883, LR: 4.3803705912425316e-05, Duration: 67.52 sec


Epoch 95: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 1.9751, Val_Loss: 0.7690, Total Mean Loss: 1.3720, LR: 3.0483734363234566e-05, Duration: 67.76 sec


Epoch 96: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 2.0435, Val_Loss: 0.7558, Total Mean Loss: 1.3996, LR: 1.9563375920296352e-05, Duration: 67.69 sec


Epoch 97: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0720, Val_Loss: 0.7562, Total Mean Loss: 1.4141, LR: 1.1055935358221834e-05, Duration: 67.62 sec


Epoch 98: 100%|██████████| 27/27 [01:06<00:00,  2.47s/it]


	Loss: 2.1632, Val_Loss: 0.7543, Total Mean Loss: 1.4588, LR: 4.971777682864596e-06, Duration: 67.64 sec


Epoch 99: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.1685, Val_Loss: 0.7549, Total Mean Loss: 1.4617, LR: 1.3183155031594304e-06, Duration: 67.57 sec


Epoch 100: 100%|██████████| 27/27 [01:06<00:00,  2.46s/it]


	Loss: 2.0983, Val_Loss: 0.7549, Total Mean Loss: 1.4266, LR: 1e-07, Duration: 67.38 sec

[100 epoch result]
       Metric     Value
0   Accuracy  0.894000
1  Precision  0.912238
2     Recall  0.894000
3   F1 Score  0.891306


# STEP 03 - Test ConvNeXt V2 with FCMAE

## 00.MASK

In [54]:
import torch

# Define the upsample_mask and a simple example
def upsample_mask(mask, scale):
    assert len(mask.shape) == 2
    p = int(mask.shape[1] ** .5)
    return mask.reshape(-1, p, p).repeat_interleave(scale, axis=1).repeat_interleave(scale, axis=2)

# 예시 마스크와 데이터
mask = torch.tensor([[0, 1, 0, 1],
                     [1, 0, 1, 0]])
x = torch.arange(1., 17.).reshape(1, 1, 4, 4)  # 4x4 데이터, 채널과 배치 차원 포함

# 마스크 업샘플링
upsampled_mask = upsample_mask(mask, 2)  # 스케일을 2로 설정
upsampled_mask = upsampled_mask.unsqueeze(1)  # 채널 차원 추가, 올바른 변수 이름 사용

# 데이터에 마스크 적용
x_orig = x.clone()  # 원본 데이터 복사
x_masked = x * (1. - upsampled_mask.type_as(x))  # 마스크 적용, 타입 일치 시키기

print(f"\n원본 데이터\n{x_orig}")
print(f"\n마스크\n{upsampled_mask}")
print(f"\n마스크 된 데이터\n{x_masked}")


원본 데이터
tensor([[[[ 1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.],
          [ 9., 10., 11., 12.],
          [13., 14., 15., 16.]]]])

마스크
tensor([[[[0, 0, 1, 1],
          [0, 0, 1, 1],
          [0, 0, 1, 1],
          [0, 0, 1, 1]]],


        [[[1, 1, 0, 0],
          [1, 1, 0, 0],
          [1, 1, 0, 0],
          [1, 1, 0, 0]]]])

마스크 된 데이터
tensor([[[[ 1.,  2.,  0.,  0.],
          [ 5.,  6.,  0.,  0.],
          [ 9., 10.,  0.,  0.],
          [13., 14.,  0.,  0.]]],


        [[[ 0.,  0.,  3.,  4.],
          [ 0.,  0.,  7.,  8.],
          [ 0.,  0., 11., 12.],
          [ 0.,  0., 15., 16.]]]])


## 01.TRAIN FCMAE

In [3]:
from model.fcmae import convnextv2_fcmae_tiny

model = convnextv2_fcmae_tiny()



In [4]:
# Transforms 정의하기
train_transform = transforms.Compose([
            transforms.RandomResizedCrop(224, scale=(0.2, 1.0), interpolation=3),  # 3 is bicubic
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

data_dir = '../../data/sports/'
batch_size = 800
train_path = data_dir

# dataset load
train_data = ImageFolder(train_path, transform=train_transform)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True)

len(train_loader)
device = 'cuda:0'
model.to(device)

model_path = '../../model/convnext/fcmae.pt'

epochs = 500
optimizer = optim.AdamW(model.parameters(), lr=1.5e-4, weight_decay=0.05, betas=(0.9, 0.95))

warmup_steps = int(len(train_loader)*(epochs)*0.1)
train_steps = len(train_loader)*(epochs)
scheduler = CosineWarmupScheduler(optimizer, 
                                num_warmup_steps=50, 
                                num_training_steps=500,
                                num_cycles=0.5,
                                min_lr=1e-7)



In [None]:

training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')

for epoch in range(epochs):
    model.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for _, data in pbar:
        optimizer.zero_grad()
        
        samples= data[0].to(device)
        loss, _, _ = model(samples, mask_ratio=0.6)
        
        loss.backward()
        optimizer.step()
            
        lr = optimizer.param_groups[0]["lr"]
        lrs.append(lr)
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)        
    
    # 모델 저장 로직 조정
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        if epoch > (epochs // 2) :
            torch.save(model.state_dict(), model_path)
            model_saved_text = ' - model saved!'
        else :
            model_saved_text = ' - model save pass'
    else:
        model_saved_text = ''
    
    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss:,.4f}, LR: {lr}, Duration: {epoch_duration:.2f} sec{model_saved_text}'
    print(text)

    # 에폭마다 스케줄러 업데이트
    scheduler.step()

## 02.SUP after FCMAE

### Load Model & pretrained Weights

In [3]:
from model.convnextv2 import load_convNext

model = load_convNext(droppath=0.2)

In [4]:
model.state_dict().keys()

odict_keys(['downsample_layers.0.stem_conv.weight', 'downsample_layers.0.stem_conv.bias', 'downsample_layers.0.stem_ln.weight', 'downsample_layers.0.stem_ln.bias', 'downsample_layers.1.ds_ln.weight', 'downsample_layers.1.ds_ln.bias', 'downsample_layers.1.ds_conv.weight', 'downsample_layers.1.ds_conv.bias', 'downsample_layers.2.ds_ln.weight', 'downsample_layers.2.ds_ln.bias', 'downsample_layers.2.ds_conv.weight', 'downsample_layers.2.ds_conv.bias', 'downsample_layers.3.ds_ln.weight', 'downsample_layers.3.ds_ln.bias', 'downsample_layers.3.ds_conv.weight', 'downsample_layers.3.ds_conv.bias', 'stages.0.0.dwconv.weight', 'stages.0.0.dwconv.bias', 'stages.0.0.layernorm.weight', 'stages.0.0.layernorm.bias', 'stages.0.0.pwconv1.weight', 'stages.0.0.pwconv1.bias', 'stages.0.0.grn.gamma', 'stages.0.0.grn.beta', 'stages.0.0.pwconv2.weight', 'stages.0.0.pwconv2.bias', 'stages.0.1.dwconv.weight', 'stages.0.1.dwconv.bias', 'stages.0.1.layernorm.weight', 'stages.0.1.layernorm.bias', 'stages.0.1.pwcon

In [5]:
model.state_dict()['downsample_layers.0.stem_conv.weight'][0]

tensor([[[-0.1022,  0.0025,  0.0971, -0.1319],
         [ 0.0741, -0.0137, -0.1210,  0.0809],
         [-0.0412,  0.1016,  0.1059,  0.1085],
         [-0.0005,  0.1099, -0.1407,  0.0341]],

        [[ 0.0822,  0.0716, -0.0384, -0.0843],
         [-0.0118,  0.0952, -0.0900,  0.0358],
         [-0.0824, -0.1412, -0.0292, -0.1179],
         [ 0.0908,  0.0106, -0.0017, -0.0202]],

        [[ 0.0073,  0.0537, -0.0127, -0.0521],
         [ 0.1309, -0.1267, -0.0205,  0.0763],
         [ 0.0940, -0.0117,  0.1352, -0.0373],
         [-0.0094,  0.0734, -0.1340, -0.1404]]])

In [6]:
model.state_dict()['stages.0.0.dwconv.weight'][0]

tensor([[[-0.1225,  0.1148, -0.0976,  0.0228,  0.0768, -0.0003,  0.0342],
         [ 0.1254, -0.0224, -0.0078,  0.0486, -0.0807,  0.1374,  0.0436],
         [ 0.0313,  0.1053,  0.1102, -0.0897,  0.0588, -0.1138, -0.0082],
         [-0.0232, -0.0490,  0.0008,  0.1120,  0.0286,  0.1220, -0.0252],
         [ 0.1202,  0.1240, -0.1240, -0.0277,  0.0616, -0.1307, -0.0818],
         [ 0.1062, -0.0777,  0.0282, -0.0153, -0.1320, -0.0815, -0.0488],
         [ 0.1405, -0.0576, -0.1348,  0.0177,  0.0700, -0.0779, -0.0733]]])

### Remap Weights

In [7]:
from collections import OrderedDict

def remap_checkpoint_keys(ckpt):
    new_ckpt = OrderedDict()
    for k, v in ckpt.items():
        if k.startswith('encoder'):
            k = '.'.join(k.split('.')[1:]) # remove encoder in the name
        if k.endswith('kernel'):
            k = '.'.join(k.split('.')[:-1]) # remove kernel in the name
            new_k = k + '.weight'
            if len(v.shape) == 3: # resahpe standard convolution
                kv, in_dim, out_dim = v.shape
                ks = int(math.sqrt(kv))
                new_ckpt[new_k] = v.permute(2, 1, 0).\
                    reshape(out_dim, in_dim, ks, ks).transpose(3, 2)
            elif len(v.shape) == 2: # reshape depthwise convolution
                kv, dim = v.shape
                ks = int(math.sqrt(kv))
                new_ckpt[new_k] = v.permute(1, 0).\
                    reshape(dim, 1, ks, ks).transpose(3, 2)
            continue
        elif 'ln' in k or 'linear' in k:
            k = k.split('.')
            k.pop(-2) # remove ln and linear in the name
            new_k = '.'.join(k)
        else:
            new_k = k
        new_ckpt[new_k] = v

    # reshape grn affine parameters and biases
    for k, v in new_ckpt.items():
        if k.endswith('bias') and len(v.shape) != 1:
            new_ckpt[k] = v.reshape(-1)
        elif 'grn' in k:
            new_ckpt[k] = v.unsqueeze(0).unsqueeze(1)
    return new_ckpt

def load_state_dict(model, state_dict, prefix='', ignore_missing="relative_position_index"):
    missing_keys = []
    unexpected_keys = []
    error_msgs = []
    # copy state_dict so _load_from_state_dict can modify it
    metadata = getattr(state_dict, '_metadata', None)
    state_dict = state_dict.copy()
    if metadata is not None:
        state_dict._metadata = metadata

    def load(module, prefix=''):
        local_metadata = {} if metadata is None else metadata.get(
            prefix[:-1], {})
        module._load_from_state_dict(
            state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs)
        for name, child in module._modules.items():
            if child is not None:
                load(child, prefix + name + '.')

    load(model, prefix=prefix)

    warn_missing_keys = []
    ignore_missing_keys = []
    for key in missing_keys:
        keep_flag = True
        for ignore_key in ignore_missing.split('|'):
            if ignore_key in key:
                keep_flag = False
                break
        if keep_flag:
            warn_missing_keys.append(key)
        else:
            ignore_missing_keys.append(key)

    missing_keys = warn_missing_keys

    if len(missing_keys) > 0:
        print("Weights of {} not initialized from pretrained model: {}".format(
            model.__class__.__name__, missing_keys))
    if len(unexpected_keys) > 0:
        print("Weights from pretrained model not used in {}: {}".format(
            model.__class__.__name__, unexpected_keys))
    if len(ignore_missing_keys) > 0:
        print("Ignored weights of {} not initialized from pretrained model: {}".format(
            model.__class__.__name__, ignore_missing_keys))
    if len(error_msgs) > 0:
        print('\n'.join(error_msgs))

In [8]:
pretrain_path = '../../model/convnext/fcmae.pt'
checkpoint_model = torch.load(pretrain_path, map_location='cpu')

state_dict = model.state_dict()
for k in ['head.weight', 'head.bias']:
    if k in checkpoint_model and checkpoint_model[k].shape != state_dict[k].shape:
        print(f"Removing key {k} from head of pretrained checkpoint")
        del checkpoint_model[k]

# remove decoder weights
checkpoint_model_keys = list(checkpoint_model.keys())
for k in checkpoint_model_keys:
    if 'decoder' in k or 'mask_token'in k or \
        'proj' in k or 'pred' in k:
        print(f"Removing key {k} from decoder of pretrained checkpoint")
        del checkpoint_model[k]

checkpoint_model = remap_checkpoint_keys(checkpoint_model)
load_state_dict(model, checkpoint_model, prefix='')

Removing key mask_token from decoder of pretrained checkpoint
Removing key proj.weight from decoder of pretrained checkpoint
Removing key proj.bias from decoder of pretrained checkpoint
Removing key decoder.0.dwconv.weight from decoder of pretrained checkpoint
Removing key decoder.0.dwconv.bias from decoder of pretrained checkpoint
Removing key decoder.0.layernorm.weight from decoder of pretrained checkpoint
Removing key decoder.0.layernorm.bias from decoder of pretrained checkpoint
Removing key decoder.0.pwconv1.weight from decoder of pretrained checkpoint
Removing key decoder.0.pwconv1.bias from decoder of pretrained checkpoint
Removing key decoder.0.grn.gamma from decoder of pretrained checkpoint
Removing key decoder.0.grn.beta from decoder of pretrained checkpoint
Removing key decoder.0.pwconv2.weight from decoder of pretrained checkpoint
Removing key decoder.0.pwconv2.bias from decoder of pretrained checkpoint
Removing key pred.weight from decoder of pretrained checkpoint
Removing

In [9]:
# manually initialize fc layer
nn.init.trunc_normal_(model.fc.weight, std=2e-5)
torch.nn.init.constant_(model.fc.bias, 0.)

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.], requires_grad=True)

In [10]:
model.state_dict()['downsample_layers.0.stem_conv.weight'][0]

tensor([[[-1.2056, -0.8509,  1.1679,  0.6501],
         [ 0.1916, -0.6376,  0.5137, -0.1978],
         [-0.7029,  0.1940,  0.3492,  0.6778],
         [-0.8013,  0.2786,  1.0694, -0.1054]],

        [[ 0.4654,  0.1810,  0.9873, -1.6086],
         [ 0.7260,  0.8883,  0.5517,  0.1936],
         [ 0.6057, -1.0542, -0.8394, -0.0230],
         [-0.9260,  0.6637, -1.4884,  0.6571]],

        [[ 0.3846,  0.3557,  0.1090,  1.3101],
         [-0.3910,  0.5647, -0.3082, -1.5620],
         [ 0.5292,  1.0900, -0.2673, -0.2546],
         [-0.2218, -0.6494, -0.2747, -0.3344]]])

In [11]:
model.state_dict()['downsample_layers.0.stem_ln.weight']

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1.])

In [12]:
model.state_dict()['stages.0.0.dwconv.weight'][0]

tensor([[[ 0.1999,  0.9103, -0.8238, -0.0341,  0.8881,  0.3552, -0.2845],
         [-0.0604,  1.1877, -0.9397,  1.6832,  0.0330,  0.5312, -0.1214],
         [-0.2699,  0.5054,  0.3215,  0.5711,  0.3631, -1.0114,  0.2572],
         [-0.2513, -0.3276, -0.1782,  2.2991,  0.4163,  0.7285, -0.0070],
         [-0.0218, -0.6208,  0.7876,  0.3589,  0.3943,  0.6794,  0.7090],
         [-0.8632, -0.1470,  0.8258,  0.6889, -0.7541, -0.6836, -0.1466],
         [ 0.9184, -0.0388, -0.0988,  0.1967, -0.6746,  0.0904,  0.1652]]])

In [13]:
# 총 파라미터 수 계산
total_params = sum(p.numel() for p in model.parameters())

# 학습 가능한 파라미터 수 계산
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print('='*80)
print(f"\nTotal Parameters: {total_params:,}")
print(f"Trainable Parameters: {trainable_params:,}\n")
print('='*80)


Total Parameters: 27,943,396
Trainable Parameters: 27,943,396



In [14]:
# Transforms 정의하기
train_transform = transforms.Compose([
    transforms.TrivialAugmentWide(interpolation=F.InterpolationMode.BICUBIC),
    transforms.RandomResizedCrop(224, scale=(0.6,1), interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

data_dir = '../../data/sports'
batch_size = 320

train_path = data_dir+'/train'
valid_path = data_dir+'/valid'
test_path = data_dir+'/test'

# dataset load
train_data = ImageFolder(train_path, transform=train_transform)
valid_data = ImageFolder(valid_path, transform=test_transform)
test_data = ImageFolder(test_path, transform=test_transform)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, pin_memory=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=False, pin_memory=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [15]:
device = 'cuda:4'
max_norm = 3.0 

model.to(device)

model_ema = None
ema_active = True
if ema_active:
    ema_decay = 0.9999
    model_ema = ModelEmaV3(
        model,
        decay=ema_decay,
    )
    print(f"Using EMA with decay = {ema_decay}")

model_path = ''

mixup = True
if mixup :
    mixup_fn = Mixup(mixup_alpha=.8, 
                    cutmix_alpha=1., 
                    prob=1., 
                    switch_prob=0.5, 
                    mode='batch',
                    label_smoothing=.1,
                    num_classes=100)
    
    criterion = SoftTargetCrossEntropy()
else :
    criterion = LabelSmoothingCrossEntropy(.1)
    
criterion = nn.CrossEntropyLoss(label_smoothing=0.)

Using EMA with decay = 0.9999


### CASE 01: Without LayerWise Learning Rate Decay

In [16]:
epochs = 100

optimizer = optim.AdamW(model.parameters(), lr=8e-3, weight_decay=0.05)
warmup_steps = int(len(train_loader)*(epochs)*0.1)
train_steps = len(train_loader)*(epochs)
scheduler = CosineWarmupScheduler(optimizer, 
                                num_warmup_steps=warmup_steps, 
                                num_training_steps=train_steps,
                                num_cycles=0.5,
                                min_lr=1e-6)



In [17]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')
model_save = False
torch.backends.cudnn.benchmark = True

for i in range(epochs // 100):
    for epoch in range(100):
        model.train()
        start_time = time.time()
        running_loss = 0.0
        pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1 + i*100}")
        
        for _, data in pbar:
            inputs, labels = data[0].to(device), data[1].to(device)
            inputs, labels = mixup_fn(inputs, labels)
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
                
            loss.backward()
            # 그라디언트 클리핑 적용
            clip_grad_norm_(model.parameters(), max_norm=max_norm)
            optimizer.step()
            
            # EMA 모델 업데이트, 필요한 경우
            if model_ema is not None:
                model_ema.update(model)
                
            scheduler.step()
                
            lr = optimizer.param_groups[0]["lr"]
            lrs.append(lr)
            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        losses.append(epoch_loss)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data in valid_loader:
                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
        val_loss /= len(valid_loader)
        val_losses.append(val_loss)
        
        # 모델 저장 조건 수정
        total_loss = val_loss + epoch_loss
        if total_loss < best_loss:
            best_loss = total_loss
            # torch.save(model.state_dict(), model_path)
            model_save = True
            save_text = ' - model saved!'
        else:
            save_text = ''

        epoch_duration = time.time() - start_time
        training_time += epoch_duration
        
        text = f'\tLoss: {epoch_loss:.4f}, Val_Loss: {val_loss:.4f}, Total Mean Loss: {total_loss/2:.4f}, LR: {lr}, Duration: {epoch_duration:.2f} sec{save_text}'
        print(text)

    # 예측 수행 및 레이블 저장
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # 혼동 행렬 생성
    cm = confusion_matrix(all_labels, all_preds)

    # 예측과 실제 레이블
    y_true = all_labels  # 실제 레이블
    y_pred = all_preds  # 모델에 의해 예측된 레이블

    # 전체 데이터셋에 대한 정확도
    accuracy = accuracy_score(y_true, y_pred)

    # 평균 정밀도, 리콜, F1-Score ('weighted')
    precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

    # 판다스 데이터프레임으로 결과 정리
    performance_metrics = pd.DataFrame({
        'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
        'Value': [accuracy, precision, recall, f1_score]
    })

    # 데이터프레임 출력
    print(f"\n[{i*100+100} epoch result]\n", performance_metrics)

Epoch 1: 100%|██████████| 43/43 [01:41<00:00,  2.36s/it]


	Loss: 4.5005, Val_Loss: 3.9983, Total Mean Loss: 4.2494, LR: 0.0008009, Duration: 102.98 sec - model saved!


Epoch 2: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 4.2779, Val_Loss: 3.5158, Total Mean Loss: 3.8968, LR: 0.0016007999999999999, Duration: 100.92 sec - model saved!


Epoch 3: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 4.2061, Val_Loss: 3.3989, Total Mean Loss: 3.8025, LR: 0.0024007000000000004, Duration: 101.14 sec - model saved!


Epoch 4: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 4.1997, Val_Loss: 3.3340, Total Mean Loss: 3.7669, LR: 0.0032006, Duration: 101.10 sec - model saved!


Epoch 5: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 4.1097, Val_Loss: 3.0904, Total Mean Loss: 3.6000, LR: 0.004000500000000001, Duration: 101.09 sec - model saved!


Epoch 6: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 4.0018, Val_Loss: 3.1023, Total Mean Loss: 3.5520, LR: 0.004800400000000001, Duration: 101.36 sec - model saved!


Epoch 7: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.9883, Val_Loss: 2.9167, Total Mean Loss: 3.4525, LR: 0.005600300000000002, Duration: 101.21 sec - model saved!


Epoch 8: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 3.9845, Val_Loss: 2.6441, Total Mean Loss: 3.3143, LR: 0.0064002, Duration: 101.44 sec - model saved!


Epoch 9: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.9452, Val_Loss: 3.0935, Total Mean Loss: 3.5194, LR: 0.007200100000000001, Duration: 100.73 sec


Epoch 10: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.9051, Val_Loss: 3.0012, Total Mean Loss: 3.4531, LR: 0.008, Duration: 101.19 sec


Epoch 11: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.8659, Val_Loss: 2.5735, Total Mean Loss: 3.2197, LR: 0.007997563612662874, Duration: 101.03 sec - model saved!


Epoch 12: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.8390, Val_Loss: 2.4251, Total Mean Loss: 3.1321, LR: 0.007990257419014168, Duration: 100.97 sec - model saved!


Epoch 13: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 3.7828, Val_Loss: 2.3779, Total Mean Loss: 3.0803, LR: 0.007978090320525408, Duration: 101.48 sec - model saved!


Epoch 14: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.7604, Val_Loss: 2.2646, Total Mean Loss: 3.0125, LR: 0.00796107714093191, Duration: 101.25 sec - model saved!


Epoch 15: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.7000, Val_Loss: 2.2239, Total Mean Loss: 2.9620, LR: 0.007939238608172326, Duration: 100.81 sec - model saved!


Epoch 16: 100%|██████████| 43/43 [01:40<00:00,  2.34s/it]


	Loss: 3.6562, Val_Loss: 2.0731, Total Mean Loss: 2.8647, LR: 0.007912601329134857, Duration: 101.85 sec - model saved!


Epoch 17: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 3.6028, Val_Loss: 2.1227, Total Mean Loss: 2.8627, LR: 0.007881197757240848, Duration: 101.33 sec - model saved!


Epoch 18: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.5454, Val_Loss: 1.9528, Total Mean Loss: 2.7491, LR: 0.007845066152905306, Duration: 100.78 sec - model saved!


Epoch 19: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 3.6236, Val_Loss: 1.9310, Total Mean Loss: 2.7773, LR: 0.007804250536922468, Duration: 101.41 sec


Epoch 20: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 3.5071, Val_Loss: 1.7346, Total Mean Loss: 2.6209, LR: 0.007758800636833242, Duration: 101.40 sec - model saved!


Epoch 21: 100%|██████████| 43/43 [01:39<00:00,  2.33s/it]


	Loss: 3.5577, Val_Loss: 1.6780, Total Mean Loss: 2.6179, LR: 0.007708771826339867, Duration: 101.28 sec - model saved!


Epoch 22: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 3.4590, Val_Loss: 1.7109, Total Mean Loss: 2.5849, LR: 0.007654225057841583, Duration: 101.60 sec - model saved!


Epoch 23: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.3858, Val_Loss: 1.7129, Total Mean Loss: 2.5494, LR: 0.00759522678817352, Duration: 101.22 sec - model saved!


Epoch 24: 100%|██████████| 43/43 [01:39<00:00,  2.33s/it]


	Loss: 3.3745, Val_Loss: 1.7047, Total Mean Loss: 2.5396, LR: 0.00753184889763928, Duration: 101.30 sec - model saved!


Epoch 25: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.3182, Val_Loss: 1.5433, Total Mean Loss: 2.4307, LR: 0.007464168602435864, Duration: 100.84 sec - model saved!


Epoch 26: 100%|██████████| 43/43 [01:40<00:00,  2.35s/it]


	Loss: 3.3372, Val_Loss: 1.4651, Total Mean Loss: 2.4012, LR: 0.007392268360577626, Duration: 102.28 sec - model saved!


Epoch 27: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.2036, Val_Loss: 1.6317, Total Mean Loss: 2.4177, LR: 0.00731623577143389, Duration: 101.21 sec


Epoch 28: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.3782, Val_Loss: 1.5350, Total Mean Loss: 2.4566, LR: 0.0072361634690026036, Duration: 101.22 sec


Epoch 29: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.1613, Val_Loss: 1.4944, Total Mean Loss: 2.3278, LR: 0.0071521490090500845, Duration: 101.15 sec - model saved!


Epoch 30: 100%|██████████| 43/43 [01:39<00:00,  2.33s/it]


	Loss: 3.2638, Val_Loss: 1.3359, Total Mean Loss: 2.2999, LR: 0.007064294750254353, Duration: 101.32 sec - model saved!


Epoch 31: 100%|██████████| 43/43 [01:40<00:00,  2.34s/it]


	Loss: 3.1162, Val_Loss: 1.2215, Total Mean Loss: 2.1688, LR: 0.0069727077294968395, Duration: 101.83 sec - model saved!


Epoch 32: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.3109, Val_Loss: 1.4087, Total Mean Loss: 2.3598, LR: 0.006877499531454436, Duration: 101.23 sec


Epoch 33: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.2327, Val_Loss: 1.2003, Total Mean Loss: 2.2165, LR: 0.00677878615265076, Duration: 101.03 sec


Epoch 34: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.1314, Val_Loss: 1.2135, Total Mean Loss: 2.1725, LR: 0.006676687860132254, Duration: 101.20 sec


Epoch 35: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 3.0798, Val_Loss: 1.1072, Total Mean Loss: 2.0935, LR: 0.0065713290449413144, Duration: 101.45 sec - model saved!


Epoch 36: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.9622, Val_Loss: 1.1090, Total Mean Loss: 2.0356, LR: 0.006462838070564971, Duration: 101.30 sec - model saved!


Epoch 37: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.0868, Val_Loss: 1.0859, Total Mean Loss: 2.0864, LR: 0.006351347116543747, Duration: 100.86 sec


Epoch 38: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.0997, Val_Loss: 1.2129, Total Mean Loss: 2.1563, LR: 0.006236992017431253, Duration: 101.04 sec


Epoch 39: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.1669, Val_Loss: 1.2889, Total Mean Loss: 2.2279, LR: 0.006119912097300704, Duration: 101.27 sec


Epoch 40: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.9470, Val_Loss: 0.9977, Total Mean Loss: 1.9724, LR: 0.006000250000000001, Duration: 101.18 sec - model saved!


Epoch 41: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.1658, Val_Loss: 0.9994, Total Mean Loss: 2.0826, LR: 0.005878151515362171, Duration: 101.16 sec


Epoch 42: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.9747, Val_Loss: 1.0842, Total Mean Loss: 2.0295, LR: 0.0057537654015829155, Duration: 101.16 sec


Epoch 43: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 3.0236, Val_Loss: 1.0850, Total Mean Loss: 2.0543, LR: 0.0056272432039816645, Duration: 101.36 sec


Epoch 44: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 3.0502, Val_Loss: 0.9466, Total Mean Loss: 1.9984, LR: 0.005498739070366941, Duration: 101.35 sec


Epoch 45: 100%|██████████| 43/43 [01:40<00:00,  2.34s/it]


	Loss: 2.9102, Val_Loss: 0.9602, Total Mean Loss: 1.9352, LR: 0.005368409563231013, Duration: 102.09 sec - model saved!


Epoch 46: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.8590, Val_Loss: 1.0632, Total Mean Loss: 1.9611, LR: 0.005236413469002603, Duration: 101.11 sec


Epoch 47: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.7639, Val_Loss: 0.8998, Total Mean Loss: 1.8319, LR: 0.005102911604590089, Duration: 101.43 sec - model saved!


Epoch 48: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.9008, Val_Loss: 0.8654, Total Mean Loss: 1.8831, LR: 0.004968066621450871, Duration: 101.22 sec


Epoch 49: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.6531, Val_Loss: 0.8360, Total Mean Loss: 1.7445, LR: 0.004832042807425629, Duration: 101.15 sec - model saved!


Epoch 50: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.7464, Val_Loss: 1.0506, Total Mean Loss: 1.8985, LR: 0.00469500588657889, Duration: 101.50 sec


Epoch 51: 100%|██████████| 43/43 [01:40<00:00,  2.34s/it]


	Loss: 2.7989, Val_Loss: 0.9023, Total Mean Loss: 1.8506, LR: 0.004557122817289782, Duration: 101.91 sec


Epoch 52: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.6784, Val_Loss: 0.9291, Total Mean Loss: 1.8037, LR: 0.004418561588838981, Duration: 101.59 sec


Epoch 53: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.6665, Val_Loss: 0.7547, Total Mean Loss: 1.7106, LR: 0.0042794910167396295, Duration: 100.90 sec - model saved!


Epoch 54: 100%|██████████| 43/43 [01:40<00:00,  2.34s/it]


	Loss: 2.9438, Val_Loss: 0.9641, Total Mean Loss: 1.9539, LR: 0.004140080537061654, Duration: 102.11 sec


Epoch 55: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.6722, Val_Loss: 0.8713, Total Mean Loss: 1.7717, LR: 0.004000500000000001, Duration: 101.25 sec


Epoch 56: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.5629, Val_Loss: 0.7552, Total Mean Loss: 1.6591, LR: 0.003860919462938349, Duration: 101.35 sec - model saved!


Epoch 57: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.6445, Val_Loss: 0.8593, Total Mean Loss: 1.7519, LR: 0.0037215089832603712, Duration: 101.21 sec


Epoch 58: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.5318, Val_Loss: 0.8654, Total Mean Loss: 1.6986, LR: 0.0035824384111610213, Duration: 101.16 sec


Epoch 59: 100%|██████████| 43/43 [01:41<00:00,  2.35s/it]


	Loss: 2.5694, Val_Loss: 0.7880, Total Mean Loss: 1.6787, LR: 0.00344387718271022, Duration: 102.35 sec


Epoch 60: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.4942, Val_Loss: 0.7219, Total Mean Loss: 1.6080, LR: 0.003305994113421113, Duration: 101.48 sec - model saved!


Epoch 61: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.4456, Val_Loss: 0.7369, Total Mean Loss: 1.5912, LR: 0.0031689571925743727, Duration: 101.48 sec - model saved!


Epoch 62: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.4264, Val_Loss: 0.8096, Total Mean Loss: 1.6180, LR: 0.00303293337854913, Duration: 101.07 sec


Epoch 63: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.5055, Val_Loss: 0.7477, Total Mean Loss: 1.6266, LR: 0.0028980883954099123, Duration: 101.08 sec


Epoch 64: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.2971, Val_Loss: 0.6533, Total Mean Loss: 1.4752, LR: 0.0027645865309973987, Duration: 101.03 sec - model saved!


Epoch 65: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.4391, Val_Loss: 0.6543, Total Mean Loss: 1.5467, LR: 0.002632590436768989, Duration: 101.43 sec


Epoch 66: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.4635, Val_Loss: 0.6473, Total Mean Loss: 1.5554, LR: 0.0025022609296330603, Duration: 101.22 sec


Epoch 67: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.5511, Val_Loss: 0.7714, Total Mean Loss: 1.6613, LR: 0.002373756796018338, Duration: 101.07 sec


Epoch 68: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.4326, Val_Loss: 0.6659, Total Mean Loss: 1.5492, LR: 0.002247234598417085, Duration: 101.18 sec


Epoch 69: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.3532, Val_Loss: 0.7323, Total Mean Loss: 1.5427, LR: 0.0021228484846378313, Duration: 101.23 sec


Epoch 70: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.4161, Val_Loss: 0.7229, Total Mean Loss: 1.5695, LR: 0.0020007500000000012, Duration: 101.46 sec


Epoch 71: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.4639, Val_Loss: 0.7263, Total Mean Loss: 1.5951, LR: 0.0018810879026992975, Duration: 101.33 sec


Epoch 72: 100%|██████████| 43/43 [01:39<00:00,  2.33s/it]


	Loss: 2.5016, Val_Loss: 0.7104, Total Mean Loss: 1.6060, LR: 0.0017640079825687487, Duration: 101.28 sec


Epoch 73: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.4719, Val_Loss: 0.7154, Total Mean Loss: 1.5937, LR: 0.0016496528834562543, Duration: 101.13 sec


Epoch 74: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.3738, Val_Loss: 0.6260, Total Mean Loss: 1.4999, LR: 0.0015381619294350297, Duration: 101.34 sec


Epoch 75: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.3922, Val_Loss: 0.6802, Total Mean Loss: 1.5362, LR: 0.0014296709550586859, Duration: 101.07 sec


Epoch 76: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.3968, Val_Loss: 0.6636, Total Mean Loss: 1.5302, LR: 0.0013243121398677478, Duration: 101.43 sec


Epoch 77: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.3772, Val_Loss: 0.6906, Total Mean Loss: 1.5339, LR: 0.00122221384734924, Duration: 101.26 sec


Epoch 78: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.4278, Val_Loss: 0.6729, Total Mean Loss: 1.5504, LR: 0.0011235004685455656, Duration: 101.42 sec


Epoch 79: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.3452, Val_Loss: 0.6851, Total Mean Loss: 1.5152, LR: 0.0010282922705031614, Duration: 101.06 sec


Epoch 80: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.3834, Val_Loss: 0.6143, Total Mean Loss: 1.4988, LR: 0.000936705249745648, Duration: 101.03 sec


Epoch 81: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.2604, Val_Loss: 0.6191, Total Mean Loss: 1.4397, LR: 0.0008488509909499158, Duration: 101.36 sec - model saved!


Epoch 82: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.4331, Val_Loss: 0.6519, Total Mean Loss: 1.5425, LR: 0.0007648365309973983, Duration: 101.52 sec


Epoch 83: 100%|██████████| 43/43 [01:40<00:00,  2.34s/it]


	Loss: 2.2711, Val_Loss: 0.6466, Total Mean Loss: 1.4589, LR: 0.0006847642285661112, Duration: 101.93 sec


Epoch 84: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.3225, Val_Loss: 0.6680, Total Mean Loss: 1.4952, LR: 0.0006087316394223745, Duration: 101.51 sec


Epoch 85: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.3317, Val_Loss: 0.6536, Total Mean Loss: 1.4926, LR: 0.0005368313975641375, Duration: 101.34 sec


Epoch 86: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.4682, Val_Loss: 0.6624, Total Mean Loss: 1.5653, LR: 0.0004691511023607216, Duration: 101.37 sec


Epoch 87: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.2309, Val_Loss: 0.5942, Total Mean Loss: 1.4126, LR: 0.0004057732118264824, Duration: 101.36 sec - model saved!


Epoch 88: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.5037, Val_Loss: 0.6603, Total Mean Loss: 1.5820, LR: 0.00034677494215841747, Duration: 100.78 sec


Epoch 89: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.2489, Val_Loss: 0.6345, Total Mean Loss: 1.4417, LR: 0.0002922281736601342, Duration: 100.96 sec


Epoch 90: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.2811, Val_Loss: 0.6038, Total Mean Loss: 1.4425, LR: 0.0002421993631667597, Duration: 100.94 sec


Epoch 91: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.2056, Val_Loss: 0.6184, Total Mean Loss: 1.4120, LR: 0.00019674946307753348, Duration: 100.99 sec - model saved!


Epoch 92: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.2586, Val_Loss: 0.6146, Total Mean Loss: 1.4366, LR: 0.0001559338470946936, Duration: 100.71 sec


Epoch 93: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.1969, Val_Loss: 0.6111, Total Mean Loss: 1.4040, LR: 0.00011980224275915211, Duration: 101.17 sec - model saved!


Epoch 94: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.4151, Val_Loss: 0.6223, Total Mean Loss: 1.5187, LR: 8.839867086514416e-05, Duration: 101.00 sec


Epoch 95: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.3300, Val_Loss: 0.6233, Total Mean Loss: 1.4767, LR: 6.176139182767403e-05, Duration: 100.77 sec


Epoch 96: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.2813, Val_Loss: 0.6242, Total Mean Loss: 1.4528, LR: 3.992285906808934e-05, Duration: 101.35 sec


Epoch 97: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.3215, Val_Loss: 0.6249, Total Mean Loss: 1.4732, LR: 2.290967947459098e-05, Duration: 101.33 sec


Epoch 98: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.2299, Val_Loss: 0.6232, Total Mean Loss: 1.4265, LR: 1.0742580985833122e-05, Duration: 101.20 sec


Epoch 99: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.1854, Val_Loss: 0.6234, Total Mean Loss: 1.4044, LR: 3.4363873371264992e-06, Duration: 101.46 sec


Epoch 100: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.3050, Val_Loss: 0.6234, Total Mean Loss: 1.4642, LR: 1e-06, Duration: 101.15 sec

[100 epoch result]
       Metric     Value
0   Accuracy  0.914000
1  Precision  0.927274
2     Recall  0.914000
3   F1 Score  0.908676


### CASE 02: LLRD

In [18]:
def LLRD_ConvNeXt(model, depths=[3,3,9,3], weight_decay=0.05, lr=8e-3, scale=0.95):
    
    stage = 0
    layer_names = []
    param_groups = {}
    for depth in depths:
        if stage == 0:
            layer_names.append(f'downsample_layers.{stage}.stem_conv.weight')
            layer_names.append(f'downsample_layers.{stage}.stem_conv.bias')
            layer_names.append(f'downsample_layers.{stage}.stem_ln.weight')
            layer_names.append(f'downsample_layers.{stage}.stem_ln.bias')
        else :
            layer_names.append(f'downsample_layers.{stage}.ds_ln.weight')
            layer_names.append(f'downsample_layers.{stage}.ds_ln.bias')
            layer_names.append(f'downsample_layers.{stage}.ds_conv.weight')
            layer_names.append(f'downsample_layers.{stage}.ds_conv.bias')        
        for i in range(depth):
            layer_names.append(f'stages.{stage}.{i}.dwconv.weight')
            layer_names.append(f'stages.{stage}.{i}.dwconv.bias')
            layer_names.append(f'stages.{stage}.{i}.layernorm.weight')
            layer_names.append(f'stages.{stage}.{i}.layernorm.bias')
            layer_names.append(f'stages.{stage}.{i}.pwconv1.weight')
            layer_names.append(f'stages.{stage}.{i}.pwconv1.bias')
            layer_names.append(f'stages.{stage}.{i}.grn.gamma')
            layer_names.append(f'stages.{stage}.{i}.grn.beta')            
            layer_names.append(f'stages.{stage}.{i}.pwconv2.weight')
            layer_names.append(f'stages.{stage}.{i}.pwconv2.bias')
        stage += 1
    
    layer_names.append('layernorm.weight')
    layer_names.append('layernorm.bias')
    layer_names.append('fc.weight')
    layer_names.append('fc.bias')
    
    # Layer Learning Rate Decay
    for name, param in model.named_parameters():
        total_depths = sum(depths)
        if name.startswith("downsample_layers"):
            stage_id = int(name.split('.')[1])
            layer_id = sum(depths[:stage_id])
            param_groups[name] = {'lr':lr*(scale**(total_depths-layer_id)),
                                  'weight_decay':0.}
        
        elif name.startswith("stages"):
            stage_id = int(name.split('.')[1])
            block_id = int(name.split('.')[2])
            layer_id = sum(depths[:stage_id]) + block_id
            if len(param.shape) == 1 or name.endswith(".bias") or name.endswith(".gamma") or name.endswith(".beta"):
                param_groups[name] = {'lr':lr*(scale**(total_depths-layer_id)),
                                      'weight_decay':0.}
            else :
                param_groups[name] = {'lr':lr*(scale**(total_depths-layer_id)),
                                      'weight_decay':weight_decay}       
        else : # head
            layer_id = total_depths
            if len(param.shape) == 1 or name.endswith(".bias"):
                param_groups[name] = {'lr':lr*(scale**(total_depths-layer_id)),
                                      'weight_decay':0.}
            else :
                param_groups[name] = {'lr':lr*(scale**(total_depths-layer_id)),
                                      'weight_decay':weight_decay}    
    return layer_names, param_groups

In [19]:
layer_names, param_groups = LLRD_ConvNeXt(model)
for name in layer_names:
    print(f"name: {name}, lr: {param_groups[name]['lr']}, weight_decay: {param_groups[name]['weight_decay']}")

name: downsample_layers.0.stem_conv.weight, lr: 0.0031777145476657455, weight_decay: 0.0
name: downsample_layers.0.stem_conv.bias, lr: 0.0031777145476657455, weight_decay: 0.0
name: downsample_layers.0.stem_ln.weight, lr: 0.0031777145476657455, weight_decay: 0.0
name: downsample_layers.0.stem_ln.bias, lr: 0.0031777145476657455, weight_decay: 0.0
name: stages.0.0.dwconv.weight, lr: 0.0031777145476657455, weight_decay: 0.05
name: stages.0.0.dwconv.bias, lr: 0.0031777145476657455, weight_decay: 0.0
name: stages.0.0.layernorm.weight, lr: 0.0031777145476657455, weight_decay: 0.0
name: stages.0.0.layernorm.bias, lr: 0.0031777145476657455, weight_decay: 0.0
name: stages.0.0.pwconv1.weight, lr: 0.0031777145476657455, weight_decay: 0.05
name: stages.0.0.pwconv1.bias, lr: 0.0031777145476657455, weight_decay: 0.0
name: stages.0.0.grn.gamma, lr: 0.0031777145476657455, weight_decay: 0.0
name: stages.0.0.grn.beta, lr: 0.0031777145476657455, weight_decay: 0.0
name: stages.0.0.pwconv2.weight, lr: 0.00

In [20]:
groups = [{'params': param,
            'lr' : param_groups[name]['lr'],
            'weight_decay': param_groups[name]['weight_decay']} for name, param in model.named_parameters()]

In [21]:
epochs = 100

optimizer = optim.AdamW(groups)
warmup_steps = int(len(train_loader)*(epochs)*0.1)
train_steps = len(train_loader)*(epochs)
scheduler = CosineWarmupScheduler(optimizer, 
                                num_warmup_steps=warmup_steps, 
                                num_training_steps=train_steps,
                                num_cycles=0.5,
                                min_lr=1e-6)



In [22]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')
model_save = False
torch.backends.cudnn.benchmark = True

for i in range(epochs // 100):
    for epoch in range(100):
        model.train()
        start_time = time.time()
        running_loss = 0.0
        pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1 + i*100}")
        
        for _, data in pbar:
            inputs, labels = data[0].to(device), data[1].to(device)
            inputs, labels = mixup_fn(inputs, labels)
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
                
            loss.backward()
            # 그라디언트 클리핑 적용
            clip_grad_norm_(model.parameters(), max_norm=max_norm)
            optimizer.step()
            
            # EMA 모델 업데이트, 필요한 경우
            if model_ema is not None:
                model_ema.update(model)
                
            scheduler.step()
                
            lr = optimizer.param_groups[0]["lr"]
            lrs.append(lr)
            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        losses.append(epoch_loss)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data in valid_loader:
                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
        val_loss /= len(valid_loader)
        val_losses.append(val_loss)
        
        # 모델 저장 조건 수정
        total_loss = val_loss + epoch_loss
        if total_loss < best_loss:
            best_loss = total_loss
            # torch.save(model.state_dict(), model_path)
            model_save = True
            save_text = ' - model saved!'
        else:
            save_text = ''

        epoch_duration = time.time() - start_time
        training_time += epoch_duration
        
        text = f'\tLoss: {epoch_loss:.4f}, Val_Loss: {val_loss:.4f}, Total Mean Loss: {total_loss/2:.4f}, LR: {lr}, Duration: {epoch_duration:.2f} sec{save_text}'
        print(text)

    # 예측 수행 및 레이블 저장
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # 혼동 행렬 생성
    cm = confusion_matrix(all_labels, all_preds)

    # 예측과 실제 레이블
    y_true = all_labels  # 실제 레이블
    y_pred = all_preds  # 모델에 의해 예측된 레이블

    # 전체 데이터셋에 대한 정확도
    accuracy = accuracy_score(y_true, y_pred)

    # 평균 정밀도, 리콜, F1-Score ('weighted')
    precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

    # 판다스 데이터프레임으로 결과 정리
    performance_metrics = pd.DataFrame({
        'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
        'Value': [accuracy, precision, recall, f1_score]
    })

    # 데이터프레임 출력
    print(f"\n[{i*100+100} epoch result]\n", performance_metrics)

Epoch 1:   0%|          | 0/43 [00:00<?, ?it/s]

Epoch 1: 100%|██████████| 43/43 [01:41<00:00,  2.35s/it]


	Loss: 4.5119, Val_Loss: 3.9885, Total Mean Loss: 4.2502, LR: 0.0003186714547665745, Duration: 102.45 sec - model saved!


Epoch 2: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 4.2982, Val_Loss: 3.4926, Total Mean Loss: 3.8954, LR: 0.000636342909533149, Duration: 100.57 sec - model saved!


Epoch 3: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 4.1744, Val_Loss: 3.2555, Total Mean Loss: 3.7149, LR: 0.0009540143642997237, Duration: 100.85 sec - model saved!


Epoch 4: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 4.1671, Val_Loss: 3.0869, Total Mean Loss: 3.6270, LR: 0.001271685819066298, Duration: 100.60 sec - model saved!


Epoch 5: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 4.0473, Val_Loss: 2.9928, Total Mean Loss: 3.5201, LR: 0.0015893572738328724, Duration: 100.96 sec - model saved!


Epoch 6: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.9681, Val_Loss: 2.8903, Total Mean Loss: 3.4292, LR: 0.0019070287285994473, Duration: 100.73 sec - model saved!


Epoch 7: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.9791, Val_Loss: 2.6867, Total Mean Loss: 3.3329, LR: 0.002224700183366022, Duration: 100.67 sec - model saved!


Epoch 8: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.9610, Val_Loss: 2.6699, Total Mean Loss: 3.3154, LR: 0.002542371638132596, Duration: 100.70 sec - model saved!


Epoch 9: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.8865, Val_Loss: 2.5807, Total Mean Loss: 3.2336, LR: 0.0028600430928991706, Duration: 100.79 sec - model saved!


Epoch 10: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.9098, Val_Loss: 2.4482, Total Mean Loss: 3.1790, LR: 0.0031777145476657455, Duration: 100.79 sec - model saved!


Epoch 11: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.8225, Val_Loss: 2.5034, Total Mean Loss: 3.1630, LR: 0.003176746963330504, Duration: 100.93 sec - model saved!


Epoch 12: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.8015, Val_Loss: 2.4077, Total Mean Loss: 3.1046, LR: 0.003173845389177246, Duration: 100.79 sec - model saved!


Epoch 13: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.7183, Val_Loss: 2.2883, Total Mean Loss: 3.0033, LR: 0.003169013360327125, Duration: 100.83 sec - model saved!


Epoch 14: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.7117, Val_Loss: 2.0277, Total Mean Loss: 2.8697, LR: 0.0031622567638629772, Duration: 100.88 sec - model saved!


Epoch 15: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.6566, Val_Loss: 2.1158, Total Mean Loss: 2.8862, LR: 0.003153583831656821, Duration: 100.86 sec


Epoch 16: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.5253, Val_Loss: 1.8516, Total Mean Loss: 2.6884, LR: 0.0031430051303405855, Duration: 101.11 sec - model saved!


Epoch 17: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.6382, Val_Loss: 1.8362, Total Mean Loss: 2.7372, LR: 0.0031305335484323018, Duration: 100.53 sec


Epoch 18: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.5305, Val_Loss: 1.8098, Total Mean Loss: 2.6702, LR: 0.0031161842806334246, Duration: 100.72 sec - model saved!


Epoch 19: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.4664, Val_Loss: 1.7079, Total Mean Loss: 2.5871, LR: 0.003099974809316432, Duration: 101.16 sec - model saved!


Epoch 20: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.5328, Val_Loss: 1.6012, Total Mean Loss: 2.5670, LR: 0.0030819248832252458, Duration: 100.72 sec - model saved!


Epoch 21: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.4068, Val_Loss: 1.5768, Total Mean Loss: 2.4918, LR: 0.00306205649341443, Duration: 100.81 sec - model saved!


Epoch 22: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.4318, Val_Loss: 1.5513, Total Mean Loss: 2.4916, LR: 0.0030403938464564784, Duration: 101.02 sec - model saved!


Epoch 23: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.3738, Val_Loss: 1.4034, Total Mean Loss: 2.3886, LR: 0.0030169633349498345, Duration: 100.92 sec - model saved!


Epoch 24: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.2459, Val_Loss: 1.5410, Total Mean Loss: 2.3934, LR: 0.002991793505363575, Duration: 100.82 sec


Epoch 25: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.3604, Val_Loss: 1.4286, Total Mean Loss: 2.3945, LR: 0.002964915023257937, Duration: 100.89 sec


Epoch 26: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.3137, Val_Loss: 1.4404, Total Mean Loss: 2.3770, LR: 0.0029363606359230515, Duration: 100.73 sec - model saved!


Epoch 27: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.2510, Val_Loss: 1.3704, Total Mean Loss: 2.3107, LR: 0.0029061651324814213, Duration: 100.78 sec - model saved!


Epoch 28: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.2450, Val_Loss: 1.3958, Total Mean Loss: 2.3204, LR: 0.002874365301502729, Duration: 100.62 sec


Epoch 29: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.2054, Val_Loss: 1.2440, Total Mean Loss: 2.2247, LR: 0.0028409998861826332, Duration: 100.58 sec - model saved!


Epoch 30: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.1411, Val_Loss: 1.2091, Total Mean Loss: 2.1751, LR: 0.0028061095371401537, Duration: 100.84 sec - model saved!


Epoch 31: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.1918, Val_Loss: 1.3333, Total Mean Loss: 2.2626, LR: 0.0027697367628911527, Duration: 101.01 sec


Epoch 32: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.1670, Val_Loss: 1.1344, Total Mean Loss: 2.1507, LR: 0.0027319258780582555, Duration: 100.69 sec - model saved!


Epoch 33: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.1864, Val_Loss: 1.1963, Total Mean Loss: 2.1913, LR: 0.0026927229493803116, Duration: 101.02 sec


Epoch 34: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 3.0623, Val_Loss: 1.1943, Total Mean Loss: 2.1283, LR: 0.002652175739587166, Duration: 100.79 sec - model saved!


Epoch 35: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.0697, Val_Loss: 1.0925, Total Mean Loss: 2.0811, LR: 0.002610333649208133, Duration: 100.87 sec - model saved!


Epoch 36: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.0433, Val_Loss: 1.0539, Total Mean Loss: 2.0486, LR: 0.0025672476563850595, Duration: 101.14 sec - model saved!


Epoch 37: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 3.1422, Val_Loss: 1.2485, Total Mean Loss: 2.1954, LR: 0.0025229702547633127, Duration: 101.02 sec


Epoch 38: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.9666, Val_Loss: 0.9535, Total Mean Loss: 1.9601, LR: 0.002477555389536357, Duration: 100.87 sec - model saved!


Epoch 39: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.9930, Val_Loss: 1.0833, Total Mean Loss: 2.0381, LR: 0.002431058391721848, Duration: 100.77 sec


Epoch 40: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.9819, Val_Loss: 1.1321, Total Mean Loss: 2.0570, LR: 0.0023835359107493093, Duration: 101.07 sec


Epoch 41: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.9287, Val_Loss: 1.1466, Total Mean Loss: 2.0376, LR: 0.0023350458454415284, Duration: 100.79 sec


Epoch 42: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.8377, Val_Loss: 0.9558, Total Mean Loss: 1.8968, LR: 0.002285647273473762, Duration: 100.63 sec - model saved!


Epoch 43: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.9962, Val_Loss: 1.0333, Total Mean Loss: 2.0147, LR: 0.0022354003793966853, Duration: 100.92 sec


Epoch 44: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.7796, Val_Loss: 0.9365, Total Mean Loss: 1.8580, LR: 0.0021843663813107903, Duration: 100.94 sec - model saved!


Epoch 45: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.8294, Val_Loss: 0.8850, Total Mean Loss: 1.8572, LR: 0.0021326074562815606, Duration: 100.98 sec - model saved!


Epoch 46: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.9068, Val_Loss: 1.0178, Total Mean Loss: 1.9623, LR: 0.0020801866645862926, Duration: 100.88 sec


Epoch 47: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.7263, Val_Loss: 0.9120, Total Mean Loss: 1.8191, LR: 0.002027167872884863, Duration: 100.93 sec - model saved!


Epoch 48: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.7300, Val_Loss: 0.9738, Total Mean Loss: 1.8519, LR: 0.0019736156764080417, Duration: 100.89 sec


Epoch 49: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.8982, Val_Loss: 0.8761, Total Mean Loss: 1.8872, LR: 0.0019195953202581517, Duration: 100.70 sec


Epoch 50: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.5931, Val_Loss: 0.8665, Total Mean Loss: 1.7298, LR: 0.0018651726199179646, Duration: 101.01 sec - model saved!


Epoch 51: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.6872, Val_Loss: 0.8218, Total Mean Loss: 1.7545, LR: 0.0018104138810646692, Duration: 101.10 sec


Epoch 52: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.6830, Val_Loss: 0.8117, Total Mean Loss: 1.7473, LR: 0.0017553858187866224, Duration: 100.48 sec


Epoch 53: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.6228, Val_Loss: 0.9036, Total Mean Loss: 1.7632, LR: 0.0017001554763012856, Duration: 100.95 sec


Epoch 54: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.7403, Val_Loss: 0.8417, Total Mean Loss: 1.7910, LR: 0.0016447901432733968, Duration: 101.10 sec


Epoch 55: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.5678, Val_Loss: 0.9073, Total Mean Loss: 1.7376, LR: 0.0015893572738328726, Duration: 100.68 sec


Epoch 56: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.6559, Val_Loss: 0.8896, Total Mean Loss: 1.7727, LR: 0.0015339244043923493, Duration: 101.00 sec


Epoch 57: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.6922, Val_Loss: 0.9028, Total Mean Loss: 1.7975, LR: 0.0014785590713644594, Duration: 100.77 sec


Epoch 58: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.7674, Val_Loss: 0.8663, Total Mean Loss: 1.8168, LR: 0.0014233287288791233, Duration: 101.67 sec


Epoch 59: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.7837, Val_Loss: 0.8174, Total Mean Loss: 1.8005, LR: 0.0013683006666010762, Duration: 100.72 sec


Epoch 60: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.4801, Val_Loss: 0.8801, Total Mean Loss: 1.6801, LR: 0.0013135419277477808, Duration: 100.47 sec - model saved!


Epoch 61: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.5862, Val_Loss: 0.7797, Total Mean Loss: 1.6830, LR: 0.0012591192274075936, Duration: 100.69 sec


Epoch 62: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.7489, Val_Loss: 0.8559, Total Mean Loss: 1.8024, LR: 0.0012050988712577037, Duration: 100.58 sec


Epoch 63: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.6162, Val_Loss: 0.7727, Total Mean Loss: 1.6944, LR: 0.0011515466747808825, Duration: 101.29 sec


Epoch 64: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.6132, Val_Loss: 0.8359, Total Mean Loss: 1.7246, LR: 0.001098527883079453, Duration: 100.65 sec


Epoch 65: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.4654, Val_Loss: 0.7920, Total Mean Loss: 1.6287, LR: 0.001046107091384185, Duration: 100.59 sec - model saved!


Epoch 66: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.4580, Val_Loss: 0.7734, Total Mean Loss: 1.6157, LR: 0.0009943481663549551, Duration: 100.76 sec - model saved!


Epoch 67: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.5001, Val_Loss: 0.7782, Total Mean Loss: 1.6392, LR: 0.0009433141682690608, Duration: 100.90 sec


Epoch 68: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.5155, Val_Loss: 0.7583, Total Mean Loss: 1.6369, LR: 0.0008930672741919835, Duration: 100.58 sec


Epoch 69: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.5393, Val_Loss: 0.8018, Total Mean Loss: 1.6706, LR: 0.0008436687022242174, Duration: 100.92 sec


Epoch 70: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.4591, Val_Loss: 0.7978, Total Mean Loss: 1.6285, LR: 0.0007951786369164367, Duration: 101.18 sec


Epoch 71: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.4793, Val_Loss: 0.7250, Total Mean Loss: 1.6022, LR: 0.0007476561559438978, Duration: 100.71 sec - model saved!


Epoch 72: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.4251, Val_Loss: 0.7413, Total Mean Loss: 1.5832, LR: 0.0007011591581293887, Duration: 100.82 sec - model saved!


Epoch 73: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.4225, Val_Loss: 0.7174, Total Mean Loss: 1.5699, LR: 0.000655744292902433, Duration: 100.62 sec - model saved!


Epoch 74: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.3473, Val_Loss: 0.7849, Total Mean Loss: 1.5661, LR: 0.0006114668912806857, Duration: 100.68 sec - model saved!


Epoch 75: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.5323, Val_Loss: 0.7227, Total Mean Loss: 1.6275, LR: 0.0005683808984576124, Duration: 100.58 sec


Epoch 76: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.2927, Val_Loss: 0.6995, Total Mean Loss: 1.4961, LR: 0.0005265388080785801, Duration: 100.61 sec - model saved!


Epoch 77: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.4638, Val_Loss: 0.7161, Total Mean Loss: 1.5900, LR: 0.0004859915982854339, Duration: 100.66 sec


Epoch 78: 100%|██████████| 43/43 [01:39<00:00,  2.30s/it]


	Loss: 2.2921, Val_Loss: 0.7848, Total Mean Loss: 1.5384, LR: 0.0004467886696074901, Duration: 100.33 sec


Epoch 79: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.2935, Val_Loss: 0.7028, Total Mean Loss: 1.4981, LR: 0.0004089777847745927, Duration: 101.19 sec


Epoch 80: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.3892, Val_Loss: 0.7471, Total Mean Loss: 1.5681, LR: 0.0003726050105255919, Duration: 100.72 sec


Epoch 81: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.4548, Val_Loss: 0.6996, Total Mean Loss: 1.5772, LR: 0.00033771466148311234, Duration: 101.13 sec


Epoch 82: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.4554, Val_Loss: 0.7359, Total Mean Loss: 1.5957, LR: 0.0003043492461630169, Duration: 100.72 sec


Epoch 83: 100%|██████████| 43/43 [01:39<00:00,  2.30s/it]


	Loss: 2.2836, Val_Loss: 0.6913, Total Mean Loss: 1.4874, LR: 0.0002725494151843244, Duration: 100.30 sec - model saved!


Epoch 84: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.2322, Val_Loss: 0.6922, Total Mean Loss: 1.4622, LR: 0.00024235391174269407, Duration: 101.14 sec - model saved!


Epoch 85: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.3308, Val_Loss: 0.6647, Total Mean Loss: 1.4977, LR: 0.00021379952440780885, Duration: 100.89 sec


Epoch 86: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.2269, Val_Loss: 0.7049, Total Mean Loss: 1.4659, LR: 0.0001869210423021702, Duration: 100.76 sec


Epoch 87: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.5295, Val_Loss: 0.7130, Total Mean Loss: 1.6212, LR: 0.00016175121271591134, Duration: 100.86 sec


Epoch 88: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.2567, Val_Loss: 0.6699, Total Mean Loss: 1.4633, LR: 0.00013832070120926694, Duration: 100.75 sec


Epoch 89: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.3638, Val_Loss: 0.6958, Total Mean Loss: 1.5298, LR: 0.00011665805425131569, Duration: 100.65 sec


Epoch 90: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.3992, Val_Loss: 0.7040, Total Mean Loss: 1.5516, LR: 9.678966444049992e-05, Duration: 100.89 sec


Epoch 91: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.2585, Val_Loss: 0.6956, Total Mean Loss: 1.4771, LR: 7.873973834931357e-05, Duration: 100.87 sec


Epoch 92: 100%|██████████| 43/43 [01:39<00:00,  2.31s/it]


	Loss: 2.3697, Val_Loss: 0.6950, Total Mean Loss: 1.5324, LR: 6.25302670323207e-05, Duration: 100.75 sec


Epoch 93: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.3172, Val_Loss: 0.6830, Total Mean Loss: 1.5001, LR: 4.818099923344367e-05, Duration: 100.95 sec


Epoch 94: 100%|██████████| 43/43 [01:40<00:00,  2.33s/it]


	Loss: 2.3586, Val_Loss: 0.6790, Total Mean Loss: 1.5188, LR: 3.570941732515986e-05, Duration: 101.29 sec


Epoch 95: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.5162, Val_Loss: 0.6889, Total Mean Loss: 1.6025, LR: 2.5130716008924942e-05, Duration: 101.06 sec


Epoch 96: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.4328, Val_Loss: 0.6941, Total Mean Loss: 1.5634, LR: 1.645778380276822e-05, Duration: 100.97 sec


Epoch 97: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.2231, Val_Loss: 0.6959, Total Mean Loss: 1.4595, LR: 9.701187338620671e-06, Duration: 100.94 sec - model saved!


Epoch 98: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.4287, Val_Loss: 0.6955, Total Mean Loss: 1.5621, LR: 4.869158488499532e-06, Duration: 101.13 sec


Epoch 99: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.2580, Val_Loss: 0.6947, Total Mean Loss: 1.4764, LR: 1.9675843352417e-06, Duration: 100.94 sec


Epoch 100: 100%|██████████| 43/43 [01:39<00:00,  2.32s/it]


	Loss: 2.3442, Val_Loss: 0.6947, Total Mean Loss: 1.5195, LR: 1e-06, Duration: 101.14 sec

[100 epoch result]
       Metric     Value
0   Accuracy  0.904000
1  Precision  0.920925
2     Recall  0.904000
3   F1 Score  0.901721


### CASE 03 : Grouped Learning Rate Decay

In [16]:
def LLRD_ConvNeXt(model, depths=[3,3,9,3], weight_decay=1e-5, lr=8e-3, scale=0.9):
    
    stage = 0
    layer_names = []
    param_groups = {}
    for depth in depths:
        if stage == 0:
            layer_names.append(f'downsample_layers.{stage}.stem_conv.weight')
            layer_names.append(f'downsample_layers.{stage}.stem_conv.bias')
            layer_names.append(f'downsample_layers.{stage}.stem_ln.weight')
            layer_names.append(f'downsample_layers.{stage}.stem_ln.bias')
        else :
            layer_names.append(f'downsample_layers.{stage}.ds_ln.weight')
            layer_names.append(f'downsample_layers.{stage}.ds_ln.bias')
            layer_names.append(f'downsample_layers.{stage}.ds_conv.weight')
            layer_names.append(f'downsample_layers.{stage}.ds_conv.bias')        
        for i in range(depth):
            layer_names.append(f'stages.{stage}.{i}.dwconv.weight')
            layer_names.append(f'stages.{stage}.{i}.dwconv.bias')
            layer_names.append(f'stages.{stage}.{i}.layernorm.weight')
            layer_names.append(f'stages.{stage}.{i}.layernorm.bias')
            layer_names.append(f'stages.{stage}.{i}.pwconv1.weight')
            layer_names.append(f'stages.{stage}.{i}.pwconv1.bias')
            layer_names.append(f'stages.{stage}.{i}.grn.gamma')
            layer_names.append(f'stages.{stage}.{i}.grn.beta')            
            layer_names.append(f'stages.{stage}.{i}.pwconv2.weight')
            layer_names.append(f'stages.{stage}.{i}.pwconv2.bias')
        stage += 1
    
    layer_names.append('layernorm.weight')
    layer_names.append('layernorm.bias')
    layer_names.append('fc.weight')
    layer_names.append('fc.bias')
    
    # Layer Learning Rate Decay
    for name, param in model.named_parameters():
        total_depths = sum(depths)
        if name.startswith("downsample_layers"):
            stage_id = int(name.split('.')[1])
            layer_id = sum(depths[:stage_id]) + 1
            param_groups[name] = {'lr':lr*(scale**((total_depths-layer_id)//3+1)),
                                  'weight_decay':0.}
        
        elif name.startswith("stages"):
            stage_id = int(name.split('.')[1])
            block_id = int(name.split('.')[2])
            layer_id = sum(depths[:stage_id]) + block_id + 1
            if len(param.shape) == 1 or name.endswith(".bias") or name.endswith(".gamma") or name.endswith(".beta"):
                param_groups[name] = {'lr':lr*(scale**((total_depths-layer_id)//3+1)),
                                      'weight_decay':0.}
            else :
                param_groups[name] = {'lr':lr*(scale**((total_depths-layer_id)//3+1)),
                                      'weight_decay':weight_decay}       
        else : # head
            if len(param.shape) == 1 or name.endswith(".bias"):
                param_groups[name] = {'lr':lr,
                                      'weight_decay':0.}
            else :
                param_groups[name] = {'lr':lr,
                                      'weight_decay':weight_decay}    
    return layer_names, param_groups

In [17]:
layer_names, param_groups = LLRD_ConvNeXt(model)
for name in layer_names:
    print(f"name: {name}, lr: {param_groups[name]['lr']}, weight_decay: {param_groups[name]['weight_decay']}")

name: downsample_layers.0.stem_conv.weight, lr: 0.004251528, weight_decay: 0.0
name: downsample_layers.0.stem_conv.bias, lr: 0.004251528, weight_decay: 0.0
name: downsample_layers.0.stem_ln.weight, lr: 0.004251528, weight_decay: 0.0
name: downsample_layers.0.stem_ln.bias, lr: 0.004251528, weight_decay: 0.0
name: stages.0.0.dwconv.weight, lr: 0.004251528, weight_decay: 1e-05
name: stages.0.0.dwconv.bias, lr: 0.004251528, weight_decay: 0.0
name: stages.0.0.layernorm.weight, lr: 0.004251528, weight_decay: 0.0
name: stages.0.0.layernorm.bias, lr: 0.004251528, weight_decay: 0.0
name: stages.0.0.pwconv1.weight, lr: 0.004251528, weight_decay: 1e-05
name: stages.0.0.pwconv1.bias, lr: 0.004251528, weight_decay: 0.0
name: stages.0.0.grn.gamma, lr: 0.004251528, weight_decay: 0.0
name: stages.0.0.grn.beta, lr: 0.004251528, weight_decay: 0.0
name: stages.0.0.pwconv2.weight, lr: 0.004251528, weight_decay: 1e-05
name: stages.0.0.pwconv2.bias, lr: 0.004251528, weight_decay: 0.0
name: stages.0.1.dwconv

In [18]:
groups = [{'params': param,
            'lr' : param_groups[name]['lr'],
            'weight_decay': param_groups[name]['weight_decay']} for name, param in model.named_parameters()]

In [19]:
epochs = 100

optimizer = optim.AdamW(groups)
warmup_steps = int(len(train_loader)*(epochs)*0.1)
train_steps = len(train_loader)*(epochs)
scheduler = CosineWarmupScheduler(optimizer, 
                                num_warmup_steps=warmup_steps, 
                                num_training_steps=train_steps,
                                num_cycles=0.5,
                                min_lr=1e-7)



In [20]:
training_time = 0
losses = []
val_losses = []
lrs = []
best_loss = float('inf')
model_save = False

for i in range(epochs // 100):
    for epoch in range(100):
        model.train()
        start_time = time.time()
        running_loss = 0.0
        pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1 + i*100}")
        
        for _, data in pbar:
            inputs, labels = data[0].to(device), data[1].to(device)
            inputs, labels = mixup_fn(inputs, labels)
            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
                
            loss.backward()
            # 그라디언트 클리핑 적용
            clip_grad_norm_(model.parameters(), max_norm=max_norm)
            optimizer.step()
            
            # EMA 모델 업데이트, 필요한 경우
            if model_ema is not None:
                model_ema.update(model)
                
            scheduler.step()
                
            lr = optimizer.param_groups[0]["lr"]
            lrs.append(lr)
            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        losses.append(epoch_loss)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for data in valid_loader:
                inputs, labels = data[0].to(device), data[1].to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
        val_loss /= len(valid_loader)
        val_losses.append(val_loss)
        
        # 모델 저장 조건 수정
        total_loss = val_loss + epoch_loss
        if total_loss < best_loss:
            best_loss = total_loss
            # torch.save(model.state_dict(), model_path)
            model_save = True
            save_text = ' - model saved!'
        else:
            save_text = ''

        epoch_duration = time.time() - start_time
        training_time += epoch_duration
        
        text = f'\tLoss: {epoch_loss:.4f}, Val_Loss: {val_loss:.4f}, Total Mean Loss: {total_loss/2:.4f}, LR: {lr}, Duration: {epoch_duration:.2f} sec{save_text}'
        print(text)

    # 예측 수행 및 레이블 저장
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # 혼동 행렬 생성
    cm = confusion_matrix(all_labels, all_preds)

    # 예측과 실제 레이블
    y_true = all_labels  # 실제 레이블
    y_pred = all_preds  # 모델에 의해 예측된 레이블

    # 전체 데이터셋에 대한 정확도
    accuracy = accuracy_score(y_true, y_pred)

    # 평균 정밀도, 리콜, F1-Score ('weighted')
    precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

    # 판다스 데이터프레임으로 결과 정리
    performance_metrics = pd.DataFrame({
        'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
        'Value': [accuracy, precision, recall, f1_score]
    })

    # 데이터프레임 출력
    print(f"\n[{i*100+100} epoch result]\n", performance_metrics)

Epoch 1:   7%|▋         | 3/43 [00:07<01:43,  2.59s/it]