In [1]:
import time
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm

import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.transforms.autoaugment import AutoAugmentPolicy
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler

import timm
import model.vit_better as vit_custom

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
unique_idx = '3'

batch_size = 128
num_workers = 8
img_size = 224
patch_size = 16
dropout = 0.1

model_name = 'vit_base_patch32_224'
pretrained = False
num_classes = 10

device = f'cuda:{unique_idx}'

label_smoothing = 0.1
learning_rate = 0.001
epochs = 10

model_path = f'finetune_model:{unique_idx}.pth'  # 모델 저장 경로

In [3]:
# 데이터 증강을 위한 전처리
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(img_size),  # 무작위 크기 및 비율로 자르기
    transforms.AutoAugment(AutoAugmentPolicy.CIFAR10),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers)


Files already downloaded and verified
Files already downloaded and verified


In [4]:
model1 = vit_custom.VisionTransformer(img_size=img_size, 
                        patch_size=patch_size, 
                        num_classes=num_classes, 
                        dropout=dropout,
                        embed_dim=768,
                        num_layers=12,
                        num_heads=12,
                        mlp_ratio=4.,
                        estimate_params=True).to(device)
model2 = vit_custom.VisionTransformer(img_size=img_size, 
                        patch_size=patch_size, 
                        num_classes=100, 
                        dropout=dropout,
                        embed_dim=768,
                        num_layers=12,
                        num_heads=12,
                        mlp_ratio=4.,
                        estimate_params=True)
model2.load_state_dict(torch.load('./model/last_sports.pth', map_location=device))
model2.head = nn.Linear(768, num_classes)
model2.to(device)
model3 = timm.create_model(model_name=model_name, 
                          pretrained=False, 
                          num_classes=num_classes).to(device)
model4 = timm.create_model(model_name=model_name, 
                          pretrained=True, 
                          num_classes=num_classes).to(device)

In [5]:
criterion = nn.CrossEntropyLoss(label_smoothing=label_smoothing)

optimizer1 = optim.Adam(model1.parameters(), lr=learning_rate)
optimizer2 = optim.Adam(model2.parameters(), lr=learning_rate)
optimizer3 = optim.Adam(model3.parameters(), lr=learning_rate)
optimizer4 = optim.Adam(model4.parameters(), lr=learning_rate)

In [6]:
training_time = 0
losses = []
val_losses = []

# GradScaler 초기화
scaler = GradScaler()

for epoch in range(epochs):
    model1.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for i, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer1.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model1(inputs)
            loss = criterion(outputs, labels)

        # Scaled Backward & Optimizer Step
        scaler.scale(loss).backward()
        scaler.step(optimizer1)
        scaler.update()

        # scheduler.step()
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)

 
    model1.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model1(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    val_loss /= len(test_loader)
    val_losses.append(val_loss)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, Duration: {epoch_duration:.2f} sec'
    print(text)

Epoch 1: 100%|██████████| 391/391 [02:01<00:00,  3.23it/s]


	Loss: 2.157296555731303, Val Loss: 1.8791595033452482, Duration: 147.00 sec


Epoch 2: 100%|██████████| 391/391 [02:02<00:00,  3.20it/s]


	Loss: 1.947840246703009, Val Loss: 1.7793357281745235, Duration: 148.43 sec


Epoch 3: 100%|██████████| 391/391 [02:01<00:00,  3.21it/s]


	Loss: 1.877630598099945, Val Loss: 1.6886035943333106, Duration: 148.15 sec


Epoch 4: 100%|██████████| 391/391 [02:01<00:00,  3.22it/s]


	Loss: 1.8413025716991376, Val Loss: 1.6230733409712585, Duration: 147.68 sec


Epoch 5: 100%|██████████| 391/391 [02:01<00:00,  3.22it/s]


	Loss: 1.8058368208463236, Val Loss: 1.6005174193201186, Duration: 147.44 sec


Epoch 6: 100%|██████████| 391/391 [02:01<00:00,  3.21it/s]


	Loss: 1.7894740793711084, Val Loss: 1.5762923156158835, Duration: 148.15 sec


Epoch 7: 100%|██████████| 391/391 [02:01<00:00,  3.22it/s]


	Loss: 1.7571621392389087, Val Loss: 1.5438201517998418, Duration: 147.85 sec


Epoch 8: 100%|██████████| 391/391 [02:01<00:00,  3.21it/s]


	Loss: 1.7370552263601358, Val Loss: 1.5162938410722757, Duration: 148.17 sec


Epoch 9: 100%|██████████| 391/391 [02:01<00:00,  3.21it/s]


	Loss: 1.712827499260378, Val Loss: 1.5301591716235197, Duration: 148.16 sec


Epoch 10: 100%|██████████| 391/391 [02:01<00:00,  3.21it/s]


	Loss: 1.7031346157078853, Val Loss: 1.5054274525823472, Duration: 148.02 sec


In [7]:
training_time = 0
losses = []
val_losses = []

# GradScaler 초기화
scaler = GradScaler()

for epoch in range(epochs):
    model2.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for i, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer2.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model2(inputs)
            loss = criterion(outputs, labels)

        # Scaled Backward & Optimizer Step
        scaler.scale(loss).backward()
        scaler.step(optimizer2)
        scaler.update()

        # scheduler.step()
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)

 
    model2.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model2(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    val_loss /= len(test_loader)
    val_losses.append(val_loss)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, Duration: {epoch_duration:.2f} sec'
    print(text)

Epoch 1: 100%|██████████| 391/391 [02:01<00:00,  3.21it/s]


	Loss: 1.9335967317566543, Val Loss: 1.6209835991074768, Duration: 148.36 sec


Epoch 2: 100%|██████████| 391/391 [02:01<00:00,  3.21it/s]


	Loss: 1.771192114676356, Val Loss: 1.5236065417905398, Duration: 148.28 sec


Epoch 3: 100%|██████████| 391/391 [02:02<00:00,  3.20it/s]


	Loss: 1.7128159673622503, Val Loss: 1.502555320534525, Duration: 148.45 sec


Epoch 4: 100%|██████████| 391/391 [02:01<00:00,  3.21it/s]


	Loss: 1.6715979140128017, Val Loss: 1.4310721521136127, Duration: 148.17 sec


Epoch 5: 100%|██████████| 391/391 [02:02<00:00,  3.18it/s]


	Loss: 1.6483957352845564, Val Loss: 1.4246131543871723, Duration: 149.24 sec


Epoch 6: 100%|██████████| 391/391 [02:02<00:00,  3.20it/s]


	Loss: 1.6277514537582007, Val Loss: 1.4226683770553976, Duration: 148.42 sec


Epoch 7: 100%|██████████| 391/391 [02:03<00:00,  3.17it/s]


	Loss: 1.6000336006169429, Val Loss: 1.373530941673472, Duration: 149.83 sec


Epoch 8: 100%|██████████| 391/391 [02:01<00:00,  3.22it/s]


	Loss: 1.5846524394076804, Val Loss: 1.4364797208882585, Duration: 148.01 sec


Epoch 9: 100%|██████████| 391/391 [02:02<00:00,  3.20it/s]


	Loss: 1.5680279838459572, Val Loss: 1.3450999546654616, Duration: 148.47 sec


Epoch 10: 100%|██████████| 391/391 [02:01<00:00,  3.21it/s]


	Loss: 1.547762540600184, Val Loss: 1.329295671438869, Duration: 148.30 sec


In [8]:
training_time = 0
losses = []
val_losses = []

# GradScaler 초기화
scaler = GradScaler()

for epoch in range(epochs):
    model3.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for i, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer3.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model3(inputs)
            loss = criterion(outputs, labels)

        # Scaled Backward & Optimizer Step
        scaler.scale(loss).backward()
        scaler.step(optimizer3)
        scaler.update()

        # scheduler.step()
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)

 
    model3.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model3(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    val_loss /= len(test_loader)
    val_losses.append(val_loss)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, Duration: {epoch_duration:.2f} sec'
    print(text)

Epoch 1: 100%|██████████| 391/391 [00:27<00:00, 14.44it/s]


	Loss: 2.312748422403165, Val Loss: 2.129036118712606, Duration: 35.81 sec


Epoch 2: 100%|██████████| 391/391 [00:27<00:00, 14.30it/s]


	Loss: 2.2823955835893637, Val Loss: 2.216826967046231, Duration: 35.88 sec


Epoch 3: 100%|██████████| 391/391 [00:27<00:00, 14.11it/s]


	Loss: 2.231895372995635, Val Loss: 2.1439897802811636, Duration: 36.12 sec


Epoch 4: 100%|██████████| 391/391 [00:27<00:00, 14.07it/s]


	Loss: 2.1967028980060004, Val Loss: 2.078063678137864, Duration: 36.19 sec


Epoch 5: 100%|██████████| 391/391 [00:27<00:00, 14.27it/s]


	Loss: 2.18060740424544, Val Loss: 2.100993847545189, Duration: 35.76 sec


Epoch 6: 100%|██████████| 391/391 [00:28<00:00, 13.89it/s]


	Loss: 2.190528876336334, Val Loss: 2.1489799565906766, Duration: 36.55 sec


Epoch 7: 100%|██████████| 391/391 [00:27<00:00, 14.16it/s]


	Loss: 2.2134380383259806, Val Loss: 2.114785511282426, Duration: 36.17 sec


Epoch 8: 100%|██████████| 391/391 [00:27<00:00, 14.08it/s]


	Loss: 2.2152922683969485, Val Loss: 2.118636762039571, Duration: 36.31 sec


Epoch 9: 100%|██████████| 391/391 [00:27<00:00, 14.09it/s]


	Loss: 2.209093983520937, Val Loss: 2.1504852862297734, Duration: 36.30 sec


Epoch 10: 100%|██████████| 391/391 [00:27<00:00, 14.37it/s]


	Loss: 2.2083193139956734, Val Loss: 2.0936913128140606, Duration: 35.66 sec


In [9]:
training_time = 0
losses = []
val_losses = []

# GradScaler 초기화
scaler = GradScaler()

for epoch in range(epochs):
    model4.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for i, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer4.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model4(inputs)
            loss = criterion(outputs, labels)

        # Scaled Backward & Optimizer Step
        scaler.scale(loss).backward()
        scaler.step(optimizer4)
        scaler.update()

        # scheduler.step()
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)

 
    model4.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model4(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    val_loss /= len(test_loader)
    val_losses.append(val_loss)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, Duration: {epoch_duration:.2f} sec'
    print(text)

Epoch 1: 100%|██████████| 391/391 [00:27<00:00, 14.17it/s]


	Loss: 2.149266161272288, Val Loss: 1.7924842698664605, Duration: 36.24 sec


Epoch 2: 100%|██████████| 391/391 [00:27<00:00, 14.30it/s]


	Loss: 1.8457493675334373, Val Loss: 1.4880663352676584, Duration: 35.92 sec


Epoch 3: 100%|██████████| 391/391 [00:27<00:00, 14.18it/s]


	Loss: 1.7045758669943456, Val Loss: 1.372133087508286, Duration: 36.16 sec


Epoch 4: 100%|██████████| 391/391 [00:27<00:00, 14.21it/s]


	Loss: 1.6273356267558339, Val Loss: 1.3022497032262101, Duration: 36.06 sec


Epoch 5: 100%|██████████| 391/391 [00:28<00:00, 13.92it/s]


	Loss: 1.5650245496988906, Val Loss: 1.2645655794988704, Duration: 36.86 sec


Epoch 6: 100%|██████████| 391/391 [00:28<00:00, 13.81it/s]


	Loss: 1.520080073410288, Val Loss: 1.1794754206379758, Duration: 37.05 sec


Epoch 7: 100%|██████████| 391/391 [00:27<00:00, 14.13it/s]


	Loss: 1.4728151724466583, Val Loss: 1.174241918551771, Duration: 36.38 sec


Epoch 8: 100%|██████████| 391/391 [00:27<00:00, 14.09it/s]


	Loss: 1.4417619421658918, Val Loss: 1.1530705207510838, Duration: 36.27 sec


Epoch 9: 100%|██████████| 391/391 [00:27<00:00, 14.05it/s]


	Loss: 1.4091029478156047, Val Loss: 1.0981245508676842, Duration: 36.47 sec


Epoch 10: 100%|██████████| 391/391 [00:27<00:00, 14.11it/s]


	Loss: 1.3844815193844573, Val Loss: 1.085808407656754, Duration: 36.20 sec
