In [8]:
import time
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import tqdm

import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.transforms.autoaugment import AutoAugmentPolicy
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler

import timm
import model.vit_better as vit_custom

In [9]:
unique_idx = '4'

batch_size = 512
num_workers = 8
img_size = 224
patch_size = 16
dropout = 0.1

model_name = 'vit_base_patch32_224'
pretrained = False
num_classes = 10

device = f'cuda:{unique_idx}'

label_smoothing = 0.0
learning_rate = 0.001
epochs = 10

model_path = f'finetune_model:{unique_idx}.pth'  # 모델 저장 경로

In [10]:
# 데이터 증강을 위한 전처리
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(img_size),  # 무작위 크기 및 비율로 자르기
    transforms.AutoAugment(AutoAugmentPolicy.CIFAR10),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers)


Files already downloaded and verified
Files already downloaded and verified


In [11]:
model1 = vit_custom.VisionTransformer(img_size=img_size, 
                        patch_size=patch_size, 
                        num_classes=num_classes, 
                        dropout=dropout,
                        embed_dim=768,
                        num_layers=12,
                        num_heads=12,
                        mlp_ratio=4.,
                        estimate_params=True).to(device)
model2 = vit_custom.VisionTransformer(img_size=img_size, 
                        patch_size=patch_size, 
                        num_classes=100, 
                        dropout=dropout,
                        embed_dim=768,
                        num_layers=12,
                        num_heads=12,
                        mlp_ratio=4.,
                        estimate_params=True)
model2.load_state_dict(torch.load('./model/last_sports.pth', map_location=device))
model2.head = nn.Linear(768, num_classes)
model2.to(device)
model3 = timm.create_model(model_name=model_name, 
                          pretrained=False, 
                          num_classes=num_classes).to(device)
model4 = timm.create_model(model_name=model_name, 
                          pretrained=True, 
                          num_classes=num_classes).to(device)

In [12]:
criterion = nn.CrossEntropyLoss(label_smoothing=label_smoothing)

optimizer1 = optim.Adam(model1.parameters(), lr=learning_rate)
optimizer2 = optim.Adam(model2.parameters(), lr=learning_rate)
optimizer3 = optim.Adam(model3.parameters(), lr=learning_rate)
optimizer4 = optim.Adam(model4.parameters(), lr=learning_rate)

In [9]:
training_time = 0
losses = []
val_losses = []

# GradScaler 초기화
scaler = GradScaler()

for epoch in range(epochs):
    model1.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for i, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer1.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model1(inputs)
            loss = criterion(outputs, labels)

        # Scaled Backward & Optimizer Step
        scaler.scale(loss).backward()
        scaler.step(optimizer1)
        scaler.update()

        # scheduler.step()
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)

 
    model1.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model1(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    val_loss /= len(test_loader)
    val_losses.append(val_loss)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, Duration: {epoch_duration:.2f} sec'
    print(text)

Epoch 1: 100%|██████████| 98/98 [01:54<00:00,  1.17s/it]


	Loss: 2.2623161734366906, Val Loss: 1.9686915159225464, Duration: 141.34 sec


Epoch 2: 100%|██████████| 98/98 [01:56<00:00,  1.19s/it]


	Loss: 2.0344421133703117, Val Loss: 1.7639890789985657, Duration: 142.70 sec


Epoch 3: 100%|██████████| 98/98 [01:56<00:00,  1.18s/it]


	Loss: 1.874090506106007, Val Loss: 1.5843210160732268, Duration: 142.54 sec


Epoch 4: 100%|██████████| 98/98 [01:55<00:00,  1.18s/it]


	Loss: 1.773993621067125, Val Loss: 1.5347998082637786, Duration: 142.54 sec


Epoch 5: 100%|██████████| 98/98 [01:56<00:00,  1.19s/it]


	Loss: 1.726123673575265, Val Loss: 1.5311316430568696, Duration: 142.98 sec


Epoch 6: 100%|██████████| 98/98 [01:56<00:00,  1.19s/it]


	Loss: 1.6821188233336624, Val Loss: 1.4426632463932036, Duration: 143.24 sec


Epoch 7: 100%|██████████| 98/98 [01:56<00:00,  1.19s/it]


	Loss: 1.6382945751657292, Val Loss: 1.436668300628662, Duration: 143.46 sec


Epoch 8: 100%|██████████| 98/98 [01:57<00:00,  1.20s/it]


	Loss: 1.612448554866168, Val Loss: 1.3359784245491029, Duration: 143.85 sec


Epoch 9: 100%|██████████| 98/98 [01:56<00:00,  1.19s/it]


	Loss: 1.587733934120256, Val Loss: 1.3158570587635041, Duration: 143.38 sec


Epoch 10: 100%|██████████| 98/98 [01:58<00:00,  1.21s/it]


	Loss: 1.564613708427974, Val Loss: 1.2721958339214325, Duration: 144.86 sec


In [6]:
training_time = 0
losses = []
val_losses = []

# GradScaler 초기화
scaler = GradScaler()

for epoch in range(epochs):
    model2.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for i, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer2.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model2(inputs)
            loss = criterion(outputs, labels)

        # Scaled Backward & Optimizer Step
        scaler.scale(loss).backward()
        scaler.step(optimizer2)
        scaler.update()

        # scheduler.step()
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)

 
    model2.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model2(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    val_loss /= len(test_loader)
    val_losses.append(val_loss)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, Duration: {epoch_duration:.2f} sec'
    print(text)

Epoch 1: 100%|██████████| 98/98 [01:56<00:00,  1.19s/it]


	Loss: 1.8670874700254323, Val Loss: 1.4241740226745605, Duration: 142.13 sec


Epoch 2: 100%|██████████| 98/98 [01:56<00:00,  1.19s/it]


	Loss: 1.6314488345262956, Val Loss: 1.3182855248451233, Duration: 142.83 sec


Epoch 3: 100%|██████████| 98/98 [01:56<00:00,  1.19s/it]


	Loss: 1.5578840034348624, Val Loss: 1.2583444178104402, Duration: 143.17 sec


Epoch 4: 100%|██████████| 98/98 [01:56<00:00,  1.19s/it]


	Loss: 1.4982385173135875, Val Loss: 1.2127423286437988, Duration: 143.13 sec


Epoch 5: 100%|██████████| 98/98 [01:56<00:00,  1.19s/it]


	Loss: 1.4712985571549864, Val Loss: 1.1880209386348723, Duration: 143.18 sec


Epoch 6: 100%|██████████| 98/98 [01:57<00:00,  1.20s/it]


	Loss: 1.4345406695288054, Val Loss: 1.1417484879493713, Duration: 143.98 sec


Epoch 7: 100%|██████████| 98/98 [01:57<00:00,  1.20s/it]


	Loss: 1.4073233689580644, Val Loss: 1.1176199793815613, Duration: 144.07 sec


Epoch 8: 100%|██████████| 98/98 [01:56<00:00,  1.19s/it]


	Loss: 1.3748024361474174, Val Loss: 1.0862751930952073, Duration: 143.57 sec


Epoch 9: 100%|██████████| 98/98 [01:56<00:00,  1.19s/it]


	Loss: 1.3555698881343918, Val Loss: 1.084110641479492, Duration: 143.00 sec


Epoch 10: 100%|██████████| 98/98 [01:56<00:00,  1.19s/it]


	Loss: 1.343857852780089, Val Loss: 1.043871021270752, Duration: 143.17 sec


In [13]:
from sklearn.metrics import confusion_matrix
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model2(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
display(performance_metrics)

Unnamed: 0,Metric,Value
0,Accuracy,0.6371
1,Precision,0.636212
2,Recall,0.6371
3,F1 Score,0.633042


In [10]:
training_time = 0
losses = []
val_losses = []

# GradScaler 초기화
scaler = GradScaler()

for epoch in range(epochs):
    model3.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for i, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer3.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model3(inputs)
            loss = criterion(outputs, labels)

        # Scaled Backward & Optimizer Step
        scaler.scale(loss).backward()
        scaler.step(optimizer3)
        scaler.update()

        # scheduler.step()
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)

 
    model3.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model3(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    val_loss /= len(test_loader)
    val_losses.append(val_loss)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, Duration: {epoch_duration:.2f} sec'
    print(text)

Epoch 1: 100%|██████████| 98/98 [00:23<00:00,  4.12it/s]


	Loss: 2.523995343519717, Val Loss: 2.0967079162597657, Duration: 32.58 sec


Epoch 2: 100%|██████████| 98/98 [00:23<00:00,  4.12it/s]


	Loss: 2.1745342478460197, Val Loss: 2.0236403048038483, Duration: 32.65 sec


Epoch 3: 100%|██████████| 98/98 [00:23<00:00,  4.17it/s]


	Loss: 2.1222505934384404, Val Loss: 1.968419885635376, Duration: 32.63 sec


Epoch 4: 100%|██████████| 98/98 [00:24<00:00,  4.07it/s]


	Loss: 2.0668411108912252, Val Loss: 1.901482141017914, Duration: 33.22 sec


Epoch 5: 100%|██████████| 98/98 [00:24<00:00,  4.07it/s]


	Loss: 2.0413163900375366, Val Loss: 1.8580438196659088, Duration: 33.00 sec


Epoch 6: 100%|██████████| 98/98 [00:23<00:00,  4.11it/s]


	Loss: 2.028800896235875, Val Loss: 1.8863760709762574, Duration: 32.50 sec


Epoch 7: 100%|██████████| 98/98 [00:24<00:00,  4.06it/s]


	Loss: 2.040751024168365, Val Loss: 1.8994010984897614, Duration: 33.16 sec


Epoch 8: 100%|██████████| 98/98 [00:24<00:00,  4.05it/s]


	Loss: 2.076141829393348, Val Loss: 1.910189837217331, Duration: 33.28 sec


Epoch 9: 100%|██████████| 98/98 [00:23<00:00,  4.16it/s]


	Loss: 2.0512935969294332, Val Loss: 1.9110594987869263, Duration: 32.35 sec


Epoch 10: 100%|██████████| 98/98 [00:23<00:00,  4.20it/s]


	Loss: 2.0484114423090096, Val Loss: 1.879141741991043, Duration: 32.12 sec


In [11]:
training_time = 0
losses = []
val_losses = []

# GradScaler 초기화
scaler = GradScaler()

for epoch in range(epochs):
    model4.train()
    start_time = time.time()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch + 1}")
    
    for i, data in pbar:
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer4.zero_grad()

        # AutoCast 적용
        with autocast():
            outputs = model4(inputs)
            loss = criterion(outputs, labels)

        # Scaled Backward & Optimizer Step
        scaler.scale(loss).backward()
        scaler.step(optimizer4)
        scaler.update()

        # scheduler.step()
        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    losses.append(epoch_loss)

 
    model4.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model4(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    val_loss /= len(test_loader)
    val_losses.append(val_loss)

    epoch_duration = time.time() - start_time
    training_time += epoch_duration
    
    text = f'\tLoss: {epoch_loss}, Val Loss: {val_loss}, Duration: {epoch_duration:.2f} sec'
    print(text)

Epoch 1: 100%|██████████| 98/98 [00:23<00:00,  4.09it/s]


	Loss: 2.41632481253877, Val Loss: 2.004483497142792, Duration: 32.76 sec


Epoch 2: 100%|██████████| 98/98 [00:24<00:00,  4.07it/s]


	Loss: 1.99537278438101, Val Loss: 1.6770375847816468, Duration: 32.88 sec


Epoch 3: 100%|██████████| 98/98 [00:23<00:00,  4.13it/s]


	Loss: 1.6948575499106426, Val Loss: 1.2783602893352508, Duration: 32.41 sec


Epoch 4: 100%|██████████| 98/98 [00:23<00:00,  4.14it/s]


	Loss: 1.3646157140634498, Val Loss: 0.7867714107036591, Duration: 32.10 sec


Epoch 5: 100%|██████████| 98/98 [00:23<00:00,  4.24it/s]


	Loss: 1.1188466524591252, Val Loss: 0.6425715535879135, Duration: 32.04 sec


Epoch 6: 100%|██████████| 98/98 [00:22<00:00,  4.28it/s]


	Loss: 0.9790806831145773, Val Loss: 0.5649664521217346, Duration: 31.63 sec


Epoch 7: 100%|██████████| 98/98 [00:23<00:00,  4.18it/s]


	Loss: 0.901688634741063, Val Loss: 0.45184720158576963, Duration: 32.36 sec


Epoch 8: 100%|██████████| 98/98 [00:23<00:00,  4.15it/s]


	Loss: 0.8488072418436712, Val Loss: 0.4197005912661552, Duration: 32.43 sec


Epoch 9: 100%|██████████| 98/98 [00:23<00:00,  4.14it/s]


	Loss: 0.8141384471435936, Val Loss: 0.37713140398263933, Duration: 32.42 sec


Epoch 10: 100%|██████████| 98/98 [00:23<00:00,  4.16it/s]


	Loss: 0.7811690629745016, Val Loss: 0.3787470117211342, Duration: 32.38 sec


In [12]:
from sklearn.metrics import confusion_matrix
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model4(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
display(performance_metrics)

Unnamed: 0,Metric,Value
0,Accuracy,0.8684
1,Precision,0.870746
2,Recall,0.8684
3,F1 Score,0.86846
