In [1]:
import timm
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.transforms.autoaugment import AutoAugmentPolicy
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
from tqdm import tqdm


img_size = 32
patch_size = 4
channels = 3

batch_size = 512
num_classes = 10
num_workers = 64

device = 'cuda:2'

# CIFAR-10 데이터셋을 위한 전처리
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(img_size),  # 무작위 크기 및 비율로 자르기
    transforms.AutoAugment(AutoAugmentPolicy.CIFAR10),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

# 모델 불러오기 및 수정
model = timm.create_model('vit_small_patch16_224', pretrained=True)

# 패치 임베딩 수정
model.patch_embed = timm.models.vision_transformer.PatchEmbed(
    img_size=img_size, patch_size=patch_size, in_chans=channels, embed_dim=model.embed_dim
)

# 포지셔널 임베딩 조정
num_patches = (img_size // patch_size) ** 2
model.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, model.embed_dim))

# 분류기 수정
model.head = nn.Linear(model.head.in_features, num_classes)

# 학습 파라미터 설정
criterion = nn.CrossEntropyLoss(label_smoothing=0.0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

model.to(device)
# 학습 루프
for epoch in range(10):
    running_loss = 0.0
    pbar = tqdm(enumerate(trainloader), total=len(trainloader), desc=f"Epoch {epoch + 1}")
    
    model.train()
    for i, data in pbar:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in testloader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    val_loss /= len(testloader)
    
    text = f'\tLoss: {loss}, Val Loss: {val_loss}'
    print(text)

print('Finished Training')


  from .autonotebook import tqdm as notebook_tqdm


Files already downloaded and verified
Files already downloaded and verified


Epoch 1: 100%|██████████| 98/98 [00:31<00:00,  3.15it/s]


	Loss: 1.916131854057312, Val Loss: 1.9400612771511079


Epoch 2: 100%|██████████| 98/98 [00:30<00:00,  3.16it/s]


	Loss: 1.7580667734146118, Val Loss: 1.8092978358268739


Epoch 3: 100%|██████████| 98/98 [00:31<00:00,  3.13it/s]


	Loss: 1.591298222541809, Val Loss: 1.6233053386211396


Epoch 4: 100%|██████████| 98/98 [00:31<00:00,  3.12it/s]


	Loss: 1.577829122543335, Val Loss: 1.5546675503253937


Epoch 5: 100%|██████████| 98/98 [00:31<00:00,  3.10it/s]


	Loss: 1.574291467666626, Val Loss: 1.542280125617981


Epoch 6: 100%|██████████| 98/98 [00:31<00:00,  3.11it/s]


	Loss: 1.5153234004974365, Val Loss: 1.4216112852096559


Epoch 7: 100%|██████████| 98/98 [00:31<00:00,  3.13it/s]


	Loss: 1.3828635215759277, Val Loss: 1.342096483707428


Epoch 8: 100%|██████████| 98/98 [00:31<00:00,  3.15it/s]


	Loss: 1.4008663892745972, Val Loss: 1.3377744436264039


Epoch 9: 100%|██████████| 98/98 [00:31<00:00,  3.14it/s]


	Loss: 1.269587516784668, Val Loss: 1.2351432919502259


Epoch 10: 100%|██████████| 98/98 [00:31<00:00,  3.11it/s]


	Loss: 1.29839026927948, Val Loss: 1.2327030301094055
Finished Training


In [2]:
from sklearn.metrics import confusion_matrix
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
display(performance_metrics)

Unnamed: 0,Metric,Value
0,Accuracy,0.5624
1,Precision,0.561588
2,Recall,0.5624
3,F1 Score,0.547092


In [3]:
import model.vit_better as vit_custom

# 모델 불러오기 및 수정
model = vit_custom.VisionTransformer(img_size=224, 
                        patch_size=16, 
                        num_classes=100, 
                        dropout=0.1,
                        embed_dim=768,
                        num_layers=12,
                        num_heads=12,
                        mlp_ratio=4.,
                        estimate_params=True)
model.load_state_dict(torch.load('./model/last_sports.pth', map_location=device))
model.embed_dim = 768

# 패치 임베딩 수정
model.patch_embed = vit_custom.PatchEmbedding(img_size=img_size, 
                                              patch_size=patch_size, 
                                              in_channels=channels, 
                                              embed_dim=model.embed_dim)

# 포지셔널 임베딩 조정
num_patches = (img_size // patch_size) ** 2
model.pos_embed = vit_custom.PositionalEmbedding(num_patches, model.embed_dim)

# 분류기 수정
model.head = nn.Linear(model.head.in_features, num_classes)

# 학습 파라미터 설정
criterion = nn.CrossEntropyLoss(label_smoothing=0.0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

model.to(device)
# 학습 루프
for epoch in range(10):
    running_loss = 0.0
    pbar = tqdm(enumerate(trainloader), total=len(trainloader), desc=f"Epoch {epoch + 1}")
    
    model.train()
    for i, data in pbar:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for data in testloader:
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
    val_loss /= len(testloader)
    
    text = f'\tLoss: {loss}, Val Loss: {val_loss}'
    print(text)

print('Finished Training')

Epoch 1: 100%|██████████| 98/98 [02:00<00:00,  1.23s/it]


	Loss: 1.7248024940490723, Val Loss: 1.7568284094333648


Epoch 2: 100%|██████████| 98/98 [02:01<00:00,  1.24s/it]


	Loss: 1.65959894657135, Val Loss: 1.6409284830093385


Epoch 3: 100%|██████████| 98/98 [02:01<00:00,  1.24s/it]


	Loss: 1.5587505102157593, Val Loss: 1.5472254753112793


Epoch 4: 100%|██████████| 98/98 [02:01<00:00,  1.23s/it]


	Loss: 1.4636024236679077, Val Loss: 1.4681180596351624


Epoch 5: 100%|██████████| 98/98 [02:01<00:00,  1.24s/it]


	Loss: 1.4648040533065796, Val Loss: 1.4282831370830535


Epoch 6: 100%|██████████| 98/98 [02:01<00:00,  1.24s/it]


	Loss: 1.409483551979065, Val Loss: 1.376497322320938


Epoch 7: 100%|██████████| 98/98 [02:01<00:00,  1.24s/it]


	Loss: 1.3905757665634155, Val Loss: 1.3468469321727752


Epoch 8: 100%|██████████| 98/98 [02:01<00:00,  1.24s/it]


	Loss: 1.3854670524597168, Val Loss: 1.3098032891750335


Epoch 9: 100%|██████████| 98/98 [02:01<00:00,  1.24s/it]


	Loss: 1.3407928943634033, Val Loss: 1.2821733832359314


Epoch 10: 100%|██████████| 98/98 [02:01<00:00,  1.24s/it]


	Loss: 1.2658334970474243, Val Loss: 1.221460407972336
Finished Training


In [4]:
from sklearn.metrics import confusion_matrix
import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# 예측 수행 및 레이블 저장
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 혼동 행렬 생성
cm = confusion_matrix(all_labels, all_preds)

# 예측과 실제 레이블
y_true = all_labels  # 실제 레이블
y_pred = all_preds  # 모델에 의해 예측된 레이블

# 전체 데이터셋에 대한 정확도
accuracy = accuracy_score(y_true, y_pred)

# 평균 정밀도, 리콜, F1-Score ('weighted')
precision, recall, f1_score, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

# 판다스 데이터프레임으로 결과 정리
performance_metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

# 데이터프레임 출력
display(performance_metrics)

Unnamed: 0,Metric,Value
0,Accuracy,0.5654
1,Precision,0.562812
2,Recall,0.5654
3,F1 Score,0.559064
