In [None]:
import torch

print("torch version:", torch.__version__)


torch version: 2.5.1


In [22]:
TRAIN_PATH = "/home/kangkr1002/facial_bone/Training"
VAL_PATH   = "/home/kangkr1002/facial_bone/Validation"
TEST_PATH  = "/home/kangkr1002/facial_bone/Test"

In [23]:
from transformers import ViTForImageClassification, ViTFeatureExtractor
import torch

# 디바이스 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ViT 모델 로딩 (pretrained)
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=2  # 분류할 클래스 수 (ex: binary classification)
)

model = model.to(device)


config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
from transformers import AutoImageProcessor

# ViT 전용 이미지 전처리
feature_extractor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")

# 새 transform 정의
def transform_vit(example):
    # 예제는 PIL 이미지여야 해
    inputs = feature_extractor(images=example, return_tensors="pt")
    return inputs['pixel_values'].squeeze(0)


preprocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

Fast image processor class <class 'transformers.models.vit.image_processing_vit_fast.ViTImageProcessorFast'> is available for this model. Using slow image processor class. To use the fast image processor class set `use_fast=True`.


In [7]:
from torchvision import datasets
from torch.utils.data import DataLoader

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, folder_path, transform):
        self.dataset = datasets.ImageFolder(folder_path)
        self.transform = transform
    
    def __getitem__(self, idx):
        img, label = self.dataset[idx]
        img = self.transform(img)
        return img, label

    def __len__(self):
        return len(self.dataset)

# 데이터셋 정의
train_ds = CustomDataset(TRAIN_PATH, transform=transform_vit)
val_ds   = CustomDataset(VAL_PATH, transform=transform_vit)
test_ds  = CustomDataset(TEST_PATH, transform=transform_vit)

# DataLoader
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=32, shuffle=False)
test_loader  = DataLoader(test_ds, batch_size=32, shuffle=False)


In [24]:
from transformers import ViTConfig, ViTForImageClassification

# 1. Config 만들기
config = ViTConfig(
    image_size=224,
    patch_size=16,
    num_labels=2,          # 너가 분류할 클래스 수
    hidden_size=768,
    num_hidden_layers=12,
    num_attention_heads=12,
    intermediate_size=3072,
    qkv_bias=True,
)

# 2. 모델 새로 만들기 (pretrained 없이!)
model = ViTForImageClassification(config)

# 3. 디바이스 올리기
model = model.to(device)


In [25]:
from torch import nn
from torch.optim import AdamW

criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=2e-5)


In [None]:
import mlflow

mlflow.set_tracking_uri("http://10.125.208.184:5000")

with mlflow.start_run(run_name="VIT4"):  # 전체 학습을 하나의 run으로 묶는다
    mlflow.log_param("learning_rate", optimizer.param_groups[0]['lr'])
    mlflow.log_param("batch_size", train_loader.batch_size)
    mlflow.log_param("num_epochs", 10)

    for epoch in range(10):
        # ========== Train ==========
        model.train()
        total_loss = 0
        correct = 0
        total = 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images).logits
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        avg_loss = total_loss / len(train_loader)
        accuracy = correct / total

        # ========== Validation ==========
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images).logits
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                preds = outputs.argmax(dim=1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = val_correct / val_total

        # ========== Log to MLflow ==========
        mlflow.log_metric("train_loss", avg_loss, step=epoch)
        mlflow.log_metric("train_accuracy", accuracy, step=epoch)
        mlflow.log_metric("val_loss", avg_val_loss, step=epoch)
        mlflow.log_metric("val_accuracy", val_accuracy, step=epoch)

        print(f"Epoch {epoch+1}: "
              f"Train Loss = {avg_loss:.4f}, Train Acc = {accuracy:.4f} | "
              f"Val Loss = {avg_val_loss:.4f}, Val Acc = {val_accuracy:.4f}")


Epoch 1: Train Loss = 0.2093, Train Acc = 0.9149 | Val Loss = 0.2508, Val Acc = 0.8970
Epoch 2: Train Loss = 0.1720, Train Acc = 0.9314 | Val Loss = 0.2560, Val Acc = 0.8950
Epoch 3: Train Loss = 0.1480, Train Acc = 0.9424 | Val Loss = 0.2812, Val Acc = 0.8790


In [None]:
# 모델 mlflow에 저장
mlflow.pytorch.log_model(model, "model", registered_model_name="VIT4")
# 모델 저장
torch.save(model.state_dict(), "vit_model.pth")


In [31]:
import mlflow
import mlflow.pytorch
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
import torch
import json
import os
os.environ['TMPDIR'] = '/home/kmj388/my_tmp'  # 여유 공간 있는 경로로 지정
os.makedirs('/home/kmj388/my_tmp', exist_ok=True)
# ========== 세팅 ==========
mlflow.set_tracking_uri("http://10.125.208.184:5000")

# ========== 방금 훈련한 run의 ID를 가져와야 함 ==========
# 보통은 start_run() 안에서 run_id를 바로 저장해놓는게 베스트였어
# 일단 여기서는 MLflow UI에서 run_id를 직접 가져오거나
# 따로 저장했던 run_id를 여기에 넣자
run_id = "8990d17a96744cb6a7aaf72ea4a464b9"  # <<< VIT_2 run id로 바꿔

# ========== 현재 열린 run 닫기 ==========
if mlflow.active_run():
    mlflow.end_run()

# ========== 이어서 기록 ==========
with mlflow.start_run(run_id=run_id):

    # 1. 모델 저장
    mlflow.pytorch.log_model(model, artifact_path="model")

    # 2. Validation 데이터로 confusion matrix / classification report 만들기
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images).logits
            preds = outputs.argmax(dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Confusion Matrix
    cm = confusion_matrix(all_labels, all_preds)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot()
    plt.title('Validation Confusion Matrix')
    plt.savefig("val_confusion_matrix.png")
    mlflow.log_artifact("val_confusion_matrix.png")

    # Classification Report
    cls_report = classification_report(all_labels, all_preds, output_dict=True)
    with open("val_classification_report.json", "w") as f:
        json.dump(cls_report, f, indent=4)
    mlflow.log_artifact("val_classification_report.json")

    # 3. Model Registry 등록
    model_uri = f"runs:/{run_id}/model"
    mlflow.register_model(model_uri=model_uri, name="VIT_2_Model")

    # 4. Test 데이터셋도 돌리기 (선택사항)
    model.eval()
    test_preds = []
    test_labels = []
    test_loss = 0
    test_correct = 0
    test_total = 0

    with torch.no_grad():
        for images, labels in test_loader:  # ❗ test_loader 준비되어 있어야 함
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images).logits
            loss = criterion(outputs, labels)

            test_loss += loss.item() * images.size(0)
            preds = outputs.argmax(dim=1)

            test_total += labels.size(0)
            test_correct += preds.eq(labels).sum().item()

            test_preds.extend(preds.cpu().numpy())
            test_labels.extend(labels.cpu().numpy())

    avg_test_loss = test_loss / test_total
    test_accuracy = test_correct / test_total

    # 테스트 메트릭 기록
    mlflow.log_metric("test_loss", avg_test_loss)
    mlflow.log_metric("test_accuracy", test_accuracy)

    # 테스트 Confusion Matrix
    cm_test = confusion_matrix(test_labels, test_preds)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm_test)
    disp.plot()
    plt.title('Test Confusion Matrix')
    plt.savefig("test_confusion_matrix.png")
    mlflow.log_artifact("test_confusion_matrix.png")

    # 테스트 Classification Report
    test_cls_report = classification_report(test_labels, test_preds, output_dict=True)
    with open("test_classification_report.json", "w") as f:
        json.dump(test_cls_report, f, indent=4)
    mlflow.log_artifact("test_classification_report.json")


🏃 View run VIT3 at: http://10.125.208.184:5000/#/experiments/0/runs/8990d17a96744cb6a7aaf72ea4a464b9
🧪 View experiment at: http://10.125.208.184:5000/#/experiments/0


OSError: [Errno 28] No space left on device: '/tmp/tmpwe9k1ha_'

In [30]:
# 모델 저장 pth
torch.save(model.state_dict(), "vit_model.pth")

#model test
model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images).logits
        preds = outputs.argmax(dim=1)

        print("Predictions:", preds)
        print("Labels:", labels)
        break



Predictions: tensor([1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0,
        0, 0, 0, 0, 0, 1, 0, 0], device='cuda:0')
Labels: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')


In [20]:
model.eval()
test_loss = 0.0
test_correct = 0
test_total = 0
test_preds = []
test_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        logits = outputs.logits   # ✅ 여기 수정!
        
        loss = criterion(logits, labels)  # ✅ logits만 넣기

        test_loss += loss.item() * images.size(0)
        preds = logits.argmax(dim=1)  # ✅ logits 기준으로 예측

        test_total += labels.size(0)
        test_correct += preds.eq(labels).sum().item()

        test_preds.extend(preds.cpu().numpy())
        test_labels.extend(labels.cpu().numpy())

avg_test_loss = test_loss / test_total
test_accuracy = test_correct / test_total

print(f"Test Loss: {avg_test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

# Confusion Matrix
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(test_labels, test_preds)
print("\nConfusion Matrix:")
print(cm)

cls_report = classification_report(test_labels, test_preds, digits=4)
print("\nClassification Report:")
print(cls_report)


Test Loss: 0.0518
Test Accuracy: 0.9840

Confusion Matrix:
[[495   5]
 [ 11 489]]

Classification Report:
              precision    recall  f1-score   support

           0     0.9783    0.9900    0.9841       500
           1     0.9899    0.9780    0.9839       500

    accuracy                         0.9840      1000
   macro avg     0.9841    0.9840    0.9840      1000
weighted avg     0.9841    0.9840    0.9840      1000

