In [1]:
pip install timm


[0mNote: you may need to restart the kernel to use updated packages.


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import mlflow
import mlflow.pytorch
import timm
import os

# 하이퍼파라미터
BATCH_SIZE = 32
NUM_EPOCHS = 10
LEARNING_RATE = 1e-4
OPTIMIZER_NAME = "AdamW"
IMAGE_SIZE = 224

# 데이터 전처리
transform_train = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

transform_val_test = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

# 데이터셋
train_dataset = datasets.ImageFolder(root="/home/kangkr1002/facial_bone/Training", transform=transform_train)
val_dataset = datasets.ImageFolder(root="/home/kangkr1002/facial_bone/Validation", transform=transform_val_test)
test_dataset = datasets.ImageFolder(root="/home/kangkr1002/facial_bone/Test", transform=transform_val_test)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

num_classes = len(train_dataset.classes)

# 디바이스
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")

# 모델: Swin Transformer
model = timm.create_model('swin_base_patch4_window7_224', pretrained=False, num_classes=num_classes)
model = model.to(device)

# 손실함수, 옵티마이저
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)



# 학습 함수
def train_epoch(model, loader, criterion, optimizer):
    model.train()
    running_loss = 0
    correct = 0
    total = 0
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    acc = correct / total
    return running_loss / len(loader), acc

def eval_epoch(model, loader, criterion):
    model.eval()
    running_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    acc = correct / total
    return running_loss / len(loader), acc
mlflow.set_tracking_uri("http://10.125.208.184:5000")
# MLflow Experiment 세팅
experiment_name = "SwinTransformer"
mlflow.set_experiment(experiment_name)
# 학습 루프 (MLflow Tracking)
with mlflow.start_run():
    # 파라미터 저장
    mlflow.log_param("Model", "Swin-Base")
    mlflow.log_param("Batch Size", BATCH_SIZE)
    mlflow.log_param("Learning Rate", LEARNING_RATE)
    mlflow.log_param("Optimizer", OPTIMIZER_NAME)
    mlflow.log_param("Epochs", NUM_EPOCHS)
    mlflow.log_param("Image Size", IMAGE_SIZE)
    mlflow.log_param("Num Classes", num_classes)

    for epoch in range(NUM_EPOCHS):
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer)
        val_loss, val_acc = eval_epoch(model, val_loader, criterion)

        # 지표 기록
        mlflow.log_metric("Train Loss", train_loss, step=epoch)
        mlflow.log_metric("Train Acc", train_acc, step=epoch)
        mlflow.log_metric("Val Loss", val_loss, step=epoch)
        mlflow.log_metric("Val Acc", val_acc, step=epoch)

        print(f"Epoch [{epoch+1}/{NUM_EPOCHS}]")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

    # 모델 저장
    mlflow.pytorch.log_model(model, artifact_path="model")

    # Test Set 평가
    test_loss, test_acc = eval_epoch(model, test_loader, criterion)
    print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.4f}")

    # Test 지표 기록
    mlflow.log_metric("Test Loss", test_loss)
    mlflow.log_metric("Test Acc", test_acc)


Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/kmj388/micromamba/envs/capston-pytorch/lib/python3.10/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/home/kmj388/micromamba/envs/capston-pytorch/lib/python3.10/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/home/kmj388/micromamba/envs/capston-pytorch/lib/python3.10/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/home/kmj388/micromamba/envs/capston-pytorch/lib/python3.10/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/home/kmj388/micromamba/envs/capston-pytorch/lib/python3.10/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/home/kmj388/micromamba/envs/capston-pytorch/lib/python3.10/multiprocessing/util.py", line 224, in __call__
    res = self._ca

Epoch [1/10]
Train Loss: 0.6658, Train Acc: 0.6120
Val Loss: 0.6030, Val Acc: 0.6540


Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/kmj388/micromamba/envs/capston-pytorch/lib/python3.10/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
Traceback (most recent call last):
  File "/home/kmj388/micromamba/envs/capston-pytorch/lib/python3.10/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
Traceback (most recent call last):
  File "/home/kmj388/micromamba/envs/capston-pytorch/lib/python3.10/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/home/kmj388/micromamba/envs/capston-pytorch/lib/python3.10/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/home/kmj388/micromamba/envs/capston-pytorch/lib/python3.10/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/home/kmj388/micromamba/envs/capston-pytorch/lib/python3.10/multiprocessing/util.py", line 300,

🏃 View run dapper-auk-52 at: http://10.125.208.184:5000/#/experiments/557782806761005845/runs/bb9c0b9fa2ad4c1c88e7dba2791e2446
🧪 View experiment at: http://10.125.208.184:5000/#/experiments/557782806761005845


KeyboardInterrupt: 

In [None]:
# 모델 mlflow에 저장
mlflow.pytorch.log_model(model, "model", registered_model_name="SwinTransformer")