In [1]:
import pandas as pd
from src.data.preprocessing import preprocess_metadata, get_transform
from src.data.dataset import SkinLesionDataset
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

# 데이터 경로
train_meta_path = "data/raw/train-metadata.csv"
train_img_hdf5 = "data/raw/train-image.hdf5"

In [2]:
# 메타데이터 전처리
df_raw = pd.read_csv(train_meta_path)
df_processed, scaler = preprocess_metadata(df_raw)

# train/validation split
train_df, val_df = train_test_split(
    df_processed, test_size=0.2, stratify=df_processed['target'], random_state=42
)

train_dataset = SkinLesionDataset(
    df=train_df,
    hdf5_path=train_img_hdf5,
    transforms=get_transform(phase="train"),
    use_metadata=False
)
val_dataset = SkinLesionDataset(
    df=val_df,
    hdf5_path=train_img_hdf5,
    transforms=get_transform(phase="val"),
    use_metadata=False
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

  df_raw = pd.read_csv(train_meta_path)


In [3]:
# 모델 정의
import torch
import torch.nn as nn
from torch.optim import Adam
from src.models.cnn import SimpleCNN

# 장치 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 모델 초기화
model = SimpleCNN(pretrained=True).to(device)
# 손실함수 및 옵티마이저
criterion = nn.BCELoss()
optimizer = Adam(model.parameters(), lr=1e-4)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 135MB/s]


In [4]:
# 학습 루프
num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0

    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(imgs).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_train_loss = total_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}")

Epoch [1/5], Train Loss: 0.0084
Epoch [2/5], Train Loss: 0.0075
Epoch [3/5], Train Loss: 0.0073
Epoch [4/5], Train Loss: 0.0070
Epoch [5/5], Train Loss: 0.0069


In [5]:
# 검증
model.eval()
val_loss = 0.0
with torch.no_grad():
    for imgs, labels in val_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs).squeeze()
        loss = criterion(outputs, labels)
        val_loss += loss.item()

avg_val_loss = val_loss / len(val_loader)
print(f"Epoch [{epoch+1}/{num_epochs}], Val Loss: {avg_val_loss:.4f}")

Epoch [5/5], Val Loss: 0.0068


In [7]:
model_path = "outputs/simple_cnn.pth"
torch.save(model.state_dict(), model_path)
print(f"모델이 저장되었습니다: {model_path}")

모델이 저장되었습니다: outputs/simple_cnn.pth
