In [None]:
#model2.pth 85.9%
import os
import torch
import json
import random
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd

# ✅ GPU 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("✅ 사용 중인 GPU:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")

# ✅ 경로 설정
data_dir = "C:/Users/user/OneDrive/Desktop/Resnet182-real/data/category_data"
model_dir = "C:/Users/user/OneDrive/Desktop/Resnet182-real/model"
csv_dir = "C:/Users/user/OneDrive/Desktop/Resnet182-real/csv"
json_dir = "C:/Users/user/OneDrive/Desktop/Resnet182-real/json"  # JSON 저장 경로
os.makedirs(model_dir, exist_ok=True)
os.makedirs(csv_dir, exist_ok=True)
os.makedirs(json_dir, exist_ok=True)  # JSON 저장 폴더 생성

# ✅ 하이퍼파라미터 설정
BATCH_SIZE = 32
IMG_SIZE = (224, 224)
EPOCHS = 30
LEARNING_RATE = 0.001

# ✅ 데이터 전처리
transform = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# ✅ 데이터셋 로드
dataset = datasets.ImageFolder(data_dir, transform=transform)

# ✅ 각 클래스별 이미지 인덱스 분배
class_indices = {cls: [] for cls in dataset.class_to_idx.values()}

for idx, (path, label) in enumerate(dataset.samples):
    class_indices[label].append(idx)

# ✅ 각 클래스의 샘플 수 확인
class_sample_count = {cls: len(indices) for cls, indices in class_indices.items()}
print("✅ 클래스별 샘플 수:", class_sample_count)

# ✅ 1개 이하의 샘플을 가진 클래스를 제외
class_indices_filtered = {cls: indices for cls, indices in class_indices.items() if len(indices) > 1}

# ✅ 데이터셋 분할
train_indices, val_indices, test_indices = [], [], []

for cls, indices in class_indices_filtered.items():
    # stratify를 사용하지 않고 임의로 분할
    train, temp = train_test_split(indices, test_size=0.3, random_state=42)
    val, test = train_test_split(temp, test_size=0.33, random_state=42)

    train_indices.extend(train)
    val_indices.extend(val)
    test_indices.extend(test)

# ✅ 인덱스를 JSON 파일로 저장
train_indices_path = os.path.join(json_dir, "model2train.json")
val_indices_path = os.path.join(json_dir, "model2val.json")
test_indices_path = os.path.join(json_dir, "model2test.json")

with open(train_indices_path, "w") as f:
    json.dump(train_indices, f)
with open(val_indices_path, "w") as f:
    json.dump(val_indices, f)
with open(test_indices_path, "w") as f:
    json.dump(test_indices, f)

print(f"\n📂 데이터셋 인덱스 저장 완료: {train_indices_path}, {val_indices_path}, {test_indices_path}")

# ✅ 데이터셋 분할
dataset_train = Subset(dataset, train_indices)
dataset_val = Subset(dataset, val_indices)
dataset_test = Subset(dataset, test_indices)

# ✅ 데이터 로더 설정
train_loader = DataLoader(dataset=dataset_train, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(dataset=dataset_val, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(dataset=dataset_test, batch_size=BATCH_SIZE, shuffle=False)

# ✅ 모델 초기화
from torchvision import models
import torch.nn as nn
import torch.optim as optim

model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, len(dataset.classes))
model.to(device)

# ✅ 손실 함수 및 옵티마이저
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# ✅ 학습 루프
best_valid_accuracy = 0.0
best_epoch = 0
csv_path = os.path.join(csv_dir, "카테고리모델2.csv")
train_losses, valid_losses, train_accuracies, valid_accuracies = [], [], [], []

print("\n📢 Training Started! Logging Every Epoch:\n")

for epoch in range(EPOCHS):
    print(f"\n📢 Epoch [{epoch+1}/{EPOCHS}] 시작")
    model.train()
    train_loss, train_correct = 0, 0
    total_train = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * inputs.size(0)
        train_correct += (outputs.argmax(1) == labels).sum().item()
        total_train += labels.size(0)

    train_loss /= total_train
    train_accuracy = train_correct / total_train
    train_losses.append(train_loss)
    train_accuracies.append(train_accuracy)

    # ✅ 검증 단계
    model.eval()
    valid_loss, valid_correct = 0, 0
    total_valid = 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            valid_loss += loss.item() * inputs.size(0)
            valid_correct += (outputs.argmax(1) == labels).sum().item()
            total_valid += labels.size(0)

    valid_loss /= total_valid
    valid_accuracy = valid_correct / total_valid
    valid_losses.append(valid_loss)
    valid_accuracies.append(valid_accuracy)

    print(f"   🎯 Train Accuracy: {train_accuracy:.4f} | 📉 Train Loss: {train_loss:.4f} | "
        f"🎯 Valid Accuracy: {valid_accuracy:.4f} | 📉 Valid Loss: {valid_loss:.4f}")
    
    # ✅ 최고의 검증 정확도를 기록한 에포크 모델 저장
    if valid_accuracy > best_valid_accuracy:
        best_valid_accuracy = valid_accuracy
        best_epoch = epoch + 1
        best_model_path = os.path.join(model_dir, "model2.pth")
        torch.save(model.state_dict(), best_model_path)
        print(f"🎯 새로운 최고 모델 저장됨! (Epoch {epoch+1}, Accuracy: {best_valid_accuracy:.4f})")

# ✅ 결과 CSV 저장
df = pd.DataFrame({
    'Epoch': range(1, len(train_losses) + 1),
    'Train Loss': train_losses,
    'Valid Loss': valid_losses,
    'Train Accuracy': train_accuracies,
    'Valid Accuracy': valid_accuracies
})
csv_path = os.path.join(csv_dir, "category_classification_results.csv")
df.to_csv(csv_path, index=False)
print(f"📄 학습 결과 CSV 저장됨: {csv_path}")

# ✅ 검증 정확도가 가장 높은 에포크의 모든 정보 출력
print(f"✅ 학습 종료! 최고 성능을 기록한 Epoch: {best_epoch}, 🎯 Train Accuracy: {train_accuracies[best_epoch-1]:.4f}, 📉 Train Loss: {train_losses[best_epoch-1]:.4f}, 🎯 Valid Accuracy: {best_valid_accuracy:.4f}, 📉 Valid Loss: {valid_losses[best_epoch-1]:.4f}")


✅ 사용 중인 GPU: Quadro RTX 4000
✅ 클래스별 샘플 수: {0: 1488, 1: 689, 2: 1694, 3: 440, 4: 2687, 5: 1998, 6: 1476, 7: 1061, 8: 1061, 9: 1289}

📂 데이터셋 인덱스 저장 완료: C:/Users/user/OneDrive/Desktop/Resnet182-real/json\model2train.json, C:/Users/user/OneDrive/Desktop/Resnet182-real/json\model2val.json, C:/Users/user/OneDrive/Desktop/Resnet182-real/json\model2test.json

📢 Training Started! Logging Every Epoch:


📢 Epoch [1/30] 시작
   🎯 Train Accuracy: 0.7208 | 📉 Train Loss: 0.8616 | 🎯 Valid Accuracy: 0.8057 | 📉 Valid Loss: 0.5990
🎯 새로운 최고 모델 저장됨! (Epoch 1, Accuracy: 0.8057)

📢 Epoch [2/30] 시작
   🎯 Train Accuracy: 0.8475 | 📉 Train Loss: 0.4771 | 🎯 Valid Accuracy: 0.8412 | 📉 Valid Loss: 0.5388
🎯 새로운 최고 모델 저장됨! (Epoch 2, Accuracy: 0.8412)

📢 Epoch [3/30] 시작
   🎯 Train Accuracy: 0.8813 | 📉 Train Loss: 0.3637 | 🎯 Valid Accuracy: 0.8440 | 📉 Valid Loss: 0.5037
🎯 새로운 최고 모델 저장됨! (Epoch 3, Accuracy: 0.8440)

📢 Epoch [4/30] 시작
   🎯 Train Accuracy: 0.9125 | 📉 Train Loss: 0.2686 | 🎯 Valid Accuracy: 0.8548 | 📉 Valid Lo