In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report
# %% [code]
# 데이터 불러오기
df = pd.read_csv("diabetes.csv")
df.head()
# %% [code]
# Classification: BMI 기준 이진 분류
# 특징: BMI를 제외한 나머지 컬럼 사용 (Outcome 등은 제거할 수 있음)
features_cls = df.drop(columns=["BMI", "Outcome"])  # Outcome이 있으면 제거
# 타겟: BMI가 30 이상이면 1, 미만이면 0으로 변환
target_cls = (df["BMI"] >= 30).astype(np.float32).values.reshape(-1, 1)

# 학습/테스트 데이터 분리
X_train_cls, X_test_cls, y_train_cls, y_test_cls = train_test_split(
    features_cls.values.astype(np.float32), target_cls,
    test_size=0.2, random_state=42
)

# 스케일링
scaler_cls = StandardScaler()
X_train_cls = scaler_cls.fit_transform(X_train_cls)
X_test_cls = scaler_cls.transform(X_test_cls)

# numpy -> PyTorch tensor 변환
X_train_cls_tensor = torch.tensor(X_train_cls)
X_test_cls_tensor = torch.tensor(X_test_cls)
y_train_cls_tensor = torch.tensor(y_train_cls)
y_test_cls_tensor = torch.tensor(y_test_cls)

# %% [code]
# Classification용 신경망 모델 정의
class ClassificationModel(nn.Module):
    def __init__(self, input_dim):
        super(ClassificationModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 32)
        self.fc2 = nn.Linear(32, 16)
        self.fc3 = nn.Linear(16, 1)  # 출력: 1개 뉴런, sigmoid 적용
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

input_dim_cls = X_train_cls_tensor.shape[1]
model_cls = ClassificationModel(input_dim_cls)

# 손실함수 및 옵티마이저: BCELoss (이진 분류)
criterion_cls = nn.BCELoss()
optimizer_cls = optim.Adam(model_cls.parameters(), lr=0.001)

# 학습
epochs_cls = 100
for epoch in range(epochs_cls):
    model_cls.train()
    optimizer_cls.zero_grad()
    outputs_cls = model_cls(X_train_cls_tensor)
    loss_cls = criterion_cls(outputs_cls, y_train_cls_tensor)
    loss_cls.backward()
    optimizer_cls.step()
    if (epoch+1) % 10 == 0:
        print(f"[Classification] Epoch {epoch+1}/{epochs_cls}, Loss: {loss_cls.item():.4f}")

# 평가
model_cls.eval()
with torch.no_grad():
    test_outputs_cls = model_cls(X_test_cls_tensor)
    predicted_cls = (test_outputs_cls > 0.5).float()
    correct = (predicted_cls.eq(y_test_cls_tensor).sum().item())
    total = y_test_cls_tensor.size(0)
    accuracy_cls = correct / total
    print(f"\nClassification Test Accuracy: {accuracy_cls:.4f}")
    print("\nClassification Report:")
    # classification_report를 출력하려면 numpy 배열로 변환합니다.
    print(classification_report(y_test_cls_tensor.numpy(), predicted_cls.numpy(), digits=4))



[Classification] Epoch 10/100, Loss: 0.6860
[Classification] Epoch 20/100, Loss: 0.6664
[Classification] Epoch 30/100, Loss: 0.6469
[Classification] Epoch 40/100, Loss: 0.6271
[Classification] Epoch 50/100, Loss: 0.6082
[Classification] Epoch 60/100, Loss: 0.5912
[Classification] Epoch 70/100, Loss: 0.5765
[Classification] Epoch 80/100, Loss: 0.5644
[Classification] Epoch 90/100, Loss: 0.5537
[Classification] Epoch 100/100, Loss: 0.5440

Classification Test Accuracy: 0.7143

Classification Report:
              precision    recall  f1-score   support

         0.0     0.6071    0.6071    0.6071        56
         1.0     0.7755    0.7755    0.7755        98

    accuracy                         0.7143       154
   macro avg     0.6913    0.6913    0.6913       154
weighted avg     0.7143    0.7143    0.7143       154

