In [1]:
import os
import sys
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

# 파일 경로 설정 (현재 폴더 내에 abalone.csv 파일이 있어야 함)
file_path = "abalone.csv"

if not os.path.exists(file_path):
    print(f"파일을 찾을 수 없습니다: {file_path}\n현재 작업 디렉토리: {os.getcwd()}")
    print("파일이 현재 경로에 있는지 확인하거나, file_path를 수정하세요.")
    sys.exit()  # 파일을 찾을 수 없으면 프로그램 종료

# 1. 데이터 로드 및 전처리
df = pd.read_csv(file_path)
df = pd.get_dummies(df, columns=["Sex"])

# 타겟 이진화: Rings의 중앙값을 기준으로 이진 라벨 생성
median_rings = df["Rings"].median()
df["Target"] = (df["Rings"] >= median_rings).astype(np.float32)

# 특성: Rings와 Target 제외
X = df.drop(columns=["Rings", "Target"]).values.astype(np.float32)
y = df["Target"].values.astype(np.float32).reshape(-1, 1)

# 2. 학습/테스트 데이터 분리 및 스케일링
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler_cls = StandardScaler()
X_train = scaler_cls.fit_transform(X_train)
X_test = scaler_cls.transform(X_test)

# numpy 배열 -> PyTorch 텐서 변환
X_train_tensor = torch.tensor(X_train)
X_test_tensor = torch.tensor(X_test)
y_train_tensor = torch.tensor(y_train)
y_test_tensor = torch.tensor(y_test)

# 3. 분류용 신경망 모델 정의 (출력층에 Sigmoid 적용)
class ClassificationModel(nn.Module):
    def __init__(self, input_dim):
        super(ClassificationModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)  # 출력: 1개 뉴런
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

input_dim_cls = X_train_tensor.shape[1]
model_cls = ClassificationModel(input_dim_cls)

# 4. 손실함수 및 옵티마이저 설정 (BCELoss 사용)
criterion_cls = nn.BCELoss()
optimizer_cls = optim.Adam(model_cls.parameters(), lr=0.001)

# 5. 모델 학습 (100 에포크)
epochs_cls = 100
for epoch in range(epochs_cls):
    model_cls.train()
    optimizer_cls.zero_grad()
    outputs_cls = model_cls(X_train_tensor)
    loss_cls = criterion_cls(outputs_cls, y_train_tensor)
    loss_cls.backward()
    optimizer_cls.step()
    if (epoch+1) % 10 == 0:
        print(f"[Classification] Epoch {epoch+1}/{epochs_cls}, Loss: {loss_cls.item():.4f}")

# 6. 평가
model_cls.eval()
with torch.no_grad():
    outputs_test = model_cls(X_test_tensor)
    predictions = (outputs_test > 0.5).float()
    correct = (predictions.eq(y_test_tensor).sum().item())
    total = y_test_tensor.size(0)
    accuracy = correct / total
    print(f"\nClassification Test Accuracy: {accuracy:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test_tensor.numpy(), predictions.numpy(), digits=4))


[Classification] Epoch 10/100, Loss: 0.6437
[Classification] Epoch 20/100, Loss: 0.5503
[Classification] Epoch 30/100, Loss: 0.4697
[Classification] Epoch 40/100, Loss: 0.4161
[Classification] Epoch 50/100, Loss: 0.3935
[Classification] Epoch 60/100, Loss: 0.3867
[Classification] Epoch 70/100, Loss: 0.3823
[Classification] Epoch 80/100, Loss: 0.3781
[Classification] Epoch 90/100, Loss: 0.3743
[Classification] Epoch 100/100, Loss: 0.3708

Classification Test Accuracy: 0.8361

Classification Report:
              precision    recall  f1-score   support

         0.0     0.8034    0.6738    0.7329       279
         1.0     0.8488    0.9174    0.8818       557

    accuracy                         0.8361       836
   macro avg     0.8261    0.7956    0.8074       836
weighted avg     0.8337    0.8361    0.8321       836

