In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

In [2]:
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/abalone.csv')

In [3]:
data

Unnamed: 0,id,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...,...,...
4172,4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


In [4]:
data['Sex'] = LabelEncoder().fit_transform(data['Sex'])

In [5]:
def bucket_rings(r):
    if r <= 9:
        return 0
    elif r <= 12:
        return 1
    elif r <= 15:
        return 2
    else:
        return 3

In [11]:
# 타겟 레이블 생성
y = data['Rings'].apply(bucket_rings)
n_classes = y.nunique()

In [7]:
# 입력 데이터와 정규화
X = data.drop('Rings', axis=1).values
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [8]:
# 훈련/테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [9]:
# 텐서 변환
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [12]:
# 분류 모델 정의
class ClassificationModel(nn.Module):
    def __init__(self):
        super(ClassificationModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(9, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, n_classes)
        )

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ClassificationModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [13]:
# 학습 루프
model.train()
for epoch in range(50):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

Epoch 1, Loss: 1.0439
Epoch 2, Loss: 0.8868
Epoch 3, Loss: 0.8450
Epoch 4, Loss: 0.8194
Epoch 5, Loss: 0.8034
Epoch 6, Loss: 0.7888
Epoch 7, Loss: 0.7860
Epoch 8, Loss: 0.7761
Epoch 9, Loss: 0.7737
Epoch 10, Loss: 0.7654
Epoch 11, Loss: 0.7619
Epoch 12, Loss: 0.7577
Epoch 13, Loss: 0.7578
Epoch 14, Loss: 0.7510
Epoch 15, Loss: 0.7552
Epoch 16, Loss: 0.7494
Epoch 17, Loss: 0.7442
Epoch 18, Loss: 0.7478
Epoch 19, Loss: 0.7429
Epoch 20, Loss: 0.7423
Epoch 21, Loss: 0.7391
Epoch 22, Loss: 0.7410
Epoch 23, Loss: 0.7365
Epoch 24, Loss: 0.7356
Epoch 25, Loss: 0.7354
Epoch 26, Loss: 0.7346
Epoch 27, Loss: 0.7311
Epoch 28, Loss: 0.7291
Epoch 29, Loss: 0.7289
Epoch 30, Loss: 0.7271
Epoch 31, Loss: 0.7252
Epoch 32, Loss: 0.7252
Epoch 33, Loss: 0.7232
Epoch 34, Loss: 0.7198
Epoch 35, Loss: 0.7211
Epoch 36, Loss: 0.7196
Epoch 37, Loss: 0.7212
Epoch 38, Loss: 0.7161
Epoch 39, Loss: 0.7154
Epoch 40, Loss: 0.7139
Epoch 41, Loss: 0.7157
Epoch 42, Loss: 0.7166
Epoch 43, Loss: 0.7125
Epoch 44, Loss: 0.71

In [14]:
from sklearn.metrics import accuracy_score

# 평가
model.eval()
preds, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch)
        predicted = torch.argmax(outputs, dim=1).cpu().numpy()
        preds.extend(predicted)
        actuals.extend(y_batch.numpy())

acc = accuracy_score(actuals, preds)
print(f"\n✅ Final Test Accuracy: {acc:.4f}")


✅ Final Test Accuracy: 0.6687
