In [23]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_squared_error

from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from torchsummary import summary
import numpy as np

In [24]:
# 데이터셋 인스턴스 생성
data = pd.read_csv("C:/Users/tisxo/AI_class/data/Mall_Customers.csv")

In [25]:
data

Unnamed: 0,CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40
...,...,...,...,...,...
195,196,Female,35,120,79
196,197,Female,45,126,28
197,198,Male,32,126,74
198,199,Male,32,137,18


In [26]:
# 성별 문자열 대체
data["Gender"] = data["Gender"].replace({"Female": 1, "Male": 0})

  data["Gender"] = data["Gender"].replace({"Female": 1, "Male": 0})


In [27]:
data

Unnamed: 0,CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
0,1,0,19,15,39
1,2,0,21,15,81
2,3,1,20,16,6
3,4,1,23,16,77
4,5,1,31,17,40
...,...,...,...,...,...
195,196,1,35,120,79
196,197,1,45,126,28
197,198,0,32,126,74
198,199,0,32,137,18


In [28]:
# 데이터와 타겟 분리
X = data.drop(['Spending Score (1-100)'], axis=1).values
y = data['Spending Score (1-100)'].values.reshape(-1, 1)

In [29]:
X

array([[  1,   0,  19,  15],
       [  2,   0,  21,  15],
       [  3,   1,  20,  16],
       [  4,   1,  23,  16],
       [  5,   1,  31,  17],
       [  6,   1,  22,  17],
       [  7,   1,  35,  18],
       [  8,   1,  23,  18],
       [  9,   0,  64,  19],
       [ 10,   1,  30,  19],
       [ 11,   0,  67,  19],
       [ 12,   1,  35,  19],
       [ 13,   1,  58,  20],
       [ 14,   1,  24,  20],
       [ 15,   0,  37,  20],
       [ 16,   0,  22,  20],
       [ 17,   1,  35,  21],
       [ 18,   0,  20,  21],
       [ 19,   0,  52,  23],
       [ 20,   1,  35,  23],
       [ 21,   0,  35,  24],
       [ 22,   0,  25,  24],
       [ 23,   1,  46,  25],
       [ 24,   0,  31,  25],
       [ 25,   1,  54,  28],
       [ 26,   0,  29,  28],
       [ 27,   1,  45,  28],
       [ 28,   0,  35,  28],
       [ 29,   1,  40,  29],
       [ 30,   1,  23,  29],
       [ 31,   0,  60,  30],
       [ 32,   1,  21,  30],
       [ 33,   0,  53,  33],
       [ 34,   0,  18,  33],
       [ 35,  

In [30]:
y

array([[39],
       [81],
       [ 6],
       [77],
       [40],
       [76],
       [ 6],
       [94],
       [ 3],
       [72],
       [14],
       [99],
       [15],
       [77],
       [13],
       [79],
       [35],
       [66],
       [29],
       [98],
       [35],
       [73],
       [ 5],
       [73],
       [14],
       [82],
       [32],
       [61],
       [31],
       [87],
       [ 4],
       [73],
       [ 4],
       [92],
       [14],
       [81],
       [17],
       [73],
       [26],
       [75],
       [35],
       [92],
       [36],
       [61],
       [28],
       [65],
       [55],
       [47],
       [42],
       [42],
       [52],
       [60],
       [54],
       [60],
       [45],
       [41],
       [50],
       [46],
       [51],
       [46],
       [56],
       [55],
       [52],
       [59],
       [51],
       [59],
       [50],
       [48],
       [59],
       [47],
       [55],
       [42],
       [49],
       [56],
       [47],
       [54],
       [53],

In [31]:
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X = scaler_X.fit_transform(X)
y = scaler_y.fit_transform(y)

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [33]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape, 

((160, 4), (40, 4), (160, 1), (40, 1))

In [34]:
# TensorDataset으로 래핑
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [35]:
# 모델 정의
class MallModel(nn.Module):
    def __init__(self):
        super(MallModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(4, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

In [36]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MallModel().to(device)

In [37]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [38]:
# 학습 루프
model.train()
for epoch in range(50):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")

Epoch 1, Loss: 0.9989
Epoch 2, Loss: 0.9475
Epoch 3, Loss: 0.9236
Epoch 4, Loss: 0.8975
Epoch 5, Loss: 0.8846
Epoch 6, Loss: 0.8719
Epoch 7, Loss: 0.8615
Epoch 8, Loss: 0.8504
Epoch 9, Loss: 0.8443
Epoch 10, Loss: 0.8377
Epoch 11, Loss: 0.8286
Epoch 12, Loss: 0.8244
Epoch 13, Loss: 0.8171
Epoch 14, Loss: 0.8100
Epoch 15, Loss: 0.8055
Epoch 16, Loss: 0.7989
Epoch 17, Loss: 0.7937
Epoch 18, Loss: 0.7884
Epoch 19, Loss: 0.7817
Epoch 20, Loss: 0.7751
Epoch 21, Loss: 0.7725
Epoch 22, Loss: 0.7634
Epoch 23, Loss: 0.7577
Epoch 24, Loss: 0.7506
Epoch 25, Loss: 0.7446
Epoch 26, Loss: 0.7410
Epoch 27, Loss: 0.7296
Epoch 28, Loss: 0.7255
Epoch 29, Loss: 0.7171
Epoch 30, Loss: 0.7104
Epoch 31, Loss: 0.7030
Epoch 32, Loss: 0.6999
Epoch 33, Loss: 0.6898
Epoch 34, Loss: 0.6826
Epoch 35, Loss: 0.6785
Epoch 36, Loss: 0.6712
Epoch 37, Loss: 0.6663
Epoch 38, Loss: 0.6597
Epoch 39, Loss: 0.6509
Epoch 40, Loss: 0.6454
Epoch 41, Loss: 0.6414
Epoch 42, Loss: 0.6370
Epoch 43, Loss: 0.6280
Epoch 44, Loss: 0.62

In [39]:
# 평가
model.eval()
preds, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).cpu().numpy()
        preds.extend(outputs)
        actuals.extend(y_batch.numpy())

mse = mean_squared_error(actuals, preds)
print(f"Test MSE: {mse:.4f}")

Test MSE: 0.6834
