In [21]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from torchsummary import summary
import numpy as np
from sklearn.metrics import mean_squared_error


In [22]:
df = pd.read_csv("C:/AI_File/diabetes.csv")
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [23]:
df.columns


Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [24]:
df['Outcome'].value_counts()

0    500
1    268
Name: Outcome, dtype: int64

In [25]:
# 결측치 확인
print("\n 결측값 확인:")
print(df.isnull().sum())


 결측값 확인:
Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64


In [26]:
target_column = "Outcome"  
X = df.drop(columns=[target_column]).values
y = df[target_column].values


In [27]:
# 데이터 분할 및 전처리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [28]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape, 

((614, 8), (154, 8), (614,), (154,))

In [29]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [30]:

# 회귀 모델 정의
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(8, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RegressionModel().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [31]:

# 학습 루프
model.train()
for epoch in range(50):
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader):.4f}")


Epoch 1, Loss: 0.3003
Epoch 2, Loss: 0.1914
Epoch 3, Loss: 0.1683
Epoch 4, Loss: 0.1518
Epoch 5, Loss: 0.1473
Epoch 6, Loss: 0.1473
Epoch 7, Loss: 0.1391
Epoch 8, Loss: 0.1507
Epoch 9, Loss: 0.1426
Epoch 10, Loss: 0.1364
Epoch 11, Loss: 0.1370
Epoch 12, Loss: 0.1343
Epoch 13, Loss: 0.1346
Epoch 14, Loss: 0.1345
Epoch 15, Loss: 0.1306
Epoch 16, Loss: 0.1280
Epoch 17, Loss: 0.1248
Epoch 18, Loss: 0.1313
Epoch 19, Loss: 0.1265
Epoch 20, Loss: 0.1253
Epoch 21, Loss: 0.1222
Epoch 22, Loss: 0.1236
Epoch 23, Loss: 0.1197
Epoch 24, Loss: 0.1323
Epoch 25, Loss: 0.1260
Epoch 26, Loss: 0.1262
Epoch 27, Loss: 0.1168
Epoch 28, Loss: 0.1205
Epoch 29, Loss: 0.1132
Epoch 30, Loss: 0.1141
Epoch 31, Loss: 0.1133
Epoch 32, Loss: 0.1177
Epoch 33, Loss: 0.1142
Epoch 34, Loss: 0.1210
Epoch 35, Loss: 0.1115
Epoch 36, Loss: 0.1101
Epoch 37, Loss: 0.1142
Epoch 38, Loss: 0.1097
Epoch 39, Loss: 0.1127
Epoch 40, Loss: 0.1065
Epoch 41, Loss: 0.1116
Epoch 42, Loss: 0.1099
Epoch 43, Loss: 0.1093
Epoch 44, Loss: 0.10

In [32]:

# 평가
model.eval()
preds, actuals = [], []
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch = X_batch.to(device)
        outputs = model(X_batch).cpu().numpy()
        preds.extend(outputs)
        actuals.extend(y_batch.numpy())

mse = mean_squared_error(actuals, preds)
print(f"Test MSE: {mse:.4f}")

Test MSE: 0.1940
