In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split

In [2]:
# 1. 데이터 로드
df = pd.read_csv('D:GC/diabetes.csv')
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [3]:
# 2. 특징(X)과 라벨(Y) 분리
X = df.drop('Outcome', axis=1)
Y = df['Outcome']

In [4]:
# 3. 데이터 스케일링
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [5]:
# 5. 학습용 및 테스트용 데이터 분리
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [6]:
print(X_train.shape)


(614, 8)


In [7]:
# 6. 텐서플로 모델 구성
model = Sequential([
    Dense(32, activation='relu',input_shape=(8, )), 
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')  # 이진 분류를 위한 sigmoid 활성화 함수
])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:

# 7. 모델 컴파일|
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [9]:

# 8. 모델 학습
model.fit(X_train, Y_train, epochs=50, batch_size=16, validation_split=0.2)

Epoch 1/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.5578 - loss: 0.6852 - val_accuracy: 0.7154 - val_loss: 0.6378
Epoch 2/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6616 - loss: 0.6259 - val_accuracy: 0.7154 - val_loss: 0.5904
Epoch 3/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7338 - loss: 0.5513 - val_accuracy: 0.7073 - val_loss: 0.5494
Epoch 4/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7417 - loss: 0.5280 - val_accuracy: 0.6992 - val_loss: 0.5221
Epoch 5/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7661 - loss: 0.4850 - val_accuracy: 0.7073 - val_loss: 0.5082
Epoch 6/50
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7736 - loss: 0.4632 - val_accuracy: 0.7398 - val_loss: 0.4936
Epoch 7/50
[1m31/31[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x1de3377e420>

In [10]:
# 9. 테스트 데이터로 모델 평가
loss, accuracy = model.evaluate(X_test, Y_test)
print(f"Test Accuracy: {accuracy:.4f}")

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7437 - loss: 0.5637
Test Accuracy: 0.7597


In [11]:
def create_sequential_model(input_dim):
    model = Sequential()
    model.add(Dense(64, input_dim=input_dim, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# PCA 적용 없이 Sequential 모델 학습 및 정확도 계산
model_no_pca = create_sequential_model(input_dim=X_train.shape[1])
model_no_pca.fit(X_train, Y_train, epochs=50, batch_size=16, validation_split=0.2, verbose=0)
accuracy_no_pca = model_no_pca.evaluate(X_test, Y_test, verbose=0)[1]
print(f'PCA 적용하지 않은 경우 정확도: {accuracy_no_pca:.4f}')

PCA 적용하지 않은 경우 정확도: 0.7143


In [12]:
def apply_pca_and_evaluate(n_components):
    # PCA 적용
    pca = PCA(n_components=n_components)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)
    
    # Sequential 모델 생성
    model = create_sequential_model(input_dim=n_components)
    
    # 모델 학습
    model.fit(X_train_pca, Y_train, epochs=50, batch_size=16, validation_split=0.2, verbose=0)
    
    # 테스트 데이터로 예측 및 정확도 확인
    accuracy = model.evaluate(X_test_pca, Y_test, verbose=0)[1]
    return accuracy

In [14]:
accuracy_pca_2 = apply_pca_and_evaluate(2)
accuracy_pca_3 = apply_pca_and_evaluate(3)
accuracy_pca_4 = apply_pca_and_evaluate(4)
accuracy_pca_5 = apply_pca_and_evaluate(5)
# 결과 출력
print(f'PCA 2개 적용한 경우 정확도: {accuracy_pca_2:.4f}')
print(f'PCA 3개 적용한 경우 정확도: {accuracy_pca_3:.4f}')
print(f'PCA 4개 적용한 경우 정확도: {accuracy_pca_4:.4f}')
print(f'PCA 5개 적용한 경우 정확도: {accuracy_pca_5:.4f}')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


PCA 2개 적용한 경우 정확도: 0.6688
PCA 3개 적용한 경우 정확도: 0.7338
PCA 4개 적용한 경우 정확도: 0.7273
PCA 5개 적용한 경우 정확도: 0.7208
