# 패키지 로딩

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, roc_auc_score, precision_score

# 하리퍼 파라미터 설정

In [7]:
INPUT_DIMS = 13
EPOCH = 100
BATCH = 32

# 데이터 로딩 및 확인

In [None]:
# !pip install xlrd

In [8]:
data = pd.read_excel('heart.xls')
print(data.shape)
print(data.head())

(303, 14)
   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  


In [9]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB
None


In [10]:
data.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0
mean,54.366337,0.683168,0.966997,131.623762,246.264026,0.148515,0.528053,149.646865,0.326733,1.039604,1.39934,0.729373,2.313531,0.544554
std,9.082101,0.466011,1.032052,17.538143,51.830751,0.356198,0.52586,22.905161,0.469794,1.161075,0.616226,1.022606,0.612277,0.498835
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,47.5,0.0,0.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,2.0,0.0
50%,55.0,1.0,1.0,130.0,240.0,0.0,1.0,153.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,274.5,0.0,1.0,166.0,1.0,1.6,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


# 데이터 스케일링

In [11]:
from sklearn.preprocessing import StandardScaler

x = data.drop('target', axis=1)
y = data['target']
scaler = StandardScaler()
scaled_x = scaler.fit_transform(x)

# 학습 / 검증 / 평가 데이터 분할

In [39]:
x_train, x_test, y_train, y_test = train_test_split(scaled_x, y, train_size=0.8, random_state=10)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((242, 13), (61, 13), (242,), (61,))

In [40]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, train_size=0.8, random_state=0)  # 검증용 데이터
x_train.shape, x_val.shape, y_train.shape, y_val.shape

((193, 13), (49, 13), (193,), (49,))

# 모델 생성

In [41]:
from keras.layers import Dropout
from keras import regularizers

model = Sequential()
model.add(Dense(units=1000, activation='tanh', input_dim = INPUT_DIMS, kernel_regularizer=regularizers.L2(0.02)))
model.add(Dense(units=1000, activation='tanh', kernel_regularizer=regularizers.L2(0.1)))
model.add(Dropout(rate=0.5))
model.add(Dense(units=1, activation='sigmoid'))
print(model.summary())

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


None


# 모델 컴파일 및 학습

## Tensorboard 사용 준비

In [42]:
from tensorflow.keras.callbacks import TensorBoard
import datetime

log_dir = 'c:\\Logs\\'+datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
# histogram_freq = 1 : 1 에포크마다 활성화 출력의 히스토그램을 기록
tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=1)

## EarlyStopping 설정
- EarlyStopping 매개 변수
>- monitor: 모니터링할 지표 선택  'val_loss', 'val_acc' 등의 값 지정
>- mode: 모니터링할 지표를 최소화/최대화할지 결정 (loss - min, accuracy - max) 
>- patience: 모니터링 지표의 값에 개선이 없다고 판단하기 전에 대기할 epoch 수

In [43]:
from keras.callbacks import EarlyStopping
early_stop = EarlyStopping(monitor='val_loss', mode='min', patience=3)

In [44]:
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) # 이진분류 - 암환자 여부 판단
model.fit(x_train, y_train, batch_size=BATCH, epochs=EPOCH, validation_data=(x_val, y_val), verbose=1, 
          callbacks=[tensorboard, early_stop])

Epoch 1/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 105ms/step - accuracy: 0.7193 - loss: 83.0923 - val_accuracy: 0.6735 - val_loss: 47.8967
Epoch 2/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 51ms/step - accuracy: 0.7624 - loss: 42.7003 - val_accuracy: 0.8571 - val_loss: 28.1773
Epoch 3/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 55ms/step - accuracy: 0.8099 - loss: 25.2581 - val_accuracy: 0.8367 - val_loss: 16.6309
Epoch 4/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step - accuracy: 0.8297 - loss: 14.8619 - val_accuracy: 0.7755 - val_loss: 9.5869
Epoch 5/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.7518 - loss: 8.5246 - val_accuracy: 0.8776 - val_loss: 5.1125
Epoch 6/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.8177 - loss: 4.4791 - val_accuracy: 0.8367 - val_loss: 2.7398
Epoch 7/100
[1m7/7[0m [32m━━━

<keras.src.callbacks.history.History at 0x152cbcbb0d0>

# 예측 및 모델 평가

In [51]:
y_pred_prob = model.predict(x_test)
# print(y_pred_prob[:5])
y_pred = (y_pred_prob > 0.5)
# print(y_pred[:5])
print('Confusion Matrix')
print(confusion_matrix(y_test, y_pred))

score = model.evaluate(x_test, y_test)
print(f'Loss: {score[0]:.3f}')
print(f'Accuracay: {score[1]:.3f}')

print(f'Precision: {precision_score(y_test, y_pred):.3f}')
print(f'AUC: {roc_auc_score(y_test, y_pred_prob):.3f}')

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Confusion Matrix
[[26  9]
 [ 3 23]]
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8064 - loss: 0.4996 
Loss: 0.498
Accuracay: 0.803
Precision: 0.719
AUC: 0.908
