# 3장 피마 인디언 당뇨병 예측

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve,roc_curve,f1_score,confusion_matrix
from sklearn.metrics import precision_score, recall_score , roc_auc_score
from sklearn.preprocessing import StandardScaler , Binarizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import joblib

In [2]:
diabetes_data = pd.read_csv('./data/diabetes.csv')
print(diabetes_data['Outcome'].value_counts())
diabetes_data.head(3)

0    500
1    268
Name: Outcome, dtype: int64


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1


In [3]:
x = diabetes_data.loc[:,['Glucose','BloodPressure','BMI', 'Age']]
y = diabetes_data.iloc[:,-1]

X_train, X_test, y_train, y_test = train_test_split(x,y,test_size = 0.2 , random_state=156)

# 로지스틱 회귀로 학습, 예측 및 평가 수행
lr = LogisticRegression()
lr.fit(X_train,y_train)

joblib.dump(lr, 'model/diabetes_lr.pkl')

['model/diabetes_lr.pkl']

In [4]:
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train, y_train)
joblib.dump(svc, 'model/diabetes_svm.pkl')

['model/diabetes_svm.pkl']

In [5]:
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
joblib.dump(dtc, 'model/diabetes_dtc.pkl')

['model/diabetes_dtc.pkl']

In [6]:
model_lr = joblib.load('model/diabetes_lr.pkl')
model_svm = joblib.load('model/diabetes_svm.pkl')
model_dt = joblib.load('model/diabetes_dtc.pkl')

In [7]:
y_pred_lr = model_lr.predict(X_test)
y_pred_svm = model_svm.predict(X_test)
y_pred_dt = model_dt.predict(X_test)

In [8]:
from sklearn.metrics import accuracy_score
acc_lr = accuracy_score(y_test, y_pred_lr)
acc_svm = accuracy_score(y_test, y_pred_svm)
acc_dt = accuracy_score(y_test, y_pred_dt)
print(acc_lr, acc_svm, acc_dt)

0.7402597402597403 0.7597402597402597 0.6753246753246753


In [9]:
diabetes_data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [10]:
test_data = np.array([50,72,33.6,50]).reshape(1, 4)

In [11]:
index_lr = model_lr.predict(test_data)[0]
index_svm = model_svm.predict(test_data)[0]
index_dt = model_dt.predict(test_data)[0]

In [12]:
sp_names = ['부정','긍정']
print(sp_names[index_lr], sp_names[index_svm], sp_names[index_dt])

부정 부정 부정


In [13]:
# 딥러닝 모델 설정
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense

model=Sequential([
    Dense(100,input_shape=(4,), activation="relu"),
    Dense(80, input_shape=(4,),activation="relu"),
    Dense(50, input_shape=(4,),activation="relu"),
    Dense(20, input_shape=(4,),activation="relu"),
    Dense(1, activation="sigmoid")
])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 100)               500       
_________________________________________________________________
dense_2 (Dense)              (None, 80)                8080      
_________________________________________________________________
dense_3 (Dense)              (None, 50)                4050      
_________________________________________________________________
dense_4 (Dense)              (None, 20)                1020      
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 21        
Total params: 13,671
Trainable params: 13,671
Non-trainable params: 0
_________________________________________________________________


Using TensorFlow backend.


In [14]:
# 모델 컴파일 
model.compile(loss='binary_crossentropy', 
              optimizer='adam',
              metrics=['accuracy'])

In [15]:
from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath="model/diabetes_deep.hdf5", 
                               monitor='val_loss', 
                               verbose=1, save_best_only=True)

In [16]:
# 모델 학습
model.fit(X_train, y_train, validation_split=0.2, epochs=500, batch_size=50,
          callbacks=[checkpointer])

Train on 491 samples, validate on 123 samples
Epoch 1/500

Epoch 00001: val_loss improved from inf to 0.91489, saving model to model/diabetes_deep.hdf5
Epoch 2/500

Epoch 00002: val_loss did not improve from 0.91489
Epoch 3/500

Epoch 00003: val_loss improved from 0.91489 to 0.81016, saving model to model/diabetes_deep.hdf5
Epoch 4/500

Epoch 00004: val_loss improved from 0.81016 to 0.59464, saving model to model/diabetes_deep.hdf5
Epoch 5/500

Epoch 00005: val_loss improved from 0.59464 to 0.57925, saving model to model/diabetes_deep.hdf5
Epoch 6/500

Epoch 00006: val_loss did not improve from 0.57925
Epoch 7/500

Epoch 00007: val_loss did not improve from 0.57925
Epoch 8/500

Epoch 00008: val_loss did not improve from 0.57925
Epoch 9/500

Epoch 00009: val_loss did not improve from 0.57925
Epoch 10/500

Epoch 00010: val_loss did not improve from 0.57925
Epoch 11/500

Epoch 00011: val_loss did not improve from 0.57925
Epoch 12/500

Epoch 00012: val_loss did not improve from 0.57925
Epo


Epoch 00041: val_loss did not improve from 0.56251
Epoch 42/500

Epoch 00042: val_loss did not improve from 0.56251
Epoch 43/500

Epoch 00043: val_loss did not improve from 0.56251
Epoch 44/500

Epoch 00044: val_loss did not improve from 0.56251
Epoch 45/500

Epoch 00045: val_loss did not improve from 0.56251
Epoch 46/500

Epoch 00046: val_loss improved from 0.56251 to 0.55520, saving model to model/diabetes_deep.hdf5
Epoch 47/500

Epoch 00047: val_loss did not improve from 0.55520
Epoch 48/500

Epoch 00048: val_loss did not improve from 0.55520
Epoch 49/500

Epoch 00049: val_loss did not improve from 0.55520
Epoch 50/500

Epoch 00050: val_loss did not improve from 0.55520
Epoch 51/500

Epoch 00051: val_loss did not improve from 0.55520
Epoch 52/500

Epoch 00052: val_loss did not improve from 0.55520
Epoch 53/500

Epoch 00053: val_loss did not improve from 0.55520
Epoch 54/500

Epoch 00054: val_loss did not improve from 0.55520
Epoch 55/500

Epoch 00055: val_loss did not improve from 


Epoch 00082: val_loss did not improve from 0.55520
Epoch 83/500

Epoch 00083: val_loss did not improve from 0.55520
Epoch 84/500

Epoch 00084: val_loss did not improve from 0.55520
Epoch 85/500

Epoch 00085: val_loss did not improve from 0.55520
Epoch 86/500

Epoch 00086: val_loss did not improve from 0.55520
Epoch 87/500

Epoch 00087: val_loss did not improve from 0.55520
Epoch 88/500

Epoch 00088: val_loss did not improve from 0.55520
Epoch 89/500

Epoch 00089: val_loss did not improve from 0.55520
Epoch 90/500

Epoch 00090: val_loss did not improve from 0.55520
Epoch 91/500

Epoch 00091: val_loss did not improve from 0.55520
Epoch 92/500

Epoch 00092: val_loss did not improve from 0.55520
Epoch 93/500

Epoch 00093: val_loss did not improve from 0.55520
Epoch 94/500

Epoch 00094: val_loss did not improve from 0.55520
Epoch 95/500

Epoch 00095: val_loss did not improve from 0.55520
Epoch 96/500

Epoch 00096: val_loss did not improve from 0.55520
Epoch 97/500

Epoch 00097: val_loss di


Epoch 00123: val_loss did not improve from 0.55520
Epoch 124/500

Epoch 00124: val_loss did not improve from 0.55520
Epoch 125/500

Epoch 00125: val_loss did not improve from 0.55520
Epoch 126/500

Epoch 00126: val_loss did not improve from 0.55520
Epoch 127/500

Epoch 00127: val_loss did not improve from 0.55520
Epoch 128/500

Epoch 00128: val_loss did not improve from 0.55520
Epoch 129/500

Epoch 00129: val_loss did not improve from 0.55520
Epoch 130/500

Epoch 00130: val_loss did not improve from 0.55520
Epoch 131/500

Epoch 00131: val_loss did not improve from 0.55520
Epoch 132/500

Epoch 00132: val_loss did not improve from 0.55520
Epoch 133/500

Epoch 00133: val_loss did not improve from 0.55520
Epoch 134/500

Epoch 00134: val_loss did not improve from 0.55520
Epoch 135/500

Epoch 00135: val_loss did not improve from 0.55520
Epoch 136/500

Epoch 00136: val_loss did not improve from 0.55520
Epoch 137/500

Epoch 00137: val_loss did not improve from 0.55520
Epoch 138/500

Epoch 001


Epoch 00164: val_loss did not improve from 0.55520
Epoch 165/500

Epoch 00165: val_loss did not improve from 0.55520
Epoch 166/500

Epoch 00166: val_loss did not improve from 0.55520
Epoch 167/500

Epoch 00167: val_loss did not improve from 0.55520
Epoch 168/500

Epoch 00168: val_loss did not improve from 0.55520
Epoch 169/500

Epoch 00169: val_loss did not improve from 0.55520
Epoch 170/500

Epoch 00170: val_loss did not improve from 0.55520
Epoch 171/500

Epoch 00171: val_loss did not improve from 0.55520
Epoch 172/500

Epoch 00172: val_loss did not improve from 0.55520
Epoch 173/500

Epoch 00173: val_loss did not improve from 0.55520
Epoch 174/500

Epoch 00174: val_loss did not improve from 0.55520
Epoch 175/500

Epoch 00175: val_loss did not improve from 0.55520
Epoch 176/500

Epoch 00176: val_loss did not improve from 0.55520
Epoch 177/500

Epoch 00177: val_loss did not improve from 0.55520
Epoch 178/500

Epoch 00178: val_loss did not improve from 0.55520
Epoch 179/500

Epoch 001


Epoch 00205: val_loss did not improve from 0.55520
Epoch 206/500

Epoch 00206: val_loss did not improve from 0.55520
Epoch 207/500

Epoch 00207: val_loss did not improve from 0.55520
Epoch 208/500

Epoch 00208: val_loss did not improve from 0.55520
Epoch 209/500

Epoch 00209: val_loss did not improve from 0.55520
Epoch 210/500

Epoch 00210: val_loss did not improve from 0.55520
Epoch 211/500

Epoch 00211: val_loss did not improve from 0.55520
Epoch 212/500

Epoch 00212: val_loss did not improve from 0.55520
Epoch 213/500

Epoch 00213: val_loss did not improve from 0.55520
Epoch 214/500

Epoch 00214: val_loss did not improve from 0.55520
Epoch 215/500

Epoch 00215: val_loss did not improve from 0.55520
Epoch 216/500

Epoch 00216: val_loss did not improve from 0.55520
Epoch 217/500

Epoch 00217: val_loss did not improve from 0.55520
Epoch 218/500

Epoch 00218: val_loss did not improve from 0.55520
Epoch 219/500

Epoch 00219: val_loss did not improve from 0.55520
Epoch 220/500

Epoch 002


Epoch 00246: val_loss did not improve from 0.55520
Epoch 247/500

Epoch 00247: val_loss did not improve from 0.55520
Epoch 248/500

Epoch 00248: val_loss did not improve from 0.55520
Epoch 249/500

Epoch 00249: val_loss did not improve from 0.55520
Epoch 250/500

Epoch 00250: val_loss did not improve from 0.55520
Epoch 251/500

Epoch 00251: val_loss did not improve from 0.55520
Epoch 252/500

Epoch 00252: val_loss did not improve from 0.55520
Epoch 253/500

Epoch 00253: val_loss did not improve from 0.55520
Epoch 254/500

Epoch 00254: val_loss did not improve from 0.55520
Epoch 255/500

Epoch 00255: val_loss did not improve from 0.55520
Epoch 256/500

Epoch 00256: val_loss did not improve from 0.55520
Epoch 257/500

Epoch 00257: val_loss did not improve from 0.55520
Epoch 258/500

Epoch 00258: val_loss did not improve from 0.55520
Epoch 259/500

Epoch 00259: val_loss did not improve from 0.55520
Epoch 260/500

Epoch 00260: val_loss did not improve from 0.55520
Epoch 261/500

Epoch 002


Epoch 00287: val_loss did not improve from 0.55520
Epoch 288/500

Epoch 00288: val_loss did not improve from 0.55520
Epoch 289/500

Epoch 00289: val_loss did not improve from 0.55520
Epoch 290/500

Epoch 00290: val_loss did not improve from 0.55520
Epoch 291/500

Epoch 00291: val_loss did not improve from 0.55520
Epoch 292/500

Epoch 00292: val_loss did not improve from 0.55520
Epoch 293/500

Epoch 00293: val_loss did not improve from 0.55520
Epoch 294/500

Epoch 00294: val_loss did not improve from 0.55520
Epoch 295/500

Epoch 00295: val_loss did not improve from 0.55520
Epoch 296/500

Epoch 00296: val_loss did not improve from 0.55520
Epoch 297/500

Epoch 00297: val_loss did not improve from 0.55520
Epoch 298/500

Epoch 00298: val_loss did not improve from 0.55520
Epoch 299/500

Epoch 00299: val_loss did not improve from 0.55520
Epoch 300/500

Epoch 00300: val_loss did not improve from 0.55520
Epoch 301/500

Epoch 00301: val_loss did not improve from 0.55520
Epoch 302/500

Epoch 003


Epoch 00328: val_loss did not improve from 0.55520
Epoch 329/500

Epoch 00329: val_loss did not improve from 0.55520
Epoch 330/500

Epoch 00330: val_loss did not improve from 0.55520
Epoch 331/500

Epoch 00331: val_loss did not improve from 0.55520
Epoch 332/500

Epoch 00332: val_loss did not improve from 0.55520
Epoch 333/500

Epoch 00333: val_loss did not improve from 0.55520
Epoch 334/500

Epoch 00334: val_loss did not improve from 0.55520
Epoch 335/500

Epoch 00335: val_loss did not improve from 0.55520
Epoch 336/500

Epoch 00336: val_loss did not improve from 0.55520
Epoch 337/500

Epoch 00337: val_loss did not improve from 0.55520
Epoch 338/500

Epoch 00338: val_loss did not improve from 0.55520
Epoch 339/500

Epoch 00339: val_loss did not improve from 0.55520
Epoch 340/500

Epoch 00340: val_loss did not improve from 0.55520
Epoch 341/500

Epoch 00341: val_loss did not improve from 0.55520
Epoch 342/500

Epoch 00342: val_loss did not improve from 0.55520
Epoch 343/500

Epoch 003


Epoch 00369: val_loss did not improve from 0.55520
Epoch 370/500

Epoch 00370: val_loss did not improve from 0.55520
Epoch 371/500

Epoch 00371: val_loss did not improve from 0.55520
Epoch 372/500

Epoch 00372: val_loss did not improve from 0.55520
Epoch 373/500

Epoch 00373: val_loss did not improve from 0.55520
Epoch 374/500

Epoch 00374: val_loss did not improve from 0.55520
Epoch 375/500

Epoch 00375: val_loss did not improve from 0.55520
Epoch 376/500

Epoch 00376: val_loss did not improve from 0.55520
Epoch 377/500

Epoch 00377: val_loss did not improve from 0.55520
Epoch 378/500

Epoch 00378: val_loss did not improve from 0.55520
Epoch 379/500

Epoch 00379: val_loss did not improve from 0.55520
Epoch 380/500

Epoch 00380: val_loss did not improve from 0.55520
Epoch 381/500

Epoch 00381: val_loss did not improve from 0.55520
Epoch 382/500

Epoch 00382: val_loss did not improve from 0.55520
Epoch 383/500

Epoch 00383: val_loss did not improve from 0.55520
Epoch 384/500

Epoch 003


Epoch 00410: val_loss did not improve from 0.55520
Epoch 411/500

Epoch 00411: val_loss did not improve from 0.55520
Epoch 412/500

Epoch 00412: val_loss did not improve from 0.55520
Epoch 413/500

Epoch 00413: val_loss did not improve from 0.55520
Epoch 414/500

Epoch 00414: val_loss did not improve from 0.55520
Epoch 415/500

Epoch 00415: val_loss did not improve from 0.55520
Epoch 416/500

Epoch 00416: val_loss did not improve from 0.55520
Epoch 417/500

Epoch 00417: val_loss did not improve from 0.55520
Epoch 418/500

Epoch 00418: val_loss did not improve from 0.55520
Epoch 419/500

Epoch 00419: val_loss did not improve from 0.55520
Epoch 420/500

Epoch 00420: val_loss did not improve from 0.55520
Epoch 421/500

Epoch 00421: val_loss did not improve from 0.55520
Epoch 422/500

Epoch 00422: val_loss did not improve from 0.55520
Epoch 423/500

Epoch 00423: val_loss did not improve from 0.55520
Epoch 424/500

Epoch 00424: val_loss did not improve from 0.55520
Epoch 425/500

Epoch 004


Epoch 00451: val_loss did not improve from 0.55520
Epoch 452/500

Epoch 00452: val_loss did not improve from 0.55520
Epoch 453/500

Epoch 00453: val_loss did not improve from 0.55520
Epoch 454/500

Epoch 00454: val_loss did not improve from 0.55520
Epoch 455/500

Epoch 00455: val_loss did not improve from 0.55520
Epoch 456/500

Epoch 00456: val_loss did not improve from 0.55520
Epoch 457/500

Epoch 00457: val_loss did not improve from 0.55520
Epoch 458/500

Epoch 00458: val_loss did not improve from 0.55520
Epoch 459/500

Epoch 00459: val_loss did not improve from 0.55520
Epoch 460/500

Epoch 00460: val_loss did not improve from 0.55520
Epoch 461/500

Epoch 00461: val_loss did not improve from 0.55520
Epoch 462/500

Epoch 00462: val_loss did not improve from 0.55520
Epoch 463/500

Epoch 00463: val_loss did not improve from 0.55520
Epoch 464/500

Epoch 00464: val_loss did not improve from 0.55520
Epoch 465/500

Epoch 00465: val_loss did not improve from 0.55520
Epoch 466/500

Epoch 004


Epoch 00492: val_loss did not improve from 0.55520
Epoch 493/500

Epoch 00493: val_loss did not improve from 0.55520
Epoch 494/500

Epoch 00494: val_loss did not improve from 0.55520
Epoch 495/500

Epoch 00495: val_loss did not improve from 0.55520
Epoch 496/500

Epoch 00496: val_loss did not improve from 0.55520
Epoch 497/500

Epoch 00497: val_loss did not improve from 0.55520
Epoch 498/500

Epoch 00498: val_loss did not improve from 0.55520
Epoch 499/500

Epoch 00499: val_loss did not improve from 0.55520
Epoch 500/500

Epoch 00500: val_loss did not improve from 0.55520


<keras.callbacks.callbacks.History at 0x1b93533dc48>

In [17]:
from keras.models import load_model
model_deep = load_model("model/diabetes_deep.hdf5")

In [18]:
print("\n Accuracy : %.4f"%(model.evaluate(X_test,y_test,verbose=2))[1])


 Accuracy : 0.6818


In [44]:
test_data = np.array([148.0,72,33.6,50]).reshape(1, 4)

In [47]:
test_data = np.array([85.0,66.0,26.6,31]).reshape(1, 4)
test_data

array([[85. , 66. , 26.6, 31. ]])

In [21]:
x.head()

Unnamed: 0,Glucose,BloodPressure,BMI,Age
0,148,72,33.6,50
1,85,66,26.6,31
2,183,64,23.3,32
3,89,66,28.1,21
4,137,40,43.1,33


In [48]:
model_deep.predict(test_data)

array([[0.377059]], dtype=float32)

In [49]:
print(sp_names[model_deep.predict_classes(test_data)[0][0]])

부정
