In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
blood = pd.read_csv('data/blood.csv')
label = blood['STK']

In [3]:
del blood['STK']
del blood['SEX']
del blood['ANE']
del blood['IHD']

In [4]:
blood.head()

Unnamed: 0,AGE_G,HGB,TCHOL,TG,HDL
0,1,15.5,139,112,54
1,1,16.2,204,105,55
2,1,16.4,181,83,67
3,1,16.1,147,45,56
4,1,15.3,148,70,57


In [5]:
X = blood.values

In [6]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = \
    train_test_split(X, label, test_size=0.2, 
                     shuffle=True, stratify=label, random_state=2019)

In [7]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

In [8]:
lr.fit(x_train, y_train)

LogisticRegression()

In [9]:
import joblib
joblib.dump(lr, 'model/blood_lr.pkl')

['model/blood_lr.pkl']

## SVM

In [10]:
from sklearn.svm import SVC

In [11]:
svc = SVC()

In [12]:
svc.fit(x_train, y_train)

SVC()

In [13]:
import joblib
joblib.dump(svc, 'model/blood_svm.pkl')

['model/blood_svm.pkl']

## DT

In [14]:
from sklearn.tree import DecisionTreeClassifier

In [15]:
dtc = DecisionTreeClassifier()

In [16]:
dtc.fit(x_train, y_train)

DecisionTreeClassifier()

In [17]:
import joblib
joblib.dump(dtc, 'model/blood_dt.pkl')

['model/blood_dt.pkl']

### 저장된 모델로 정확도 산출

In [57]:
model_lr = joblib.load('model/blood_lr.pkl')
model_svm = joblib.load('model/blood_svm.pkl')
model_dt = joblib.load('model/blood_dt.pkl')

In [58]:
y_pred_lr = model_lr.predict(x_test)
y_pred_svm = model_svm.predict(x_test)
y_pred_dt = model_dt.predict(x_test)

In [60]:
from sklearn.metrics import accuracy_score
acc_lr = accuracy_score(y_test, y_pred_lr)
acc_svm = accuracy_score(y_test, y_pred_svm)
acc_dt = accuracy_score(y_test, y_pred_dt)
print(acc_lr,acc_svm, acc_dt)

0.93717 0.93717 0.88521


In [61]:
blood.tail()

Unnamed: 0,AGE_G,HGB,TCHOL,TG,HDL
999995,27,12.0,166,159,49
999996,27,12.0,164,376,42
999997,27,10.9,191,85,53
999998,27,13.5,197,81,51
999999,27,13.0,248,142,29


In [62]:
test_data = np.array([18, 14.5, 228, 93, 57]).reshape(1, 5)

In [63]:
index_lr = model_lr.predict(test_data)[0]
index_svm = model_svm.predict(test_data)[0]
index_dt = model_dt.predict(test_data)[0]

In [64]:
index_lr

0

In [67]:
index_svm

0

In [65]:
index_dt

1

In [68]:
sp_names = ['NO', '뇌질환']
print(sp_names[index_lr],sp_names[index_svm], sp_names[index_dt])

NO NO 뇌질환


## Deep Learning

In [36]:
# 딥러닝 모델 설정
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense

model = Sequential([
    Dense(24, input_shape=(5,), activation='relu'),
    Dense(12, activation='relu'),
    Dense(1, activation='softmax')
])

In [37]:
# 모델 컴파일
model.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics = ['accuracy'])

In [38]:
from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath = 'model/blood_deep.hdf5',
                            monitor ='val_loss',
                            verbose = 1, save_best_only=True)

In [39]:
# 모델 생성
model.fit(x_train, y_train, epochs=100,batch_size=30, verbose=2,
         validation_split=0.2, callbacks=[checkpointer])

Train on 640000 samples, validate on 160000 samples
Epoch 1/100
 - 17s - loss: 14.3694 - accuracy: 0.0629 - val_loss: 14.3720 - val_accuracy: 0.0627

Epoch 00001: val_loss improved from inf to 14.37204, saving model to model/blood_deep.hdf5
Epoch 2/100
 - 16s - loss: 14.3694 - accuracy: 0.0629 - val_loss: 14.3720 - val_accuracy: 0.0627

Epoch 00002: val_loss did not improve from 14.37204
Epoch 3/100
 - 17s - loss: 14.3694 - accuracy: 0.0629 - val_loss: 14.3720 - val_accuracy: 0.0627

Epoch 00003: val_loss did not improve from 14.37204
Epoch 4/100
 - 16s - loss: 14.3694 - accuracy: 0.0629 - val_loss: 14.3720 - val_accuracy: 0.0627

Epoch 00004: val_loss did not improve from 14.37204
Epoch 5/100
 - 16s - loss: 14.3694 - accuracy: 0.0629 - val_loss: 14.3720 - val_accuracy: 0.0627

Epoch 00005: val_loss did not improve from 14.37204
Epoch 6/100
 - 16s - loss: 14.3694 - accuracy: 0.0629 - val_loss: 14.3720 - val_accuracy: 0.0627

Epoch 00006: val_loss did not improve from 14.37204
Epoch 7/1

 - 17s - loss: 14.3694 - accuracy: 0.0629 - val_loss: 14.3720 - val_accuracy: 0.0627

Epoch 00055: val_loss did not improve from 14.37204
Epoch 56/100
 - 17s - loss: 14.3694 - accuracy: 0.0629 - val_loss: 14.3720 - val_accuracy: 0.0627

Epoch 00056: val_loss did not improve from 14.37204
Epoch 57/100
 - 17s - loss: 14.3694 - accuracy: 0.0629 - val_loss: 14.3720 - val_accuracy: 0.0627

Epoch 00057: val_loss did not improve from 14.37204
Epoch 58/100
 - 17s - loss: 14.3694 - accuracy: 0.0629 - val_loss: 14.3720 - val_accuracy: 0.0627

Epoch 00058: val_loss did not improve from 14.37204
Epoch 59/100
 - 17s - loss: 14.3694 - accuracy: 0.0629 - val_loss: 14.3720 - val_accuracy: 0.0627

Epoch 00059: val_loss did not improve from 14.37204
Epoch 60/100
 - 17s - loss: 14.3694 - accuracy: 0.0629 - val_loss: 14.3720 - val_accuracy: 0.0627

Epoch 00060: val_loss did not improve from 14.37204
Epoch 61/100
 - 17s - loss: 14.3694 - accuracy: 0.0629 - val_loss: 14.3720 - val_accuracy: 0.0627

Epoch 00

<keras.callbacks.callbacks.History at 0x2f4963cea08>

In [40]:
from keras.models import load_model
model_deep = load_model('model/blood_deep.hdf5')

In [42]:
model.evaluate(x_test, y_test)[1]



0.06283000111579895

In [74]:
model_deep.predict_classes(test_data)[0][0]

1

In [72]:
model_deep.predict(test_data)

array([[1.]], dtype=float32)

In [73]:
np.argmax(model_deep.predict(test_data)[0])

0

In [None]:
model_deep