 # 폐암 수술 환자의 생존율 데이터 분석

In [1]:
# 필요한 라이브러리
import numpy as np
import tensorflow as tf
tf.__version__

'2.1.0'

In [2]:
# 실행할 때마다 같은 결과를 출력하기 위한 seed 값 설정
seed = 2020
np.random.seed(seed)
tf.random.set_seed(seed)

In [3]:
#준비된 수술 환자 데이터를 불러들임
Data_set = np.loadtxt("../dataset/ThoraricSurgery.csv", delimiter=",")
Data_set[:3,:]

array([[293.  ,   1.  ,   3.8 ,   2.8 ,   0.  ,   0.  ,   0.  ,   0.  ,
          0.  ,   0.  ,  12.  ,   0.  ,   0.  ,   0.  ,   1.  ,   0.  ,
         62.  ,   0.  ],
       [  1.  ,   2.  ,   2.88,   2.16,   1.  ,   0.  ,   0.  ,   0.  ,
          1.  ,   1.  ,  14.  ,   0.  ,   0.  ,   0.  ,   1.  ,   0.  ,
         60.  ,   0.  ],
       [  8.  ,   2.  ,   3.19,   2.5 ,   1.  ,   0.  ,   0.  ,   0.  ,
          1.  ,   0.  ,  11.  ,   0.  ,   0.  ,   1.  ,   1.  ,   0.  ,
         66.  ,   1.  ]])

In [4]:
# 환자의 기록과 수술 결과를 X와 Y로 구분하여 저장
X = Data_set[:,0:17]
Y = Data_set[:,17]
X.shape, Y.shape

((470, 17), (470,))

In [14]:
#80% Train setm 20% Test set으로 분리
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = \
    train_test_split(X,Y, test_size=0.2, random_state=seed)

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [16]:
# 딥러닝 모델 설정
model = Sequential([
    Dense(30, input_shape=(17,), activation='relu'),
    Dense(1, activation='sigmoid')
])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 30)                540       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 31        
Total params: 571
Trainable params: 571
Non-trainable params: 0
_________________________________________________________________


In [17]:
# 딥러닝 환경 설정 (오차함수, 최적화함수)
model.compile(loss='binary_crossentropy',
             optimizer='adam', metrics=['accuracy'])

In [20]:
# 딥러닝 학습
#validation set의 비율을 20%
history = model.fit(x_train, y_train, validation_split=0.2, epochs=20, batch_size=10)

Train on 300 samples, validate on 76 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [21]:
# 결과 출력
# evaluate() returns the loss value & metrics values for the model
print("\n Accuracy: %.4f" % (model.evaluate(x_test, y_test, verbose=2))[1])

94/94 - 0s - loss: 0.6213 - accuracy: 0.8511

 Accuracy: 0.8511


In [22]:
history

<tensorflow.python.keras.callbacks.History at 0x1386e65ab08>

In [23]:
history.history

{'loss': [0.4213525213301182,
  0.4110069761673609,
  0.4003681619962057,
  0.4182903724412123,
  0.41538728376229606,
  0.45183952947457634,
  0.44393036638696987,
  0.400230789432923,
  0.4053180690854788,
  0.42235951075951256,
  0.39011069337526955,
  0.4250524955491225,
  0.4148184006412824,
  0.4101303641994794,
  0.40629686042666435,
  0.4107018123070399,
  0.40126847128073373,
  0.40674236938357355,
  0.41334313998619715,
  0.40638265771170456],
 'accuracy': [0.85333335,
  0.85,
  0.86,
  0.8466667,
  0.85333335,
  0.8433333,
  0.85333335,
  0.85,
  0.85333335,
  0.8466667,
  0.85,
  0.8566667,
  0.85333335,
  0.8466667,
  0.8566667,
  0.85,
  0.85,
  0.8466667,
  0.85333335,
  0.8466667],
 'val_loss': [0.5375272717915083,
  0.5915304920391032,
  0.5895166765702399,
  0.5995748324417755,
  0.5936172788864688,
  0.6051262272031683,
  0.5575104983229386,
  0.5129626836431654,
  0.5490454372606779,
  0.5521631962374637,
  0.493746517128066,
  0.5578460132605151,
  0.52215867450362

In [29]:
history.history['accuracy']
history.history['val_accuracy']
history.history['loss']
history.history['val_loss']

[0.5375272717915083,
 0.5915304920391032,
 0.5895166765702399,
 0.5995748324417755,
 0.5936172788864688,
 0.6051262272031683,
 0.5575104983229386,
 0.5129626836431654,
 0.5490454372606779,
 0.5521631962374637,
 0.493746517128066,
 0.5578460132605151,
 0.5221586745036276,
 0.5503756347063341,
 0.5502982633678537,
 0.5426266350244221,
 0.5639553615137151,
 0.5347437611535976,
 0.5112195446303016,
 0.7313713306855214]

In [28]:
import pandas as pd
df = pd.DataFrame(history.history)
df.head()

Unnamed: 0,loss,accuracy,val_loss,val_accuracy
0,0.421353,0.853333,0.537527,0.802632
1,0.411007,0.85,0.59153,0.828947
2,0.400368,0.86,0.589517,0.710526
3,0.41829,0.846667,0.599575,0.842105
4,0.415387,0.853333,0.593617,0.842105
