## 폐암 수술 환자의 생존율 예측

In [48]:
import os

# 딥러닝을 구동하는 데 필요한 케라스 함수 호출
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
print(tf.__version__)

# 필요한 라이브러리 불러옴
import numpy as np

import pandas as pd

# 실행할 때마다 같은 결과를 출력하기 위해 설정하는 부분
np.random.seed(3)
tf.random.set_seed(3)

'''
# 준비된 수술 환자 데이터를 불러옴
Data_set = np.loadtxt("../dataset/ThoraricSurgery.csv", delimiter=',')

# 환자의 기록과 수술 결과를 X와 Y로 구분하여 저장
X = Data_set[:, 0:17]
Y = Data_set[:, 17]
'''

# 준비된 수술 환자 데이터를 불러옴 by using pandas
#df = pd.read_csv("../dataset/ThoraricSurgery.csv",
#                      names=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "class"])
df = pd.read_csv("../dataset/ThoraricSurgery.csv", header=None)
print(df.head())
df.rename(columns={17:"class"}, inplace=True)
print(df.head())

# 환자의 기록과 수술 결과를 X와 Y로 구분하여 저장
X = df.drop(['class'], axis=1, inplace=False).values
Y = df['class'].values

2.2.0
    0   1     2     3   4   5   6   7   8   9   10  11  12  13  14  15  16  17
0  293   1  3.80  2.80   0   0   0   0   0   0  12   0   0   0   1   0  62   0
1    1   2  2.88  2.16   1   0   0   0   1   1  14   0   0   0   1   0  60   0
2    8   2  3.19  2.50   1   0   0   0   1   0  11   0   0   1   1   0  66   1
3   14   2  3.98  3.06   2   0   0   0   1   1  14   0   0   0   1   0  80   1
4   17   2  2.21  1.88   0   0   1   0   0   0  12   0   0   0   1   0  56   0
     0  1     2     3  4  5  6  7  8  9  10  11  12  13  14  15  16  class
0  293  1  3.80  2.80  0  0  0  0  0  0  12   0   0   0   1   0  62      0
1    1  2  2.88  2.16  1  0  0  0  1  1  14   0   0   0   1   0  60      0
2    8  2  3.19  2.50  1  0  0  0  1  0  11   0   0   1   1   0  66      1
3   14  2  3.98  3.06  2  0  0  0  1  1  14   0   0   0   1   0  80      1
4   17  2  2.21  1.88  0  0  1  0  0  0  12   0   0   0   1   0  56      0


In [49]:
'''
# 딥러닝 구조를 결정(모델을 설정하고 실행하는 부분)
model = Sequential()
model.add(Dense(30, input_dim=17, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# 딥러닝 실행
#model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
model.compile(loss='binary_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])
'''


def build_model():
    model = tf.keras.models.Sequential([
        #tf.keras.layers.Dense(64, activation='relu', input_shape=[len(train_dataset.keys())]),
        tf.keras.layers.Dense(30, input_dim=17, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid'),        
    ])
    optimizer = tf.keras.optimizers.RMSprop(0.001)
    model.compile(loss='mse',
                  optimizer=optimizer,
                  metrics=['accuracy', 'mae', 'mse'])
    return model


model = build_model()


'''
checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# 모델의 가중치를 저장하는 콜백 만들기
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)
'''

# 파일 이름에 에포크 번호를 포함시킵니다(`str.format` 포맷)
checkpoint_path = "training_2/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)


# 열 번째 에포크마다 가중치를 저장하기 위한 콜백을 만듭니다



cp_callback = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=50, baseline=0.4),
    tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path, 
        monitor='val_loss',
        mode='min',        
        verbose=1, 
        save_best_only=True,
        save_weights_only=True,    
        save_freq=10)
]

In [46]:
model.summary()

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_26 (Dense)             (None, 30)                540       
_________________________________________________________________
dense_27 (Dense)             (None, 1)                 31        
Total params: 571
Trainable params: 571
Non-trainable params: 0
_________________________________________________________________


In [47]:
history = model.fit(X, Y, 
                    validation_split = 0.2,
                    #validation_data=(X, Y),
                    epochs=200, 
                    batch_size=64,
                    verbose=1,  # Verbosity mode. 0 = silent, 1 = progress bar(default), 2 = one line per epoch. 
                    callbacks=cp_callback)

model.save_weights('./checkpoints/my_checkpoint') # 수동으로 가중치 저장하기
model.save('./saved_model') # 전체 모델 저장하기

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [40]:
'''모델 weight 불러오기'''
checkpoint_path = "training_2/cp-0100.ckpt"
# checkpoint_path = './checkpoints/my_checkpoint'
model.load_weights(checkpoint_path) # 

loss, acc, mae, mse = model.evaluate(X, Y, verbose=2)
print(loss)
print("복원된 모델의 정확도: {:5.2f}%".format(100*acc))

print(model.predict(X).shape)

15/15 - 0s - loss: 0.3200 - accuracy: 0.4468 - mae: 0.5117 - mse: 0.3200
0.3199830949306488
복원된 모델의 정확도: 44.68%
(470, 1)


In [41]:
'''모델 전체 불러오기'''
new_model = tf.keras.models.load_model('./saved_model') # 전체 모델 불러오기

# 모델 구조를 확인합니다
new_model.summary()

# 복원된 모델을 평가합니다
loss, acc, mae, mse= new_model.evaluate(X,  Y, verbose=2)
print('복원된 모델의 정확도: {:5.2f}%'.format(100*acc))

print(new_model.predict(X).shape)

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_24 (Dense)             (None, 30)                540       
_________________________________________________________________
dense_25 (Dense)             (None, 1)                 31        
Total params: 571
Trainable params: 571
Non-trainable params: 0
_________________________________________________________________
15/15 - 0s - loss: 0.1489 - accuracy: 0.8511 - mean_absolute_error: 0.1489 - mean_squared_error: 0.1489
복원된 모델의 정확도: 85.11%
(470, 1)
