In [7]:
import tensorflow as tf
import numpy as np

def normalize(data):
    return (data - np.min(data))/(np.max(data) - np.min(data))

def add_div():
    print("-"*45)
    
# Load csv and preprocess dataset.

dataset = np.loadtxt("./wisc_bc_data.csv",delimiter=",",dtype=str)

x_data = dataset[1:,2:].astype(np.float32) # Extract and Cast Features
x_data = normalize(x_data) # normalize dataset

y_data = dataset[1:,1] # Extract Diagnosis result

y_new_data = []
for i in range(np.shape(y_data)[0]): # Cast 'Maligant'/'Benign' as one-hot encoded float32 vector
    if y_data[i] == 'M': # 악성
        y_new_data.append([1. ,0.])
    else:
        y_new_data.append([0. ,1.])
y_data = y_new_data

print("-"*10,"Breast Cancer Diagnosis","-"*10)

print("Shape of X : ", np.shape(x_data)) 
print("Shape of Y : ", np.shape(y_data))

add_div()

# Divide train/test set


x_train = x_data[0:300,:] 
y_train = y_data[0:300]
print("Number of x_train : ", np.shape(x_train)[0]) 
print("Number of y_train : ", np.shape(y_train)[0])

x_test = x_data[301:,:] 
y_test = y_data[301:]
print("Number of x_test : ", np.shape(x_test)[0]) 
print("Number of y_test : ", np.shape(y_test)[0])
add_div()

# Design neural network
tf.reset_default_graph()
global_step = tf.Variable(0,trainable=False,name='global_step')
X = tf.placeholder(tf.float32,[None,30])
Y = tf.placeholder(tf.float32,[None,2])
keep_prob = tf.placeholder(tf.float32)
with tf.name_scope('layer1'):
    W1 = tf.Variable(tf.random_normal([30,20],stddev=0.01),name="W1")
    b1 = tf.Variable(tf.random_normal([20],stddev=0.01),name="b1")
    L1 = tf.nn.relu(tf.add(tf.matmul(X,W1),b1))
    L1 = tf.nn.dropout(L1,keep_prob)
    
with tf.name_scope('layer2'):
    W2 = tf.Variable(tf.random_normal([20,20],stddev=0.01),name="W2")
    b2 = tf.Variable(tf.random_normal([20],stddev=0.01),name="b2")
    L2 = tf.nn.relu(tf.add(tf.matmul(L1,W2),b2))
    L2 = tf.nn.dropout(L2,keep_prob)

with tf.name_scope('output'):
    W3 = tf.Variable(tf.random_normal([20,2],stddev=0.01),name="W3")
    b3 = tf.Variable(tf.random_normal([2],stddev=0.01),name="b3")
    model = tf.nn.relu(tf.add(tf.matmul(L2,W3),b3))

with tf.name_scope('optimizer'):
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=model,labels=Y))
    optimizer = tf.train.AdamOptimizer(0.001).minimize(cost,global_step = global_step)
    tf.summary.scalar('cost',cost)
    

# Training neural network

sess = tf.Session()
saver = tf.train.Saver(tf.global_variables())
ckpt = tf.train.get_checkpoint_state('./cancer_model')

force_train = False

if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path) and not force_train:
    saver.restore(sess,ckpt.model_checkpoint_path)
else:
    sess.run(tf.global_variables_initializer())

merged = tf.summary.merge_all()
writer = tf.summary.FileWriter('./cancer_logs',sess.graph)
    
for step in range(10001):
    sess.run(optimizer,feed_dict={X:x_train,Y:y_train,keep_prob:0.7})
    if step%1000 == 0:
        print("Step : {:04d}, Cost : {:f}".format(sess.run(global_step),
                                                  sess.run(cost,feed_dict={X:x_train,Y:y_train,keep_prob:0.7})))
    summary = sess.run(merged,feed_dict={X:x_train,Y:y_train,keep_prob:0.7})
    writer.add_summary(summary,global_step=sess.run(global_step))
    
saver.save(sess,'./cancer_model/dnn.ckpt',global_step=global_step)

is_correct = tf.equal(tf.argmax(model,1),tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(is_correct,tf.float32))
add_div()

print("정확도 : {:.2f}% ".format(100*(sess.run(accuracy,feed_dict={X:x_test,Y:y_test,keep_prob:1}))))
    



---------- Breast Cancer Diagnosis ----------
Shape of X :  (569, 30)
Shape of Y :  (569, 2)
---------------------------------------------
Number of x_train :  300
Number of y_train :  300
Number of x_test :  268
Number of y_test :  268
---------------------------------------------
INFO:tensorflow:Restoring parameters from ./cancer_model\dnn.ckpt-30003
Step : 30004, Cost : 0.091225
Step : 31004, Cost : 0.089752
Step : 32004, Cost : 0.080410
Step : 33004, Cost : 0.089568
Step : 34004, Cost : 0.084287
Step : 35004, Cost : 0.089495
Step : 36004, Cost : 0.081840
Step : 37004, Cost : 0.089479
Step : 38004, Cost : 0.082438
Step : 39004, Cost : 0.077233
Step : 40004, Cost : 0.084893
---------------------------------------------
정확도 : 97.01% 
