In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import time
import random

In [3]:
data_file = '../data/heart.csv'
origin_data = pd.read_csv(data_file)
origin_data

Unnamed: 0,sbp,tobacco,ldl,adiposity,famhist,typea,obesity,alcohol,age,chd
0,160,12.00,5.73,23.11,Present,49,25.30,97.20,52,1
1,144,0.01,4.41,28.61,Absent,55,28.87,2.06,63,1
2,118,0.08,3.48,32.28,Present,52,29.14,3.81,46,0
3,170,7.50,6.41,38.03,Present,51,31.99,24.26,58,1
4,134,13.60,3.50,27.78,Present,60,25.99,57.34,49,1
5,132,6.20,6.47,36.21,Present,62,30.77,14.14,45,0
6,142,4.05,3.38,16.20,Absent,59,20.81,2.62,38,0
7,114,4.08,4.59,14.60,Present,62,23.11,6.72,58,1
8,114,0.00,3.83,19.40,Present,49,24.86,2.49,29,0
9,132,0.00,5.80,30.96,Present,69,30.11,0.00,53,1


In [6]:
def load_data(data = origin_data, stragy = 2):
    positive_example = data[data['chd'] == 1]
    negitive_example = data[data['chd'] == 0]

    if stragy == 1:
        positive_example = pd.concat([positive_example, positive_example])
    elif stragy == 2: 
        negitive_example = data[data['chd'] == 0]
        negitive_index = random.sample(list(negitive_example.index.values), len(positive_example))
        negitive_example = negitive_example.ix[negitive_index]

    positive_msk = np.random.rand(len(positive_example)) < 0.9
    negitive_msk = np.random.rand(len(negitive_example)) < 0.9
    
    while np.abs(len(positive_example[positive_msk]) - len(negitive_example[negitive_msk])) > 10:
        positive_msk = np.random.rand(len(positive_example)) < 0.9
        negitive_msk = np.random.rand(len(negitive_example)) < 0.9
    
    train_dataset = pd.concat([positive_example[positive_msk], negitive_example[negitive_msk]])
    test_dataset = pd.concat([positive_example[~positive_msk], negitive_example[~negitive_msk]])
    return train_dataset, test_dataset

def normaliztion(dataset):
    return dataset.apply(lambda x: (x - np.mean(x)) / (np.max(x) - np.min(x)), axis=0)

def transfer_famhist(dataset):
    tmp = dataset.replace({'famhist':{'Present':1, 'Absent':0}})
    return normaliztion(tmp).values

def to_one_hotting(data, num_lables=2):
    return (np.arange(num_lables) == data[:,None]).astype(np.float32)

def generate_data(dataset):
    data = dataset.iloc[:,0:9]
    labels = dataset['chd']
    return randomize(transfer_famhist(data), to_one_hotting(labels.values))

def randomize(dataset, labels):
    permutation = np.random.permutation(labels.shape[0])
    shuffled_dataset = dataset[permutation]
    shuffled_labels = labels[permutation]
    return shuffled_dataset, shuffled_labels

train_dataset, test_dataset = load_data(origin_data)
print(train_dataset)
train_data, train_label = generate_data(train_dataset)
test_data, test_label = generate_data(test_dataset)

print(train_data.shape, train_label.shape)  

     sbp  tobacco    ldl  adiposity  famhist  typea  obesity  alcohol  age  \
0    160    12.00   5.73      23.11  Present     49    25.30    97.20   52   
1    144     0.01   4.41      28.61   Absent     55    28.87     2.06   63   
3    170     7.50   6.41      38.03  Present     51    31.99    24.26   58   
7    114     4.08   4.59      14.60  Present     62    23.11     6.72   58   
9    132     0.00   5.80      30.96  Present     69    30.11     0.00   53   
10   206     6.00   2.95      32.27   Absent     72    26.81    56.06   60   
11   134    14.10   4.44      22.39  Present     65    23.09     0.00   40   
17   146    10.50   8.29      35.36  Present     78    32.73    13.89   53   
18   158     2.60   7.46      34.07  Present     61    29.30    53.28   62   
19   124    14.00   6.23      35.96  Present     45    30.09     0.00   59   
20   106     1.61   1.74      12.32   Absent     74    20.92    13.37   20   
25   124     4.00  12.42      31.29  Present     54    23.23    

In [10]:
# Define paramaters for the model
learning_rate = 0.01
batch_size = 16
n_epochs = 100

In [12]:
X = tf.placeholder(dtype = np.float32, shape = [None, 9], name='X')
Y = tf.placeholder(dtype = np.float32, shape = [None, 2], name='Y')

W = tf.Variable(tf.random_normal([9, 2]), name='W')
b = tf.Variable(tf.zeros([2]), name='b')

logits = tf.matmul(X, W) + b

entropy = tf.nn.softmax_cross_entropy_with_logits(labels = Y, logits = logits)

loss = tf.reduce_mean(entropy)

preds = tf.nn.softmax(logits)
correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

with tf.Session() as sess:
    start_time = time.time()
    sess.run(tf.global_variables_initializer())	
    n_batches = int(len(train_data)/batch_size)
    for i in range(n_epochs): 
        total_loss = 0

        for index in range(n_batches):
            
            X_batch = train_data[index*batch_size:(index+1)*batch_size]
            Y_batch = train_label[index*batch_size:(index+1)*batch_size]
            
            _, loss_batch= sess.run([optimizer, loss], feed_dict={X: X_batch, Y: Y_batch})
            total_loss += loss_batch
            
        test_accuracy = sess.run(accuracy, feed_dict={X: test_data, Y: test_label})
        print('Average loss:{0} at epoch:{1}'.format(total_loss/n_batches, i), ' and accuracy is :', test_accuracy / len(test_label))
        
    print('Total time: {0} seconds'.format(time.time() - start_time))
    print('Optimization Finished!')
    
    total_accuracy = sess.run(accuracy, feed_dict={X: test_data, Y: test_label})
    print('Final Accuracy is :', test_accuracy / len(test_label))

Average loss:0.8450903561380174 at epoch:0  and accuracy is : 0.444444444444
Average loss:0.759326352013482 at epoch:1  and accuracy is : 0.555555555556
Average loss:0.6988345517052544 at epoch:2  and accuracy is : 0.62962962963
Average loss:0.656263111366166 at epoch:3  and accuracy is : 0.592592592593
Average loss:0.6258349120616913 at epoch:4  and accuracy is : 0.62962962963
Average loss:0.6035914503865771 at epoch:5  and accuracy is : 0.592592592593
Average loss:0.587040364742279 at epoch:6  and accuracy is : 0.62962962963
Average loss:0.5745855818192164 at epoch:7  and accuracy is : 0.592592592593
Average loss:0.5651479131645627 at epoch:8  and accuracy is : 0.592592592593
Average loss:0.5579578760597441 at epoch:9  and accuracy is : 0.592592592593
Average loss:0.552448151840104 at epoch:10  and accuracy is : 0.592592592593
Average loss:0.5481946253114276 at epoch:11  and accuracy is : 0.592592592593
Average loss:0.5448801037338045 at epoch:12  and accuracy is : 0.592592592593
Ave