In [57]:
import tensorflow as tf
import numpy as np
import pandas as pd
import time
import random

In [58]:
data_file = '../data/heart.csv'
origin_data = pd.read_csv(data_file)

In [59]:
def load_data(data = origin_data, stragy = 2):
    positive_example = data[data['chd'] == 1]
    negitive_example = data[data['chd'] == 0]

    if stragy == 1:
        positive_example = pd.concat([positive_example, positive_example])
    elif stragy == 2: 
        negitive_example = data[data['chd'] == 0]
        negitive_index = random.sample(list(negitive_example.index.values), len(positive_example))
        negitive_example = negitive_example.ix[negitive_index]

    positive_msk = np.random.rand(len(positive_example)) < 0.9
    negitive_msk = np.random.rand(len(negitive_example)) < 0.9
    
    while np.abs(len(positive_example[positive_msk]) - len(negitive_example[negitive_msk])) > 10:
        positive_msk = np.random.rand(len(positive_example)) < 0.9
        negitive_msk = np.random.rand(len(negitive_example)) < 0.9
    
    train_dataset = pd.concat([positive_example[positive_msk], negitive_example[negitive_msk]])
    test_dataset = pd.concat([positive_example[~positive_msk], negitive_example[~negitive_msk]])
    return train_dataset, test_dataset

def normaliztion(dataset):
    return dataset.apply(lambda x: (x - np.mean(x)) / (np.max(x) - np.min(x)), axis=0)

def transfer_famhist(dataset):
    tmp = dataset.replace({'famhist':{'Present':1, 'Absent':0}})
    return normaliztion(tmp).values

def to_one_hotting(data, num_lables=2):
    return (np.arange(num_lables) == data[:,None]).astype(np.float32)

def generate_data(dataset):
    data = dataset.iloc[:,0:9]
    labels = dataset['chd']
    return randomize(transfer_famhist(data), to_one_hotting(labels.values))

def randomize(dataset, labels):
    permutation = np.random.permutation(labels.shape[0])
    shuffled_dataset = dataset[permutation]
    shuffled_labels = labels[permutation]
    return shuffled_dataset, shuffled_labels

train_dataset, test_dataset = load_data(origin_data)

train_data, train_label = generate_data(train_dataset)
test_data, test_label = generate_data(test_dataset)

print(train_data.shape, train_label.shape)  

(277, 9) (277, 2)


In [60]:
# Define paramaters for the model
learning_rate = 0.01
batch_size = 16
n_epochs = 100

In [61]:
X = tf.placeholder(dtype = np.float32, shape = [batch_size, 9], name='X')
Y = tf.placeholder(dtype = np.float32, shape = [batch_size, 2], name='Y')

W = tf.Variable(tf.random_normal([9, 2]), name='W')
b = tf.Variable(tf.random_normal([batch_size, 2]), name='b')

logits = tf.matmul(X, W) + b

entropy = tf.nn.softmax_cross_entropy_with_logits(labels = Y, logits = logits)

loss = tf.reduce_mean(entropy)

preds = tf.nn.softmax(logits)
correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

with tf.Session() as sess:
    start_time = time.time()
    sess.run(tf.global_variables_initializer())	
    n_batches = int(len(train_data)/batch_size)
    for i in range(n_epochs): 
        total_loss = 0

        for index in range(n_batches):
            
            X_batch = train_data[index*batch_size:(index+1)*batch_size]
            Y_batch = train_label[index*batch_size:(index+1)*batch_size]
            
            _, loss_batch= sess.run([optimizer, loss], feed_dict={X: X_batch, Y: Y_batch})
            total_loss += loss_batch
            
        test_accuracy = sess.run([accuracy], feed_dict={X: X_batch, Y: Y_batch})
        print('Average loss epoch :{0}'.format(total_loss/n_batches))
        

    print('Total time: {0} seconds'.format(time.time() - start_time))
    print('Optimization Finished!')

    # test the model
    n_batches = int(len(test_data)/batch_size)
    total_correct_preds = 0
    for index in range(n_batches):
        X_batch = test_data[index*batch_size:(index+1)*batch_size]
        Y_batch = test_label[index*batch_size:(index+1)*batch_size]
        _, loss_batch, logits_batch = sess.run([optimizer, loss, logits], feed_dict={X: X_batch, Y:Y_batch}) 
        preds = tf.nn.softmax(logits_batch)
        correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y_batch, 1))
        accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32)) # need numpy.count_nonzero(boolarr) :(
        total_correct_preds += sess.run(accuracy)

    print('Accuracy:',format(total_correct_preds/len(test_data)))

Average loss epoch :0.8555820408989402
Average loss epoch :0.7877266231705161
Average loss epoch :0.7354915387490216
Average loss epoch :0.6948846736375023
Average loss epoch :0.6630223375909469
Average loss epoch :0.6376643461339614
Average loss epoch :0.6172758586266461
Average loss epoch :0.6008069427574382
Average loss epoch :0.5874951201326707
Average loss epoch :0.5767462130855111
Average loss epoch :0.5680746050441966
Average loss epoch :0.5610761046409607
Average loss epoch :0.5554152779719409
Average loss epoch :0.5508173511308783
Average loss epoch :0.5470607385915869
Average loss epoch :0.5439693471964668
Average loss epoch :0.5414047328864827
Average loss epoch :0.5392589078230017
Average loss epoch :0.5374479206169352
Average loss epoch :0.535906533984577
Average loss epoch :0.5345840068424449
Average loss epoch :0.5334406495094299
Average loss epoch :0.5324453115463257
Average loss epoch :0.5315733324078953
Average loss epoch :0.5308050039936515
Average loss epoch :0.5301