In [1]:
'''
A logistic regression learning algorithm example using TensorFlow library.
Note:- Logistic regression( this word is historical accident..it is really classification)
This program also teaches how to add test metrics ROC and probability distribution to tensorboard.
'''
import tensorflow as tf
import numpy as np
import sklearn as sk

# Import cancer data
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [2]:
# functions to read input 
def read_cancer_data():
    cancer_data = load_breast_cancer()
    features = np.array(cancer_data.data)
    labels = np.array(cancer_data.target)
    return features, labels
def feature_normalize(dataset):
    mu = np.mean(dataset,axis=0)
    sigma = np.std(dataset,axis=0)
    return (dataset - mu)/sigma
def append_bias_reshape(features,labels):
    n_training_samples = features.shape[0]
    n_dim = features.shape[1]
    f = np.reshape(np.c_[np.ones(n_training_samples),features],[n_training_samples,n_dim + 1])
    l = np.reshape(labels,[n_training_samples,1])
    return f, l

In [3]:
features,labels = read_cancer_data()
normalized_features = feature_normalize(features)
f, l = append_bias_reshape(normalized_features,labels)
n_dim = f.shape[1]
train_x, test_x, train_y, test_y = train_test_split(f, l, test_size=0.80, random_state=42)

In [4]:
# Parameters
learning_rate = 0.01
training_epochs = 1000
logs_path="/tmp/logs/3/1"

# tf Graph Input
with tf.name_scope('input'):
    x = tf.placeholder(tf.float32,[None,n_dim])
    y = tf.placeholder(tf.float32,[None,1])
    W = tf.Variable(tf.truncated_normal([n_dim, 1]))
    tf.summary.histogram("Weights",W)

In [5]:
# Construct model
with tf.name_scope('model'):
    layer1 = tf.matmul(x, W)
    pred =  tf.nn.sigmoid(layer1) # sigmoid activation
    # Minimize error using logistic regression
    cost = tf.reduce_mean(tf.reduce_sum((-y * tf.log(pred)) - ((1 - y) * tf.log(1 - pred)), reduction_indices=[1]))
    tf.summary.scalar('cost',cost)
    #tf.summary.histogram('pred',pred)
    summary_op = tf.summary.merge_all()
    # Gradient Descent
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [6]:
# Initializing the variables
init = tf.global_variables_initializer()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    # create log writer object
    writer = tf.summary.FileWriter(logs_path,sess.graph)
    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        # Run optimization op (backprop) and cost op (to get loss value)
        _, c,summary = sess.run([optimizer, cost,summary_op], feed_dict={x: train_x, y: train_y})
        # Compute average loss
        avg_cost += c 
        writer.add_summary(summary,(epoch+1)*100)
        # Display logs per epoch step
        if (epoch+1) % 50 == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
    print("Optimization Finished!")
    writer.flush()
    
    # we are adding additional histogram summaries here...
    pred_summary = tf.summary.histogram("prediction",pred)
    merged = tf.summary.merge([pred_summary])
    y_pred,ss = sess.run([pred,merged], feed_dict={x: test_x, y: test_y})
    writer.add_summary(ss)
    writer.flush()
    print ("AUC:",sk.metrics.roc_auc_score(test_y, y_pred))
    fpr, tpr, thresholds = sk.metrics.roc_curve(test_y, y_pred)
    # we are adding ROC here
    total = len(fpr)
    for idx in range(total):
        summt = tf.Summary()
        summt.value.add(tag="ROC", simple_value = tpr[idx])
        writer.add_summary (summt, fpr[idx] * 100) #act as global_step
        writer.flush ()
    writer.close()
    

Epoch: 0050 cost= 0.727610886
Epoch: 0100 cost= 0.524173200
Epoch: 0150 cost= 0.425240457
Epoch: 0200 cost= 0.366862983
Epoch: 0250 cost= 0.328748077
Epoch: 0300 cost= 0.302088737
Epoch: 0350 cost= 0.282445014
Epoch: 0400 cost= 0.267359734
Epoch: 0450 cost= 0.255376667
Epoch: 0500 cost= 0.245585859
Epoch: 0550 cost= 0.237393558
Epoch: 0600 cost= 0.230398655
Epoch: 0650 cost= 0.224322081
Epoch: 0700 cost= 0.218964666
Epoch: 0750 cost= 0.214180797
Epoch: 0800 cost= 0.209862262
Epoch: 0850 cost= 0.205926567
Epoch: 0900 cost= 0.202310383
Epoch: 0950 cost= 0.198963940
Epoch: 1000 cost= 0.195847809
Optimization Finished!
AUC: 0.964914831741
