<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Initialization" data-toc-modified-id="Initialization-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Initialization</a></span></li><li><span><a href="#Data-Input" data-toc-modified-id="Data-Input-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Data Input</a></span></li><li><span><a href="#Preprocess-Data" data-toc-modified-id="Preprocess-Data-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Preprocess Data</a></span></li><li><span><a href="#Build-Tensorflow-Graph" data-toc-modified-id="Build-Tensorflow-Graph-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Build Tensorflow Graph</a></span></li><li><span><a href="#Training-and-Evaluation" data-toc-modified-id="Training-and-Evaluation-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Training and Evaluation</a></span></li></ul></div>

### Initialization

In [None]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import tensorflow as tf

# turn off tensorflow deprecation warnings
import tensorflow.python.util.deprecation as deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix

### Data Input
Read the bank data set and split into a features and a label subset

In [None]:
data_sets = ('bank-10percent', 'bank-full', 'bank-balanced')
bank = pd.read_csv('../data/' + data_sets[1] + '.csv')

label_col = 'y'
features = bank.drop(columns=['y'])
label = bank[label_col]

label_encoded = pd.get_dummies(label, drop_first = False)
features_encoded = pd.get_dummies(features, drop_first = True)

class_count=label_encoded.shape[1]
feature_count=features_encoded.shape[1]

### Preprocess Data
For logistic regression it's recommended to normalize the data

In [None]:
sc_X = StandardScaler()
features_normalized = sc_X.fit_transform(features_encoded)

Split into training and test set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features_normalized, label_encoded, test_size = 0.2, random_state = 167)

### Build Tensorflow Graph

In [None]:
# Graph Input
x = tf.placeholder(tf.float32, [None, feature_count]) 
y = tf.placeholder(tf.float32, [None, class_count]) 

# Set model weights
W = tf.Variable(tf.zeros([feature_count, class_count]))
b = tf.Variable(tf.zeros([class_count]))

# Construct model
prob = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax

# Minimize error using cross entropy
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(prob), reduction_indices=1))

# softmax returns a two-dimensional tensor with probabilities for each class
pred_class = tf.argmax(prob, 1);
true_class = tf.argmax(y, 1);
class1_prob = tf.gather(prob, 1, axis=1)

_, accuracy = tf.metrics.accuracy(true_class, pred_class)
_, area_under_curve = tf.metrics.auc(true_class, class1_prob)
    
# Gradient Descent
learning_rate = 0.1
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)


Initialize the variables (i.e. assign their default value)

In [None]:
init = [ 
        tf.global_variables_initializer(), 
        tf.local_variables_initializer()    # for metrics
        ]

### Training and Evaluation

Set training parameters

In [None]:
training_epochs = 200
display_step = 5

In [None]:
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        # Run optimization op (backprop) and cost op (to get loss value)
        _, c, acc = sess.run([optimizer, cost, accuracy], feed_dict={x: X_train, y: y_train})
        # Display logs per epoch step
        if (epoch+1) % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), "Accuracy: {0:.2f} %".format(acc*100))
    
    print("Optimization Finished!")

    # Test model
    pc, tc, acc, auc = sess.run([pred_class, true_class, accuracy, area_under_curve], 
                            feed_dict={x: X_test, y: y_test})
    
    # Calculate accuracy
    print("Confusion Matrix :")   
    print(confusion_matrix(tc, pc))
    
    print("Accuracy: {0:.2f} %".format(acc*100))
    
    print("AUC: {0:.2f}".format(auc))