### Initialization

In [9]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import tensorflow as tf

# turn off tensorflow deprecation warnings
import tensorflow.python.util.deprecation as deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix

### Data Input
Read the bank data set and split into a features and a label subset

In [10]:
data_sets = ('bank-10percent', 'bank-full', 'bank-balanced')
bank = pd.read_csv('../data/' + data_sets[1] + '.csv')

label_col = 'y'
features = bank.drop(columns=['y'])
label = bank[label_col]

label_encoded = pd.get_dummies(label, drop_first = False)
features_encoded = pd.get_dummies(features, drop_first = True)

class_count=label_encoded.shape[1]
feature_count=features_encoded.shape[1]

### Preprocess Data
For logistic regression it's recommended to normalize the data

In [11]:
sc_X = StandardScaler()
features_normalized = sc_X.fit_transform(features_encoded)

Split into training and test set

In [12]:
X_train, X_test, y_train, y_test = train_test_split(features_normalized, label_encoded, test_size = 0.2, random_state = 167)

### Build Tensorflow Graph

In [13]:
# Graph Input
x = tf.placeholder(tf.float32, [None, feature_count]) 
y = tf.placeholder(tf.float32, [None, class_count]) 

# Set model weights
W = tf.Variable(tf.zeros([feature_count, class_count]))
b = tf.Variable(tf.zeros([class_count]))

# Construct model
prob = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax

# Minimize error using cross entropy
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(prob), reduction_indices=1))

# softmax returns a two-dimensional tensor with probabilities for each class
pred_class = tf.argmax(prob, 1);
true_class = tf.argmax(y, 1);
class1_prob = tf.gather(prob, 1, axis=1)

_, accuracy = tf.metrics.accuracy(true_class, pred_class)
_, area_under_curve = tf.metrics.auc(true_class, class1_prob)
    
# Gradient Descent
learning_rate = 0.1
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)


Initialize the variables (i.e. assign their default value)

In [14]:
init = [ 
        tf.global_variables_initializer(), 
        tf.local_variables_initializer()    # for metrics
        ]

### Training and Evaluation

Set training parameters

In [15]:
training_epochs = 200
display_step = 5

In [16]:
with tf.Session() as sess:

    # Run the initializer
    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        # Run optimization op (backprop) and cost op (to get loss value)
        _, c, acc = sess.run([optimizer, cost, accuracy], feed_dict={x: X_train, y: y_train})
        # Display logs per epoch step
        if (epoch+1) % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), "Accuracy: {0:.2f} %".format(acc*100))
    
    print("Optimization Finished!")

    # Test model
    pc, tc, acc, auc = sess.run([pred_class, true_class, accuracy, area_under_curve], 
                            feed_dict={x: X_test, y: y_test})
    
    # Calculate accuracy
    print("Confusion Matrix :")   
    print(confusion_matrix(tc, pc))
    
    print("Accuracy: {0:.2f} %".format(acc*100))
    
    print("AUC: {0:.2f}".format(auc))

Epoch: 0005 cost= 0.551598310 Accuracy: 85.55 %
Epoch: 0010 cost= 0.454488099 Accuracy: 87.15 %
Epoch: 0015 cost= 0.394153893 Accuracy: 88.00 %
Epoch: 0020 cost= 0.353864551 Accuracy: 88.50 %
Epoch: 0025 cost= 0.325605541 Accuracy: 88.85 %
Epoch: 0030 cost= 0.304985225 Accuracy: 89.11 %
Epoch: 0035 cost= 0.289442688 Accuracy: 89.31 %
Epoch: 0040 cost= 0.277408630 Accuracy: 89.48 %
Epoch: 0045 cost= 0.267879367 Accuracy: 89.62 %
Epoch: 0050 cost= 0.260188788 Accuracy: 89.73 %
Epoch: 0055 cost= 0.253880918 Accuracy: 89.82 %
Epoch: 0060 cost= 0.248634532 Accuracy: 89.90 %
Epoch: 0065 cost= 0.244218171 Accuracy: 89.97 %
Epoch: 0070 cost= 0.240461215 Accuracy: 90.03 %
Epoch: 0075 cost= 0.237235576 Accuracy: 90.09 %
Epoch: 0080 cost= 0.234443471 Accuracy: 90.14 %
Epoch: 0085 cost= 0.232009143 Accuracy: 90.18 %
Epoch: 0090 cost= 0.229872912 Accuracy: 90.22 %
Epoch: 0095 cost= 0.227987483 Accuracy: 90.26 %
Epoch: 0100 cost= 0.226314664 Accuracy: 90.29 %
Epoch: 0105 cost= 0.224823534 Accuracy: 