In [1]:
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
import time
import preprocess

In [2]:
trainX,trainY,testX,testY = preprocess.input_data('./data/ham','./data/spam',.1)  
np.savetxt("./data/trainX.csv", trainX, delimiter=",")
np.savetxt("./data/trainY.csv", trainY, delimiter=",")
np.savetxt("./data/testX.csv", testX, delimiter=",")
np.savetxt("./data/testY.csv", testY, delimiter=",")

In [3]:
def csv_to_numpy_array(filePath, delimiter):
    return np.genfromtxt(filePath, delimiter=delimiter, dtype=None)

In [4]:
print("loading training data")
trainX = csv_to_numpy_array("./data/trainX.csv", delimiter=",")
trainY = csv_to_numpy_array("./data/trainY.csv", delimiter=",")
print("loading test data")
testX = csv_to_numpy_array("./data/testX.csv", delimiter=",")
testY = csv_to_numpy_array("./data/testY.csv", delimiter=",")

loading training data
loading test data


In [5]:
numFeatures = trainX.shape[1]
numLabels = trainY.shape[1]
numEpochs = 100
learningRate = tf.train.exponential_decay(learning_rate=0.0008,
                                          global_step= 1,
                                          decay_steps=trainX.shape[0],
                                          decay_rate= 0.95,
                                          staircase=True)

In [6]:
X = tf.placeholder(tf.float32, [None, numFeatures])
yGold = tf.placeholder(tf.float32, [None, numLabels])

In [7]:
weights = tf.Variable(tf.random_normal([numFeatures,numLabels],
                                       mean=0,
                                       stddev=(np.sqrt(6/numFeatures+
                                                         numLabels+1)),
                                       name="weights"))
bias = tf.Variable(tf.random_normal([1,numLabels],
                                    mean=0,
                                    stddev=(np.sqrt(6/numFeatures+numLabels+1)),
                                    name="bias"))

In [8]:
init_OP = tf.global_variables_initializer()

In [9]:
apply_weights_OP = tf.matmul(X, weights, name="apply_weights")
add_bias_OP = tf.add(apply_weights_OP, bias, name="add_bias") 
activation_OP = tf.nn.sigmoid(add_bias_OP, name="activation")

In [10]:
cost_OP = tf.nn.l2_loss(activation_OP-yGold, name="squared_error_cost")
training_OP = tf.train.GradientDescentOptimizer(learningRate).minimize(cost_OP)

In [11]:
epoch_values=[]
accuracy_values=[]
cost_values=[]

In [12]:
sess = tf.Session()
sess.run(init_OP)
correct_predictions_OP = tf.equal(tf.argmax(activation_OP,1),tf.argmax(yGold,1))
accuracy_OP = tf.reduce_mean(tf.cast(correct_predictions_OP, "float"))
activation_summary_OP = tf.summary.histogram("output", activation_OP)
accuracy_summary_OP = tf.summary.scalar("accuracy", accuracy_OP)
cost_summary_OP = tf.summary.scalar("cost", cost_OP)
weightSummary = tf.summary.histogram("weights", weights.eval(session=sess))
biasSummary = tf.summary.histogram("biases", bias.eval(session=sess))
all_summary_OPS = tf.summary.merge_all()
writer = tf.summary.FileWriter("summary_logs", sess.graph)

In [13]:
# Initialize reporting variables
cost = 0
diff = 1

In [14]:
# Training epochs
for i in range(numEpochs):
        # Run training step
        step = sess.run(training_OP, feed_dict={X: trainX, yGold: trainY})
        # Report occasional stats
        if i % 10 == 0:
            # Add epoch to epoch_values
            epoch_values.append(i)
            # Generate accuracy stats on test data
            summary_results, train_accuracy, newCost = sess.run(
                [all_summary_OPS, accuracy_OP, cost_OP], 
                feed_dict={X: trainX, yGold: trainY}
            )
            # Add accuracy to live graphing variable
            accuracy_values.append(train_accuracy)
            # Add cost to live graphing variable
            cost_values.append(newCost)
            # Write summary stats to writer
            writer.add_summary(summary_results, i)
            # Re-assign values for variables
            diff = abs(newCost - cost)
            cost = newCost

            #generate print statements
            print("step %d, training accuracy %g"%(i, train_accuracy))
            print("step %d, cost %g"%(i, newCost))
            print("step %d, change in cost %g"%(i, diff))

step 0, training accuracy 0.287863
step 0, cost 1595.39
step 0, change in cost 1595.39
step 10, training accuracy 0.712137
step 10, cost 966.983
step 10, change in cost 628.403
step 20, training accuracy 0.711923
step 20, cost 955.069
step 20, change in cost 11.9142
step 30, training accuracy 0.711923
step 30, cost 952.269
step 30, change in cost 2.80005
step 40, training accuracy 0.711923
step 40, cost 949.812
step 40, change in cost 2.45642
step 50, training accuracy 0.711923
step 50, cost 947.419
step 50, change in cost 2.39294
step 60, training accuracy 0.711923
step 60, cost 945.077
step 60, change in cost 2.34247
step 70, training accuracy 0.711923
step 70, cost 942.783
step 70, change in cost 2.29401
step 80, training accuracy 0.711923
step 80, cost 940.536
step 80, change in cost 2.24628
step 90, training accuracy 0.712137
step 90, cost 938.336
step 90, change in cost 2.20068


In [15]:
print("final accuracy on test set: %s" %str(sess.run(accuracy_OP, feed_dict={X: testX, yGold: testY})))

final accuracy on test set: 0.696325


In [16]:
saver = tf.train.Saver()

In [17]:
sess.close()