## Building our TensorFlow Model

In [None]:

# TODO Load into memory our data (we can not do this quite yet, since the script to compute labels is still running)
""" Here are some useful functions to keep in mind, pulled from the in-class exercise on TensorFlow
tf_X_train = np.array(X_train, dtype='float32')
train_data = tf.data.Dataset.from_tensor_slices((tf_X_train, y_train))
train_data = train_data.repeat().shuffle(500).batch(batch_size).prefetch(1)
"""

In [39]:
num_classes = 2 # each point is either *good* or *bad*
num_features = 128 # each point is a 128-dimension vector of floats

# Training parameters (we begin by using the same initial training parameters as we used in our first TensorFlow model in class)
learning_rate = 0.0001
training_steps = 3000
batch_size = 25
display_step = 100

# Network paramters (again, we will begin with the same as was used in class, and we plan to build/change from there)
n_hidden_1 = 28 # 1st layer number of neurons.

In [41]:
# Store layers weight & bias

# A random value generator to initialize weights.
random_normal = tf.initializers.RandomNormal()

weights = {
    'h1': tf.Variable(random_normal([num_features, n_hidden_1])),
    'out': tf.Variable(random_normal([n_hidden_1, num_classes])),
}
biases = {
    'b1': tf.Variable(tf.zeros([n_hidden_1])),
    'out': tf.Variable(tf.zeros([num_classes])),
}

In [46]:
def neural_net(x):
    # Hidden fully connected layer with 128 neurons.
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    # Apply sigmoid to layer_1 output for non-linearity.
    layer_1 = tf.nn.relu(layer_1)

#     # Hidden fully connected layer with 256 neurons.
#     layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
#     # Apply sigmoid to layer_2 output for non-linearity.
#     layer_2 = tf.nn.relu(layer_2)

    # Output fully connected layer with a neuron for each class.
    out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
    # Apply softmax to normalize the logits to a probability distribution.
    return tf.nn.sigmoid(out_layer)

In [None]:
# Create model.
def neural_net(x):
    # Hidden fully connected layer with 128 neurons.
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    # Apply sigmoid to layer_1 output for non-linearity.
    layer_1 = tf.nn.relu(layer_1)
    
#     # Hidden fully connected layer with 256 neurons.
#     layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
#     # Apply sigmoid to layer_2 output for non-linearity.
#     layer_2 = tf.nn.relu(layer_2)
    
    # Output fully connected layer with a neuron for each class.
    out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
    # Apply softmax to normalize the logits to a probability distribution.
    return tf.nn.sigmoid(out_layer)

### Custom Loss/Metric Functions

Here we create the **cross_entropy** and **accuracy** functions that take our inputs/outputs to define our loss functions and scoring metrics similar to what we've used in the past.

In [42]:
# Cross-Entropy loss function.
def cross_entropy(y_pred, y_true):
    # Clip prediction values to avoid log(0) error.
    
#     # Compute cross-entropy.
#     y_pred = tf.cast(y_pred, tf.float32)
#     y_true = tf.cast(y_true, tf.float32)
#     #return tf.reduce_mean(-tf.math.log(y_pred)*y_true + -tf.math.log(1-y_pred)*(1-y_true))
#     return tf.compat.v1.losses.sigmoid_cross_entropy(y_true, y_pred)
    y_true_tf = tf.cast(tf.reshape(y_true, (-1, 1)), dtype=tf.float32)
    logits_tf = tf.cast(tf.reshape(y_pred, (-1, 1)), dtype=tf.float32)
    bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    return bce(y_true_tf, logits_tf)

# Accuracy metric.
def accuracy(y_pred, y_true):
    y_pred = tf.math.round(y_pred)
    # Predicted class is the index of highest score in prediction vector (i.e. argmax).
    correct_prediction = tf.equal(tf.cast(y_pred, tf.int64), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Stochastic gradient descent optimizer.
optimizer = tf.keras.optimizers.Adam(learning_rate)

### Defining our Backward Prop
While many things need to be manually defined, we can use `tf.GradientTape()` to track trainable variables and determine their gradient in regards to certain calculations/outputs. This means we don't need to manually define our backwards propogation and simply rely on the `GradientTape().gradient(dy, dyx)` functionality.
 - Note: This can be changed for higher order gradients

In [43]:
# Optimization process. 
def run_optimization(x, y):
    # Wrap computation inside a GradientTape for automatic differentiation.
    with tf.GradientTape() as g:
        pred = neural_net(x)
        loss = cross_entropy(pred, y)
        
    # Variables to update, i.e. trainable variables.
    trainable_variables = list(weights.values()) + list(biases.values())

    # Compute gradients - d_loss/d_trainable_variables
    gradients = g.gradient(loss, trainable_variables)
    
    # Update W and b following gradients.
    optimizer.apply_gradients(zip(gradients, trainable_variables))

### Training our Model
Just as we need to manually define our network, we also may need to define our training cycle. Here we run through the following:
 1. Getting a batch from our dataset for ***training_steps*** number of times
 2. We update the weights/run the optimziation for these inputs
 3. We make a prediction, calculate loss, and determine the accuracy
 4. repeat

In [None]:
# Run training for the given number of steps.
for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
    # Run the optimization to update W and b values.
    run_optimization(batch_x, batch_y)
    
    if step % display_step == 0:
        pred = neural_net(batch_x)
        loss = cross_entropy(pred, batch_y)
        acc = accuracy(pred, batch_y)
        print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))

In [None]:
# Assessing the output of our model
X_test = np.array(X_test, dtype='float32')
test = tf.data.Dataset.from_tensor_slices((X_test,y_test))
p = neural_net(X_test)
l = cross_entropy(p, y_test)
a = accuracy(p, y_test)
print(l,a)