# Forward Propagation code

A couple of raw code examples of forward propagation calculations.

In [11]:
import numpy as np

In [13]:
input_data = np.array([2,3])
weights= {'node_0': np.array([1, 1]),
          'node_1': np.array([-1, 1]),
          'output': np.array([2, -1])}

node_0_value = (input_data * weights['node_0']).sum()
node_1_value = (input_data * weights['node_1']).sum()

In [10]:
print(node_0_value)
print(node_1_value)

5
1


In [14]:
hidden_layer_values = np.array([node_0_value, node_1_value])
print(hidden_layer_values)

[5 1]


In [17]:
output = (hidden_layer_values * weights['output']).sum()
print(output)

9


For maximum predictive power we have to implement an activation function in the hidden layers in order to capture 'non-linearities'

For this we distinguish the input from the output in each neuron in the hidden layer, where the output implements the Activation Function 'ReLU'

In [18]:
# Given the weights lets redraw the model
node_0_input = (input_data * weights['node_0']).sum()
node_0_output = np.tanh(node_0_input)

node_1_input = (input_data * weights['node_1']).sum()
node_1_output = np.tanh(node_1_input)

hidden_layer_outputs = np.array([node_0_output, node_1_output])

output = (hidden_layer_outputs * weights['output']).sum()
print(output)

1.23822425257


The 'ReLU' Activation Function outputs the maximum of (input, 0) as a linear number. 

In [19]:
def relu(input):
    return max(input, 0)

Rewriting the example above, replacing the tanh with the ReLU

In [22]:
node_0_input = (input_data * weights['node_0']).sum()
node_0_output = relu(node_0_input)

node_1_input = (input_data * weights['node_1']).sum()
node_1_output = relu(node_1_input)

hidden_layer_outputs = np.array([node_0_output, node_1_output])

output = (hidden_layer_outputs * weights['output']).sum()
print(output)

9


In order to generalize the model to work with multiple observations, we will define a function which will generate predictions for an array of observations.

In [24]:
def predict_with_network(input_data_row, weights):
    
    node_0_0_input = (input_data_row * weights['node_0_0']).sum()
    node_0_0_output = relu(node_0_0_input)
    
    node_0_1_input = (input_data_row * weights['node_0_1']).sum()
    node_0_1_output = relu(node_0_1_input)
    
    hidden_0_output = np.array([node_0_0_output, node_0_1_output])
    
    node_1_0_input = (hidden_0_output * weights['node_1_0']).sum()
    node_1_0_output = relu(node_1_0_input)
    
    node_1_1_input = (hidden_0_output * weights['node_1_1']).sum()
    node_1_1_output = relu(node_1_1_input)
    
    hidden_1_output = np.array([node_1_0_output, node_1_1_output])
    
    input_to_final_layer = (hidden_1_output * weights['output']).sum()
    model_output = relu(input_to_final_layer)
    
    return model_output

In [None]:
results = []
for input_data_row in input_data:
    results.append(predict_with_network(input_data_row, weights))

### Gradient Descent Optimizer Function


First we need to implement a loss function to measure the process at which we learn to approximate the real distribution. 

In [None]:
from sklearn.metrics import mean_squared_error

# Create model_output_0 
model_output_0 = []
# Create model_output_0
model_output_1 = []

# Loop over input_data
for row in input_data:
    # Append prediction to model_output_0
    model_output_0.append(predict_with_network(row, weights_0))
    
    # Append prediction to model_output_1
    model_output_1.append(predict_with_network(row, weights_1))

# Calculate the mean squared error for model_output_0: mse_0
mse_0 = mean_squared_error(model_output_0, target_actuals)

# Calculate the mean squared error for model_output_1: mse_1
mse_1 = mean_squared_error(model_output_1, target_actuals)

# Print mse_0 and mse_1
print("Mean squared error with weights_0: %f" %mse_0)
print("Mean squared error with weights_1: %f" %mse_1)


Gradient Descent update Algorithm

In [26]:
import numpy as np
weights = np.array([1,2])
input_data = np.array([3,4])

target = 6
learning_rate = 0.01

preds = (weights * input_data).sum()
error = preds - target
print(error)

5


Calculating the slope of the current Error

In [27]:
gradient = 2 * input_data * error
gradient

array([30, 40])

In [29]:
weights_updated = weights - learning_rate * gradient
preds_updated = (weights_updated * input_data).sum()
error_updated = preds_updated - target
error_updated

2.5

In [None]:
from sklearn.metrics import mean_squared_error as mse
import matplotlib.pyplot as plt

def get_slope(input_data, target, weights):
    preds = (input_data * weights).sum()
    error = preds - target
    return 2 * input_data * error


def get_mse(input_data, target, weights):
    preds = (input_data * weights).sum()
    return mse(target, preds)


n_updates = 20
mse_hist = []

for i in range(n_updates):
    slope = get_slope(input_data, target, weights)
    
    weights = weights - 0.01 * slope
    
    mse = get_mse(input_data, target, weights)
    
    mse_hist.append(mse)
    

# Plot the mse History
plt.plot(mse_hist)
plt.xlabel('Iterations')
ply.ylabel('Mean Squared Error')
plt.show()
    

## Creating a Keras Model

- 1) You specifiy the architecture
- 2) You compile the model
- 3) You fit the model (Backprop, Model weight approximation)
- 4) Predict with the model

In [None]:
# 1. Model Specification
import numpy as np
from keras.layers import Dense
from keras.models import Sequential

predictors = np.loadtxt('predictors_data.csv', delimiter=',')
n_cols = predictors.shape[1]

model = Sequential()
model.add(Dense(100, activation='relu', input_shape=(n_cols,)))
model.add(Dense(100, activation='relu'))
model.add(Dense(1))

In [None]:
# Specify the optimizer
model.compile(optimizer='adam', loss='mean_squared_error')

# Scaling the data

# Fitting the model with prescaled Features
model.fit(predictors, target)
