In [4]:
import numpy as np

def sigmoid(x):
    """
    Calculate sigmoid
    """
    return 1 / (1 + np.exp(-x))

input_features = np.array([0.5, 0.1, -0.2])               # (3,)
expected_output = 0.6                                     # scalar
learning_rate = 0.5                                       # scalar
weights_from_input_to_hidden = np.array([[0.5, -0.6],     # (3, 2) - 3 inputs to 2 hidden
                                         [0.1, -0.2],
                                         [0.1, 0.7]])
weights_from_hidden_to_output = np.array([0.1, -0.3])     # (2,) - 2 hidden to 1 output

### Forward pass

# Compute activations by passing weighted sums through sigmoid
hidden_layer_activation = sigmoid(np.dot(input_features, weights_from_input_to_hidden))  # (2,)
output_layer_activation = sigmoid(np.dot(hidden_layer_activation, weights_from_hidden_to_output))  # scalar

### Backwards pass

## Output layer

# Error: difference between expected and actual output
output_layer_error = expected_output - output_layer_activation  # scalar
# Gradient: sigmoid derivative for the output neuron (how responsive is this neuron?)
output_layer_gradient = output_layer_activation * (1 - output_layer_activation)  # scalar
# Scaled error: how much should this neuron change? (how wrong × how responsive)
output_layer_scaled_error = output_layer_error * output_layer_gradient  # scalar

## Hidden layer

# Error: blame assigned to each hidden neuron based on its connection strength
hidden_layer_error = output_layer_scaled_error * weights_from_hidden_to_output  # (2,)
# Gradient: sigmoid derivative for each hidden neuron (how responsive is this neuron?)
hidden_layer_gradient = hidden_layer_activation * (1 - hidden_layer_activation)  # (2,)
# Scaled error: how much should each hidden neuron change? (how wrong × how responsive)
hidden_layer_scaled_error = hidden_layer_error * hidden_layer_gradient  # (2,)

# Weight updates: how much should it change × how much did this input contribute x learning rate to reduce the delta
delta_weights_from_hidden_to_output = learning_rate * output_layer_scaled_error * hidden_layer_activation  # (2,)
delta_weights_from_input_to_hidden = learning_rate * np.outer(input_features, hidden_layer_scaled_error)  # (3, 2)

print('Change in weights for hidden layer to output layer:')
print(delta_weights_from_hidden_to_output)

print('Change in weights for input layer to hidden layer:')
print(delta_weights_from_input_to_hidden)

Change in weights for hidden layer to output layer:
[0.00804047 0.00555918]
Change in weights for input layer to hidden layer:
[[ 1.77005547e-04 -5.11178506e-04]
 [ 3.54011093e-05 -1.02235701e-04]
 [-7.08022187e-05  2.04471402e-04]]


In [6]:
import numpy as np
from notebooks.backpropagation_data_prep import features, targets, features_test, targets_test

np.random.seed(21)

def sigmoid(x):
    """
    Calculate sigmoid
    """
    return 1 / (1 + np.exp(-x))

# Hyperparameters
num_hidden_units = 2  # number of hidden units
epochs = 900
learning_rate = 0.005
num_records, num_features = features.shape
previous_loss = None

# Initialize weights
weights_from_input_to_hidden = np.random.normal(scale=1 / num_features ** .5,
                                                size=(num_features, num_hidden_units))
weights_from_hidden_to_output = np.random.normal(scale=1 / num_features ** .5,
                                                 size=num_hidden_units)
for e in range(epochs):
    delta_weights_from_input_to_hidden = np.zeros(weights_from_input_to_hidden.shape)
    delta_weights_from_hidden_to_output = np.zeros(weights_from_hidden_to_output.shape)
    for x, y in zip(features.values.astype(float), targets):
        ## Forward pass ##
        # TODO: Calculate the output
        hidden_layer_input = np.dot(x, weights_from_input_to_hidden)
        hidden_layer_activation = sigmoid(hidden_layer_input)
        output_layer_activation = sigmoid(np.dot(hidden_layer_activation,
                                                 weights_from_hidden_to_output))

        ## Backward pass ##
        # TODO: Calculate the network's prediction error
        output_layer_error = y - output_layer_activation
        
        # TODO: Calculate error term for the output unit
        output_layer_scaled_error = output_layer_error * output_layer_activation * (1 - output_layer_activation)

        ## propagate errors to hidden layer
        # TODO: Calculate the hidden layer's contribution to the error
        hidden_layer_error = np.dot(output_layer_scaled_error, weights_from_hidden_to_output)
        
        # TODO: Calculate the error term for the hidden layer
        hidden_layer_scaled_error = hidden_layer_error * hidden_layer_activation * (1 - hidden_layer_activation)
        
        # TODO: Update the change in weights
        delta_weights_from_hidden_to_output += output_layer_scaled_error * hidden_layer_activation
        delta_weights_from_input_to_hidden += hidden_layer_scaled_error * np.array(x[:, None], dtype=np.float64)

    # TODO: Update weights
    weights_from_input_to_hidden += learning_rate * delta_weights_from_input_to_hidden / num_records
    weights_from_hidden_to_output += learning_rate * delta_weights_from_hidden_to_output / num_records

    # Printing out the mean square error on the training set
    if e % (epochs / 10) == 0:
        hidden_layer_activation = sigmoid(np.dot(x, weights_from_input_to_hidden))
        output_layer_activation = sigmoid(np.dot(hidden_layer_activation,
                                                 weights_from_hidden_to_output))
        mean_squared_error = np.mean((output_layer_activation - targets) ** 2)
        if previous_loss and previous_loss < mean_squared_error:
            print("Train loss:", mean_squared_error, "WARNING - Loss Increasing")
        else:
            print("Train loss:", mean_squared_error)
        previous_loss = mean_squared_error

# Calculate accuracy on test data
hidden_layer_activation = sigmoid(np.dot(features_test.values.astype(float), weights_from_input_to_hidden))
output_layer_activation = sigmoid(np.dot(hidden_layer_activation, weights_from_hidden_to_output))
predictions = output_layer_activation > 0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))

Train loss: 0.2513572524259881
Train loss: 0.24996540718842905
Train loss: 0.24862005218904504
Train loss: 0.2473199321717981
Train loss: 0.24606380465584854
Train loss: 0.24485044179257037
Train loss: 0.243678632018683
Train loss: 0.24254718151769472
Train loss: 0.24145491550165454
Train loss: 0.24040067932493334
Prediction accuracy: 0.725
