In [4]:
import numpy as np
input = np.array([1.54, 1.32])
weights = np.array([1.56, -0.45])
bias = np.array([0.0])


In [5]:
def sigmoid(x):
    return 1/(1+np.exp(-x))


def make_prediction(input, weights, bias):
    # simple two layer network
    layer1 = np.dot(input, weights) + bias
    layer2 = sigmoid(layer1)
    return layer2
    

In [6]:
prediction = make_prediction(input, weights, bias)
print(f'The prediction is {prediction}')


The prediction is [0.85916839]


In [7]:
# basic gradient descent
target = 0
MSE = (prediction - target)**2
derivative = 2 * (prediction - target)
print(f'The derivative is {derivative}')

weights = weights - derivative
prediction = make_prediction(input, weights, bias)
MSE = (prediction - target)**2
print(f'The new prediction is {prediction}')


The derivative is [1.71833678]
The new prediction is [0.04285867]


In [8]:
# getting derivative of error function with respect to bias using backpropagation
d_error_wrt_prediction = 2*(prediction - target)
layer1_result = np.dot(input, weights) + bias 
d_prediction_wrt_layer1 = sigmoid(layer1_result) * (1 - sigmoid(layer1_result))     # derivative of sigmoid fn
d_layer1_wrt_bias = 1   # independent variable, so d(x)/dx = 1
d_error_wrt_bias = d_layer1_wrt_bias * d_prediction_wrt_layer1 * d_error_wrt_prediction  # chain rule
d_error_wrt_bias

array([0.00351628])

In [9]:
class NeuralNetwork:
    def __init__(self, learning_rate):
        self.learning_rate = learning_rate

        # weights and bias randomly chosen
        self.weights = np.array([np.random.randn(), np.random.randn()])
        self.bias = np.random.randn()

    def sigmoid(self, x):
        return 1/(1+np.exp(-x))

    def predict(self, input):
        layer1 = np.dot(input, self.weights) + self.bias
        layer2 = self.sigmoid(layer1)
        return layer2 

    def sigmoid_derivative(self, x):
        return self.sigmoid(x) * (1-self.sigmoid(x))

    def compute_gradients(self, input, target):
        layer1 = np.dot(input, self.weights) + self.bias 
        layer2 = self.sigmoid(layer1)
        prediction = layer2 

        d_error_wrt_prediction = 2*(prediction - target)
        d_prediction_wrt_layer1 = self.sigmoid_derivative(layer1)
        d_layer1_wrt_bias = 1
        d_error_wrt_bias = d_layer1_wrt_bias * d_prediction_wrt_layer1 * d_error_wrt_prediction
        
        d_layer1_wrt_weights = (input*1) + (self.weights*0)     # derivative of dot product
        d_error_wrt_weights = d_layer1_wrt_weights * d_prediction_wrt_layer1 * d_error_wrt_prediction

    def update_params(self, d_error_wrt_weights, d_error_wrt_bias):
        self.bias = self.bias - (d_error_wrt_bias * self.learning_rate)
        self.weights = self.weights - (d_error_wrt_weights * self.learning_rate)