# Neural networks basics

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker
from IPython.display import Markdown, display, HTML

# Fix the dying kernel problem (only a problem in some installations - you can remove it, if it works without it)
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

**Task 1.** Write a method for performing feed forward for a network with 2 input neurons, 3 hidden neurons with ReLU as the activation function, and 1 output neuron with no activation function. The interface of the method should be as follows:

    feed_forward_1(x, w1, w2)
    
where x is a numpy array of size 1x2, w1 is a numpy array of size 3x2 of weights from the input layer to the hidden layer, w2 is a numpy array of size 1x3. The result should be a single number.

As a helper function you can write the relu function:

    relu(x)
   
which takes a numpy array as input and applies the ReLU function element-wise.

Test it on 2-3 examples, calculate the result also with pen and paper by yourself (it's very important to do it at least once in your life!) and compare your results with the function results.

In [53]:
def temp(a):
    return a + 1

def relu(x):
    ########################
    # Write your code here #
    ########################
    if x > 0:
        return x
    return 0

def relu2(x):
    tempik = np.vectorize(relu)
    return tempik(x)

def feed_forward_1(x, w1, w2):
    ########################
    # Write your code here #
    ########################
    w1x = np.matmul(w1, x)
    print(w1x)
    reluw1x = relu2(w1x)
    print(reluw1x)
    w2reluw1x = np.matmul(w2, reluw1x)
    print(w2reluw1x)
    if(w2reluw1x < 0):
        return w2reluw1x
    reluw2reluw1x = relu2(w2reluw1x)
    print(reluw2reluw1x)
    return reluw2reluw1x

def simpler(x):
    return x[0][0]


# Test

########################
# Write your code here #
########################
print(relu(3))
array = np.array([[1],[-2],[3]])
print(array)
blep = np.vectorize(relu)
arrayblep = blep(array)
print(arrayblep)
print(relu2(array))

w1 = np.array([[0.1, 0.2], [0.7, 0.3], [0.4, 0.3]])
print(w1)
w2 = np.array([[0.4, 0.2, 0.9]])
# w2 = np.array([[1, -1, 1]])
print(w2)
x = np.array([[0.9], [0.7]])
print(x)

print(feed_forward_1(x, w1, w2))
print(simpler(feed_forward_1(x, w1, w2)))

x = np.array([1, 2])
w1 = np.array([[-1, 1],
               [0.5, -2],
               [0, 3]])
w2 = np.array([-1, -2, -1])
print(feed_forward_1(x, w1, w2))

3
[[ 1]
 [-2]
 [ 3]]
[[1]
 [0]
 [3]]
[[1]
 [0]
 [3]]
[[0.1 0.2]
 [0.7 0.3]
 [0.4 0.3]]
[[0.4 0.2 0.9]]
[[0.9]
 [0.7]]
[[0.23]
 [0.84]
 [0.57]]
[[0.23]
 [0.84]
 [0.57]]
[[0.773]]
[[0.773]]
[[0.773]]
[[0.23]
 [0.84]
 [0.57]]
[[0.23]
 [0.84]
 [0.57]]
[[0.773]]
[[0.773]]
0.7730000000000001
[ 1.  -3.5  6. ]
[1. 0. 6.]
-7.0
-7.0


**Task 2.** Write a method for performing feed forward for a network with 2 input neurons, 3 hidden neurons with sigmoid activation function, and 1 output neuron with sigmoid activation function. The interface of the method should be as follows:

    feed_forward_2(x, w1, w2)
    
where x is a numpy array of size 1x2, w1 is a numpy array of size 3x2 of weights from the input layer to the hidden layer, w2 is a numpy array of size 1x3. The result should be a single number.

As a helper function you can write the sigmoid function:

    sigmoid(x)
   
which takes a numpy array as input and applies the logistic function element-wise.

Test it on 2-3 examples. Try to estimate the result by hand and compare both results.

In [26]:
import math
def sigmoid(x):
    ########################
    # Write your code here #
    ########################
    return 1 / (1 + (math.exp(-x)))

def sigmoid2(x):
    tempik = np.vectorize(sigmoid)
    return tempik(x)

def feed_forward_2(x, w1, w2):
    ########################
    # Write your code here #
    ########################
    w1x = np.matmul(w1, x)
    reluw1x = sigmoid2(w1x)
    w2reluw1x = np.matmul(w2, reluw1x)
    reluw2reluw1x = sigmoid2(w2reluw1x)
    return reluw2reluw1x

# Test

########################
# Write your code here #
########################
print(sigmoid(1))
w1 = np.array([[0.1, 0.2], [0.7, 0.3], [0.4, 0.3]])
print(w1)
w2 = np.array([[0.4, 0.2, 0.9]])
# w2 = np.array([[1, -1, 1]])
print(w2)
x = np.array([[0.9], [0.7]])
print(x)

print(feed_forward_2(x, w1, w2))

0.7310585786300049
[[0.1 0.2]
 [0.7 0.3]
 [0.4 0.3]]
[[0.4 0.2 0.9]]
[[0.9]
 [0.7]]
[[0.71859015]]


**Task 3.** Write a method for performing feed forward for a network with 2 input neurons, 3 hidden neurons with relu activation, and 3 output neurons with no activation function but softmax applied to them. The interface of the method should be as follows:

    feed_forward_3(x, w1, w2)
    
where x is a numpy array of size 1x2, w1 is a numpy array of size 3x2 of weights from the input layer to the hidden layer, w2 is a numpy array of size 3x3. The result should be a numpy array of size 1x3.

As a helper function you can write the softmax function:

    softmax(x)
   
which takes a numpy array as input, applies softmax to it and returns a numpy array of the same size.

Test it on 2-3 examples. Try to estimate the result by hand and compare both results. Verify that the sum of output neurons is equal to 1.

In [27]:
def softmax(x):
    ########################
    # Write your code here #
    ########################
    suma = np.sum(np.exp(x))
    tempik = np.exp(x) / suma
    return tempik

def feed_forward_3(x, w1, w2):
    w1x = np.matmul(w1, x)
    reluw1x = relu2(w1x)
    w2reluw1x = np.matmul(w2, reluw1x)
    reluw2reluw1x = softmax(w2reluw1x)
    return reluw2reluw1x

# Test

########################
# Write your code here #
########################
print(sigmoid(1))
w1 = np.array([[0.1, 0.2], [0.7, 0.3], [0.4, 0.3]])
print(w1)
w2 = np.array([[0.4, 0.2, 0.9], [0.4, 0.8, 0.94], [0.2, 0.3, 0.5]])
# w2 = np.array([[1, -1, 1]])
print(w2)
x = np.array([[0.9], [0.7]])
print(x)

print(feed_forward_3(x, w1, w2))
#print(relu2(np.matmul(w1, x)))
#print(softmax(relu2(np.matmul(w1, x))))

0.7310585786300049
[[0.1 0.2]
 [0.7 0.3]
 [0.4 0.3]]
[[0.4  0.2  0.9 ]
 [0.4  0.8  0.94]
 [0.2  0.3  0.5 ]]
[[0.9]
 [0.7]]
[[0.2840535 ]
 [0.48104586]
 [0.23490064]]


**Task 4.** Write a method for calculating the squared error:

    se(y_bar, y)
    
where y_bar is a numpy array of predicted results and y is a numpy array of real values to be predicted.

Test it on the 2-3 examples you created for feed_forward_1 and find the SE with respect to real values chosen by hand. Experiment with several values to get a grip of how SE works.

In [28]:
def se(y_bar, y):
    ########################
    # Write your code here #
    ########################
    temp = y - y_bar
    tempik = (temp * temp)
    return np.sum(tempik)
    
# Test

########################
# Write your code here #
########################
a = np.array([0.4, 0.5, 0.6])
b = np.array([0.43, 0.58, 0.6])
print(se(a, b))

print(a * 2)

0.007299999999999991
[0.8 1.  1.2]


**Task 5.** Write a method for calculating the cross-entropy loss:

    crossentropy(y_bar, y)
    
where y_bar is a numpy array of predicted results and y is a numpy array of real values to be predicted (y must contain one value of 1 and 0 on all other positions - just like in one-hot encoding).

Test it on the 2-3 examples you created for feed_forward_3 and find the cross-entropy with respect to vectors of real values chosen by hand. Experiment with several target vectors to get a grip of how cross-entropy works.

In [54]:
def crossentropy(y_bar, y):
    ########################
    # Write your code here #
    ########################
    
    suma = 0
    for i in range (len(y)):
        temp_sum = 0
        current_y = y[i]
        current_y_pred = y_bar[i]
        for c in range (len(current_y)):
            current_y_c = current_y[c]
            current_y_pred_c = current_y_pred[c]
            temp_sum += current_y_c * np.log(current_y_pred_c)
            temp_sum += (1- current_y_c) * np.log(1 - current_y_pred_c)
        suma -= temp_sum
    return suma/len(y)
    
    #y_bar_log = np.log(y_bar)
    #print(y_bar_log)
    
# Test

########################
# Write your code here #
########################
y1 = [0, 0, 1, 0]
y2 = [1, 0, 0, 0]
y3 = [0, 0, 0, 1]
y4 = [0, 1, 0, 0]

y = np.array([y1, y2, y3, y4])

predicted_y1 = [0.15, 0.03, 0.8, 0.02]
predicted_y2 = [0.9, 0.05, 0.03, 0.02]
predicted_y3 = [0.1, 0.1, 0.05, 0.75]
predicted_y4 = [0.01, 0.97, 0.01, 0.01]

predicted_y = np.array([predicted_y1, predicted_y2, predicted_y3, predicted_y4])


print(crossentropy(predicted_y, y))



0.3134866834155037


**Task 6.** Write a method which calculates the error term for network 2:

    error_term(x, w1, w2, y)
    
where x is a numpy array of size 1x2, w1 is a numpy array of size 3x2 of weights from the input layer to the hidden layer, w2 is a numpy array of size 1x3, y is a float.

The solution is extremely simple - it's just the difference between y and the result of feed_forward_2. This task is solely to bring this notion to your attention.

Test it on the 2-3 examples you created for feed_forward_2 and several values of y (remember that the values of y should logically be in the interval [0-1], because you used the sigmoid activation which sends the network's results into this interval, technically turning them into probabilities).

In [30]:
def error_term(x, w1, w2, y):
    ########################
    # Write your code here #
    ########################
    helper = feed_forward_2(x, w1, w2)
    return np.abs(y - helper)

# Test

########################
# Write your code here #
########################

w1 = np.array([[0.1, 0.2], [0.7, 0.3], [0.4, 0.3]])
print(w1)
w2 = np.array([[0.4, 0.2, 0.9]])
# w2 = np.array([[1, -1, 1]])
print(w2)
x = np.array([[0.9], [0.7]])
print(x)

y1 = 0.7

print(error_term(x, w1, w2, y1))

[[0.1 0.2]
 [0.7 0.3]
 [0.4 0.3]]
[[0.4 0.2 0.9]]
[[0.9]
 [0.7]]
[[0.01859015]]


**Task 7.** Write a method which calculates the derivative of the sigmoid function on every element of a numpy array:

    sigmoid_derivative(x)
    
where x is a numpy array. The result should have the same size as the input.

Use the formula:

<center>
$$
    \sigma'(x) = \sigma(x) (1 - \sigma(x))
$$
</center>

Verify this formula by differentiating the sigmoid function by hand.

Test the sigmoid_derivative method on a numpy array with several values (e.g. -3, -2, -1, 0, 1, 2, 3).

In [31]:
def helper(x):
    return (sigmoid2(x) * (1 - sigmoid2(x)))

def sigmoid_derivative(x):
    ########################
    # Write your code here #
    ########################
    bep = [0] * len(x)
    for i in range (len(x)):
        bep[i] = helper(x[i])
    return bep

# Test

########################
# Write your code here #
########################
helpik = np.array([-3, -2, -1, 0, 1, 2, 3])
print(sigmoid_derivative(helpik))

[0.04517665973091214, 0.1049935854035065, 0.19661193324148185, 0.25, 0.19661193324148185, 0.10499358540350662, 0.045176659730912]


**Task 8.** Write a method for performing the backpropagation step for network 2. The method should have the following interface:

    backpropagate(x, w1, w2, y, alpha)
    
where x is a numpy array of size 1x2, w1 is a numpy array of size 3x2 of weights from the input layer to the hidden layer, w2 is a numpy array of size 1x3, y is a float, alpha is the learning rate. The method should return a tuple with updated matrices w1 and w2.

Test it on 2-3 examples. Test several values of the learning rate alpha on the same input to see how it affects the update step.

In [56]:
import math
def sigmoid_temp(x):
    ########################
    # Write your code here #
    ########################
    return 1 / (1 + (math.exp(-x)))

def sigmoid2_temp(x):
    tempik = np.vectorize(sigmoid)
    return tempik(x)

def feed_forward_2_temp(x, w1, w2, tempik):
    ########################
    # Write your code here #
    ########################
    #w1x = np.matmul(w1, x)
    reluw1x = sigmoid2(tempik)
    w2reluw1x = np.matmul(w2, reluw1x)
    reluw2reluw1x = sigmoid2(w2reluw1x)
    return reluw2reluw1x

def backpropagate(x, w1, w2, y, alpha):
    ########################
    # Write your code here #
    ########################
    blepciok = w1.copy()
    temp = np.matmul(w1, x)
    print(temp)
    h = sigmoid2(temp)
    print(h)
    y_test = sigmoid2(np.matmul(w2, h))
    w1 = 2 * alpha
    print(w1)
    print(y)
    fif = feed_forward_2_temp(x, blepciok, w2, temp)
    print("fif: " + str(fif))
    e = y - fif
    print(e)
    w1 = w1 * e
    print("w1 : " + str(w1))
    w2h = np.matmul(w2, h)
    s = sigmoid_derivative(w2h)
    w1 = w1 * s
    print(w1)
    w1 = w1 * h
    #w1 = 2 * alpha * error_term(x, w1, w2, y) * sigmoid_derivative(w2 * h) * h
    #w2 = 2 * alpha * error_term(x, w1, w2, y) * sigmoid_derivative(w2 * h) * h * np.dot(w2 * sigmoid_derivative(w1 * x)) * x.T
    w2 = 2 * alpha
    w2 = w2 * e
    w2 = w2 * s
    w2 = w2 * h
    s2 = sigmoid_derivative(temp)
    #w2s2 = np.dot(w2, s2)
    w2s2 = w2 * s2
    w2 = w2 * w2s2
    w2 = w2 * x.T
    return (w1, w2)

# Test

########################
# Write your code here #
########################

w1 = np.array([[0.1, 0.2], [0.7, 0.3], [0.4, 0.3]])
#print(w1)
w2 = np.array([[0.4, 0.2, 0.9]])
# w2 = np.array([[1, -1, 1]])
#print(w2)
x = np.array([[0.9], [0.7]])
#print(x)

y1 = 0.7

alfa = 0.01
alfa2 = 0.001

#w1_test = np.array([[0.1, 0.2], [0.7, 0.3], [0.4, 0.3]])

blep = backpropagate(x, w1, w2, y1, alfa)
print(blep[0])
print(blep[1])

print()
print()

x = np.array([1.0, 2.0])
w1 = np.array([[-1.0, 1.0],
               [0.5, -2],
               [0.0, 3.0]])
w2 = np.array([-1.0, 2.0, 1.0])

print(feed_forward_2(x, w1, w2))
print()

y = -0.78
alpha = 0.1
for _ in range(10):
    w1, w2 = backpropagate(x, w1, w2, y, alpha)
    print(w1)
    print(w2)
    print(feed_forward_2(x, w1, w2))
    print()

[[0.23]
 [0.84]
 [0.57]]
[[0.55724785]
 [0.69846522]
 [0.63876318]]
0.02
0.7
fif: [[0.71859015]]
[[-0.01859015]]
w1 : [[-0.0003718]]
[[-7.51854058e-05]]
[[-4.18969061e-05]
 [-5.25143907e-05]
 [-4.80256685e-05]]
[[3.89776360e-10 3.03159391e-10]
 [5.22734751e-10 4.06571473e-10]
 [4.78984252e-10 3.72543307e-10]]


0.5805650145974393

[ 1.  -3.5  6. ]
[0.73105858 0.02931223 0.99752738]
0.2
-0.78
fif: 0.5805650145974393
-1.3605650145974393
w1 : -0.27211300291948787


TypeError: object of type 'numpy.float64' has no len()

**Task 9.** Write a method for performing the backpropagation step for network 2 but without the activation function on the output neuron.

In [None]:
def backpropagate_2(x, w1, w2, y, alpha):
    ########################
    # Write your code here #
    ########################
    return w1, w2

# Test

########################
# Write your code here #
########################