In [1]:
import numpy as np

# Problem C.1
Write a function in Python that can take any function $f$, any 1D numpy array representing a point $x$, and any difference delta as input, and then return the gradient of $f$, evaluated at $x$, using the finite diiference approximation introduced above. This function should then be
used to solve the following problems.

In [2]:
def gradient_f(f, x, delta = 10e-6):
    # Determine the dimension of x vector
    N_dim = x.shape[0]
    
    # Create the zero vector, which is used to store the final gradient vector.
    final_gradient = np.zeros(N_dim)
    
    for i in range(N_dim):
        # x_0 is the initial value in the ith dimension of the x vector.
        x_0 = x[i]
        x_plus_delta = x_0 + delta
        partial_f_x_i = (f(x_plus_delta) - f(x_0)) / delta
        # Put the results of partial derivative in each direction into the final gradient vector.
        final_gradient[i] = partial_f_x_i
        
    return final_gradient

# Problem C.2
Find the gradient of $f_{C.2}(x, y) = (x - 2)^2 + (y - 2)^2$ at the point $(1, 1)$ by

## (a)
Manual differentiation.

With paper and pencil, we work out that the gradient at the point $(1, 1)$ is $(-2, -2)$.

## (b)
Implementing the above difference approximation method in Python, and then applying
it to this task.

In [3]:
# The original function.
def f_C2(x, y):
    f = (x - 2) ** 2 + (y - 2) ** 2
    return f

def gradient_f_C2(f, x, delta = 10e-6):
    # Determine the dimension of x vector.
    # Here, it is 2-dimensional because there are two variables in the original function.
    N_dim = x.shape[0]
    
    # Create the zero vector, which is used to store the final gradient vector.
    final_gradient = np.zeros(N_dim)
    
    for i in range(N_dim):
        # x_0 is the initial value in the ith dimension of the x vector.
        x_0 = x[i]
        x_plus_delta = x_0 + delta
        if (i == 0): # if i=0, it means that we are finding the partial derivative of x variable.
            partial_f_x_i = (f(x_plus_delta, x[1]) - f(x_0, x[1])) / delta
            # Put the results of partial derivative in each direction into the final gradient vector.
            final_gradient[i] = partial_f_x_i
        else: # if else, it means that we are finding the partial derivative of y variable.
            partial_f_x_i = (f(x[0], x_plus_delta) - f(x[0], x_0)) / delta
            final_gradient[i] = partial_f_x_i 
            
    return final_gradient

# Point (1, 1).
vector_x_y = np.array([1, 1])
# Find out the gradient at point (1, 1).
gradient_result = gradient_f_C2(f_C2, vector_x_y)
print("The gradient at the point (1, 1) is " + str(gradient_result) + ".")

The gradient at the point (1, 1) is [-1.99999 -1.99999].


# Problem C.3
Determine the values of all six partial derivatives to the function $f_{C:3}$ at the point $(v_1 = 1; v_2 = 2;w_1 = 1;w_2 = 2; b_1 = 3; b_2 = 4)$ when $x = 3$.
<br>
$f_{C:3}(v_1, v_2, w_1, w_2, b_1, b_2) = v_1 * (w_1 * x + b_1) + v_2 * (w_2 * x + b_2)$
<br>
NOTE: This function $f_{C:3}$ is not considered to be a function of the variable $x$. This is a
relevant situation for example when $x$ is the input to a multilayer neural network and where
$f_{C:3}$ denotes the network output as a function of the network parameters $v_1$, $v_2$, $w_1$, $w_2$, $b_1$, $b_2$.
<br>
You should use two approaches:

## (a) 
Manual differentiation by means of the chain rule.

With paper and pencil, we work out that the gradient is $(6, 10, 3, 6, 1, 2)$, when $x = 3$.

## (b) 
Implementing the above difference approximation method in Python, and then applying it to this task.

In [4]:
# The original function.
def f_C3(v1, v2, w1, w2, b1, b2, x):
    f = v1 * (w1 * x + b1) + v2 * (w2 * x + b2)
    return f

def gradient_f_C3(f, x, delta = 10e-6):
    # Determine the dimension of x vector.
    # Here, it is 2-dimensional because there are two variables in the original function.
    N_dim = x.shape[0] - 1
    # Create the zero vector, which is used to store the final gradient vector.
    final_gradient = np.zeros(N_dim)
    
    for i in range(N_dim):
        # x_0 is the initial value in the ith dimension of the x vector.
        x_0 = x[i]
        x_plus_delta = x_0 + delta

        if (i == 0):
            partial_f_x_i = (f(x_plus_delta, x[1], x[2], x[3], x[4], x[5], x[6]) - f(x_0, x[1], x[2], x[3], x[4], x[5], x[6])) / delta
            final_gradient[i] = partial_f_x_i
            
        elif (i == 1):
            partial_f_x_i = (f(x[0], x_plus_delta, x[2], x[3], x[4], x[5], x[6]) - f(x[0], x_0, x[2], x[3], x[4], x[5], x[6])) / delta
            final_gradient[i] = partial_f_x_i
          
        elif (i == 2):
            partial_f_x_i = (f(x[0], x[1], x_plus_delta, x[3], x[4], x[5], x[6]) - f(x[0], x[1], x_0, x[3], x[4], x[5], x[6])) / delta
            final_gradient[i] = partial_f_x_i
           
        elif (i == 3):
            partial_f_x_i = (f(x[0], x[1], x[2], x_plus_delta, x[4], x[5], x[6]) - f(x[0], x[1], x[2], x_0, x[4], x[5], x[6])) / delta
            final_gradient[i] = partial_f_x_i
           
        elif (i == 4):
            partial_f_x_i = (f(x[0], x[1], x[2], x[3], x_plus_delta, x[5], x[6]) - f(x[0], x[1], x[2], x[3], x_0, x[5], x[6])) / delta
            final_gradient[i] = partial_f_x_i
            
        else:
            partial_f_x_i = (f(x[0], x[1], x[2], x[3], x[4], x_plus_delta, x[6]) - f(x[0], x[1], x[2], x[3], x[4], x_0, x[6])) / delta
            final_gradient[i] = partial_f_x_i
              
    return final_gradient
    
vector_input = np.array([1, 2, 1, 2, 3, 4, 3])
gradient_result = gradient_f_C3(f_C3, vector_input, delta=10e-6)
print("The gradient is " + str(gradient_result) + ", when x = 3.")

The gradient is [ 6. 10.  3.  6.  1.  2.], when x = 3.


# Problem C.4
First, let us define the function

In [5]:
def phi(z):
  phi = 1 / (1+np.exp(-z))
  return phi

def fc4(v1, v2, w1, w2, b1, b2):
  fc4 = phi(v1 * phi(w1 * 3 + b1) + v2 * phi(w2 * 3 + b2))
  return fc4

def gradient(v1, v2, w1, w2, b1, b2,delta):
  gradient=[]
  
  partial_v1 = (fc4(v1+delta, v2, w1, w2, b1, b2) - fc4(v1, v2, w1, w2, b1, b2)) / delta
  gradient.append(partial_v1)

  partial_v2 = (fc4(v1, v2+delta, w1, w2, b1, b2) - fc4(v1, v2, w1, w2, b1, b2)) / delta
  gradient.append(partial_v2)

  partial_w1 = (fc4(v1, v2, w1+delta, w2, b1, b2) - fc4(v1, v2, w1, w2, b1, b2)) / delta
  gradient.append(partial_w1)

  partial_w2 = (fc4(v1, v2, w1, w2+delta, b1, b2) - fc4(v1, v2, w1, w2, b1, b2)) / delta
  gradient.append(partial_w2)

  partial_b1 = (fc4(v1, v2, w1, w2, b1+delta, b2) - fc4(v1, v2, w1, w2, b1, b2)) / delta
  gradient.append(partial_b1)

  partial_b2 = (fc4(v1, v2, w1, w2, b1, b2+delta) - fc4(v1, v2, w1, w2, b1, b2)) / delta
  gradient.append(partial_b2)

  return gradient

gradient(1,2, 1, 2, 3, 4,10**-3)

[0.04514924452891922,
 0.04525905385677209,
 0.00033456165726697407,
 1.2315084396696818e-05,
 0.000111631789256883,
 4.109133810992205e-06]