# Deep Learning - Assignment 1

- Build a neural network from scratch
- Implement a 2-class classification neural network with a single hidden layer
- Use sigmoid activation function at hidden layer and softmax at output layer
- Compute the cross entropy loss as this is a classification task


In [40]:
%matplotlib inline
import math
from math import log
import numpy as np
from random import random 

1 - Defining the neural network structure

In [41]:
X = ([1.,-1.]) # Input Vector
Y = ([1., 0.])  # Output Vector

In [42]:
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

In [43]:
def softmax(X):
    numerator = np.exp(X)
    denominator = np.sum(np.exp(X))
    return numerator/denominator

In [44]:
def categorical_cross_entropy(predicted, actual):
    sum_score = 0.0
    for i in range(len(actual)):
        sum_score += actual[i] * log(1e-15 + predicted[i])
    mean_sum_score = 1.0 / len(actual) * sum_score
    return -2 * mean_sum_score

2 - Initialize the model's parameters

In [45]:
def initialize_parameters():
    """
    Returns:
    params -- python dictionary containing parameters:
                  
    """  
    W = ([[1., 1., 1.], [-1., -1., -1.]])
    b = ([0., 0., 0.])
    V = ([[1., 1.], [-1., -1.],[-1., -1.]])
    c = ([0., 0.])
    k1 = ([0., 0., 0.])
    h = ([0., 0., 0.])
    k2 = ([0., 0.])
    y = ([0., 0.])
    
    parameters = {"W": W,
                  "b": b,
                  "V": V,
                  "c": c,
                  "k1": k1,
                  "h": h,
                  "k2": k2,
                  "y": y}
    
    return parameters

In [46]:
parameters = initialize_parameters()
print("W = " + str(parameters["W"]))
print("b = " + str(parameters["b"]))
print("V = " + str(parameters["V"]))
print("c = " + str(parameters["c"]))

W = [[1.0, 1.0, 1.0], [-1.0, -1.0, -1.0]]
b = [0.0, 0.0, 0.0]
V = [[1.0, 1.0], [-1.0, -1.0], [-1.0, -1.0]]
c = [0.0, 0.0]


3 - Implement forward_propagation

In [47]:
def forward_propagation(X, parameters):
    """
    Returns:
    y -- The softmax output of the second activation
    cache -- a dictionary containing "k1", "h", "k2" and "y"
    """
    # Retrieve each parameter from the dictionary "parameters"
    W = parameters["W"]
    b = parameters["b"]
    V = parameters["V"]
    c = parameters["c"]
    k1 = parameters["k1"]
    h = parameters["h"]
    k2 = parameters["k2"]
    y = parameters["y"]
   
    # Implement Forward Propagation to calculate y (output) (probabilities)
    transpose_W = list(map(list, zip(*W)))
    transpose_V = list(map(list, zip(*V)))
    
    for j in range(len(transpose_W)):
        for i in range(len(X)):
            k1[j] += W[i][j] * X[i]
        k1[j] += b[j]
    print(f"k1 = {k1}")
    
    for i in range(len(k1)):
        h[i] = sigmoid(k1[i])
    print(f"h = {h}")
    
    for j in range(len(transpose_V)):
        for i in range(len(h)):
            k2[j] += V[i][j] * h[i]
        k2[j] += c[j]
    print(f"k2 = {k2}")
    
    y = softmax(k2)
    print(f"y = {y}")
    
    cache = {"k1": k1,
             "h": h,
             "k2": k2,
             "y": y}
    
    return y, cache

In [48]:
y, cache = forward_propagation(X, parameters)

k1 = [2.0, 2.0, 2.0]
h = [0.8807970779778823, 0.8807970779778823, 0.8807970779778823]
k2 = [-0.8807970779778823, -0.8807970779778823]
y = [0.5 0.5]


In [49]:
print("cost = " + str(categorical_cross_entropy(y, Y)))

cost = 0.6931471805599433


In [50]:
def backward_propagation(parameters, cache ,X, Y):
    """
    Returns:
    grads -- python dictionary containing gradients with respect to different parameters
    """
    # First, W and V and retrived from the dictionary "parameters".
    W = parameters["W"]
    b = parameters["b"]
    V = parameters["V"]
    c = parameters["c"]
    
    transpose_W = list(map(list, zip(*W)))
    transpose_V = list(map(list, zip(*V)))
        
    # Retrieved also k1 and k2 from dictionary "cache".
    k1 = cache["k1"]
    h = cache["h"]
    k2 = cache["k2"]  
    y = cache["y"]
    
    dW = ([[0., 0., 0.], [0., 0., 0.]])
    dk1 = ([0., 0., 0.])
    db = ([0., 0., 0.])
    dh = ([0., 0., 0.])
    dV = ([[0., 0.], [0., 0.],[0., 0.]])
    dk2 = ([0., 0.])
    dc = ([0., 0.])
    
    dk2 = y - Y
    
    for i in range(len(transpose_W)):
        for j in range(len(k2)):
            dV[i][j] = dk2[j] * h[i]
            dh[i] += dk2[j] * V[i][j]
    
    dc = dk2
    
    for i in range(len(transpose_W)):
        dk1[i] = dh[i] * h[i] * (1 - h[i])
    
    for j in range(len(transpose_W)):
        for i in range(len(X)):
            dW[i][j] = dk1[j] * X[i]
        db[j] = dk1[j]

    
    grads = {"dW": dW,
             "db": db,
             "dV": dV,
             "dc": dc}

    return grads

In [52]:
grads = backward_propagation(parameters, cache, X, Y)
print ("dW = "+ str(grads["dW"]))
print ("db = "+ str(grads["db"]))
print ("dV = "+ str(grads["dV"]))
print ("dc = "+ str(grads["dc"]))

dW = [[0.0, 0.0, 0.0], [-0.0, -0.0, -0.0]]
db = [0.0, 0.0, 0.0]
dV = [[-0.44039853898894116, 0.44039853898894116], [-0.44039853898894116, 0.44039853898894116], [-0.44039853898894116, 0.44039853898894116]]
dc = [-0.5  0.5]
