In [1]:
import numpy as np
import itertools

In [2]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z) * (1 - sigmoid(z))

In [3]:
def get_dimensions_and_activations(
    input_layer:tuple,
    hidden_layer:list,
    output_layer:int
):

    dimensions = []
    activation_functions = []

    hidden_layer.insert(0, input_layer)

    for dim, act_func in hidden_layer:
        dimensions.append(dim)
        activation_functions.append(act_func)
        
    dimensions.append(output_layer)
    
    return dimensions, activation_functions

In [4]:
def calculate_weight_set_dimensions(dimensions):
    a, b = itertools.tee(dimensions[::-1])
    next(b, None)
    weight_set_dimensions = list(zip(a, b))[::-1]
    
    return weight_set_dimensions

In [5]:
def initialise_weights(dimensions):
    # For single hidden layer neural network there will be 2 sets of weights;
    # 1- one set to hidden layer
    # 2- one set from hidden layer
    # number of weight sets = no_of_hidden_layers + 1
    weight_dims = calculate_weight_set_dimensions(dimensions)
    no_of_weight_sets = len(weight_dims)
    
    # W_set holds weight sets such as w1, w2, w3 etc.
    W = np.empty_like(range(no_of_weight_sets), dtype=object)
    B = np.empty_like(range(no_of_weight_sets), dtype=object)
    for index, (row, column) in enumerate(weight_dims):
        np.random.seed(42)
        W[index] = np.random.rand(row, column)
        B[index] = np.random.rand(row, 1)
    return W, B

In [6]:
def forwardpass(X, W, B, dimensions):
    weight_dims = calculate_weight_set_dimensions(dimensions)
    no_of_weight_sets = len(weight_dims)

    Z = np.empty_like(range(no_of_weight_sets + 1), dtype=object)
    A = np.empty_like(range(no_of_weight_sets + 1), dtype=object)
    A[0] = X
    Z[0] = None
    for index in range(no_of_weight_sets):
        Z[index + 1] = W[index] @ A[index] + B[index]
        A[index + 1] = sigmoid(Z[index + 1])
    return A, Z

In [7]:
def calculate_error(Y, Y_hat):
    m = Y.shape[1]
    return np.sum(0.5 * np.square(Y - Y_hat)) / m

In [8]:
def calculate_delta_final(Y, A, Z):
    return (A[-1] - Y) * (sigmoid_prime(Z[-1]))

In [9]:
def backpropagation(A, Z, Y, B, W, dimensions, alfa):
    m = Y.shape[1]
    weight_dims = calculate_weight_set_dimensions(dimensions)
    no_of_weight_sets = len(weight_dims)

    delta_final = calculate_delta_final(Y, A, Z)
    delta = np.empty_like(range(no_of_weight_sets), dtype=object)
    delta[-1] = delta_final

    gradient_W = np.empty_like(range(no_of_weight_sets), dtype=object)
    gradient_B = np.empty_like(range(no_of_weight_sets), dtype=object)
    
    # here Z[index+1] is passed instead of Z[index] this is because Z[0] is none.
    # So Z[index+1] is effectively Z[index].
    for index in reversed(range(no_of_weight_sets - 1)): # 1 is substracted as delta_final is calculated above
        delta[index] = W[index + 1].T @ delta[index + 1] * sigmoid_prime(Z[index + 1])
    
    # calculate the gradient
    for index in range(no_of_weight_sets):
        gradient_W[index] = (delta[index] @ A[index].T) / m
        gradient_B[index] = delta[index]

    #update the weights
    for index, _ in enumerate(W):
        W[index] = W[index] - alfa * gradient_W[index]
        B[index] = B[index] - alfa * gradient_B[index]
    return W, B

In [10]:
def fit(
    no_of_iterations,
    dimensions,
    X,
    Y,
    alfa
):
    W, B = initialise_weights(dimensions)
    
    initial_weights = W[0]
    initial_weights[0][0] = 0.15
    initial_weights[0][1] = 0.2
    initial_weights[1][0] = 0.25
    initial_weights[1][1] = 0.30
    B[0] = 0.35

    second_set_of_weights = W[1]
    second_set_of_weights[0][0] = 0.4
    second_set_of_weights[0][1] = 0.45
    second_set_of_weights[1][0] = 0.5
    second_set_of_weights[1][1] = 0.55
    B[1] = 0.6

    for iteration in range(no_of_iterations + 1):
        A, Z = forwardpass(X, W, B, dimensions)
        W, B = backpropagation(A, Z, Y, B, W, dimensions, alfa)
        if iteration % 100 == 0 or iteration == no_of_iterations:
            print("iteration %s - loss %.10f" %(iteration, calculate_error(Y, A[-1])))
    return W, B

In [11]:
def get_dimensions_and_activations(
    input_layer:tuple,
    hidden_layer:list,
    output_layer:int
):

    dimensions = []
    activation_functions = []

    hidden_layer.insert(0, input_layer)

    for dim, act_func in hidden_layer:
        dimensions.append(dim)
        activation_functions.append(act_func)

    dimensions.append(output_layer)

In [12]:
# X = np.array([0.05, 0.10]).reshape((2, 1))
# Y = np.array([0.01, 0.99]).reshape((2, 1))

In [13]:
# dimensions = [2,2,2]

# W, B = fit(20000, dimensions, X, Y, 0.5)

In [14]:
from sklearn.datasets import load_iris
data = load_iris()
X = data.data[:,[0,2]].T
y = data.target
from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()
Y = lb.fit_transform(y)
Y = Y.T

In [15]:
def get_shapes(any_):
    for array in any_:
        try:
            print(array.shape)
        except:
            print("NONE")
    print("\n")

In [16]:
W, B = initialise_weights([2,6,4,3])

In [19]:
calculate_weight_set_dimensions([2,6,4,3])

[(6, 2), (4, 6), (3, 4)]

In [20]:
A, Z = forwardpass(X, W, B, [2,6,4,3])

In [21]:
get_shapes(W)

(6, 2)
(4, 6)
(3, 4)




In [22]:
get_shapes(B)

(6, 1)
(4, 1)
(3, 1)




In [23]:
get_shapes(A)

(2, 150)
(6, 150)
(4, 150)
(3, 150)




In [24]:
get_shapes(Z)

NONE
(6, 150)
(4, 150)
(3, 150)




In [26]:
A[-1]

array([[0.96550778, 0.96548475, 0.96540214, 0.96550258, 0.96549641,
        0.96568006, 0.96544787, 0.96554871, 0.96542153, 0.96553762,
        0.96559046, 0.96557578, 0.96547278, 0.96520908, 0.96547184,
        0.96561934, 0.96548612, 0.96550778, 0.96570642, 0.96555953,
        0.96568006, 0.96555953, 0.9651824 , 0.9656519 , 0.96570356,
        0.96559717, 0.96559717, 0.96557009, 0.96551888, 0.96556467,
        0.96557578, 0.96559046, 0.96557009, 0.9655506 , 0.96553762,
        0.96537912, 0.96549697, 0.96548475, 0.96536103, 0.96555953,
        0.96543999, 0.96537512, 0.96536103, 0.96559717, 0.96573118,
        0.96547278, 0.96560748, 0.96544787, 0.96558039, 0.96549641,
        0.96624529, 0.96621497, 0.9662519 , 0.96614496, 0.96622406,
        0.96618959, 0.96622282, 0.96603974, 0.96622737, 0.96612268,
        0.96607082, 0.96617704, 0.96616585, 0.96621598, 0.96611188,
        0.96621925, 0.96618575, 0.9661656 , 0.96620798, 0.96614072,
        0.96621447, 0.96616984, 0.96623316, 0.96

In [28]:
calculate_delta_final(Y, A, Z)

array([[-0.00114868, -0.00115018, -0.0011556 , -0.00114902, -0.00114942,
        -0.00113743, -0.0011526 , -0.001146  , -0.00115433, -0.00114673,
        -0.00114327, -0.00114423, -0.00115097, -0.0011683 , -0.00115103,
        -0.00114139, -0.00115009, -0.00114868, -0.00113572, -0.00114529,
        -0.00113743, -0.00114529, -0.00117006, -0.00113927, -0.0011359 ,
        -0.00114284, -0.00114284, -0.00114461, -0.00114795, -0.00114496,
        -0.00114423, -0.00114327, -0.00114461, -0.00114588, -0.00114673,
        -0.00115711, -0.00114938, -0.00115018, -0.0011583 , -0.00114529,
        -0.00115312, -0.00115737, -0.0011583 , -0.00114284, -0.00113411,
        -0.00115097, -0.00114216, -0.0011526 , -0.00114393, -0.00114942,
         0.03151441,  0.03154074,  0.03150867,  0.03160152,  0.03153284,
         0.03156277,  0.03153392,  0.03169283,  0.03152997,  0.03162086,
         0.03166586,  0.03157367,  0.03158338,  0.03153986,  0.03163023,
         0.03153702,  0.03156611,  0.0315836 ,  0.0

In [None]:
A[-1].shape

In [None]:
A[-1][0][0]

In [None]:
A[-1][1][0]

In [None]:
A[-1]

# Testing with Benchmark Datasets - Iris Dataset

In [None]:
from sklearn.datasets import load_iris
data = load_iris()
X = data.data[:,[0,2]].T
y = data.target
from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()
Y = lb.fit_transform(y)
Y = Y.T

In [None]:
print(X.shape)
print(Y.shape)

In [None]:
dimensions = [X.shape[0], 6, 4, 3]

W, B = fit(50000, dimensions, X, Y, 0.01)

In [None]:
A, Z = forwardpass(X, W, B, dimensions)
y_hat = A[-1]

for index,(act, predicted) in enumerate(zip(y, np.argmax(A[-1], axis = 0))):
    print("index: %s___actual %s__predicted %s" %(index, act, predicted))

# Make Moons dataset

In [None]:
from sklearn.datasets import make_moons
from sklearn import preprocessing
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
x,y =make_moons(n_samples=1500, noise=.05)
X = x.T
lb = preprocessing.LabelBinarizer()
Y = lb.fit_transform(y)
y_next = np.where(y==0,1,0)
Y = Y.T
Y = list(Y)
Y.append(y_next)
Y = np.array(Y)

In [None]:
dimensions = [X.shape[0], 6, 4, Y.shape[0]]

W, B = fit(50000, dimensions, X, Y, 0.3)

In [None]:
A, Z = forwardpass(X, W, B, dimensions)
y_hat = A[-1]

for index,(act, predicted) in enumerate(zip(y, np.argmax(A[-1], axis = 0))):
    if predicted == 1:
        predicted = 0
    else:
        predicted = 1
    if predicted != act:
        print("NO MATCH!")
    print("index: %s___actual %s__predicted %s" %(index, act, predicted))

In [None]:
fig, ax = plt.subplots(figsize=(10,7))

# Divide the dataset as negatives and positives.
dataset = np.c_[x,y]
negatives = dataset[dataset[:, -1] == 0]
positives = dataset[dataset[:, -1] == 1]

# Make the plot pretty.
ax.set_xlabel("Class 1", fontsize=15, labelpad=10)
ax.set_ylabel("Class 2", fontsize=15, labelpad=10)
ax.set_title("Class 1 vs Class 2", fontsize=20)
ax.tick_params(labelsize=14)

# Plot both negative and positive classes to the same figure.
plot_data(ax, negatives[:, 0], negatives[:, 1], param_dict={"c": "black", "marker": "x", "label": "not admitted"})
plot_data(ax, positives[:, 0], positives[:, 1], param_dict={"c": "y", "marker": "d", "label": "admitted"})
ax.legend(prop={'size': 14});

# Get the decision boundary and plot it within the same figure.

In [None]:
np.linspace(1, 10, 60)

In [None]:
y.shape

In [None]:
def plot_data(
    ax,
    data1,
    data2,
    param_dict
):

    ax.scatter(data1, data2, **param_dict)


def get_decision_boundary(
    X,
    thetas,
    is_polynomial=False,
    PolynomialFeatures_instance=None
):
    thetas = thetas.reshape(-1, 1)

    x1_min, x1_max = X[:, 0].min(), X[:, 0].max(),
    x2_min, x2_max = X[:, 1].min(), X[:, 1].max(),
    xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))

    if is_polynomial:
        h = sigmoid(PolynomialFeatures_instance.fit_transform(np.c_[xx1.flatten(), xx2.flatten()]) @ (thetas))

    else:
        constant = np.ones((xx1.flatten().shape[0], 1))
        h = sigmoid(np.c_[constant, xx1.flatten(), xx2.flatten()] @ (thetas))

    h = h.reshape(xx1.shape)

    return xx1, xx2, h

In [None]:

# W, B = initialise_weights(dimensions)

# initial_weights = W[0]
# initial_weights[0][0] = 0.15
# initial_weights[0][1] = 0.2
# initial_weights[1][0] = 0.25
# initial_weights[1][1] = 0.30
# B[0] = 0.35

# second_set_of_weights = W[1]
# second_set_of_weights[0][0] = 0.4
# second_set_of_weights[0][1] = 0.45
# second_set_of_weights[1][0] = 0.5
# second_set_of_weights[1][1] = 0.55
# B[1] = 0.6

# A, Z = forwardpass(X, W, B)
# J = calculate_error(Y, A[-1])
# W = backpropagation(A, Z, Y, W)