In [205]:
import numpy as np
import itertools

In [206]:
# ============= ACTIVATION FUNCTIONS ===============#

def sigmoid(Z, prime=False):
    # np.
    if prime:
        return sigmoid(Z) * (1 - sigmoid(Z))
    return 1 / (1 + np.exp(-Z))


def linear(Z, prime=False):
    if prime:
        return np.ones_like(Z)
    return Z


def relu(Z, prime=False):
    if prime:
        return np.where(Z < 0, 0, Z )
    return np.where(Z < 0, 0, Z )


def tanh(Z, prime=False):
    # np.tanh() could be used directly to speed this up
    if prime:
        return 1 - np.power(tanh(Z), 2)
    return (2 / (1 + np.exp(-2 * Z))) - 1


def elu(Z, prime=False):
    # https://mlfromscratch.com/activation-functions-explained/#/
    alpha = 0.2
    if prime:
        return None
    return np.where(Z < 0, alpha * (np.exp(Z) - 1), Z)


def softmax(Z, prime=False):
    if prime:
        return 1
    return np.exp(Z) / np.sum(np.exp(Z),axis=0)

In [207]:
# ============== LOSS FUNCTIONS ===============#

def calculate_error(Y, Y_hat):
    m = Y.shape[1]
    return np.sum(0.5 * np.square(Y - Y_hat)) / m

In [247]:
class NeuralNetwork:
    

    def __init__(
        self,
        input_layer: tuple,
        hidden_layer: list, # list of tuples
        output_layer: int,
        alpha: float,
        no_of_iterations: int,
    ):
        self.input_layer = input_layer
        self.hidden_layer = hidden_layer
        self.output_layer = output_layer
        self.alpha = alpha
        self.no_of_iterations = no_of_iterations
        
        self.m = X.shape[1] # where (no_of_features, no_of_training_examples)
        self.layers = len(self.weight_set_dimensions)


    def calculate_delta_final(self, Y):
        final_act_func = self.activation_functions[-1]
        arg_to_pass_to_eval = "(self._params['Z' + str(self.layers)], prime=True)"
        
        return (self._params["A" + str(self.layers)] - Y) * eval(final_act_func + arg_to_pass_to_eval)


    def get_dimensions_and_activations(self):
        self.dimensions = []
        self.activation_functions = []
        
        self.dimensions.append(self.input_layer[0])
        self.activation_functions.append(self.input_layer[1])

        for dim, act_func in self.hidden_layer:
            self.dimensions.append(dim)
            self.activation_functions.append(act_func)

        self.dimensions.append(self.output_layer)

        
    @property
    def weight_set_dimensions(self):
        self.get_dimensions_and_activations()
        a, b = itertools.tee(self.dimensions[::-1])
        next(b, None)
        weight_set_dimensions = list(zip(a, b))[::-1]
        return weight_set_dimensions


    def initialise_weights(self):
        # For single hidden layer neural network there will be 2 sets of weights;
        # 1- one set to hidden layer
        # 2- one set from hidden layer
        # number of weight sets = no_of_hidden_layers + 1

        # W_set holds weight sets such as w1, w2, w3 etc.
#         self.W = np.empty_like(range(self.layers), dtype=object)
#         self.B = np.empty_like(range(self.layers), dtype=object)
        self.parameters = {}
        for layer, (row, column) in zip(range(1, self.layers + 1), self.weight_set_dimensions):
#             self.W[layer] = np.random.rand(row, column)
#             self.B[layer] = np.random.rand(row, 1)
            self.parameters["W" + str(layer)] = np.random.rand(row, column)
            self.parameters["B" + str(layer)] = np.random.rand(row, 1)


    def forwardpass(self, X, fp=False):
        self._params = {}
        self._params["A0"] = X
#         self.Z = np.empty_like(range(self.layers + 1), dtype=object)
#         self.A = np.empty_like(range(self.layers + 1), dtype=object)
#         self.A[0] = X
#         self.Z[0] = None
        for layer in range(1, self.layers + 1):
            # activation_function starts from 0 whereas layer starts from 1
            active_function = self.activation_functions[layer - 1]

            self._params["Z" + str(layer)] = (self.parameters["W" + str(layer)] @
                                              self._params["A" + str(layer - 1)] +
                                              self.parameters["B" + str(layer)])
            arg_to_pass_to_eval = "(self._params['Z' + str(layer)])"
            self._params["A" + str(layer)] = eval(active_function + arg_to_pass_to_eval)
            
#             arg_to_pass_to_eval = "(self.Z[layer + 1])"
#             self.Z[layer + 1] = self.W[layer] @ self.A[layer] + self.B[layer]
#             self.A[layer + 1] = eval(active_function + arg_to_pass_to_eval)+

     
    def backpropagation(self, Y):
        delta_final = self.calculate_delta_final(Y)
        self.delta = np.empty_like(range(self.layers), dtype=object)
        self.delta[-1] = delta_final

        self.gradient_W = np.empty_like(range(self.layers), dtype=object)
        self.gradient_B = np.empty_like(range(self.layers), dtype=object)

        # here Z[index+1] is passed instead of Z[index] this is because Z[0] is none.
        # So Z[index+1] is effectively Z[index].

        for layer in reversed(range(self.layers - 1)): # 1 is substracted as delta_final is calculated above
            active_function = self.activation_functions[layer]
            arg_to_pass_to_eval = "(self.Z[layer + 1], prime=True)"
            
            self.delta[layer] = (
                self.W[layer + 1].T @ self.delta[layer + 1] *
                eval(active_function + arg_to_pass_to_eval)
            )

        # calculate the gradient
        for layer in range(self.layers):
            print(self.delta[layer].shape, self.A[layer+1].shape)
            self.gradient_W[layer] = (self.delta[layer] @ self.A[layer+1].T) / self.m
            self.gradient_B[layer] = np.sum(self.delta[layer], axis=1, keepdims=True) / self.m

        #update the weights
        for layer in range(self.layers):
            self.W[layer] -= self.alpha * self.gradient_W[layer]
            self.B[layer] -= self.alpha * self.gradient_B[layer]
    
    def fit(self, X, Y):
        self.initialise_weights()
        
        for iteration in range(self.no_of_iterations + 1):
            self.forwardpass(X)
            self.backpropagation(Y)
            if iteration % 100 == 0 or iteration == self.no_of_iterations:
                print("iteration %s - loss %.20f" %(iteration, calculate_error(Y, self.A[-1])))
                
    def predict(self, X, fp=False):
        self.forwardpass(X, fp=fp)

        return np.argmax(self.A[-1].T, axis=1)

In [248]:
from sklearn.datasets import load_iris
data = load_iris()
X = data.data[:,[0,2]].T
y = data.target
from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()
Y = lb.fit_transform(y)
Y = Y.T

In [249]:
print(X.shape)
print(Y.shape)

(2, 150)
(3, 150)


In [250]:
self = NeuralNetwork(
    input_layer=(2, "relu"),
    hidden_layer=[(6, "relu"),(4, "softmax")],
    output_layer=3,
    alpha=0.5,
    no_of_iterations=20000
)

# model.fit(X, Y)

In [251]:
self.initialise_weights()

In [252]:
self.weight_set_dimensions

[(6, 2), (4, 6), (3, 4)]

In [253]:
self.parameters.keys()

dict_keys(['W1', 'B1', 'W2', 'B2', 'W3', 'B3'])

In [261]:
self._params.keys()

dict_keys(['A0', 'Z1', 'A1', 'Z2', 'A2', 'Z3', 'A3'])

In [254]:
self.forwardpass(X)

In [260]:
delta_final = self.calculate_delta_final(Y)

In [264]:
(delta_final @ self._params["A" + str(2)].T) / 150

array([[-4.27809887, -4.86625232, -3.73275976, -2.64819942],
       [11.28365084, 13.32808996, 10.1340432 ,  7.75740348],
       [-7.00555197, -8.46183764, -6.40128344, -5.10920406]])

In [170]:
self.activation_functions

['relu', 'relu', 'softmax']

In [171]:
self._params["A2"].shape

(4, 150)

In [172]:
self._params["Z3"].shape

(3, 150)

In [266]:
self.parameters["W2"].shape

(4, 6)

In [None]:
self.parameters["W2"].T @ 

In [26]:
X = np.array([0.05, 0.10]).reshape((2, 1))
Y = np.array([0.01, 0.99]).reshape((2, 1))

regressor = NeuralNetwork(
    input_layer=(2, "relu"),
    hidden_layer=[(2, "relu"), (2, "sigmoid")],
    output_layer=2,
    alpha=0.5,
    no_of_iterations=20000
)

regressor.fit(X, Y)

ValueError: matmul: Input operand 0 does not have enough dimensions (has 0, gufunc core with signature (n?,k),(k,m?)->(n?,m?) requires 1)

In [10]:
regressor.A[-1][0][0]

AttributeError: 'NeuralNetwork' object has no attribute 'A'

In [None]:
regressor.A[-1][1][0]

# Testing with Benchmark Datasets

##  1.Iris Dataset

In [69]:
from sklearn.datasets import load_iris
data = load_iris()
X = data.data[:,[0,2]].T
y = data.target
from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()
Y = lb.fit_transform(y)
Y = Y.T

In [70]:
print(X.shape)
print(Y.shape)

(2, 150)
(3, 150)


In [71]:
self = NeuralNetwork(
    input_layer=(2, "relu"),
    hidden_layer=[(6, "relu"),(4, "softmax")],
    output_layer=3,
    alpha=0.5,
    no_of_iterations=20000
)

# model.fit(X, Y)

In [72]:
self.initialise_weights()

In [73]:
self.forwardpass(X)

In [75]:
self.backpropagation(Y)

(6, 150) (6, 150)
(4, 150) (4, 150)
(3, 1) (3, 150)


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 150 is different from 1)

In [None]:
delta_final = self.calculate_delta_final(Y)
self.delta = np.empty_like(range(self.layers), dtype=object)
self.delta[-1] = delta_final

# self.gradient_W = np.empty_like(range(self.layers), dtype=object)
# self.gradient_B = np.empty_like(range(self.layers), dtype=object)

# # here Z[index+1] is passed instead of Z[index] this is because Z[0] is none.
# # So Z[index+1] is effectively Z[index].

# for layer in reversed(range(self.layers - 1)): # 1 is substracted as delta_final is calculated above
#     active_function = self.activation_functions[layer]
#     arg_to_pass_to_eval = "(self.Z[layer + 1], prime=True)"

#     self.delta[layer] = (
#         self.W[layer + 1].T @ self.delta[layer + 1] *
#         eval(active_function + arg_to_pass_to_eval)
#     )
#     print(self.delta[layer].shape)


# # calculate the gradient
# for layer in range(self.layers):
#     self.gradient_W[layer] = (self.delta[layer] @ self.A[layer].T) / self.m
#     self.gradient_B[layer] = self.delta[layer]

# #update the weights
# for layer in range(self.layers):
#     self.W[layer] -= self.alpha * self.gradient_W[layer]
#     self.B[layer] -= self.alpha * self.gradient_B[layer]

In [None]:
self.delta

In [None]:
self.delta

In [None]:
(model.W[2].T @ model.delta[2])

In [None]:
model.delta[-1]

In [None]:
model.delta

In [None]:
model.weight_set_dimensions

In [None]:
model.activation_functions

In [None]:
model.layers

In [None]:
model.predict(X)

In [None]:
class_labels = np.argmax(Y.T, axis=1)
class_labels

In [None]:
class_labels == model.predict(X)

In [None]:
model.dimensions

In [None]:
model.weight_set_dimensions

In [None]:
model.B[0]

In [None]:
model.B[1]

In [None]:
model.B[2]

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
dt = data.data[:,[0,2]]
x_min, x_max = dt[:, 0].min() - 1, dt[:, 0].max() + 1
y_min, y_max = dt[:, 1].min() - 1, dt[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))

data_to_predict = np.c_[xx.ravel(), yy.ravel()].T
# here "model" is your model's prediction (classification) function
Z = model.predict(data_to_predict, fp=True) 

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure(figsize=(12,8))
plt.contourf(xx, yy, Z, alpha=0.4)
#plt.axis('off')
plt.scatter(dt[:, 0], dt[:, 1], c=y,s=20, edgecolor='k')
plt.xlabel('sepal length')
plt.ylabel('petal length')

In [None]:
model.W[0].shape

In [None]:
model.B[0].shape

In [None]:
x_min, x_max = dt[:, 0].min() - 1, dt[:, 0].max() + 1

In [None]:
x_min

In [None]:
x_max

In [None]:
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))

In [None]:
np.arange(y_min, y_max, 0.1).shape

In [None]:
np.arange(x_min, x_max, 0.1).shape

In [None]:
xx.shape

In [None]:
xx

In [None]:
plt.scatter(xx, yy)

In [None]:
data_to_predict = np.c_[xx.ravel(), yy.ravel()].T

In [None]:
data_to_predict

In [None]:
model.B[0].shape

In [None]:
model.B[1].shape

In [None]:
for W_ in model.W:
    print(W_.shape)

In [None]:
for A_ in model.A:
    try:
        print(A_.shape)
    except:
        print(None)

In [None]:
model.A

In [None]:
xx.shape

In [None]:
yy.shape

In [None]:
np.c_[xx.ravel(), yy.ravel()].shape

## 2.Make Moons dataset

In [None]:
from sklearn.datasets import make_moons
from sklearn import preprocessing
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
x,y =make_moons(n_samples=1500, noise=.05)
X = x.T
lb = preprocessing.LabelBinarizer()
Y = lb.fit_transform(y)
y_next = np.where(y==0,1,0)
Y = Y.T
Y = list(Y)
Y.append(y_next)
Y = np.array(Y)

In [None]:
print(X.shape)
print(Y.shape)

In [None]:
model = NeuralNetwork(
    input_layer=(2, "relu"),
    hidden_layer=[(2, "relu"), (2, "softmax")],
    output_layer=2,
    alpha=0.5,
    no_of_iterations=20000
)

model.fit(X, Y)

In [None]:
model.predict(X)

In [None]:
class_labels = np.argmax(Y.T, axis=1)
class_labels

In [None]:
class_labels == model.predict(X)

In [None]:
X = np.array([0.05, 0.10]).reshape((2, 1))
Y = np.array([0.01, 0.99]).reshape((2, 1))

regressor = NeuralNetwork(
    input_layer=(2, "sigmoid"),
    hidden_layer=[(2, "sigmoid")],
    output_layer=2,
    alpha=0.5,
    no_of_iterations=20000
)

In [None]:
regressor.initialise_weights()

In [None]:
regressor.B[0]

In [None]:
initial_weights = regressor.W[0]
initial_weights[0][0] = 0.15
initial_weights[0][1] = 0.2
initial_weights[1][0] = 0.25
initial_weights[1][1] = 0.30
regressor.B[0] = 0.35

second_set_of_weights = regressor.W[1]
second_set_of_weights[0][0] = 0.4
second_set_of_weights[0][1] = 0.45
second_set_of_weights[1][0] = 0.5
second_set_of_weights[1][1] = 0.55
regressor.B[1] = 0.6

In [None]:
regressor.forwardpass(X)

In [None]:
for set_1, A in enumerate(regressor.A):
    for neuron_index, neuron in enumerate(A):
        print("Set %.0f _ Neuron % .0f _ value: % .16f" %(set_1, neuron_index, neuron))

In [None]:
calculate_error(Y, regressor.A[-1])

In [None]:
regressor.backpropagation(Y)

In [None]:
regressor.delta

In [None]:
for set_1, A in enumerate(regressor.delta):
    for neuron_index, neuron in enumerate(A):
        print("Set %.0f _ delta % .0f _ value: % .18f" %(set_1, neuron_index, neuron))

In [None]:
regressor.layers

In [None]:
regressor.activation_functions

In [None]:
from itertools import chain
for element in chain(*regressor.W):
    for weight in element:
        print(weight)

In [None]:
regressor.B[0]

In [None]:
regressor.B[1]

In [None]:
regressor.W[0][0][0]

In [None]:
initial_weights = self.W[0]
initial_weights[0][0] = 0.15
initial_weights[0][1] = 0.2
initial_weights[1][0] = 0.25
initial_weights[1][1] = 0.30
self.B[0] = 0.35

second_set_of_weights = self.W[1]
second_set_of_weights[0][0] = 0.4
second_set_of_weights[0][1] = 0.45
second_set_of_weights[1][0] = 0.5
second_set_of_weights[1][1] = 0.55
self.B[1] = 0.6