In [18]:
from sklearn.datasets import load_iris
import numpy as np

In [19]:
def activation(s):
    return 0 if s <= 0 else s

def derivativeActivation(s):
    # return 0 if s <= 0 else 1
    return (1 - tanh(s) ** 2)
    # return (1 - (s) ** 2)

def derivativeActivationW(s):
    # return 0 if s <= 0 else 1
    # return (1 - tanh(s) ** 2)
    return (1 - (s ** 2))


def outputf(s):
    return (1) / ((1) + np.exp(-s))

def derivativeOutput(s):
    return (outputf(s)) * (1 - outputf(s))

def errorf(x_L, y):
    if y == 1:
        return np.log(x_L)
    else:
        return -np.log(1 - x_L)
    
def errorPerSample(X, yn):
    return errorf(X[-1][-1], yn)
    
def tanh(s):
    return np.tanh(s)

In [20]:
X = [1, 2]
W = [
    [[0.1,0.2],
     [0.3,0.4]],
    [[0.2],
     [1],
     [-3]],
    [[1],
     [2]]
]

In [24]:
def forwardPropagation(x, weights):
    Xl = np.array(x)
    W = np.array(weights)
    S = []
    X = [x]
    for index,l in enumerate(W):
        wl = np.array(l)
        sl = np.transpose(wl).dot(Xl)
        Xl_before_activation = sl
        if index != len(W) - 1:
            activation_function = np.vectorize(tanh)
            Xl = activation_function(Xl_before_activation)
            Xl = np.insert(Xl, 0, 1, axis=0)
        else:
            output_function = np.vectorize(tanh)
            Xl = output_function(Xl_before_activation)
        X.append(Xl)
        S.append(sl)
    return np.array(X),np.array(S)

In [25]:
x,s = forwardPropagation(X,W)
for i in x:
    print(i)
print('\n',s)

[1, 2]
[1.         0.60436778 0.76159416]
[ 1.         -0.90154565]
[-0.66576144]

 [array([0.7, 1. ]) array([-1.48041469]) array([-0.80309131])]


In [26]:
from copy import deepcopy
def backPropagation(X, y_n, s, weights):
    # weights_copy = deepcopy(weights)
    g = [None] * len(X)
    X = np.array(X)
    for layer, Xl in enumerate(reversed(X)):
        layer = len(X) - layer - 1
        if layer == len(X) - 1:
            delta = 2 * (Xl[0] - y_n) * derivativeActivation(s[-1][0])
            g[layer] = np.array([delta])
        elif layer > 0:
            derivatives = np.zeros([len(Xl) - 1,len(Xl) - 1])
            for i in range(len(Xl) - 1):
                derivatives[i][i] = derivativeActivationW(Xl[i + 1])
            
            Wl = weights[layer]
            Wl_t = np.array(Wl)
            g[layer] = ((Wl_t).dot((g[layer + 1]).T)[1:]).T.dot(derivatives)
            
            
            # print('Wl',Wl)
            # print('gl', g[layer + 1])
            # print('der', derivatives)
            # print('------------------------------')            
            
    g = g[1:]
    
    
    updatedW = weights
    for layer, Xl in enumerate(X[:-1]):
        # print(Xl)
        # print(g[layer])
        # print('------------------------------')
        updatedW[layer] = np.dot(np.array([Xl]).T, np.array([g[layer]]))
    
    return (updatedW)

In [27]:
g = backPropagation(x, 1, s, W)
for i in g:
    print(i)

[[-0.44083838  0.87503983]
 [-0.88167675  1.75007965]]
[[-0.69451848]
 [-0.41974459]
 [-0.52894122]]
[[-1.85486437]
 [ 1.67224491]]


In [28]:
W

[array([[-0.44083838,  0.87503983],
        [-0.88167675,  1.75007965]]), array([[-0.69451848],
        [-0.41974459],
        [-0.52894122]]), array([[-1.85486437],
        [ 1.67224491]])]

In [29]:
def updateWeights(weights, g, alpha):
    nW = deepcopy(weights)
    for i in range(len(nW)):
        for j in range(len(nW[i])):
            nW[i][j] = nW[i][j] - (alpha * g[i][j]) 
    return nW

In [30]:
nW = updateWeights(W, g, 1)
for i in nW:
    print(i)

[[0. 0.]
 [0. 0.]]
[[0.]
 [0.]
 [0.]]
[[0.]
 [0.]]


In [31]:
def fit_NeuralNetwork(X_train, y_train, alpha, hidden_layer_sizes, epochs):
    layer_units = ([len(X_train[-1])] + hidden_layer_sizes + [len(y_train[-1])])
    weight = [np.zeros((n_fan_in_ + 1, n_fan_out_)) * 0 + 0.1 for n_fan_in_,
                                                        n_fan_out_ in zip(layer_units[:-1],
                                                                       layer_units[1:])]
    X_train = np.insert(X_train, 0, 1, axis=1)
    error_list = []
    for _ in range(epochs):
        error_over_epoch = 0
        for N, x_n in enumerate(X_train):
            X_n ,S_n = forwardPropagation(x_n, W)
            g_n = backPropagation(X_n, y_train[N], S_n, W)
            weight = updateWeights(weight, g_n, alpha)
            error_over_epoch += errorPerSample(X_n, y_train[N])
        error_list.append(error_over_epoch / len(X_train))
    return weight, error_list

In [32]:
w, err = fit_NeuralNetwork([[3],[2],[-2]],[[1],[1],[-1]],1,[2,1],100)
print(w)

ValueError: shapes (1,1,2) and (2,1,1) not aligned: 2 (dim 2) != 1 (dim 1)

In [13]:
def pred(x_n, weights):
    x_n = np.insert(x_n, 0, 1, axis=0)
    x,s = forwardPropagation(x_n, weights)
    return 1 if  x[-1][-1] >= 0.5 else -1

In [14]:
pred([1], w)
    

1

In [15]:
def confMatrix(X_train, y_train, w):
    # Add implementation here
    y_pred = []
    for x_n in X_train:
        y_pred.append(pred(x_n, w))

    # the confusion maxtrix that we will return
    # matrix = [[0, 0], [0, 0]]
    matrix = np.zeros((2, 2), np.int8)

    # Populating our matrix using the prediction data
    for index, y in enumerate(y_train):
        if y == -1 and y_pred[index] == -1:
            matrix[0][0] += 1
        elif y == -1 and y_pred[index] == 1:
            matrix[0][1] += 1
        elif y == 1 and y_pred[index] == -1:
            matrix[1][0] += 1
        else:
            matrix[1][1] += 1

    # returning the result
    return matrix

In [16]:
X_train, y_train = load_iris(return_X_y=True)

In [17]:
len(y_train[-1])

TypeError: object of type 'numpy.int32' has no len()