In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
class network():

    def __init__(self,layers,activations,learning_rate):
        self.layers = layers
        self.activations = activations
        self.learning_rate = learning_rate
        
        self.weights = [None]*(len(self.layers)-1)
        
        for layer in range(len(self.layers)-1):
            self.weights[layer] = np.random.randn(self.layers[layer+1],self.layers[layer]+1)*np.sqrt(1/(self.layers[layer]+1))
            
    def actiovation_functions(self,function,z):
        if function == 'linear':
            return z
        elif function == 'ReLu':
            return np.maximum(0,z)
            
    def actiovation_functions_derivative(self,function,X):
        X = np.copy(X)
        if function == 'linear':
            return np.ones(X.shape)
        elif function == 'ReLu':
            X[X<=0] = 0
            X[X>0] = 1
            return X
            
    def predict(self,X):
        for layer in range(len(self.layers)-1):
            X_ = np.concatenate((np.ones((X.shape[0],1)),X),axis=1)
            z = np.transpose(np.dot(self.weights[layer],np.transpose(X_)))
            a = self.actiovation_functions(self.activations[layer],z)
            X = a
        return X
        
    def train(self,X,Y,epoch):
        for e in range(epoch):
            # Forward
            Z = [None]*(len(self.layers))
            A = [None]*(len(self.layers))
            A[0] = X
            for layer in range(1,len(self.layers)):
                X_ = np.concatenate((np.ones((A[layer-1].shape[0],1)),A[layer-1]),axis=1)
                Z[layer] = np.transpose(np.dot(self.weights[layer-1],np.transpose(X_)))
                A[layer] = self.actiovation_functions(self.activations[layer-1],Z[layer])

            # Backward
            dZ = [None]*(len(self.layers)-1)
            dA = [None]*(len(self.layers)-1)
            dW = [None]*(len(self.layers)-1)
            dA[-1] = A[-1]-Y
            for layer in range(len(self.layers)-1,0,-1):
                dZ[layer-1] = np.multiply(dA[layer-1],self.actiovation_functions_derivative(self.activations[layer-1],Z[layer]))
                A_ = np.concatenate((np.ones((A[layer-1].shape[0],1)),A[layer-1]),axis=1)
                dW[layer-1] =  np.einsum('ab,ac->abc', dZ[layer-1], A_)/self.layers[layer]
                if -1 < layer-2:
                    dA[layer-2] = np.sum(dW[layer-1],axis=1)[:,1:]
            
            # Update
            for layer in range(len(self.layers)-1):
                self.weights[layer] += -self.learning_rate*np.mean(dW[layer],axis=0)

In [3]:
model = network([3,10,6],['ReLu','linear'],0.0000001)

In [4]:
b = 2.5
w = -0.7
X = (np.arange(33)-5).reshape((11,3))
Y = np.square(w*np.concatenate((X,X),axis=1)+b)

print(model.weights)
print(model.predict(X))
model.train(X,Y,10000)

[array([[ 0.21181109, -0.14329983,  0.06083787,  0.50130811],
       [-0.53244468,  0.11932963, -0.09176149, -0.34357928],
       [-0.52873295,  0.14482218,  0.14996233, -0.12676775],
       [ 0.14996246, -0.03618064, -0.31756141, -1.41709672],
       [-0.50072868, -0.12014642, -0.09418047,  0.80652646],
       [-0.36861586,  0.09249814, -0.50618253,  0.02211386],
       [-0.47790776,  0.09610067,  0.85236904, -0.24336238],
       [-1.04522358,  0.41201865,  0.08875265,  0.5943501 ],
       [-0.01238683,  0.03943278,  0.29334427, -0.28067747],
       [ 0.50357899, -0.19246429,  0.48515095,  0.81442278]]), array([[-3.26186735e-01, -6.44324021e-01,  1.27589220e-01,
         4.49056322e-02,  9.89805873e-02, -1.15134352e-01,
        -1.07704152e-01,  3.52406793e-01, -7.30760689e-01,
        -3.57238346e-01,  1.15527041e-01],
       [-1.53257622e-02, -3.51282887e-02, -1.64130194e-01,
         3.23199406e-01,  5.12712143e-01,  3.06458550e-01,
         6.01836679e-01, -5.20035537e-02, -5.4712

In [5]:
print(model.weights)

[array([[ 2.77479442e-01,  2.80475358e+00,  3.07455964e+00,
         3.58069823e+00],
       [-5.32379452e-01,  1.19003473e-01, -9.20224099e-02,
        -3.43774973e-01],
       [-5.08970967e-01,  1.00768781e+00,  1.03258995e+00,
         7.75621846e-01],
       [ 1.51435976e-01, -4.33903682e-02, -3.23297621e-01,
        -1.42135942e+00],
       [-4.12398790e-01,  3.82867542e+00,  3.94297127e+00,
         4.93200809e+00],
       [-3.68342187e-01,  9.11297728e-02, -5.07277223e-01,
         2.12928370e-02],
       [-3.80357542e-01,  4.42754433e+00,  5.28136292e+00,
         4.28318171e+00],
       [-8.91376417e-01,  7.25295849e+00,  7.08353966e+00,
         7.74298427e+00],
       [-6.74349108e-03,  2.83433495e-01,  5.42988321e-01,
        -2.53900824e-02],
       [ 6.72819859e-01,  7.38943263e+00,  8.23628874e+00,
         8.73480144e+00]]), array([[-0.32363632, -0.50671011,  0.12773594,  0.08581269,  0.10230574,
         0.06962055, -0.1070886 ,  0.55583094, -0.40973497, -0.34560616,
 

In [6]:
print(model.predict(X))
print(Y)

[[ 1.95125066e-01  3.68042449e+00  8.83705894e-01  4.48187385e+00
   2.07310518e+00  5.65209116e-02]
 [-2.66190182e-01  2.75320476e-01 -1.11683109e-01  5.46288915e-01
  -1.17376921e-01  5.47600747e-02]
 [ 1.25213634e+01  1.45452013e+01  1.52302758e+01  1.31293114e+01
   1.37803143e+01  1.49133986e+01]
 [ 3.12176103e+01  3.54667453e+01  3.88113346e+01  3.17117454e+01
   3.48451539e+01  3.85539683e+01]
 [ 4.99138572e+01  5.63882893e+01  6.23923935e+01  5.02941795e+01
   5.59099935e+01  6.21945380e+01]
 [ 6.86101041e+01  7.73098334e+01  8.59734524e+01  6.88766136e+01
   7.69748331e+01  8.58351077e+01]
 [ 8.73063510e+01  9.82313774e+01  1.09554511e+02  8.74590477e+01
   9.80396727e+01  1.09475677e+02]
 [ 1.06002598e+02  1.19152921e+02  1.33135570e+02  1.06041482e+02
   1.19104512e+02  1.33116247e+02]
 [ 1.24698845e+02  1.40074465e+02  1.56716629e+02  1.24623916e+02
   1.40169352e+02  1.56756817e+02]
 [ 1.43395092e+02  1.60996009e+02  1.80297688e+02  1.43206350e+02
   1.61234192e+02  1.8039