In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
class network():

    def __init__(self,layers,activations,learning_rate):
        self.layers = layers
        self.activations = activations
        self.learning_rate = learning_rate
        
        self.weights = [None]*(len(self.layers)-1)
        
        for layer in range(len(self.layers)-1):
            self.weights[layer] = np.random.randn(self.layers[layer+1],self.layers[layer]+1)*np.sqrt(1/(self.layers[layer]+1))
            
    def actiovation_functions(self,function,z):
        if function == 'linear':
            return z
        elif function == 'ReLu':
            return np.maximum(0,z)
            
    def actiovation_functions_derivative(self,function,X):
        X = np.copy(X)
        if function == 'linear':
            return np.ones(X.shape)
        elif function == 'ReLu':
            X[X<=0] = 0
            X[X>0] = 1
            return X
            
    def predict(self,X):
        for layer in range(len(self.layers)-1):
            X_ = np.concatenate((np.ones((X.shape[0],1)),X),axis=1)
            z = np.transpose(np.dot(self.weights[layer],np.transpose(X_)))
            a = self.actiovation_functions(self.activations[layer],z)
            X = a
        return X
        
    def train(self,X,Y,epoch):
        for e in range(epoch):
            # Forward
            Z = [None]*(len(self.layers))
            A = [None]*(len(self.layers))
            A[0] = X
            for layer in range(1,len(self.layers)):
                X_ = np.concatenate((np.ones((A[layer-1].shape[0],1)),A[layer-1]),axis=1)
                Z[layer] = np.transpose(np.dot(self.weights[layer-1],np.transpose(X_)))
                A[layer] = self.actiovation_functions(self.activations[layer-1],Z[layer])

            # Backward
            dZ = [None]*(len(self.layers)-1)
            dA = [None]*(len(self.layers)-1)
            dW = [None]*(len(self.layers)-1)
            dA[-1] = A[-1]-Y
            for layer in range(len(self.layers)-1,0,-1):
                dZ[layer-1] = np.multiply(dA[layer-1],self.actiovation_functions_derivative(self.activations[layer-1],Z[layer]))
                A_ = np.concatenate((np.ones((A[layer-1].shape[0],1)),A[layer-1]),axis=1)
                dW[layer-1] =  np.einsum('ab,ac->abc', dZ[layer-1], A_)
                if -1 < layer-2:
                    dA[layer-2] = np.sum(dW[layer-1],axis=1)[:,1:]
            
            # Update
            for layer in range(len(self.layers)-1):
                self.weights[layer] += -self.learning_rate*np.mean(dW[layer],axis=0)

In [3]:
model = network([3,100,6],['ReLu','linear'],0.0000001)

In [4]:
b = 2.5
w = -0.7
X = (np.arange(33)-5).reshape((11,3))
Y = np.square(w*np.concatenate((X,X),axis=1)+b)

print(model.weights)
print(model.predict(X))
model.train(X,Y,100000)

[array([[ 2.32387935e-01,  3.06657053e-01,  2.20692289e-01,
         3.01716183e-02],
       [ 2.48871384e-01, -6.56459800e-01,  7.53424209e-01,
        -3.93063538e-01],
       [ 3.16333992e-01,  3.49065385e-01, -2.42193666e-01,
         7.24770787e-02],
       [ 5.23895916e-01, -6.98205935e-01,  9.28647673e-01,
         2.03737426e-01],
       [-1.34712992e-01,  2.52569967e-01,  7.31433644e-01,
        -1.11562336e-01],
       [ 8.23435498e-01, -7.62595688e-01, -1.07314739e-01,
        -8.80134205e-02],
       [-3.39076024e-01, -1.69803175e-02,  7.03772239e-01,
        -1.20547249e-01],
       [-1.22171213e+00, -1.26157863e-01,  1.40679749e-01,
         1.37180563e-01],
       [-5.46689350e-01, -4.36188662e-02, -4.29103448e-01,
        -2.97752732e-01],
       [-5.92028181e-01, -2.53147490e-01,  2.23277088e-01,
        -9.26714426e-03],
       [-6.03102837e-02,  1.81925128e-01, -3.20616684e-01,
         3.36023816e-01],
       [-1.95385139e-01, -8.62614920e-02,  1.55882284e-01,
     

In [5]:
print(model.weights)

[array([[ 2.42464816e-01, -5.89205745e+00, -5.96794533e+00,
        -6.14838912e+00],
       [ 2.94481256e-01, -8.25244960e-01,  6.30248921e-01,
        -4.70628954e-01],
       [ 3.21443876e-01, -1.67499821e+00, -2.26114737e+00,
        -1.94136674e+00],
       [ 5.82333354e-01, -6.33768758e+00, -4.65239654e+00,
        -5.31886935e+00],
       [-1.33834282e-01, -9.20843277e+00, -8.72869039e+00,
        -9.57080766e+00],
       [ 9.65352908e-01, -1.29008402e+00, -4.92885663e-01,
        -3.31666934e-01],
       [-3.42623697e-01, -6.09137281e+00, -5.37416793e+00,
        -6.20203509e+00],
       [-1.25175333e+00, -1.28642244e+00, -1.04962603e+00,
        -1.08316642e+00],
       [-5.03537692e-01, -2.34182519e-01, -5.76515442e-01,
        -4.02013067e-01],
       [-5.92028181e-01, -2.53147490e-01,  2.23277088e-01,
        -9.26714426e-03],
       [-5.29539747e-02, -2.07293856e+00, -2.56812406e+00,
        -1.90412725e+00],
       [-1.61058130e-01, -8.54908994e+00, -8.27261915e+00,
     

In [6]:
print(model.predict(X))
print(Y)

[[37.17533159 28.53424319 21.12557411 36.96927759 28.64560543 20.94311633]
 [10.51608265  8.46069705  6.38009403 11.33681838  8.01712743  7.10688128]
 [ 0.6604516   0.67213911  0.82869823  0.58073957  0.73632853  0.85251682]
 [ 0.64347963  0.64908605  0.81096374  0.57904355  0.75635346  0.83338652]
 [ 0.64347963  0.64908605  0.81096374  0.57904355  0.75635346  0.83338652]
 [ 0.64347963  0.64908605  0.81096374  0.57904355  0.75635346  0.83338652]
 [ 0.64347963  0.64908605  0.81096374  0.57904355  0.75635346  0.83338652]
 [ 0.64347963  0.64908605  0.81096374  0.57904355  0.75635346  0.83338652]
 [ 0.64347963  0.64908605  0.81096374  0.57904355  0.75635346  0.83338652]
 [ 0.64347963  0.64908605  0.81096374  0.57904355  0.75635346  0.83338652]
 [ 0.64347963  0.64908605  0.81096374  0.57904355  0.75635346  0.83338652]]
[[3.6000e+01 2.8090e+01 2.1160e+01 3.6000e+01 2.8090e+01 2.1160e+01]
 [1.5210e+01 1.0240e+01 6.2500e+00 1.5210e+01 1.0240e+01 6.2500e+00]
 [3.2400e+00 1.2100e+00 1.6000e-01 3