In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
class network():

    def __init__(self,layers,activations,learning_rate):
        self.layers = layers
        self.activations = activations
        self.learning_rate = learning_rate
        
        self.weights = [None]*(len(self.layers)-1)
        
        for layer in range(len(self.layers)-1):
            self.weights[layer] = np.random.randn(self.layers[layer+1],self.layers[layer]+1)*np.sqrt(1/(self.layers[layer]+1))
            
    def actiovation_functions(self,function,z):
        if function == 'linear':
            return z
        elif function == 'ReLu':
            return np.maximum(0.1*z,z)
            
    def actiovation_functions_derivative(self,function,X):
        X = np.copy(X)
        if function == 'linear':
            return np.ones(X.shape)
        elif function == 'ReLu':
            X[X<=0] = 0.1
            X[X>0] = 1
            return X
            
    def predict(self,X):
        for layer in range(len(self.layers)-1):
            X_ = np.concatenate((np.ones((X.shape[0],1)),X),axis=1)
            z = np.transpose(np.dot(self.weights[layer],np.transpose(X_)))
            a = self.actiovation_functions(self.activations[layer],z)
            X = a
        return X
        
    def train(self,X,Y,epoch):
        for e in range(epoch):
            # Forward
            Z = [None]*(len(self.layers))
            A = [None]*(len(self.layers))
            A[0] = X
            for layer in range(1,len(self.layers)):
                X_ = np.concatenate((np.ones((A[layer-1].shape[0],1)),A[layer-1]),axis=1)
                Z[layer] = np.transpose(np.dot(self.weights[layer-1],np.transpose(X_)))
                A[layer] = self.actiovation_functions(self.activations[layer-1],Z[layer])

            # Backward
            dZ = [None]*(len(self.layers)-1)
            dA = [None]*(len(self.layers)-1)
            dW = [None]*(len(self.layers)-1)
            dA[-1] = A[-1]-Y
            for layer in range(len(self.layers)-1,0,-1):
                dZ[layer-1] = np.multiply(dA[layer-1],self.actiovation_functions_derivative(self.activations[layer-1],Z[layer]))
                A_ = np.concatenate((np.ones((A[layer-1].shape[0],1)),A[layer-1]),axis=1)
                dW[layer-1] =  np.einsum('ab,ac->abc', dZ[layer-1], A_)/self.layers[layer]
                if -1 < layer-2:
                    dA[layer-2] = np.sum(dW[layer-1],axis=1)[:,1:]
            
            # Update
            for layer in range(len(self.layers)-1):
                self.weights[layer] += -self.learning_rate*np.mean(dW[layer],axis=0)

In [20]:
X = np.random.rand(100,2)*2-1
Y = (np.multiply(X[:,0],X[:,1])/np.abs(np.multiply(X[:,0],X[:,1]))/2+0.5).reshape(X.shape[0],1)
model = network([2,20,1],['ReLu','linear'],0.1)

In [23]:
# print(model.weights)
# print(model.predict(X))
model.train(X,Y,100000)

KeyboardInterrupt: 

In [None]:
print(model.predict(X)-Y)