In [1]:
import numpy as np

In [2]:
def sigmoid(x, deriv=False):
    val = None
    if not deriv:
        val = 1.0/(1.0+np.exp(-x))
    else:
        val = np.exp(-x)/((np.exp(-x)+1.0)**2)
    return val

In [3]:
def MSE(y, target, deriv=False):
    n = y.shape[0]
    val = None
    if not deriv:
        val = np.sum((target-y)**2)/n
    else:
        val = -2.0*(target-y)/n
    return val

In [4]:
def crossEntropyLoss(y, target, deriv=False):
    n = y.shape[0]
    val = None
    if not deriv:
        val = -1.0*np.sum(target*np.log(y)+(1.0-target)*np.log(1.0-target))/n
    else:
        val = -(target/output) + ((1-target)/(1-output))
    return val

In [5]:
class NeuralNet:
    def __init__(self, layers, params):
        self.params = params
        self.nLayers = len(layers)
        self.W = [np.random.randn(layers[i],layers[i+1]) for i in range(self.nLayers-1)]
        self.b = [np.random.randn(layers[i+1]) for i in range(self.nLayers-1)]
        self.z = [None for n in range(self.nLayers-1)]
        self.a = [None for n in range(self.nLayers-1)]
        
    def eval(self,x):
        self.z[0] = x @ self.W[0] + self.b[0]
        self.a[0] = self.params["activation"](self.z[0])
        for n in range(1,self.nLayers-1):
            self.z[n] = self.a[n-1] @ self.W[n] + self.b[n]
            if n < self.nLayers-2:
                self.a[n] = self.params["activation"](self.z[n])
            else:
                self.a[n] = self.z[n]
        return self.a[-1]
    
    def loss(self, x, target):
        y = self.eval(x)
        return(self.params["loss"](y, target))
    
    def backProp(self, x, target):
        y = self.eval(x)
        dl_dw = [None for n in range(self.nLayers-1)]
        dl_dw[self.nLayers-2] = self.params["loss"](y, target, True)
        for n in range(self.nLayers-2,0,-1):
            dl_dw[n-1] = (self.W[n] @ dl_dw[n]) * self.params["activation"](self.z[n-1], True)
            
        self.W[0] = self.W[0] - self.params["learningRate"]*(
            np.repeat(x[:,None], dl_dw[0].shape[0], axis=1) *
            np.repeat(dl_dw[0][None,:], x.shape[0], axis=0)
        )
        self.b[0] = self.b[0] - dl_dw[0]
        for n in range(1,self.nLayers-1):
            self.W[n] = self.W[n] - self.params["learningRate"]*(
                np.repeat(self.a[n-1][:,None], dl_dw[n].shape, axis=1) *
                np.repeat(dl_dw[n][None,:], self.a[n-1].shape, axis=0)
            )
            self.b[n] = self.b[n] - dl_dw[n]

In [12]:
net = NeuralNet(
    [20,1000,10], 
    {
        "activation": sigmoid,
        "loss": MSE,
        "learningRate": 0.01
    }
)

In [13]:
target = np.random.randn(10)
target

array([-0.38404188, -0.62935363,  1.22633497, -0.50598845,  0.81347931,
        1.75233331,  0.66825588, -0.44135469,  0.23782176, -1.42835714])

In [14]:
x = np.random.randn(20)

In [15]:
for n in range(500):
    net.backProp(x, target)
    if n % 100 == 0:
        print(net.loss(x,target))
y = net.eval(x)

2052.030726413447
2.6634610460569846e-12
6.657231972240122e-25
3.5036517548292595e-30
2.8861215774609362e-30


In [16]:
y, target

(array([-0.38404188, -0.62935363,  1.22633497, -0.50598845,  0.81347931,
         1.75233331,  0.66825588, -0.44135469,  0.23782176, -1.42835714]),
 array([-0.38404188, -0.62935363,  1.22633497, -0.50598845,  0.81347931,
         1.75233331,  0.66825588, -0.44135469,  0.23782176, -1.42835714]))

In [17]:
np.sum((target-y)**2)/len(y)

1.107486755220436e-30