In [1]:
import numpy as np

In [2]:
class NodeBase:
    def __init__(self, name, input_nodes=[]):
        self.name = name
        self.inputs = input_nodes
        self.value = None        
        self.outputs = []
        self.gradients = {}
        for node in self.inputs:
            node.outputs.append(self)
    
    def forward(self, value):
        raise NotImplementedError()
        
    def backward(self):
        raise NotImplementedError()
        
    def show(self):
        print(self.name)
        print('  in: ', [n.name for n in self.inputs])
        print('  val:', self.value)
        print('  out:', [n.name for n in self.outputs])
        print('  gra: ', end='')
        for k, v in self.gradients.items():
            print(k.name, v, end='  ')
        print()
        #print('  gra:', [' '.join([k.name, v]) for k, v in self.gradients.items()])

In [3]:
class NodeInput(NodeBase):
    def forward(self, value=None):
        if value is not None:
            self.value = value
    def backward(self):
        grad_sum = np.sum([o.gradients[self] for o in self.outputs])
        self.gradients[self] = grad_sum
            
class NodeOutput(NodeBase):
    def forward(self, ignore=None):
        assert len(self.inputs) == 1
        assert len(self.outputs) == 0
        self.value = self.inputs[0].value
    def backward(self):
        self.gradients[self.inputs[0]] = 1  # wrt self

In [12]:
class NodeAdd(NodeBase):
    def forward(self, ignore=None):
        assert len(self.inputs) == 2
        self.value = self.inputs[0].value + self.inputs[1].value
    def backward(self):
        grad_sum = np.sum([o.gradients[self] for o in self.outputs])
        self.gradients[self.inputs[0]] = grad_sum
        self.gradients[self.inputs[1]] = grad_sum
            
class NodeSubstract(NodeBase):
    def forward(self, ignore=None):
        assert len(self.inputs) == 2
        self.value = self.inputs[0].value - self.inputs[1].value
    def backward(self):
        grad_sum = np.sum([o.gradients[self] for o in self.outputs])
        self.gradients[self.inputs[0]] = grad_sum
        self.gradients[self.inputs[1]] = -grad_sum

class NodeMult(NodeBase):
    def forward(self, ignore=None):
        assert len(self.inputs) == 2
        self.value = self.inputs[0].value * self.inputs[1].value
    def backward(self):
        grad_sum = np.sum([o.gradients[self] for o in self.outputs])
        self.gradients[self.inputs[0]] = self.inputs[1].value * grad_sum
        self.gradients[self.inputs[1]] = self.inputs[0].value * grad_sum

class NodePower(NodeBase):
    def forward(self, ignore=None):
        assert len(self.inputs) == 1
        self.value = self.inputs[0].value**2
    def backward(self):
        grad_sum = np.sum([o.gradients[self] for o in self.outputs])
        self.gradients[self.inputs[0]] = 2*grad_sum

class NodeReduceSum(NodeBase):
    def forward(self, ignore=None):
        assert len(self.inputs) == 1
        self.value = np.sum(self.inputs[0].value)
    def backward(self):
        grad_sum = np.sum([o.gradients[self] for o in self.outputs])
        self.gradients[self.inputs[0]] = np.ones_like(self.inputs[0].value) * grad_sum

class NodeDot(NodeBase):
    def forward(self, ignore=None):
        assert len(self.inputs) == 2
        self.value = self.inputs[0].value @ self.inputs[1].value
    def backward(self):
        grad_sum = np.sum([o.gradients[self] for o in self.outputs])
        self.gradients[self.inputs[0]] = grad_sum @ self.inputs[1].value.T
        self.gradients[self.inputs[1]] = self.inputs[0].value.T @ grad_sum
        assert self.gradients[self.inputs[0]].shape == self.inputs[0].value.shape
        assert self.gradients[self.inputs[1]].shape == self.inputs[1].value.shape
            
class NodeSigmoid(NodeBase):
    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    def forward(self, ignore=None):
        assert len(self.inputs) == 1
        x = self.inputs[0].value
        self.value = self._sigmoid(x)
    def backward(self):
        grad_sum = np.sum([o.gradients[self] for o in self.outputs])
        self.gradients[self.inputs[0]] = self.value * (1 - self.value) * grad_sum

In [13]:
def run(fd, nodes):
    for n in nodes:
        n.forward(fd.get(n))
    for n in nodes[::-1]:
        n.backward()
    
def print_all(fd, nodes):
    print('FD: ', end='')
    for k, v in fd.items():
        print(k.name, v, end='  ')
    print()
    for n in nodes:
        n.show()

In [14]:
def ngrad(fd, nodes):

    eps = 0.0001

    for in_node in fd:
        fd_pls = fd.copy()
        fd_pls[in_node] += eps
        run(fd_pls, nodes)
        res_pls = nodes[-1].value
                
        fd_min = fd.copy()
        fd_min[in_node] -= eps
        run(fd_min, nodes)
        res_min = nodes[-1].value
                
        delta = (res_pls-res_min) / (2*eps)
        print('del(', in_node.name, ')=', delta )

Test NN

In [None]:
x = NodeInput('x')
add = NodeAdd('add', [x, x])
out = NodeOutput('out', [add])
nodes = [x, add, out]

fd = {x: 2}

ngrad(fd, nodes)

run(fd, nodes)
print_all(fd, nodes)

Test add

In [16]:
x = NodeInput('x')
add = NodeAdd('add', [x, x])
out = NodeOutput('out', [add])
nodes = [x, add, out]

fd = {x: 2}

ngrad(fd, nodes)

run(fd, nodes)
print_all(fd, nodes)

del( x )= 2.000000000002
FD: x 2  
x
  in:  []
  val: 2
  out: ['add', 'add']
  gra: x 2  
add
  in:  ['x', 'x']
  val: 4
  out: ['out']
  gra: x 1  
out
  in:  ['add']
  val: 4
  out: []
  gra: add 1  


Text x^2, power rule

In [239]:
x = NodeInput('x')
mul = NodeMult('mul', [x, x])
mul2 = NodeMult('mul2', [mul, x])
out = NodeOutput('out', [mul2])
nodes = [x, mul, mul2, out]

fd = {x: 4}

ngrad(fd, nodes)

run(fd, nodes)
print_all(fd, nodes)

del( x )= 48.000000010013366
FD: x 4  
x
  in:  []
  val: 4
  out: ['mul', 'mul', 'mul2']
  gra: x 48  
mul
  in:  ['x', 'x']
  val: 16
  out: ['mul2']
  gra: x 16  
mul2
  in:  ['mul', 'x']
  val: 64
  out: ['out']
  gra: mul 4  x 16  
out
  in:  ['mul2']
  val: 64
  out: []
  gra: mul2 1  


Test mult

In [237]:
x = NodeInput('x')
a = NodeInput('a')
mul = NodeMult('mul', [x, a])
out = NodeOutput('out', [mul])
nodes = [x, a, mul, out]

fd = {x: 12, a: 4}

ngrad(fd, nodes)

run(fd, nodes)
print_all(fd, nodes)

del( x )= 3.9999999999906777
del( a )= 12.00000000000756
FD: x 12  a 4  
x
  in:  []
  val: 12
  out: ['mul']
  gra: x 4  
a
  in:  []
  val: 4
  out: ['mul']
  gra: a 12  
mul
  in:  ['x', 'a']
  val: 48
  out: ['out']
  gra: x 4  a 12  
out
  in:  ['mul']
  val: 48
  out: []
  gra: mul 1  


In [71]:
E.value

3.5