# Lecture 4 Backpropagation and Neural Networks

## Optimization

In [1]:
# while True:
#     weights_grad = evaluate_gradient(loss_fun, data, weights)
#     weights += - step_size * weights_grad # parameter update

## Modularized implementation: forward / backward API

In [4]:
# # rough psuedo code
class ComputationalGraph(object):
    # ...
    def forward(inputs):
        # 1. [pass inputs to input gates...]
        # 2. forward the compytational graph:
        for gate in self.graph.nodes_topologically_sorted():
            gate.forward()
        return loss # the final gate in the graph outputs the loss

    def backward():
        for gate in reversed(self.graph.nodes_topologically_sorted()):
            gate.backward() # little piece of backprop (chain rule applied)
        return inputs_gradients

## Modularized implementation: forward / backward API

In [5]:
class MultiplyGate(object):
    def forward(x, y):
        z = x*y
        self.x = x # must keep these around! <- 기억해둬야 나중에 계산에서 쓸 수 있음
        self.y = y
        return z

    def backward(dz):
        dx = self.y * dz # [dz/dx * dL/dz]
        dy = self.x * dz # [dz/dy * dL/dz]
        return [dx, dy]

## Full implementation of training a 2-layer Neural Network

In [9]:
import numpy as np
from numpy.random import randn

N, D_in, H, D_out = 64, 1000, 1000, 10
x, y = randn(N, D_in), randn(N, D_out)
w1, w2 = randn(D_in, H), randn(H, D_out)

for t in range(2000):
    h = 1 / (1 + np.exp(-x.dot(w1)))
    y_pred = h.dot(w2)
    loss = np.square(y_pred - y).sum()
    if t % 100 == 0:
        print(t, loss)

    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h.T.dot(grad_y_pred)
    grad_h = grad_y_pred.dot(w2.T)
    grad_w1 = x.T.dot(grad_h * h * (1 - h))

    w1 -= 1e-4 * grad_w1
    w2 -= 1e-4 * grad_w2

0 235490.4014824084


  if __name__ == '__main__':


100 1321.112230422572
200 31.562827435615382
300 1.1205131714576424
400 0.04622141137564545
500 0.0020297709365140687
600 9.168514276452287e-05
700 4.200543959663522e-06
800 1.9398139802201755e-07
900 9.002393326799017e-09
1000 4.19204678471719e-10
1100 1.957027816209907e-11
1200 9.155031326679536e-13
1300 4.290305317248883e-14
1400 2.0137377913163287e-15
1500 9.465675792155848e-17
1600 4.455641408184428e-18
1700 2.1004794005980451e-19
1800 9.921871688110512e-21
1900 4.719520920158549e-22


## Example feed-forward computation of a  neural network

In [10]:
import math

class Neuron:
    # ...
    def neuron_tick(inputs):
        # assume inputs and weights are 1-D numpy arrays and bias is a number
        cell_body_sum = np.sum(inputs * self.weights) + self.bias
        firing_rate = 1.0 / (1.0 + math.exp(-cell_body_sum)) # sigmoid activation func <- sigmoid 함수 따로 만들어서 사용하는게 일반적일듯
        return firing_rate

In [11]:
## forward-pass of a 3-layer neural network:
# f = lambda x: 1.0/(1.0 + np.exp(-x))
# x = randn(3, 1)
# h1 = f(np.dot(W1, x) + b1)
# h2 = f(np.dot(W2, h1) + b2)
# out = np.dot(W3, h2) + b3