# 1. Neural Networks

In [2]:
import numpy as np

In [8]:
W = np.array([[1,0,-1],[0,1,-1],[-1,0,-1],[0,-1,-1]])
W

array([[ 1,  0, -1],
       [ 0,  1, -1],
       [-1,  0, -1],
       [ 0, -1, -1]])

In [22]:
W.shape

(4, 3)

In [11]:
V = np.array([[1,1,1,1,0],[-1,-1,-1,-1,2]])
V

array([[ 1,  1,  1,  1,  0],
       [-1, -1, -1, -1,  2]])

In [15]:
# ReLU activation functions on all neurons
def relu(x):
    return np.maximum(0, x)

In [59]:
# softmax activation function in the output layer
def softmax_o_1(f_u_1, f_u_2):
    return np.exp(f_u_1)/(np.exp(f_u_1)+np.exp(f_u_2))

def softmax_o_2(f_u_1, f_u_2):
    return np.exp(f_u_2)/(np.exp(f_u_1)+np.exp(f_u_2))

## Feed Forward Step

In [60]:
x = np.array([3,14,1])
x

array([ 3, 14,  1])

In [61]:
x.shape

(3,)

In [62]:
z = np.dot(W,x)
z

array([  2,  13,  -4, -15])

In [63]:
f_z = list(map(relu,z))
f_z.append(1)
f_z

[2, 13, 0, 0, 1]

In [64]:
f_z = np.array(f_z)
f_z

array([ 2, 13,  0,  0,  1])

In [65]:
u = np.dot(V,f_z)
u

array([ 15, -13])

In [69]:
f_u = list(map(relu,u))
f_u

[15, 0]

In [71]:
f_u_1 = f_u[0]
f_u_2 = f_u[1]
print(f_u_1)
print(f_u_2)

15
0


In [85]:
o_1 = softmax_o_1(f_u_1,f_u_2)
o_2 = softmax_o_2(f_u_1,f_u_2)
print('o_1: ' + str(o_1))
print('o_2 {:.22f}'.format(o_2))

o_1: 0.9999996940977731
o_2 0.0000003059022269256247


## Output of Neural Network

> Assuming that $f(z_1) + f(z_2) + f(z_3) + f(z_4) = 1$

In [88]:
u_1 = 1
u_2 = 1

In [90]:
f_u_1 = relu(u_1)
f_u_1

1

In [91]:
f_u_2 = relu(u_2)
f_u_2

1

In [93]:
o_1 = softmax_o_1(f_u_1,f_u_2)
o_1

0.5

> Assuming that $f(z_1) + f(z_2) + f(z_3) + f(z_4) = 0$

In [94]:
u_1 = 0
u_2 = 2

In [95]:
f_u_1 = relu(u_1)
f_u_1

0

In [97]:
f_u_2 = relu(u_2)
f_u_2

2

o_1 = softmax_o_1(f_u_1,f_u_2)
o_1

> Assuming that $f(z_1) + f(z_2) + f(z_3) + f(z_4) = 3$

In [102]:
u_1 = 3
u_2 = -1

In [103]:
f_u_1 = relu(u_1)
f_u_1

3

In [104]:
f_u_2 = relu(u_2)
f_u_2

0

In [105]:
o_1 = softmax_o_1(f_u_1,f_u_2)
o_1

0.9525741268224333

# Inverse Temperature

In [106]:
def softmax_with_beta_o_1(f_u_1, f_u_2, beta):
    return np.exp(f_u_1*beta)/(np.exp(f_u_1*beta)+np.exp(f_u_2*beta))

def softmax_with_beta_o_2(f_u_1, f_u_2, beta):
    return np.exp(f_u_2*beta)/(np.exp(f_u_1*beta)+np.exp(f_u_2*beta))

In [107]:
# beta = 1
np.log(999)

6.906754778648554

In [108]:
# beta = 3
np.log(999)/3

2.3022515928828513

# 2. LSTM

## LSTM states

[ref: sigmoid.py](https://gist.github.com/jovianlin/805189d4b19332f8b8a79bbd07e3f598)

In [111]:
def sigmoid(x, derivative=False):
    sigm = 1. / (1. + np.exp(-x))
    if derivative:
        return sigm * (1. - sigm)
    return sigm

In [117]:
W_f_h, W_f_x, b_f, W_c_h = 0, 0, -100, -100
W_i_h, W_i_x, b_i, W_c_x = 0, 100, 100, 50
W_o_h, W_o_x, b_o, b_c = 0, 100, 0, 0

In [122]:
def rnn_LSTM(h_t_1, c_t_1, x_t):
    # calculate forget gate f_t
    f_t = sigmoid((W_f_h*h_t_1+W_f_x*x_t+b_f))
    # calculate input gate i_t
    i_t = sigmoid((W_i_h*h_t_1+W_i_x*x_t+b_i))
    # calculate output gate o_t
    o_t = sigmoid((W_o_h*h_t_1+W_o_x*x_t+b_o))
    # calculate memory cell c_t
    c_t = f_t*c_t_1+i_t*np.tanh(W_c_h*h_t_1+W_c_x*x_t+b_c)
    # calculate visible state h_t
    h_t = o_t*np.tanh(c_t)
    return (c_t, h_t)

In [139]:
# run RNN with LSTM
h_t_1, c_t_1 = 0., 0.
x = np.array([0,0,1,1,1,0])

i = 0
for x_t in x:
    c_t, h_t = rnn_LSTM(h_t_1, c_t_1, x_t)
    print('h_' + str(i) +': ' + str(round(h_t)))
    h_t_1, c_t_1 = round(h_t), c_t
    i += 1


h_0: 0.0
h_1: 0.0
h_2: 1.0
h_3: -1.0
h_4: 1.0
h_5: -0.0


## LSTM states 2

In [140]:
# run RNN with LSTM
h_t_1, c_t_1 = 0., 0.
x = np.array([1,1,0,1,1])

i = 0
for x_t in x:
    c_t, h_t = rnn_LSTM(h_t_1, c_t_1, x_t)
    print('h_' + str(i) +': ' + str(round(h_t)))
    h_t_1, c_t_1 = round(h_t), c_t
    i += 1

h_0: 1.0
h_1: -1.0
h_2: 0.0
h_3: 1.0
h_4: -1.0


# 3. Backpropagation

## Simple Network

In [141]:
t, x, w_1, w_2, b = 1, 3, 0.01, -5, -1

In [143]:
z_1 = w_1*x
z_1

0.03

In [145]:
a_1 = relu(z_1)
a_1

0.03

In [147]:
z_2 = w_2*a_1+b
z_2

-1.15

In [154]:
y = sigmoid(z_2)
y

0.24048908305088898

In [155]:
C = (y-t)**2/2
C

0.28842841648243966

In [160]:
sigmoid(z_2, derivative=True)*w_2*(sigmoid(z_2, derivative=False)-t)

0.6936388540568185

In [158]:
(sigmoid(z_2, derivative=False)-t)*(sigmoid(z_2, derivative=True))*w_2

0.6936388540568184

In [159]:
(sigmoid(z_2, derivative=False)-t)*(sigmoid(z_2, derivative=True))

-0.13872777081136367