# Homework3
## 2020062324 이은비

In [14]:
import numpy as np

## Data preparation

### XOR data

In [15]:
x_seeds = np.array([(0, 0), (1, 0), (0, 1), (1, 1)], dtype = np.float)
y_seeds = np.array([0, 1, 1, 0])

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  x_seeds = np.array([(0, 0), (1, 0), (0, 1), (1, 1)], dtype = np.float)


In [16]:
N = 1000
idxs = np.random.randint(0, 4, N) # 1000개의 random int 생성

In [17]:
X = x_seeds[idxs]
Y = y_seeds[idxs]

In [18]:
X += np.random.normal(scale = 0.25, size = X.shape)

## Model

- Parameters:

$\mathbf{W}^{[1]} \in \mathbb{R}^{h\times n}$

$\mathbf{w}^{[2]} \in \mathbb{R}^{h}$

$\mathbf{b}^{[1]} \in \mathbb{R}^{h}$

$b^{[2]} \in \mathbb{R}$

- derivatives

$\frac{\partial L(a^{[2]}, y)}{\partial b^{[2]}} = a^{[2]}-y$

$\frac{\partial L(a^{[2]}, y)}{\partial {w_i}^{[2]}} = (a^{[2]}-y){a_i}^{[1]}$

$\frac{\partial L(a^{[2]}, y)}{\partial {b_i}^{[1]}} = (a^{[2]}-y){w_i}^{[2]}(1-{{a_i}^{[1]}}^{2})$

$\frac{\partial L(a^{[2]}, y)}{\partial {W_{ij}}^{[1]}} = (a^{[2]}-y){w_i}^{[2]}(1-{{a_i}^{[1]}}^{2})x_j$

In [19]:
class shallow_neural_network():
    def __init__(self, num_input_features, num_hiddens):
        self.num_input_features = num_input_features
        self.num_hiddens = num_hiddens
        
        self.W1 = np.random.normal(size = (num_hiddens, num_input_features))
        self.b1 = np.random.normal(size = num_hiddens)
        self.W2 = np.random.normal(size = num_hiddens)
        self.b2 = np.random.normal(size = 1)
        
    def sigmoid(self, z):
        return 1/(1 + np.exp(-z))
    
    def predict(self, x):
        z1 = np.matmul(self.W1, x) + self.b1
        a1 = np.tanh(z1)
        z2 = np.matmul(self.W2, a1) + self.b2
        a2 = self.sigmoid(z2)
        return a2, (z1, a1, z2, a2)

In [20]:
model = shallow_neural_network(2, 3)

## Train (with vector operations)

In [21]:
def train(X, Y, model, lr = 0.1):
    dW1 = np.zeros_like(model.W1)
    db1 = np.zeros_like(model.b1)
    dW2 = np.zeros_like(model.W2)
    db2 = np.zeros_like(model.b2)
    m = len(X)
    cost = 0.0
    for x, y in zip(X, Y):
        a2, (z1, a1, z2, _) = model.predict(x)
        if y == 1:
            cost -= np.log(a2)
        else:
            cost -= np.log(1-a2)
            
        diff = a2-y
        
        # layer 2
        # db2
        db2 += diff
        
        # dw2
        dW2 += a1*diff
            
        # layer 1
        # db1
        db1_tmp = diff * (np.multiply(model.W2, (1-a1**2)))
        db1 += db1_tmp
        # db2
        dW1 += np.outer(db1_tmp, x)
                
    cost /= m
    model.W1 -= lr * dW1/m
    model.b1 -= lr * db1/m
    model.W2 -= lr * dW2/m
    model.b2 -= lr * db2/m
        
    return cost

In [22]:
for epoch in range(100):
    cost = train(X, Y, model, 1.0)
    if epoch %10 == 0:
        print(epoch, cost)

0 [0.98588883]
10 [0.52568986]
20 [0.44052513]
30 [0.38556913]
40 [0.34980507]
50 [0.32691982]
60 [0.31213534]
70 [0.30236865]
80 [0.29575327]
90 [0.29116166]


## Test

In [23]:
model.predict((1, 1))[0].item()

0.09298496929567225

In [24]:
model.predict((1, 0))[0].item()

0.9350967800839893

In [25]:
model.predict((0, 1))[0].item()

0.9368359730827288

In [26]:
model.predict((0, 0))[0].item()

0.07198699376899065