클래스 구현 위주로 진행

In [1]:
import numpy as np

# Optimizer


## SGD
$W \leftarrow W - \eta \frac{\partial L}{\partial W}$

In [2]:
class SGD:
    def __init__(self, lr=0.01):
        self.lr = lr
    
    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key]

pseudo code

```
network = TwoLayerNet(...)
optimizer = SGD() # optimizer로 사용!

for i in range(iter_num):
    ...
    X_batch, y_batch = get_mini_batch(...) # 미니 배치
    grads = network.gradient(X_batch, y_batch)
    params = network.params
    optimizer.update(params, grads)
    ...
```

## Momentum

$\mathbb{v} \leftarrow a \mathbb{v} - \eta \frac{\partial L}{\partial W}$ <br/>
$W \leftarrow W + \mathbb{v}$


In [3]:
class Momentum:
    def __init__(self, lr=0.01, momentum=0.9):
        self.lr = lr
        self.momentum = momentum
        self.v = None
    
    def update(self, params, grads):
        if self.v is None:
            self.v = {}
            for key, val in params.items():
                self.v[key] = np.zeros_like(val)
            
        for key in params.keys():
            self.v[key] = self.momentum * self.v[key] - self.lr * grads[key]
            params[key] += self.v[key]

## AdaGrad

$\mathbb{h} \leftarrow \mathbb{h} + \frac{\partial L}{\partial W} \odot \frac{\partial L}{\partial W}$ <br/>
$W \leftarrow W + \eta \frac{1}{\sqrt{\mathbb{h}}}\frac{\partial L}{\partial W} $

In [5]:
class AdaGrad:
    def __init__(self, lr=0.01):
        self.lr = lr
        self.h = None
    
    def update(self, params, grads):
        if self.h is None:
            self.h = {}
            for key, val in params.items():
                self.h[key] = np.zeros_like(val)
        
        for key in params.keys():
            self.h[key] += grads[key] * grads[key]
            params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)

# Overfitting

## Dropout

In [6]:
class Dropout:
    def __init__(self, dropout_ratio=0.5):
        self.dropout_ratio = dropout_ratio
        self.mask = None

    def forward(self, x, train_flag=True):
        if train_flag:
            self.mask = np.random.rand(*x.shape) > self.dropout_ratio
            return x * self.mask

        else:
            return x * (1.0 - self.dropout_ratio)
    
    def backward(self, dout):
        return dout * self.mask