신경망의 학습

In [1]:
#softmax with Loss
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None # 손실
        self.y = None    # softmax의 출력
        self.t = None    # 정답레이블(원-핫 벡터)

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y,self.t)
        return self.loss
                
    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size

        return dx

계산 그래프 
- 덧셈 노드 
- 곱셈 노드 
- 분기 노드 
- Repeat 노드
- Sum 노드 
- MatMul 노드 


In [2]:
#Repeat 노드 
import numpy as np 
D,N = 8,7 
x = np.random.randn(1,D)
y = np.repeat(x,N,axis=0)
dy = np.random.randn(N,D) #무작위 기울기 
dx = np.sum(dy,axis=0 ,keepdims=True) #역전파
dx

array([[-1.61509355, -4.95932305, -1.49307011,  0.02712772,  2.28889475,
         0.11580762, -2.79618437,  3.19971182]])

In [3]:
#Sum 노드 
import numpy as np 
D,N=8,7
x=np.random.randn(N,D)
y=np.sum(x,axis=0,keepdims=True)
dy=np.random.randn(1,D)
dx=np.repeat(dy,N,axis=0)
dx

array([[ 0.55210329, -0.9279386 , -0.35516701, -1.51663666, -0.81021268,
        -0.31028748, -0.6089007 ,  0.76986434],
       [ 0.55210329, -0.9279386 , -0.35516701, -1.51663666, -0.81021268,
        -0.31028748, -0.6089007 ,  0.76986434],
       [ 0.55210329, -0.9279386 , -0.35516701, -1.51663666, -0.81021268,
        -0.31028748, -0.6089007 ,  0.76986434],
       [ 0.55210329, -0.9279386 , -0.35516701, -1.51663666, -0.81021268,
        -0.31028748, -0.6089007 ,  0.76986434],
       [ 0.55210329, -0.9279386 , -0.35516701, -1.51663666, -0.81021268,
        -0.31028748, -0.6089007 ,  0.76986434],
       [ 0.55210329, -0.9279386 , -0.35516701, -1.51663666, -0.81021268,
        -0.31028748, -0.6089007 ,  0.76986434],
       [ 0.55210329, -0.9279386 , -0.35516701, -1.51663666, -0.81021268,
        -0.31028748, -0.6089007 ,  0.76986434]])

In [4]:
#MatMul 노드
class MatMul:
    def __init__(self,W):
        self.params = [W]
        self.grads = [np.zeros_like(W)]
        self.x= None 
        
    def forward(self,x):
        W, = self.params
        out = np.matmul(x,W)
        self.x = x
        return out 
    
    def backward(self,dout):
        W, = self.params
        dx = np.matmul(dout,W.T)
        dW = np.matmul(self.x.T,dout)
        #깊은 복사 
        self.grads[0][...] = dW
        return dx
    

Simgoid 계층 구현

In [5]:
class Sigmoid:
    def __init__(self):
        self.params,self.grads = [],[]
        self.out = None
        
    def forward(self,x):
        out = 1 / (1+np.exp(-x))
        self.out=out
        return out 
    
    def backward(self,dout):
        dx = dout * (1.0- self.out) *self.out
        return dx 
    
        

Affine 계층 구현

In [6]:
class Affine:
    def __init__(self,W,b):
        self.params = [W,b]
        self.grads = [np.zeros_like(W), np.zeros_like(b)]
        self.x = None
        
    def forward(self,x):
        W,b=self.params
        out = np.matmul(x,W) + b
        self.x = x 
        return out 
    
    def backward(self,dout):
        W,b=self.params
        dx = np.matmul(dout,W.T)
        dW = np.matmul(self.x.T , dout)
        db = np.sum(dout , axis=0)
        
        
        self.grads[0][...] = dW
        self.grads[1][...] = db
        
        return dx 
        
        
        
        
        
        
        
        
        
        
        
            
    

가중치 갱신 
1. 미니배치
2. 기우리 계산
3. 매개변수 갱신 
4. 반복 

In [3]:
#SGD 구현 
class SGD:
    def __init__(self,lr=0.01):
        self.lr = lr 
        
    def update(self,params,grads):
        for i in range(len(params)):
            params[i]-=self.lr * grads[i]

간단한 실습 by 스파이럴 데이터셋

In [25]:
# coding: utf-8
import numpy as np


def load_data(seed=1984):
    np.random.seed(seed)
    N = 100  # 클래스당 샘플 수
    DIM = 2  # 데어터 요소 수
    CLS_NUM = 3  # 클래스 수

    x = np.zeros((N*CLS_NUM, DIM))
    t = np.zeros((N*CLS_NUM, CLS_NUM), dtype=np.int)

    for j in range(CLS_NUM):
        for i in range(N): # N*j, N*(j+1)):
            rate = i / N
            radius = 1.0*rate
            theta = j*4.0 + 4.0*rate + np.random.randn()*0.2

            ix = N*j + i
            x[ix] = np.array([radius*np.sin(theta),
                              radius*np.cos(theta)]).flatten()
            t[ix, j] = 1

    return x, t

In [12]:
! pip install numpy==1.18.5

Collecting numpy==1.18.5
  Using cached numpy-1.18.5.zip (5.4 MB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'error'


  error: subprocess-exited-with-error
  
  × Preparing metadata (pyproject.toml) did not run successfully.
  │ exit code: 1
  ╰─> [92 lines of output]
      Running from numpy source directory.
      Cythonizing sources
      Processing numpy/random\_bounded_integers.pxd.in
      Processing numpy/random\mtrand.pyx
        required_version = LooseVersion('0.29.14')
        if LooseVersion(cython_version) < required_version:
      Processing numpy/random\_bit_generator.pyx
      Processing numpy/random\_bounded_integers.pyx.in
      Processing numpy/random\_common.pyx
      performance hint: _common.pyx:261:19: Exception check will always require the GIL to be acquired. Declare the function as 'noexcept' if you control the definition and you're sure you don't want the function to raise exceptions.
      performance hint: _common.pyx:285:19: Exception check will always require the GIL to be acquired. Declare the function as 'noexcept' if you control the definition and you're sure you don'

In [20]:
x

array([[-6.27584185e-01,  4.28641845e-01, -9.31275549e-01,
        -1.00332808e+00,  2.49630546e+00, -8.74106094e-01,
         9.91703352e-01,  2.44456735e-01],
       [-1.07439829e+00,  5.40508285e-01,  5.40881905e-01,
        -8.20055479e-01,  2.50269666e-01, -3.53634103e-01,
         1.86674816e-02, -2.42050906e-01],
       [-8.88400624e-01,  1.06652142e-05, -2.77446373e-01,
         5.94140998e-01, -4.99679466e-01,  9.13599986e-01,
         8.22022449e-01, -7.13991677e-01],
       [-2.74198450e-01,  1.35935489e+00,  1.03629919e+00,
        -7.83935003e-01, -2.00864612e+00,  1.35110438e+00,
         1.54107158e+00, -1.31152539e+00],
       [ 4.44160247e-01, -1.14336995e+00,  4.10892440e-01,
        -7.80886707e-01, -8.95445798e-01, -1.71916328e+00,
        -1.08228072e+00, -6.47111103e-02],
       [ 1.41036316e+00, -1.37079444e-01, -2.67239978e-01,
        -2.86675847e-01, -1.02284405e+00,  1.71028074e+00,
        -2.07514231e-01, -1.73826008e-02],
       [ 5.84358651e-01, -1.505334