In [33]:
import numpy as np
def sigmoid(x):
    return 1/(1+np.exp(-x))
    
def softmax(a):
    c=np.max(a)
    exp_a=np.exp(a-c)
    sum_exp_a=np.sum(exp_a)
    y=exp_a/sum_exp_a
    return y

def cross_entropy_error(y,t):
    delta=1e-7
    return -np.sum(t*np.log(y+delta)) #log안이 0이 되는 것을 막기 위해서

def numerical_gradient(f,x): #수치미분
    h=1e-4 #1e-4 정도면 좋은 결과를 얻는다고 알려져 있다
    grad=np.zeros_like(x) #x와 같은 형상의 np.array를 만들고 모든 원소는 0이다
    if len(x.shape)==2:
        (row,column) =x.shape
        for i in range(row):
            for j in range(column):
                tmp_val=x[i][j]
                x[i][j]=tmp_val+h
                fxh1=f(x)
            
                x[i][j]=tmp_val-h
                fxh2=f(x)
            
                grad[i][j]=(fxh1-fxh2)/(2*h)
                x[i][j]=tmp_val
    
        """
        xh1=x
        xh2=x #이렇게 변수끼리 서로 같게 놓아버리면 간섭이 발생하는 것 같다.
        xh1[i]+=h
        xh2[i]-=h 
        f_temp=(f(xh1)-f(xh2))/(2*h)
        grad[i]=f_temp
        """
    else: 
        for i in range(x.size):
            tmp_val=x[i]
            x[i]=tmp_val+h
            fxh1=f(x)
            
            x[i]=tmp_val-h
            fxh2=f(x)
            
            grad[i]=(fxh1-fxh2)/(2*h)
            x[i]=tmp_val
    return grad

class TwoLayerNetwork:
    def __init__ (self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params={}
        self.params['W1']=weight_init_std*np.random.randn(input_size, hidden_size)
        self.params['b1']=np.zeros(hidden_size)
        self.params['W2']=weight_init_std*np.random.randn(hidden_size, output_size)
        self.params['b2']=np.zeros(output_size)
    
    def predict(self, x):
        W1, W2=self.params['W1'],self.params['W2']
        b1, b2=self.params['b1'],self.params['b2']
        a1=np.dot(x,W1)+b1
        z1=sigmoid(a1)
        a2=np.dot(z1,W2)+b2
        y=softmax(a2)
        return y
    
    def loss(self, x, t):
        y=self.predict(x)
        return cross_entropy_error(y,t)

    def accuracy(self, x, t):
        y=self.predict(x)
        max_y=np.argmax(y, axis=1) #axis=1은 가로로 즉 한 행에서 비교. axis=2는 세로로 즉 한 열에서 비교
        #argmax는 최댓값이 있는 위치 인덱스들을 (axis=1, or 0의 경우) array의 형태로 가져다 준다. axis가 없으면 그냥 처음부터 세서 숫자.
        return np.sum(max_y==t)/float(x.shape[0]) #max_y=np.array([1,2,3,4]) t=np.array([1,0,3,0]) max_y==t는 array([ True, False,  True, False]).

    def numerical_gradients(self, x, t):
        loss_w = lambda w: network.loss(x,t)
        grads={}
        grads['W1']=numerical_gradient(loss_w, self.params['W1'])
        grads['b1']=numerical_gradient(loss_w, self.params['b1'])
        grads['W2']=numerical_gradient(loss_w, self.params['W2'])
        grads['b2']=numerical_gradient(loss_w, self.params['b2'])
        return grads

In [34]:
x=np.random.randn(100,784)
t=np.random.randn(100,10)
network=TwoLayerNetwork(input_size=784, hidden_size=50, output_size=10)
y=network.predict(x)
grads=network.numerical_gradients(x,t)

In [None]:
from dataset.mnist import load_mnist
import time
start=time.time()

(x_train, t_train), (x_test, t_test)= load_mnist(normalize=True, one_hot_label=True)
train_size= x_train.shape[0]
batch_size=100
learning_rate=0.05
network=TwoLayerNetwork(input_size=784, hidden_size=50, output_size=10)
iters_num=100
train_loss_list=[]
train_acc_list=[]
test_acc_list=[]
iter_per_epoch = max(train_size/batch_size, 1)

for i in range(iters_num):
    batch_points=np.random   .choice(train_size,batch_size)
    x_batch=x_train[batch_points]
    t_batch=t_train[batch_points]
    
    grad=network.numerical_gradients(x_batch, t_batch)
    for key in ('W1','b1','W2','b2'):
        network.params[key]-=learning_rate*grad[key]

    loss=network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    print(str(i+1)+"th trial: "+str(loss))
    if ((i+1) % iter_per_epoch)==0:
        train_acc=network.accuracy(x_train, t_train)
        test_acc=network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc:"+str(train_acc)+","+str(test_acc))
    print(str((time.time()-start)/60)+" minutes passed")

In [35]:
class MulLayer:
    def __init__ (self):
        self.x=None
        self.y=None
    def forward(self, x, y):
        self.x=x #여기서 self.x, self.y 값이 갱신되어 뒤에서 backward에 사용됨.
        self.y=y
        return x*y
    def backward(self, dout):
        dx=dout*self.y
        dy=dout*self.x
        return (dx, dy)

In [36]:
apple=100
apple_num=2
tax=1.1

apple_mullayer=MulLayer()
tax_mullayer=MulLayer()

apple_price_before_tax=apple_mullayer.forward(apple, apple_num)
apple_price=tax_mullayer.forward(apple_price_before_tax, tax)

print(apple_price)

220.00000000000003


In [37]:
dapple_price=1
dapple_price_before_tax, dtax =tax_mullayer.backward(dapple_price)
dapple, dapple_num =apple_mullayer.backward(dapple_price_before_tax)

print(dapple, dapple_num, dtax)

2.2 110.00000000000001 200


In [38]:
class AddLayer:
    def __init__(self):
        self.x
        self.y
    def forward(self, x, y):
        return x+y
    def backward(self, dout):
        dx=dout*1
        dy=dout*1
        return dx, dy

In [39]:
class MulLayer:
    def __init__ (self):
        self.x=None
        self.y=None
    def forward(self, x, y):
        self.x=x #여기서 self.x, self.y 값이 갱신되어 뒤에서 backward에 사용됨.
        self.y=y
        return x*y
    def backward(self, dout):
        dx=dout*self.y
        dy=dout*self.x
        return (dx, dy)
    
class AddLayer:
    def __init__(self):
        pass
    def forward(self, x, y):
        return x+y
    def backward(self, dout):
        dx=dout*1
        dy=dout*1
        return dx, dy

apple_price_one=100
orange_price_one=150
apple_num=2
orange_num=3
tax=1.1

apple_mullayer=MulLayer()
orange_mullayer=MulLayer()
fruits_addlayer=AddLayer()
tax_mullayer=MulLayer()

apple_price=apple_mullayer.forward(apple_price_one, apple_num)
orange_price=orange_mullayer.forward(orange_price_one, orange_num)
fruits_price=fruits_addlayer.forward(apple_price, orange_price)
final_price=tax_mullayer.forward(fruits_price, tax)

#backpropagation
dfruits_price, dtax=tax_mullayer.backward(1.0)
dapple_price, dorange_price=fruits_addlayer.backward(dfruits_price)
dapple_price_one, dapple_num=apple_mullayer.backward(dapple_price)
dorange_price_one, dorange_num=orange_mullayer.backward(dorange_price)




print(final_price)

print(dtax,dapple_price_one,dapple_num,dorange_price_one,dorange_num)



715.0000000000001
650.0 2.2 110.00000000000001 3.3000000000000003 165.0


In [40]:
import numpy as np
class Relu:
    def __init__(self):
        self.mask=None
        
    def forward(self,x):
        self.mask=(x<=0)
        out=x.copy()  #변수끼리 등호로 연결해버리면 그 둘 중 어느 하나에 영향을 주면 다른 것도 똑같이 영향을 받게 된다.
        out[self.mask]=0
        return out
    
    def backward(self, dout):
        dout[self.mask]=0
        dx=dout
        return dx
class Sigmoid:
    def __init__(self):
        self.out=None
    def forward(self, x):
        out=1/(1+np.exp(-x))
        self.out=out
        return out
    def backward(self,dout):
        return dout*self.out*(1-self.out)

In [41]:
class Affine:
    def __init__(self,W,b):
        self.W=W
        self.b=b
        self.x=None
        self.dW=None
        self.db=None
    
    def forward(self,x):
        self.x=x
        return np.dot(x,self.W)+self.b
    
    def backward(self, dout):
        dx=np.dot(dout,self.W.T)
        self.dW=np.dot(self.x.T,dout)
        self.dB=np.sum(dout, axis=0)
        return dx

In [54]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss=None
        self.y=None
        self.t=None
    def forward(self,x,t):
        self.t=t
        self.y=softmax(x)
        self.loss=cross_entropy_error(self.y, self.t)
        return self.loss
    def backward(self,dout=1):
        batch_size=self.t.shape[0]
        dx=(self.y-self.t)/batch_size
        return dx
    

In [57]:
from collections import OrderedDict

class TwoLayerNetwork:
    def __init__ (self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params={}
        self.params['W1']=weight_init_std*np.random.randn(input_size, hidden_size)
        self.params['b1']=np.zeros(hidden_size)
        self.params['W2']=weight_init_std*np.random.randn(hidden_size, output_size)
        self.params['b2']=np.zeros(output_size)
    
        self.layers=OrderedDict()
        self.layers['Affine1']=Affine(self.params['W1'],self.params['b1'])
        self.layers['Relu1']=Relu()
        self.layers['Affine2']=Affine(self.params['W2'],self.params['b2'])
        self.lastLayer=SoftmaxWithLoss()
    
    
    def predict(self, x):
        for layer in self.layers.values():
            x=layer.forward(x)
        return x
    
    def loss(self, x, t):
        y=self.predict(x)
        return self.lastLayer.forward(y,t)

    def accuracy(self, x, t):
        y=self.predict(x)
        max_y=np.argmax(y, axis=1) #axis=1은 가로로 즉 한 행에서 비교. axis=2는 세로로 즉 한 열에서 비교
        #argmax는 최댓값이 있는 위치 인덱스들을 (axis=1, or 0의 경우) array의 형태로 가져다 준다. axis가 없으면 그냥 처음부터 세서 숫자.
        return np.sum(max_y==t)/float(x.shape[0]) #max_y=np.array([1,2,3,4]) t=np.array([1,0,3,0]) max_y==t는 array([ True, False,  True, False]).

    def numerical_gradients(self, x, t):
        loss_w = lambda w: network.loss(x,t)
        grads={}
        grads['W1']=numerical_gradient(loss_w, self.params['W1'])
        grads['b1']=numerical_gradient(loss_w, self.params['b1'])
        grads['W2']=numerical_gradient(loss_w, self.params['W2'])
        grads['b2']=numerical_gradient(loss_w, self.params['b2'])
        return grads
    
    def gradient(self, x, t):
        self.loss(x,t)
        dout=1
        dout=self.lastLayer.backward(dout)
        
        layers=list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout=layer.backward(dout)
        grads={}
        grads['W1']=self.layers['Affine1'].dW
        grads['b1']=self.layers['Affine1'].db
        grads['W2']=self.layers['Affine2'].dW
        grads['b2']=self.layers['Affine2'].db
        
        return grads

In [61]:
from dataset.mnist import load_mnist
import time
start=time.time()

(x_train, t_train), (x_test, t_test)= load_mnist(normalize=True, one_hot_label=True)
train_size= x_train.shape[0]
batch_size=1
learning_rate=0.1
network=TwoLayerNetwork(input_size=784, hidden_size=50, output_size=10)
iters_num=100
train_loss_list=[]
train_acc_list=[]
test_acc_list=[]
iter_per_epoch = max(train_size/batch_size, 1)

for i in range(iters_num):
    batch_points=np.random   .choice(train_size,batch_size)
    x_batch=x_train[batch_points]
    t_batch=t_train[batch_points]
    
    grad=network.gradient(x_batch, t_batch)
    for key in ('W1','b1','W2','b2'):
        network.params[key]-=learning_rate*grad[key]

    loss=network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    print(str(i+1)+"th trial: "+str(loss))
    if ((i+1) % iter_per_epoch)==0:
        train_acc=network.accuracy(x_train, t_train)
        test_acc=network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc:"+str(train_acc)+","+str(test_acc))
    print(str((time.time()-start)/60)+" minutes passed")

[[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]]


TypeError: unsupported operand type(s) for *: 'float' and 'NoneType'