In [3]:
import numpy as np
from mnist import load_mnist

In [4]:

np.random.seed(1)

def get_data():
    (x_train, y_train), (x_test, y_test) = \
    load_mnist(normalize=True, flatten=True, one_hot_label=True)
    return x_train, y_train, x_test, y_test
x_train, y_train, x_test, y_test = get_data()


In [5]:
x_train, y_train, x_test, y_test = get_data()

In [18]:
parameters = [784, 30, 20, 10]
W_dict = {}
W_dict['W1'] = np.random.randn(784, 30) * 0.01
W_dict['W2'] = np.random.randn(30, 20) * 0.01
W_dict['W3'] = np.random.randn(20, 10) * 0.01

x_tt = x_train[1:500]
y_tt = y_train[1:500]

In [19]:
def Softmax(ScoreMatrix):
    if ScoreMatrix.ndim == 2:
        temp = ScoreMatrix
        temp = temp -np.max(temp, axis = 1, keepdims=True)
        softmax_temp = np.exp(temp) / np.sum(np.exp(temp), axis=1, keepdims=True)
        return softmax_temp
    temp = ScoreMatrix
    temp = temp -np.max(temp, axis = 0)
    softmax_temp = np.exp(temp) / np.sum(np.exp(temp), axis=0)
    return softmax_temp

In [20]:
class LinearLayer:
    
    def __init__(self, W, X):
        self.W = W
        self.X = X

    
    def computeScore(self):
        self.Score = np.dot(self.X, self.W)
        return self.Score
    
    def deltaX(self, dScore): # dS / dA -> 
        dActivation = np.dot(dScore, self.W.T)
        return dActivation

    def deltaW(self, dScore):
        self.dW = np.dot(self.X.T, dScore)
        return self.dW
    
    def backward(self, dScore):
        dActivation = self.deltaX(dScore)
        dW = self.deltaW(dScore)
        return dW, dActivation
    

In [21]:
class Relu:
    
    def __init__(self, Score):
        self.Score = Score
        self.mask = None
    
    def computeActivation(self):
        self.mask = (self.Score <=0 )
        self.Activation = self.Score.copy()
        self.Activation[self.mask] = 0
        return self.Activation
    
    def backward(self, dActivation): # dR / dS 실행 
        dActivation[self.mask] = 0
        dScore = dActivation
        return dScore
    

In [22]:
class two_hidden_layer:
    
    def __init__(self, X, W_dict):
        self.X = X
        self.layer_dict = {}
        self.relu_dict = {}
        self.W_dict = W_dict
        self.Layer = {}
    
    def forward(self, X, Y):

        LL1 = LinearLayer(self.W_dict['W1'], X)
        Relu1 = Relu(LL1.computeScore())
        Layer1 = (LL1, Relu1)
        LL2 = LinearLayer(self.W_dict['W2'], Relu1.computeActivation())
        Relu2 = Relu(LL2.computeScore())
        Layer2 = (LL2, Relu2)
        LL3 = LinearLayer(self.W_dict['W3'], Relu2.computeActivation())
        Relu3 = Relu(LL3.computeScore())
        Layer3 = (LL3, Relu3)
        y_softmax = Softmax(Layer3[1].computeActivation())
        
        loss = -np.sum(Y * np.log(y_softmax)) / Y.shape[0]
        
        self.Layer = [Layer1, Layer2, Layer3]
    
        return loss, y_softmax
    
    def backward(self, y_softmax, Y):
        #deltaLoss/deltaRelu3
        dActivationLast = y_softmax - Y / Y.shape[0]
        dLayer3Score = self.Layer[2][1].backward(dActivationLast)
        dW3, dActivation3 = self.Layer[2][0].backward(dLayer3Score)
        dLayer2Score = self.Layer[1][1].backward(dActivation3)
        dW2, dActivation2 = self.Layer[1][0].backward(dLayer2Score)
        dLayer1Score = self.Layer[0][1].backward(dActivation2)
        dW1, dActivation1 = self.Layer[0][0].backward(dLayer1Score)
        

    def optimizer(self, x_train, y_train, x_test, y_test, learning_rate=0.1, epoch=1000):
        for i in range(100):
            loss, y_softmax = self.forward(x_train, y_train)
            self.backward(y_softmax, y_train)
            self.Layer[0][0].W -= learning_rate * self.Layer[0][0].dW
            self.Layer[1][0].W -= learning_rate * self.Layer[1][0].dW
            self.Layer[2][0].W -= learning_rate * self.Layer[2][0].dW
            print("loss : ", loss)
    

In [23]:
NN = two_hidden_layer(x_tt, W_dict)

NN.optimizer(x_tt, y_tt, x_test, y_test)

loss :  2.3025825790242744
loss :  2.3025851029349163
loss :  2.302585101758516
loss :  2.3025851008384093
loss :  2.3025850999196917
loss :  2.302585099002172
loss :  2.30258509808566
loss :  2.302585097169966
loss :  2.3025850962549
loss :  2.302585095340274
loss :  2.3025850944258974
loss :  2.302585093511582
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3025850929940455
loss :  2.3