In [1]:
import numpy as np
import warnings
from collections import OrderedDict
warnings.filterwarnings("ignore")
from tensorflow import keras

In [2]:
def softmax(x):
    if x.ndim == 1:
        c = max(x)
        x = x-c
        return np.exp(x)/np.sum(np.exp(x))
    else:
        x = x.T - np.max(x,axis=1)
        x = x.T
        x = (np.exp(x).T/np.sum(np.exp(x),axis=1)).T
        return x   

def cross_entropy(y,t):
    epsilon = 1e-7
    y = softmax(y)
    return -np.sum(t*np.log(y+epsilon))/y.shape[0]

def sigmoid(x):
    return 1/(np.exp(-x))


def numerical_gradient(f,x):
    h = 1e-4
    grad = np.zeros_like(x)
    it = np.nditer(x,flags=['multi_index'],op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = tmp_val + h
        fxh1 = f(x)
        x[idx] = tmp_val - h
        fxh2 = f(x)
        grad[idx] = (fxh1-fxh2)/(2*h)
        x[idx] = tmp_val
        it.iternext()
    return grad

In [3]:
class Relu:
    def __init__(self):
        self.mask = None
        
    def forward(self,x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        return out
    
    def backward(self,dout):
        dout[self.mask] = 0
        dx = dout
        return dx  

class Sigmoid:
    def __init_(self):
        self.out = None
    
    def forward(self,x):
        out = sigmoid(x)
        self.out = out
        return self.out
    
    def backward(self,dout):
        dx = dout*self.out*(1. - self.out)
        return dx


class Affine:
    def __init__(self,W,b):
        self.W = W
        self.b = b
        self.x = None
        self.original_shape = None
        self.dW = None
        self.db = None
        
    def forward(self,x):
        # shape유지
        self.original_shape = x.shape
        x = x.reshape(x.shape[0],-1)
        self.x = x
        out = np.dot(self.x,self.W) + self.b
        return out
        
    
    def backward(self,dout):
        dx = np.dot(dout,self.W.T)
        self.dW = np.dot(self.x.T,dout)
        self.db = np.sum(dout,axis=0)
        dx = dx.reshape(*self.original_shape)
        return dx

class SoftmaxWithLoss:
    
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None
    
    def forward(self,x,t):
        self.t = t
        self.y = softmax(x)
        loss = cross_entropy(self.y,t)
        return loss
    
    def backward(self,dout=1):
        dx = (self.y - self.t)/self.t.shape[0]
        return dx
        
class LeakyReLu:
    
    def __init__(self,alpha=0.01):
        self.alpha = alpha
    
    def forward(self,x):
        out = np.where(x>0,x,self.alpha*x)
        return out
    
    def backward(self,dout):
        dx = np.where(dout >0,dout,self.alpha*dout) 
        return dx

In [456]:
class MultiNet:
    
    def __init__(self,input_shape,x,decay,activation):
        self.decay = decay
        self.activation = activation
        self.model = []
        self.input_shape = input_shape
        self.x = x
        w = np.random.randn(input_shape,x)*self.decay
        b = np.zeros(x)
        self.model.append([w,b,activation])
        self.activation_type = {
            'relu':Relu,
            'sigmoid':Sigmoid,
            'softmax':SoftmaxWithLoss,
        }
        
    def add(self,x,activation):
        self.input = self.model[-1][1].size
        w = np.random.randn(self.input,x)
        b = np.zeros(x)
        self.model.append([w,b,activation])
        
        
    def model_compile(self):
        self.layers = OrderedDict()
        model_length = len(self.model)
        i = 1
        model_length = len(self.model)
        for layer in self.model:
            self.layers['Affine'+str(i)] = Affine(layer[0],layer[1])
            if model_length > i:
                self.layers[layer[2]+str(i)] = self.activation_type[layer[2]]()
            i += 1
        self.last_layer = self.activation_type[self.model[-1][2]]()

    def summary():
        pass
    
    
    def predict(self,x):
        for layer in self.layers.values():
            x  = layer.forward(x)
        return x
              
    def loss(self,x,t):
        y = self.predict(x)   
        return self.last_layer.forward(y,t)
    
    def _numeric_gradient(self,x,t,learning_rate):
        self.learning_rate = learning_rate
        f = lambda w : self.loss(x,t)
        for i in range(3):
            self.W['W'+str(i+1)] -= self.learning_rate*numerical_gradient(f,self.W['W'+str(i+1)])
            self.W['b'+str(i+1)] -= self.learning_rate*numerical_gradient(f,self.W['b'+str(i+1)])
    
    def accuracy(self,x,t):
        result = self.predict(x)
        acc = sum(np.argmax(result,axis=1) == np.argmax(t,axis=1))/len(t)
        return acc
    
    def gradient(self,x,t):
        ## forward
        self.loss(x,t)
        ## backward
        dout = 1
        dout = self.last_layer.backward(dout)
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        self.grads = {}
        i = 1
        for param in self.layers.keys():
            if 'Affine' in param:
                self.grads['W'+str(i)] = self.layers[param].dW
                self.grads['b'+str(i)] = self.layers[param].db
                i += 1
        return self.grads
    
    def fit(self,x,y,learning_rate,epochs):
        lr = learning_rate
        iterate = len(self.model)
        epochs = epochs
        for epoch in range(epochs):
            grads = self.gradient(x,y)
            for i in range(iterate):
                self.model[i][0] -= lr*self.grads['W'+str(i+1)]
                self.model[i][1] -= lr*self.grads['b'+str(i+1)]
            print(epoch+1,"====>",self.loss(X_train,y_train))
        
        

In [457]:
model = MultiNet(784,128,0.01,'relu')

In [458]:
model.add(200,'relu')
model.add(50,'relu')
model.add(10,'softmax')

In [459]:
model.model_compile()

In [460]:
from keras.datasets import mnist

In [461]:
(X_train, y_train),(X_test, y_test) = mnist.load_data()

In [462]:
X_train = X_train.reshape(-1,28*28)

In [463]:
y_train = keras.utils.to_categorical(y_train)

In [465]:
y_test = keras.utils.to_categorical(y_test)

In [466]:
model.accuracy(X_test,y_test)

0.4184

In [464]:
model.fit(X_train,y_train,1e-5,100)

1 ====> 2.265075878403801
2 ====> 2.3540315827169103
3 ====> 2.2724644116342962
4 ====> 2.3695824010670443
5 ====> 2.3276635379986983
6 ====> 2.3622652149455825
7 ====> 2.324361829672658
8 ====> 2.150635559080713
9 ====> 2.3036683157975046
10 ====> 2.353607127115264
11 ====> 2.352181803768594
12 ====> 2.3183582417373128
13 ====> 2.2030122187859686
14 ====> 2.1609250518841003
15 ====> 2.0596620229507754
16 ====> 2.049229081641049
17 ====> 2.188551368905756
18 ====> 2.1510761897639603
19 ====> 2.0201327787993253
20 ====> 1.9811779600517827
21 ====> 1.9309284159195743
22 ====> 1.9242126863717846
23 ====> 1.909378078021686
24 ====> 1.9437143290691667
25 ====> 1.9155819574984512
26 ====> 1.9525895375503617
27 ====> 1.9490811954543423
28 ====> 1.941639041396293
29 ====> 1.9760335137756802
30 ====> 1.9576380923699896
31 ====> 1.986049485983702
32 ====> 1.9604630448339662
33 ====> 1.9979548237252935
34 ====> 2.008969834307227
35 ====> 1.9878893897004817
36 ====> 2.0023869818617293
37 ====> 1.9

KeyboardInterrupt: 

In [402]:
model.accuracy(X_test,keras.utils.to_categorical(y_test))

0.1

In [401]:
lr = 1e-3
iterate = int(len(grads.keys())/2)
epochs = 1000
for epoch in range(epochs):
    grads = model.gradient(X_train,y_train)
    for i in range(iterate):
        model.model[i][0] -= lr*grads['W'+str(i+1)]
        model.model[i][1] -= lr*grads['b'+str(i+1)]
    print(epoch+1,"====>",model.loss(X_train,y_train))

1 ====> 2.36121574059787
2 ====> 2.3611490739805863
3 ====> 2.3611490739805863
4 ====> 2.3611490739805863
5 ====> 2.3611490739805863
6 ====> 2.3611490739805863
7 ====> 2.3611490739805854
8 ====> 2.3025840977704006
9 ====> 2.3025840977694454
10 ====> 2.3025840977684924
11 ====> 2.3025840977675385
12 ====> 2.3025840977665855
13 ====> 2.3025840977656302
14 ====> 2.302584097764678
15 ====> 2.302584097763724
16 ====> 2.3025840977627716
17 ====> 2.30258409776182
18 ====> 2.3025840977608687
19 ====> 2.302584097759915
20 ====> 2.3025840977589644
21 ====> 2.3025840977580123
22 ====> 2.30258409775706


KeyboardInterrupt: 

In [378]:
W1 = model.model[0][0]
b1 = model.model[0][1]

[[array([[ 0.00301538, -0.00591932,  0.00484845, ..., -0.00456069,
          -0.00436745, -0.00992276],
         [-0.01258822,  0.01784659,  0.00194317, ..., -0.01271341,
          -0.00886225,  0.00432012],
         [ 0.00952149,  0.01199066,  0.01222625, ..., -0.00474682,
           0.0069134 , -0.00979737],
         ...,
         [-0.01414463, -0.01260081,  0.01324462, ..., -0.01320817,
           0.00880258, -0.00138631],
         [ 0.00389278, -0.00295483,  0.00193241, ..., -0.02637396,
           0.01194516,  0.00316849],
         [-0.00303759,  0.00625468,  0.01195512, ..., -0.00302367,
          -0.00265947,  0.0061012 ]]),
  array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,