In [14]:
import numpy as np
import warnings
from Activations import Activation
from collections import OrderedDict
warnings.filterwarnings("ignore")

In [5]:
##data
def make_onehot(x):
    col = np.unique(x).size
    row = x.size
    data = np.zeros((row,col))
    for i in np.arange(row):
        data[i,x[i]] = 1
    return data

In [6]:
## 
from sklearn.datasets import load_iris
X = load_iris()['data']
y = load_iris()['target']

In [7]:
## y onehot
y = make_onehot(y)

In [8]:
def cross_entropy(y,t):
    epsilon = 1e-7
    y = Activation.softmax(y)
    return -np.sum(t*np.log(y+epsilon))/y.shape[0]

In [9]:
def numerical_gradient(f,x):
    h = 1e-4
    grad = np.zeros_like(x)
    it = np.nditer(x,flags=['multi_index'],op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = tmp_val + h
        fxh1 = f(x)
        x[idx] = tmp_val - h
        fxh2 = f(x)
        grad[idx] = (fxh1-fxh2)/(2*h)
        x[idx] = tmp_val
        it.iternext()
    return grad

In [10]:
class Network:
    
    def __init__(self):
        self.W = {
            'W1':np.random.randn(4,100),
            'W2':np.random.randn(100,50),
            'W3':np.random.randn(50,3)
        }
        self.b = {
            'b1':np.random.randn(100),
            'b2':np.random.randn(50),
            'b3':np.random.randn(3)
        }
        self.Activation ={
            'sigmoid':Activation.sigmoid,
            'relu':Activation.relu,
            'softmax':Activation.softmax
        }
        
    
    def predict(self,x):
        result = np.dot(x,self.W['W1']) + self.b['b1']
        for i in range(len(self.W)-2):
            result = np.dot(result,self.W['W'+str(i+2)]) + self.b['b'+str(i+2)]
            result = Activation.relu(result)    
        result = np.dot(result,self.W['W'+str(len(self.W))]) + self.b['b'+str(len(self.W))]    
        return Activation.softmax(result)
    
    def loss(self,x,t):
        self.y = self.predict(x)
        loss = cross_entropy(self.y,t)
        return loss
    
    def gradient(self,x,t,learning_rate):
        self.learning_rate = learning_rate
        f = lambda w : self.loss(x,t)
        for i in range(len(self.W)):
            self.W['W'+str(i+1)] -= self.learning_rate*numerical_gradient(f,self.W['W'+str(i+1)])
            self.b['b'+str(i+1)] -= self.learning_rate*numerical_gradient(f,self.b['b'+str(i+1)])
            

        
    
    def accuracy(self,x,t):
        result = self.predict(x)
        acc = sum(np.argmax(result,axis=1) == np.argmax(t,axis=1))/len(t)
        return acc
        

In [20]:
model = Network()
X = load_iris()['data']
y = load_iris()['target']
y = make_onehot(y)

In [21]:
a = model.predict(X)

In [22]:
b = model.predict(X)

In [6]:
from sklearn.datasets import load_iris

In [7]:
from tensorflow import keras

In [8]:
import numpy as np

In [12]:
X = load_iris()['data']
y = load_iris()['target']
y = make_onehot(y)

In [37]:
class Relu:
    def __init__(self):
        self.mask = None
        
    def forward(self,x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        return out
    
    def backward(self,dout):
        dout[self.mask] = 0
        dx = dout
        return dx  

class Sigmoid:
    def __init_(self):
        self.out = None
    
    def forward(self,x):
        out = Activation.sigmoid(x)
        self.out = out
        return self.out
    
    def backward(self,dout):
        dx = dout*self.out*(1. - self.out)
        return dx


class Affine:
    def __init__(self,W,b):
        self.W = W
        self.b = b
        self.x = None
        self.original_shape = None
        self.dW = None
        self.db = None
        
    def forward(self,x):
        # shape유지
        self.original_shape = x.shape
        x = x.reshape(x.shape[0],-1)
        self.x = x
        out = np.dot(self.x,self.W) + self.b
        return out
        
    
    def backward(self,dout):
        dx = np.dot(dout,self.W.T)
        self.dW = np.dot(self.x.T,dout)
        self.db = np.sum(dout,axis=0)
        dx = dx.reshape(*self.original_shape)
        return dx

class SoftmaxWithLoss:
    
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None
    
    def forward(self,x,t):
        self.t = t
        self.y = Activation.softmax(x)
        loss = cross_entropy(self.y,t)
        return loss
    
    def backward(self,dout=1):
        dx = (self.y - self.t)/self.t.shape[0]
        return dx
        
class LeakyReLu:
    
    def __init__(self,alpha=0.01):
        self.alpha = alpha
    
    def forward(self,x):
        out = np.where(x>0,x,self.alpha*x)
        return out
    
    def backward(self,dout):
        dx = np.where(dout >0,dout,self.alpha*dout) 
        return dx

In [49]:
class MultiNet:
    def __init__(self):
        self.W = {
            'W1':np.random.randn(4,100),
            'W2':np.random.randn(100,50),
            'W3':np.random.randn(50,3),
            'b1':np.random.randn(100),
            'b2':np.random.randn(50),
            'b3':np.random.randn(3),
        }
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.W['W1'],self.W['b1'])
        self.layers['relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.W['W2'],self.W['b2'])
        self.layers['relu2'] = Relu()
        self.layers['Affine3'] = Affine(self.W['W3'],self.W['b3'])
        self.last_layer = SoftmaxWithLoss()
    
    
    def predict(self,x):
        for layer in self.layers.values():
            x  = layer.forward(x)
        return x
              
    def loss(self,x,t):
        y = self.predict(x)   
        return self.last_layer.forward(y,t)
    
    def _numeric_gradient(self,x,t,learning_rate):
        self.learning_rate = learning_rate
        f = lambda w : self.loss(x,t)
        for i in range(3):
            self.W['W'+str(i+1)] -= self.learning_rate*numerical_gradient(f,self.W['W'+str(i+1)])
            self.W['b'+str(i+1)] -= self.learning_rate*numerical_gradient(f,self.W['b'+str(i+1)])
    
    def accuracy(self,x,t):
        result = self.predict(x)
        acc = sum(np.argmax(result,axis=1) == np.argmax(t,axis=1))/len(t)
        return acc
    
    def gradient(self,x,t,learning_rate):
        ## forward
        self.loss(x,t)
        ## backward
        dout = 1
        dout = self.last_layer.backward(dout)
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        grads = {}
#         grads['W1'] -= learning_rate*self.layers['Affine1'].dW
#         grads['W2'] -= learning_rate*self.layers['Affine2'].dW
#         grads['W3'] -= learning_rate*self.layers['Affine3'].dW
#         grads['b1'] -= learning_rate*self.layers['Affine1'].db
#         grads['b2'] -= learning_rate*self.layers['Affine2'].db
#         grads['b3'] -= learning_rate*self.layers['Affine3'].db
        
        self.layers['Affine1'].dW -= learning_rate*self.layers['Affine1'].dW
        self.layers['Affine2'].dW -= learning_rate*self.layers['Affine2'].dW
        self.layers['Affine3'].dW -= learning_rate*self.layers['Affine3'].dW
        self.layers['Affine1'].dW -= learning_rate*self.layers['Affine1'].db
        self.layers['Affine2'].dW -= learning_rate*self.layers['Affine2'].db
        self.layers['Affine3'].dW -= learning_rate*self.layers['Affine3'].db
        print(self.layers['Affine1'].dW)
        return grads
        
        


In [52]:
model = MultiNet()

In [53]:
model.loss(X,y)

1.2181110081880455

In [None]:
1.218111004962869

In [48]:
epochs = 1000
for epoch in range(epochs):
    model.gradient(X,y,1e-3)
    print(epoch+1,"===",model.loss(X,y))

1 === 1.218111004962869
2 === 1.218111004962869
3 === 1.218111004962869
4 === 1.218111004962869
5 === 1.218111004962869
6 === 1.218111004962869
7 === 1.218111004962869
8 === 1.218111004962869
9 === 1.218111004962869
10 === 1.218111004962869
11 === 1.218111004962869
12 === 1.218111004962869
13 === 1.218111004962869
14 === 1.218111004962869
15 === 1.218111004962869
16 === 1.218111004962869
17 === 1.218111004962869
18 === 1.218111004962869
19 === 1.218111004962869
20 === 1.218111004962869
21 === 1.218111004962869
22 === 1.218111004962869
23 === 1.218111004962869
24 === 1.218111004962869
25 === 1.218111004962869
26 === 1.218111004962869
27 === 1.218111004962869
28 === 1.218111004962869
29 === 1.218111004962869
30 === 1.218111004962869
31 === 1.218111004962869
32 === 1.218111004962869
33 === 1.218111004962869
34 === 1.218111004962869
35 === 1.218111004962869
36 === 1.218111004962869
37 === 1.218111004962869
38 === 1.218111004962869
39 === 1.218111004962869
40 === 1.218111004962869
41 === 1.

In [194]:
np.sum(np.argmax(model.predict(X),axis=1) == np.argmax(y,axis=1) )

50

In [226]:
y = np.arange(10)
t = np.arange(5,15)

In [228]:
(y - t)/10

array([-0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5, -0.5])