In [52]:
import numpy as np

def sigmoid(x):
    return 1/(1 + np.exp(-x))

def softmax(x):
    c = np.amax(x)
    return np.exp(x - c)/(np.sum(np.exp(x - c)))

def crossEntropyError(y, t):
    if y.shape != t.shape:
        return
    
    elif y.ndim == 1 or t.ndim == 1:
        y = y.reshape(1, y.size)
        t = t.reshape(1, t.size)
    
    batchSize = y.shape[0]
    return -np.sum(t*np.log(y))/batchSize

class TwoLayerNet():
    
    def __init__(self, inputSize, hidenSize, outputSize):
        self.W1 = np.random.randn(inputSize, hidenSize)
        self.W2 = np.random.randn(hidenSize, outputSize)
        self.b1 = np.random.randn(hidenSize)
        self.b2 = np.random.randn(outputSize)
        
    def output(self, x):
        y = np.empty(x.shape[0])
        for i in range(x.shape[0]):
            z1 = np.dot(x[i], self.W1) + self.b1
            z1 = sigmoid(z1)
            v1 = np.dot(z1, self.W2) + self.b2
            v2 = sigmoid(v1)
            y[i] = softmax(v2)
        return y
    
    def lossFunction(self, x, t):
        y = self.output(x)
        return crossEntropyError(y, t)
    
    def grad(self, x, t):
        g_W1 = np.zeros_like(self.W1)
        g_W2 = np.zeros_like(self.W2)
        g_b1 = np.zeros_like(self.b1)
        g_b2 = np.zeros_like(self.b2)
        h = 1e-4
        
        row, column = g_W1.shape
        for i in range(row):
            for j in range(column):
                value = self.W1[i, j]
                self.W1[i, j] = value + h
                fh1 = self.lossFunction(x, t)
                self.W1[i, j] = value - h
                fh2 = self.lossFunction(x, t)
                g_W1[i, j] = (fh1 - fh2)/(2*h)
                self.W1[i, j] = value
                
        row, column = g_W2.shape
        for i in range(row):
            for j in range(column):
                value = self.W2[i, j]
                self.W2[i, j] = value + h
                fh1 = self.lossFunction(x, t)
                self.W2[i, j] = value - h
                fh2 = self.lossFunction(x, t)
                g_W2[i, j] = (fh1 - fh2)/(2*h)
                self.W2[i, j] = value
                
        row = g_b1.size
        for i in range(row):
            value = self.b1[i]
            self.b1[i] = value + h
            fh1 = self.lossFunction(x, t)
            self.b1[i] = value - h
            fh2 = self.lossFunction(x, t)
            g_b1[i] = (fh1 - fh2)/(2*h)
            self.b1[i] = value
            
        row = g_b2.size
        for i in range(row):
            value = self.b2[i]
            self.b2[i] = value + h
            fh1 = self.lossFunction(x, t)
            self.b2[i] = value - h
            fh2 = self.lossFunction(x, t)
            g_b1[i] = (fh1 - fh2)/(2*h)
            self.b2[i] = value
        
        return (g_W1, g_W2, g_b1, g_b2)
    
    def grad_disc(self, x, t, lr = 0.01, step = 1):
        for i in range(step):
            g_W1, g_W2, g_b1, g_b2 = self.grad(x, t)
            self.W1 -= lr*g_W1
            self.W2 -= lr*g_W2
            self.b1 -= lr*g_b1
            self.b2 -= lr*g_b2

In [53]:
x = np.random.randn(10, 784)
t = np.array([[0,0,0,1,0,0,0,0,0,0],
              [0,0,0,1,0,0,0,0,0,0],
              [0,0,0,1,0,0,0,0,0,0],
              [0,0,0,1,0,0,0,0,0,0],
              [0,0,0,1,0,0,0,0,0,0],
              [0,0,0,1,0,0,0,0,0,0],
              [0,0,0,1,0,0,0,0,0,0],
              [0,0,0,1,0,0,0,0,0,0],
              [0,0,0,1,0,0,0,0,0,0],
              [0,0,0,1,0,0,0,0,0,0]])
x.shape[0]

10

In [54]:
net = TwoLayerNet(784, 64, 10)

In [55]:
print(net.output(x))

ValueError: ignored