In [2]:
import numpy as np
D, N = 8, 7
x = np.random.randn(1,D)
y = np.repeat(x,N, axis=0)

dy = np.random.randn(N,D)
dx = np.sum(dy, axis=0, keepdims=True)

In [3]:
import numpy as np
D, N = 8, 7
x = np.random.randn(N, D)
y = np.sum(x, axis=0, keepdims=True)

dy = np.random.randn(1, D)
dx = np.repeat(dy, N, axis=0)

In [52]:
class MatMul:
    def __init__(self, W):
        self.params=[W]
        self.grads=[np.zeros_like(W)]
        self.x = None
        
    def forward(self, x):
        W, = self.params
        out = np.matmul(x, W)
        self.x = x
        return out
    
    def backward(self, dout):
        W, = self.params
        dx = np.matmul(dout, W.T)
        dW = np.matmul(self.x.T, dout)
        self.grads[0][...]=dW
        return dx

In [53]:
class Sigmoid:
    def __init__(self):
        self.params, self.grads=[], []
        self.out = None
        
    def forward(self, x):
        out = 1/(1+np.ext(-x))
        self.out = out
        return out
    
    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out
        return dx

In [54]:
class Affine:
    def __init__(self,W,b):
        self.params=[W,b]
        self.grads = [np.zeros_like(W), np.zeros_like(b)]
        self.x = None
        
    def forward(self, x):
        W, b = self.params
        out = np.matmul(x,W) + b
        self.x = x
        return out
    
    def backward(self, dout):
        W, b = self.params
        dx = np.matmul(dout, W.T)
        dW = np.matmul(self.x.T, dout)
        db = np.sum(dout, axis=0)
        
        self.grads[0][...] = dW
        self.grads[1][...] = db
        return dx

In [55]:
class SGD:
    def __init__(self, lr=0.01):
        self.lr=lr
        
    def update(self, params, grads):
        for i in range(len(params)):
            params[i] -= self.lr * grads[i]