In [3]:
from typing import (
    List, 
    Dict,
    Optional,
    Final
)
import logging 
import numpy as np

# Logging

In [None]:
logging.basicConfig(level=logging.DEBUG)

# Loss 

In [4]:
class SoftmaxWithLogLoss:
    def __init__(self):
        self.loss = None
        self.y = None # softmaxの出力
        self.t = None # 教師データ

    def forward(self, x, t):
        self.t = t 
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        
        return self.loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        if self.t.size == self.y.size: # 教師データがone-hot-vectorの場合
            dx = (self.y - self.t) / batch_size
        else:
            dx = self.y.copy()
            dx[np.arange(batch_size), self.t] -= 1
            dx = dx / batch_size
        
        return dx


# Optimizer

In [5]:
class SGD:
    def __init__(self, lr=0.01):
        self.lr = lr
        
    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key] 

In [6]:
def coroutine(func):
    def start(*args,**kwargs):
        cr = func(*args,**kwargs)
        next(cr)
        return cr
    
    return start

In [11]:
class Affine(object):
    """Affine (MatMul) Layer"""
    # --------------------------------------------------------------------------------
    # Class initialization
    # --------------------------------------------------------------------------------

    
    # --------------------------------------------------------------------------------
    # Instance initialization
    # --------------------------------------------------------------------------------    
    def __init__(
        self, 
        W: np.ndarray, 
        num_nodes: int, 
        optimizer: object, 
        layers: List[Generator[np.ndarray, None, None]],
        log_level: int = logging.ERROR
    ):
        """Initialize an affine layer that has 'num_nodes' nodes
        Args:
            W: An array of weight-vectors for each node.
            num_nodes: Number of nodes in the layer
            optmizer: Gradient descent implementation e.g SGD, Adam.
            layers: Layers to which forward the output
        
        Batch X: shape(m, n) 
        --------------------
        'x' is an individual row with n features where n=0 is a bias. A batch X has 'm' rows. 
        X[j] is [x(j)(0), x(j)(1), ... x(j)(n)] where bias 'x(j)(0)' is 1 as a bias input.
        "input" is not limited to the 1st input data layer e.g. image pixels but to any layer.

        Weights W: shape(u, n) where u=num_nodes
        --------------------
        k-th node (k:0, 1, .. u-1) has a weight vector W(k):[w(k)(0), w(k)(1), ... w(k)(n)].
        w(k)(0) is its bias weight. Each w(k)(i) amplifies i-th feature in the input x.  
                
        """
        # --------------------------------------------------------------------------------
        # Validate the expected dimensions.
        # `W` has `u` nodes (nodes)
        # --------------------------------------------------------------------------------
        assert W.shape[0] == num_nodes, \
            f"W has {W.shape[0]} weight vectors that should have matched with num_nodes {num_nodes}"

        # W: node W
        self.u: int = num_nodes          # number of nodes in the layer
        self.W: np.ndarray = W           # node weight vectors
        self.dW: np.ndarray = np.empty(0, num_nodes)  # gradient of W

        # X: batch input
        self.m: int = -1                 # batch size: X.shape[0]
        self.n: int = W.shape[1]         # number of features in x
        self.X: np.ndarray = np.empty(0, W.shape[1])  # Batch input
        self.dX: np.ndarray
        
        logging.basicConfig()
        self._log_level = log_level
        self._logger = logging.getLogger(__name__)
        self._logger.setLevel(log_level)
        
        

    def _forward(Y: np.ndarray, layer: Generator[np.ndarray, None, None]) -> np.ndarray:
        """Send the affine output Y to the next layer
        Args:
            Y: Affine output
            layer: Layer where to propagete Y.
        Returns:
            Layer return
        """
            loss: int = layer.send(Y)
                
    
    @coroutine
    def forward(self):
        """Foward propagate of the affine layer output Y = X@W"""
        Y: Optional[np.ndarray] = None

        # --------------------------------------------------------------------------------
        # 
        # --------------------------------------------------------------------------------
        target.send(Y)
        self.X = (yield )

        # --------------------------------------------------------------------------------
        # X:shape(m, n) W.T:shape(n, u) -> Y:shape(m, u)
        # --------------------------------------------------------------------------------
        Y = np.matmul(self.X, self.W.T) 
        self.m = self.X.shape[0] if self.X is not None else -1

        # X@W, needs shapes X(m, n) @ W(n, num_nodes) to generate output Y(m, num_nodes)
        assert X.shape[1] == self.w.shape[0], \
            f"numbef of input x features {X.shape[1]} must be that of weight vector {}".format(
                , self.W.shape[0]
        )
        
         
    @coroutine
    def backward(self):
        # --------------------------------------------------------------------------------
        # Back propgation dy from the posterior layer. dy shape must match that of Y(m, num_nodes)
        # --------------------------------------------------------------------------------
        dy = next(target.backward)    # gradient back-propagated from the posterior 
        assert(dy.shape[0] == self.m), \
        "gradient dy shape {} must match output Y shape ({}, {})".format(
            dy.shape, self.m, self.n
        )

        # --------------------------------------------------------------------------------
        # Gradient descent on W
        # --------------------------------------------------------------------------------
        dw = np.dot(self.X, dy.T)
        self.w = self.optimizer(self.w, dw)

        dx = np.dot(dy, self.w)
        
        yield dx

In [19]:
import numpy as np
class Hoge(object):
    def __init__(self, units, weights, optimizer):
        # neuron weight vectors
        self.w: numpy.ndarray = weights  # weight vector per neuron
        self.n: int = weights.shape[0]   # number of features expected
        self.dw: numpy.ndarray = None    # gradient of W
        
        self.X: numpy.ndarray = np.empty((0, self.n))     # Batch input
        self.m: int  = -1                # batch size: X.shape[0]
            
        self.forward = self.forward()

    @coroutine
    def forward(self):
        """Foward propagation of the affine layer X@W"""
        while True:
            self.X = yield

            print("foward got \n{}".format(self.X))
            self.m = self.X.shape[0] if self.X is not None else -1
        
            
hoge = Hoge(4, np.arange(12).reshape((3, 4)), None)

In [20]:
hoge.forward.send(np.arange(6).reshape((2, 3)))
#hoge.forward.close()

foward got [[0 1 2]
 [3 4 5]]


In [2]:
import numpy as np
np.empty((5, 0))

array([], shape=(5, 0), dtype=float64)

In [4]:
np.array_equal(1, 2)

False

[[ 38.  56.]
 [ 92. 137.]]


True