In [1]:
import numpy as np

In [2]:
class Layer:
    pass

In [3]:
class Dense(Layer):
    
    def __init__(self, num_nodes, num_features, learning_rate=.01):
        self.learning_rate = learning_rate
        self.num_nodes = num_nodes
        self.num_features = num_features
        self.weights = None
        self.bias = None
        
    def initialize(self):
        self.weights = .01 * np.random.randn(self.num_features, self.num_nodes)
        self.bias = np.zeros((1, self.num_nodes))
        
    def forward(self, x):
        y = x.dot(self.weights) + self.bias
        self.cache = x
        return y

    def backward(self, d_y):
        x = self.cache
        # compute gradients for internal parameters and update
        d_weights = x.transpose().dot(d_y)
        d_bias = d_y.sum0()
        self.weights = (d_weights * self.learning_rate).neg() + self.weights
        self.bias    =    (d_bias * self.learning_rate).neg() + self.bias
        # compute and return external gradient
        d_x = d_y.dot(self.weights.transpose())
        return d_x

In [4]:
class SigmoidExact(Layer):
    
    def __init__(self):
        self.cache = None
        self.max = 0
        self.min = 0
    
    def initialize(self):
        pass
    
    def forward(self, x, debug=False):            
        y = (x.neg().exp() + 1).inv()
        self.cache = y
        return y
    
    def backward(self, d_y, debug=False):
        y = self.cache
        d_x = d_y * y * (y.neg() + 1)
        return d_x

In [22]:
class Sigmoid(Layer):
    
    def __init__(self):
        self.cache = None
    
    def initialize(self):
        pass
    
    def forward(self, x, debug=False):
        w0 =  0.5
        w1 =  0.2159198015
        w3 = -0.0082176259
        w5 =  0.0001825597
        w7 = -0.0000018848
        w9 =  0.0000000072
        
        x2 = x  * x
        x3 = x2 * x
        x5 = x2 * x3        
        x7 = x2 * x5        
        x9 = x2 * x7
        y = x9*w9 + x7*w7 + x5*w5 + x3*w3 + x*w1 + w0
        
        self.cache = y
        return y
    
    def backward(self, d_y):
        y = self.cache
        d_x = d_y * y * (y.neg() + 1)
        return d_x

In [6]:
class Softmax(Layer):
    
    def __init__(self):
        pass
    
    def initialize(self):
        pass
    
    def forward(self, x):
        likelihoods = x.exp()
        probs = likelihoods.div(likelihoods.sum1())
        self.cache = probs
        return probs
    
    def backward(self, d_probs):
        # TODO does the split between Softmax and CrossEntropy make sense?
        probs = self.cache
        batch_size = probs.shape[0]
        d_scores = probs - d_probs
        d_scores = d_scores.div(batch_size)
        return d_scores

In [7]:
class Reveal(Layer):
    
    def __init__(self):
        pass
    
    def initialize(self):
        pass
    
    def forward(self, x):
        return x.reveal()
    
    def backward(self, d_y):
        return d_y

In [8]:
class Loss:
    pass

In [9]:
class Diff(Loss):
    
    def derive(self, y_pred, y_train):
        return y_pred - y_train

In [10]:
class CrossEntropy(Loss):
    
    def evaluate(self, probs_pred, probs_correct):
        batch_size = probs_pred.shape[0]
        losses = (probs_pred * probs_correct).log().neg()
        loss = losses.sum0().div(batch_size)
        return loss
        
    def derive(self, y_pred, y_correct):
        return y_correct

In [11]:
class Model:
    pass

In [21]:
class Sequential(Model):
    
    def __init__(self, layers):
        self.layers = layers
    
    def initialize(self):
        for layer in self.layers:
            layer.initialize()
    
    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x
    
    def backward(self, y):
        for layer in reversed(self.layers):
            y = layer.backward(y)
            
    def fit(self, x_train, y_train, loss, batch_size=32, epochs=1000):
        for epoch in range(epochs):
            x_chunks = ( x_train[i:i+batch_size] for i in range(0, x_train.shape[0], batch_size) )
            y_chunks = ( y_train[i:i+batch_size] for i in range(0, y_train.shape[0], batch_size) )
            for x, y in zip(x_chunks, y_chunks):
                y_pred = self.forward(x)
                d_y = loss.derive(y_pred, y)
                self.backward(d_y)
            
    def predict(self, x):
        return self.forward(x)