In [1]:
import numpy as np

In [2]:
class Layer:
    pass

In [3]:
class Dense(Layer):
    
    def __init__(self, num_nodes, num_features, learning_rate=.1):
        self.learning_rate = learning_rate
        self.num_nodes = num_nodes
        self.num_features = num_features
        self.weights = None
        self.bias = None
        
    def initialize(self):
        self.weights = .1 * np.random.randn(self.num_features, self.num_nodes)
        self.bias = np.zeros((1, self.num_nodes))
        
    
#     def precompute_forward(self, batch_shape):
#         assert batch_shape[1] == self.num_features
#         dot_triple = generate_dot_triple(batch_shape[0], self.num_features, self.num_nodes)
#         return dot_triple
    
    def forward(self, x):
        out = x.dot(self.weights) + self.bias
        self.cache = x
        return out

    def backward(self, d_out):
        x = self.cache
        # compute gradients for internal parameters and update
        d_weights = x.transpose().dot(d_out)
        d_bias = d_out.sum0()
        self.weights = (d_weights * self.learning_rate).neg() + self.weights
        self.bias    =    (d_bias * self.learning_rate).neg() + self.bias
        # compute and return external gradient
        d_x = d_out.dot(self.weights.transpose())
        return d_x

In [4]:
class Sigmoid(Layer):
    
    def __init__(self):
        self.cache = None
    
    def initialize(self):
        pass
    
    def forward(self, x):
        w0 =  0.5
        w1 =  0.2159198015
        w3 = -0.0082176259
        w5 =  0.0001825597
        w7 = -0.0000018848
        w9 =  0.0000000072
        
        x2 = x  * x
        x3 = x2 * x
        x5 = x2 * x3
        x7 = x2 * x5
        x9 = x2 * x7
        
        out = x9*w9 + x7*w7 + x5*w5 + x3*w3 + x*w1 + w0
        self.cache = out
        return out
    
    def backward(self, d_out):
        out = self.cache
        d_x = d_out * out * (out.neg() + 1)
        return d_x

In [5]:
# class Softmax(Layer):
    
#     def __init__(self):
#         pass
    
#     def initialize(self):
#         pass
    
#     def forward(self, x):
#         likelihoods = x.exp()
#         probabilities = likelihoods.div(likelihoods.sum1())
#         return probabilities
    
#     def backward(self, dout):
#         pass

In [6]:
class Reveal(Layer):
    
    def __init__(self):
        pass
    
    def initialize(self):
        pass
    
    def forward(self, x):
        return x.reveal()
    
    def backward(self, d_y):
        return d_y

In [7]:
class Loss:
    pass

In [8]:
class Diff(Loss):
    
    def derive(self, y_pred, y_train):
        return y_pred - y_train

In [12]:
# class CrossEntropy(Loss):
    
#     def evaluate(self, x):
#         likelihoods = x.exp()
#         probs = likelihoods / likelihoods.sum1()
#         return probs
    
#     def derive(self, y_pred, y_correct):
#         batch_size = y_pred.shape[0]
#         bar = y_pred * y_correct
#         foo = np.sum(bar, axis=1)
#         logprobs = -np.log(foo)
#         print(y_pred, bar, foo, logprobs)
#         result = np.sum(logprobs) / batch_size
#         print(result)
#         return result

In [10]:
class Model:
    pass

In [11]:
class Sequential(Model):
    
    def __init__(self, layers):
        self.layers = layers
    
    def initialize(self):
        for layer in self.layers:
            layer.initialize()
    
    def forward(self, x, train=False):
        for layer in self.layers:
            x = layer.forward(x)
        return x
    
    def backward(self, y):
        for layer in reversed(self.layers):
            y = layer.backward(y)
            
    def fit(self, x_train, y_train, loss, epochs=1000):
        for epoch in range(epochs):
            y_pred = self.forward(x_train)
            dout = loss.derive(y_pred, y_train)
            #print("DOUT", dout)
            self.backward(dout)
            
    def predict(self, X):
        return self.forward(X)