In [42]:
import numpy as np
from sklearn.datasets import load_digits
import pandas as pd

In [2]:
def sigmoid(z):
    return 1./(1. + np.exp(-z))

class NeuralNetwork:
    def __init__(self, input_dim=2, hidden_dim=2, output_dim=2):
        self.W = 0.1 * np.random.rand(input_dim, hidden_dim)
        self.V = 0.1 * np.random.rand(hidden_dim, output_dim)
                
    # expects X to be a (n X input_dim) matrix
    def forward(self, X):
        self.X = X # keep for backward pass 
        
        self.H_in = np.dot(X, self.W)
        self.H = sigmoid(self.H_in)
        # ---- 
        self.Y_in = np.dot(self.H, self.V)
        self.Y = sigmoid(self.Y_in)
        return self.Y
    
    # expects T to be a (n X output_dim) matrix 
    def backward(self, T):
        E = self.Y - T 
        E_sq = E**2
        self.L = np.sum(E_sq, axis=1, keepdims=True)
        grad_Y = 2*E
        
        # -----
        grad_Y_in = (self.Y) * (1-self.Y) * grad_Y # sigmoid
        grad_V = np.dot(self.H.T, grad_Y_in) # outer product
        grad_H = np.dot(grad_Y_in, self.V.T)
        
        # -----
        grad_H_in = (self.H * (1.-self.H))*grad_H # sigmoid
        grad_W = np.dot(self.X.T, grad_H_in) # outer product
        return grad_W, grad_V

In [30]:
net = NeuralNetwork()

W = np.array([[6., -3.], [-2., 5.]])
V = np.array([[1., -2.], [0.25, 2.]])

In [39]:
mnist = load_digits()

In [48]:
train_X  = np.array(mnist.data)
train_T = np.array(mnist.target)

In [54]:
len(train_X[0])
for row in train_X:
    one = np.average(row[0:31])
    two = np.average(row[32:64])

In [31]:
# iterate for 200 epochs
train_X = np.array( [[3., 1. ], [-1., 4.]])
train_T = np.array( [[1., 0. ], [0., 1.]])

n_epochs = 100
alpha = 0.5

for n in range(n_epochs):
    # grad_W
    grad_W = np.zeros_like(net.W)
    grad_V = np.zeros_like(net.V)
    for i in range(train_X.shape[0]):
        X = train_X[i, :].reshape(1,-1)
        T = train_T[i, :].reshape(1,-1)
        
        net.forward(X)
        grad_W_i, grad_V_i = net.backward(T)
        grad_W += grad_W_i
        grad_V += grad_V_i
    
    # apply gradient 
    net.W -= alpha * grad_W
    net.V -= alpha * grad_V

In [32]:
print(net.forward(np.array([3.,1.])))
print(net.forward(np.array([-1.,4.])))


[0.90933916 0.09069354]
[0.09375718 0.90637874]


In [33]:
print(net.V)

[[ 2.39772906 -2.39736041]
 [-2.31759965  2.31921497]]
