In [1]:
%pylab inline
import scipy.optimize
import time
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.metrics import accuracy_score

Populating the interactive namespace from numpy and matplotlib


## Data

In [73]:
def data2numpy(D):
    X = np.array([r[0] for r in D])
    y = np.array([r[1] for r in D])
    return X, y
# [([data points], [targets])]
data_AND = data2numpy([
    ([0, 0], [0]), 
    ([0, 1], [0]),
    ([1, 0], [0]),
    ([1, 1], [1]),
])

data_OR = data2numpy([
    ([0, 0], [0]), 
    ([0, 1], [1]),
    ([1, 0], [1]),
    ([1, 1], [1]),
])

data_XOR = data2numpy([
    ([0, 0], [0]), 
    ([0, 1], [1]),
    ([1, 0], [1]),
    ([1, 1], [0]),
])

print(data_AND)
print(data_OR)
print(data_XOR)

(array([[0, 0],
       [0, 1],
       [1, 0],
       [1, 1]]), array([[0],
       [0],
       [0],
       [1]]))
(array([[0, 0],
       [0, 1],
       [1, 0],
       [1, 1]]), array([[0],
       [1],
       [1],
       [1]]))
(array([[0, 0],
       [0, 1],
       [1, 0],
       [1, 1]]), array([[0],
       [1],
       [1],
       [0]]))


## Network

In [87]:
class sigmoid:
    def f(z):
        return 1 / (1 + np.exp(-z))
    
    def df(z):
        s = sigmoid.f(z)
        return s * (1 - s)

class identity:
    def f(z):
        return z
    
    def df(z):
        return 1

class FFNN():
    def __init__(self, layers_dims=[], activation_functions=[], regularization=0.0001):
        self.layers_dims = layers_dims
        self.activation_functions = [None] + activation_functions
        weights_size = 0
        for i in range(1, len(self.layers_dims)):
            weights_size += self.layers_dims[i-1] * self.layers_dims[i]
        self.b_size = sum(layers_dims[1:])
        self.weights = np.random.rand(self.b_size + weights_size) * 0.1
        print(self.weights.shape)
        self.training_iterations = 0
        self.lambd = regularization
    
    def next_iteration(self, weights):
        self.training_iterations += 1
#         print("Iteration:", self.training_iterations)
    
    def cost(self, weights, X, y):
        nl = len(self.layers_dims) - 1
        deltas = [None for i in range(nl + 1)]
        
        self.weights = weights
        self.forward_pass(X, store_results=True)
        a = self.a
        z = self.z
        B, W = self.unfold_weights(weights)
        deltas[nl] = -(y - a[nl]) * self.activation_functions[nl].df(z[nl])
        # deltas
        for l in range(nl, 1, -1):
            deltas[l-1] = deltas[l].dot(W[l]) * self.activation_functions[l-1].df(z[l-1])
        weights_derivatives = []
        b_derivatives = []
        
        # derivatives
        for l in range(1, nl+1):
            dW = deltas[l].T.dot(a[l-1])
            weights_derivatives.append(dW.flatten())
            b_derivatives.append(deltas[l].sum(axis=0).flatten())
        cost = ((y - a[nl]) ** 2).mean()  # + self.lambd * (weights ** 2).sum()
        gradient = np.hstack(b_derivatives + weights_derivatives)

        assert gradient.shape == weights.shape
        return cost, gradient
    
    def unfold_weights(self, weights):
        used_count = self.b_size
        used_bcount = 0
        W = [None]
        B = [None]
        for i in range(1, len(self.layers_dims)):
            m, n = self.layers_dims[i], self.layers_dims[i-1]
            w = self.weights[used_count:used_count + (m * n)]
            w = w.reshape((m, n))
            b = self.weights[used_bcount:used_bcount + m]
            W.append(w)
            B.append(b)
            used_count += m * n
            used_bcount += m
        return B, W
            
    def forward_pass(self, X, store_results=False):
        used_count = self.b_size
        used_bcount = 0
        Xl = X
        B, W = self.unfold_weights(self.weights)
        if store_results:
            self.a = [X]
            self.z = [X]
        for i in range(1, len(self.layers_dims)):
            zl = Xl.dot(W[i].T) + B[i]
            Xl = self.activation_functions[i].f(zl)
            if store_results:
                self.z.append(zl)
                self.a.append(Xl)
        return Xl 
    
    def fit(self, X, y):
        res = scipy.optimize.minimize(
            fun=self.cost,
            x0=self.weights,
            args=(X, y),
            method='L-BFGS-B',
            jac=True,
            tol=1e-30,
            options={'maxiter': 100, 'disp': True},
            callback=self.next_iteration,
        )
        self.weights = res.x


for X, y in [data_AND, data_OR, data_XOR]:    
# for X, y in [data_XOR]:        

    nn = FFNN([2, 2, 1], [sigmoid, sigmoid, sigmoid])
#     nn = FFNN([2, 4, 1], [identity, identity, identity])
#     print(nn.cost(nn.weights, X, y))
    nn.fit(X, y)
    print(nn.forward_pass(X))
#     print(nn.cost(nn.weights, X, y))
    print()

(9,)
[[  4.10499947e-47]
 [  4.45904396e-20]
 [  4.15919161e-20]
 [  1.00000000e+00]]

(9,)
[[  1.49165248e-25]
 [  1.00000000e+00]
 [  1.00000000e+00]
 [  1.00000000e+00]]

(9,)
[[  7.23586414e-12]
 [  1.00000000e+00]
 [  5.00000004e-01]
 [  5.00000004e-01]]



In [4]:
import scipy as sp
sp.__version__

'0.16.0'