In [1]:
from __future__ import division
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


from scipy.io import loadmat

data = loadmat("machine_learning_andrewng/ex4data1.mat")
weights = loadmat("machine_learning_andrewng/ex3weights.mat")
print(data.keys())
print(weights.keys())

dict_keys(['__header__', '__version__', '__globals__', 'X', 'y'])
dict_keys(['__header__', '__version__', '__globals__', 'Theta1', 'Theta2'])


In [2]:
X = data['X']
y = data['y']
y = pd.get_dummies(y.ravel()).values
theta1_loaded = weights["Theta1"]
theta2_loaded = weights["Theta2"]

In [3]:
def sigmoid(z):
    return(1 / (1 + np.exp(-z)))

def sigmoid_prime(z):
    return(sigmoid(z)*(1-sigmoid(z)))

In [4]:
def forward_pass(X, theta1, theta2, elaborate=False):
    a1 = np.c_[np.ones(X.shape[0]), X]
    z2 = theta1.dot(a1.T) # 25x401 * 401x5000 = 25x5000
    a2 = sigmoid(z2.T) # 5000x25
    a2 = np.c_[np.ones(a2.shape[0]), a2] # 5000x26
    z3 = theta2.dot(a2.T) # 10x26 * 26x5000 = 10x5000
    a3 = sigmoid(z3.T) # 5000x10
    if elaborate:
        return ((X, a1, a2, a3), (z2, z3))
    return a3

In [5]:
def sci_forward_pass(thetas, X, y, elaborate=True, *args):
    print("thetas size:", thetas.shape)
    print("X size:", X.shape)
    print("y size:", y_true.shape)
    theta1 = thetas[:10025]
    theta1 = theta1.reshape(25, 401)
    theta2 = thetas[10025:]
    theta2 = theta2.reshape(10, 26)
    a1 = np.c_[np.ones(X.shape[0]), X]
    z2 = theta1.dot(a1.T) # 25x401 * 401x5000 = 25x5000
    a2 = sigmoid(z2.T) # 5000x25
    a2 = np.c_[np.ones(a1.shape[0]), a2] # 5000x26
    z3 = theta2.dot(a2.T) # 10x26 * 26x5000 = 10x5000
    a3 = sigmoid(z3.T) # 5000x10
    if elaborate:
        return ((X, a1, a2, a3), (z2, z3))
    return a3

In [6]:
def cross_entropy(X, y_true, theta1, theta2, lambda_=0):
    m = X.shape[0]
    y_pred = forward_pass(X, theta1, theta2)
    positive_loss = np.sum(np.multiply(y_true, np.log(y_pred)).flatten())
    negative_loss = np.sum(np.multiply((1 - y_true), np.log(1 - y_pred)).flatten())
    regularization = (lambda_/(2*m)) * (np.sum(theta1.flatten() ** 2) + np.sum(theta2.flatten() ** 2))
#     regularization = 0
    J = - (1/m) * (positive_loss + negative_loss) + regularization
    return J

In [7]:
def sci_cross_entropy(thetas, X, y_true):
#     print("Thetas:", thetas.shape)
#     print("X:", X.shape)
#     print("y:", y.shape)
    lambda_ = 0
    m = X.shape[0]
    theta1 = thetas[:10025]
    theta1 = theta1.reshape(25, 401)
    theta2 = thetas[10025:]
    theta2 = theta2.reshape(10, 26)
    y_pred = forward_pass(X, theta1, theta2)
    positive_loss = np.sum(np.multiply(y_true, np.log(y_pred)).flatten())
    negative_loss = np.sum(np.multiply((1 - y_true), np.log(1 - y_pred)).flatten())
    regularization = (lambda_/(2*m)) * (np.sum(theta1[:, 1:].flatten() ** 2) + np.sum(theta2[:, 1:].flatten() ** 2))
    J = - (1/m) * (positive_loss + negative_loss) + regularization
    print(J)
    return J

In [8]:
cross_entropy(X, y, theta1_loaded, theta2_loaded, lambda_=0)

0.28762916516131887

In [9]:
# original
def backward_pass(X, y_true, theta1, theta2, *args):
    ((X, a1, a2, y_pred), (z2, z3)) = forward_pass(X, theta1, theta2, elaborate=True)
    delta3 = np.multiply((y_pred - y_true), sigmoid_prime(z3.T))
    theta2_grad = a2.T.dot(delta3)
    theta2_grad = theta2_grad.T # theta2_grad.shape is now same as theta2.shape
    delta2 = np.multiply(delta3.dot(theta2[:, 1:]), sigmoid_prime(z2.T))
    theta1_grad = a1.T.dot(delta2)
    theta1_grad = theta1_grad.T
    return theta1_grad, theta2_grad    

In [10]:
def sci_backward_pass(thetas, X, y_true, *args):
    theta1 = thetas[:10025]
    theta1 = theta1.reshape(25, 401)
    theta2 = thetas[10025:]
    theta2 = theta2.reshape(10, 26)
    ((X, a1, a2, y_pred), (z2, z3)) = forward_pass(X, theta1, theta2, elaborate=True)
    delta3 = np.multiply((y_pred - y_true), sigmoid_prime(z3.T))
    theta2_grad = a2.T.dot(delta3)
    theta2_grad = theta2_grad.T # theta2_grad.shape is now same as theta2.shape
    delta2 = np.multiply(delta3.dot(theta2[:, 1:]), sigmoid_prime(z2.T))
    theta1_grad = a1.T.dot(delta2)
    theta1_grad = theta1_grad.T
    return np.r_[theta1_grad.flatten(), theta2_grad.flatten()]
    

In [31]:
theta1 = np.random.rand(25, 401)
theta2 = np.random.rand(10, 26)

In [20]:
((X, a1, a2, y_pred), (z2, z3)) = forward_pass(X, theta1, theta2, elaborate=True)

In [12]:
def train(X, y, theta1, theta2):
    n_epochs = 100
    alpha = 0.001
    for i in range(1, n_epochs):
        y_pred = forward_pass(X, theta1, theta2)
        cost = cross_entropy(X, y, theta1, theta2)
        print "Iteration: {0} Cost: {1}\r".format(i, cost),
        theta1_grad, theta2_grad = backward_pass(X, y, theta1, theta2)
        theta1 = theta1 - alpha * theta1_grad
        theta2 = theta2 - alpha * theta2_grad
train(X, y, theta1, theta2)
    

Iteration: 99 Cost: 114.125000263

In [21]:
cross_entropy(X, y, theta1, theta2)

6.9314718055994531

In [16]:
from scipy.optimize import minimize

thetas = np.r_[theta1.flatten(), theta2.flatten()]
res = minimize(sci_cross_entropy, thetas, jac=sci_backward_pass, args=(X, y), options={'maxiter':150}, method="tnc")

7.33715033747
7.33715028338
7.33715030386
7.3253564372
7.26670313089
5.96861507514
6.86325408551
4.06673376641
4.06673363932
3.99208440399
3.6702837872
5.99281095719
4.8416659062
3.76417997489
3.3371023001
3.37345572497
3.32520980153
3.32520979876
3.32520978428
3.32520971827
3.31028035608
3.2588820162
9.56815879675
5.93179370665
3.78280641266
3.34063961082
3.26088837453
3.25107052326
3.25107051158
3.25107051152
3.238778801
3.19971182086
3.19971180539
3.18542448139
3.11741877846
5.88339278296
3.76946208677
3.04668501771
3.02585565774
3.01696465318
3.01696457948
3.01696457616
3.01695682174
3.00777531775
2.96377234283
4.55313967164
3.19454413708
2.8201868917
2.82018648856
2.8201867583
2.82018661145
2.82017952609
2.81156175566
2.76880823337
2.09934743485
2.09934637102
2.0993472968
2.0993471675
2.09934718773
2.09440974675
2.07002141702
1.63196379662
1.6905231215
1.50569150703
1.50569129733
1.50569099816
1.50569139854
1.50569137284
1.50569126605
1.50568870174
1.50222379717
1.48509889012
1.31

In [11]:
def genRandThetas():
    epsilon_init = 0.12
    theta1_shape = (25, 401)
    theta2_shape = (10, 26)
    rand_thetas = [ np.random.rand( *theta1_shape ) * 2 * epsilon_init - epsilon_init, \
                    np.random.rand( *theta2_shape ) * 2 * epsilon_init - epsilon_init]
    return rand_thetas

In [12]:
thetas = genRandThetas()

In [13]:
theta1 = thetas[0]
theta2 = thetas[1]

In [15]:
theta1.shape

(25, 401)

In [15]:
res

     fun: 0.76169320232960236
     jac: array([  2.50401351,   0.        ,   0.        , ...,  -1.38358507,
        -4.88610398, -10.18670446])
 message: 'Max. number of function evaluations reached'
    nfev: 100
     nit: 11
  status: 3
 success: False
       x: array([-0.09833424, -0.10760554,  0.0189685 , ..., -2.97267521,
       -1.3471328 ,  0.78219268])

In [2]:
import numpy as np
import pandas as pd


class NeuralNetwork:
    def __init__(self, n_hidden, n_output, lambda_=0):
        self.n_hidden = n_hidden
        self.n_output = n_output
        self.thetas = x
        self.lambda_ = lambda_

    def fit(self, X, y):
        if y.shape[1] != self.n_output:
            raise ValueError("Number of columns in y ({0}) are != to number "
                             "of output neurons ({1})".format(y.shape[1],
                                                              self.n_output))

    @staticmethod
    def sigmoid(z):
        return 1 / (1 + np.exp(-z))

    def sigmoid_prime(self, z):
        return self.sigmoid(z) * (1 - self.sigmoid(z))

    def cross_entropy(self, X, y):
        m = X.shape[0]
        y_pred = self.forward_pass(X, theta1, theta2)
        positive_loss = np.sum(np.multiply(y_true, np.log(y_pred)).flatten())
        negative_loss = np.sum(
            np.multiply((1 - y), np.log(1 - y_pred)).flatten())
        regularization = (self.lambda_ / (2 * m)) * (np.sum(theta1.flatten() ** 2) + np.sum(theta2.flatten() ** 2))
        # regularization = 0
        J = - (1 / m) * (positive_loss + negative_loss) + regularization
        return J

    def forward_pass(X, theta1, theta2, elaborate=False):
        a1 = np.c_[np.ones(X.shape[0]), X]
        z2 = theta1.dot(a1.T)  # 25x401 * 401x5000 = 25x5000
        a2 = sigmoid(z2.T)  # 5000x25
        a2 = np.c_[np.ones(a2.shape[0]), a2]  # 5000x26
        z3 = theta2.dot(a2.T)  # 10x26 * 26x5000 = 10x5000
        a3 = sigmoid(z3.T)  # 5000x10
        if elaborate:
            return (X, a1, a2, a3), (z2, z3)
        return a3