In [1]:
import numpy as np
from matplotlib import pyplot as plt

In [2]:
class MLP(object):
    def __init__(self, f_act='sigmoid', n_hidden=15, l2=0.0, epochs=20, lr=0.001, shuffle=True, minibatch_size=1, seed=None):
        self.f_act = f_act
        self.random = np.random.RandomState(seed)
        self.n_hidden = n_hidden
        # Lambda value for L2-regularization.
        self.l2 = l2
        self.epochs = epochs
        self.lr = lr
        self.shuffle = shuffle
        self.minibatch_size = minibatch_size
    
    # activation_function (sigmoid)
    def _sigmoid(self, z):
        return 1.0 / (1.0 + np.exp(-np.clip(z, -250, 250)))
        
    # activation_function (tanh)
    def _tanh(self, z):
        return np.tanh(z)
    
    def _relu(self, z):
        return max(z, 0)
        
    def _forward(self, X):
        # net input of hidden layer
        z_hidden = np.dot(X, self.weight_hidden)
        
        # activation of hidden layer
        if self.f_act == 'relu':
            activation_hidden = self._relu(z_hidden)
        elif self.f_act == 'tanh':
            activation_hidden = self._tanh(z_hidden)
        else:
            activation_hidden = self._sigmoid(z_hidden)
        
        # net input of out layer
        z_out = np.dot(activation_hidden, self.weight_out)
        
        # activation of output layer
        if self.f_act == 'relu':
            activation_hidden = self._relu(z_hidden)
        elif self.f_act == 'tanh':
            activation_out = self._tanh(z_out)
        else:
            activation_out = self._sigmoid(z_out)
        
        return z_hidden, activation_hidden, z_out, activation_out
        
    def _compute_cost(self, y_enc, output):
        l2_term = self.l2 * (np.sum(self.weight_hidden ** 2.0) + np.sum(self.weight_out ** 2.0))
        
        term_1 = -y_enc * np.log(output)
        term_2 = (1.0 - y_enc) * np.log(1.0 - output)
        cost = np.sum(term_1 - term_2) + l2_term
        
        return 1
    
    def predict(self, X):
        # only z_out matters
        z_out = self._forward(X)[2]
        y_pred = np.argmax(z_out, axis=1)
        return y_pred
    
    def _onehot(self, y, n_classes):
        onehot = np.zeros((n_classes, y.shape[0]))
        for idx, val in enumerate(y.astype(int)):
            onehot[val, idx] = 1.
        return onehot.T
    
    def fit(self, X_train, y_train):
        n_output = np.unique(y_train).shape[0]
        n_features = X_train.shape[1]
        
        self.weight_hidden = np.array([[2, 1 , 1], [1, -2, 2]])
        self.weight_hidden = self.weight_hidden.astype(np.float64)
        
        print(self.weight_hidden)
        
        self.weight_out = np.array([[-1], [3], [2]])
        self.weight_out = self.weight_out.astype(np.float64)
        
        print(self.weight_out)
        
        # new y encoded
        y_train_enc = self._onehot(y_train, n_output)
        
        for i in range(self.epochs):
            indices = np.arange(X_train.shape[0])
            
            if self.shuffle:
                self.random.shuffle(indices)
            
            for start_idx in range(0, indices.shape[0] - self.minibatch_size + 1, self.minibatch_size):
                batch_idx = indices[start_idx:start_idx + self.minibatch_size]
                
                z_hidden, activation_hidden, z_out, activation_out = self._forward(X_train[batch_idx])

                # backpropragation starts here
                
                sigma_out = activation_out - y_train_enc[batch_idx]
                
                # when using sigmoid as activation function
                if self.f_act == 'relu':
                    activation_derivate_hidden = activation_hidden
                elif self.f_act == 'tanh':
                    activation_derivate_hidden = (1.0 - (activation_hidden ** 2))
                else:
                    # sigmoid
                    activation_derivate_hidden = activation_hidden * (1.0 - activation_hidden)
                
                sigma_hidden = np.dot(sigma_out, self.weight_out.T) * activation_derivate_hidden
                grad_weight_hidden = np.dot(X_train[batch_idx].T, sigma_hidden)
                grad_weight_out = np.dot(activation_hidden.T, sigma_out)
                
                # Regularization and weight updates
                
                # in hidden layer
                delta_weight_hidden = (grad_weight_hidden + self.l2 * self.weight_hidden)
                self.weight_hidden -= self.lr * delta_weight_hidden

                # in output layer
                delta_weight_out = (grad_weight_out + self.l2 * self.weight_out)
                self.weight_out -= self.lr * delta_weight_out
                
                print(self.weight_hidden)
                print(self.weight_out)
            
            # for each epoch, after backpropagation ends evaluate training
            z_hidden, activation_hidden, z_out, activation_out = self._forward(X_train)
            
            cost = self._compute_cost(y_enc=y_train_enc, output=activation_out)
            
        return self

In [3]:
mlp = MLP(n_hidden=3, 
          epochs=10, 
          lr=0.001,
          shuffle=False)

X_train = np.array([[1, 2]])
y_train = np.array([[0]])

mlp.fit(X_train, y_train)

[[ 2.  1.  1.]
 [ 1. -2.  2.]]
[[-1.]
 [ 3.]
 [ 2.]]
[[ 1.99999574  1.00003267  1.00000321]
 [ 0.99999148 -1.99993466  2.00000641]]
[[-0.99976328]
 [ 3.00001143]
 [ 2.00023945]]
[[ 1.99999149  1.00006533  1.00000641]
 [ 0.99998297 -1.99986933  2.00001282]]
[[-0.99952664]
 [ 3.00002286]
 [ 2.0004788 ]]
[[ 1.99998723  1.00009799  1.00000961]
 [ 0.99997447 -1.99980402  2.00001923]]
[[-0.99929009]
 [ 3.00003429]
 [ 2.00071807]]
[[ 1.99998298  1.00013064  1.00001282]
 [ 0.99996597 -1.99973872  2.00002563]]
[[-0.99905364]
 [ 3.00004571]
 [ 2.00095725]]
[[ 1.99997874  1.00016328  1.00001602]
 [ 0.99995747 -1.99967344  2.00003203]]
[[-0.99881727]
 [ 3.00005714]
 [ 2.00119634]]
[[ 1.99997449  1.00019592  1.00001922]
 [ 0.99994898 -1.99960817  2.00003844]]
[[-0.99858099]
 [ 3.00006856]
 [ 2.00143533]]
[[ 1.99997025  1.00022854  1.00002242]
 [ 0.9999405  -1.99954291  2.00004484]]
[[-0.99834479]
 [ 3.00007997]
 [ 2.00167424]]
[[ 1.99996601  1.00026116  1.00002562]
 [ 0.99993202 -1.99947767  2.0000

<__main__.MLP at 0x7fa097552f60>

In [4]:
mlp.weight_hidden

array([[ 1.99995754,  1.00032638,  1.00003201],
       [ 0.99991507, -1.99934724,  2.00006403]])

In [5]:
mlp.weight_out

array([[-0.99763675],
       [ 3.00011421],
       [ 2.00239044]])