In [1]:
import numpy as np
from matplotlib import pyplot as plt

In [2]:
class MLP(object):
    def __init__(self, f_act='sigmoid', n_hidden=15, l2=0.0, epochs=20, lr=0.001, shuffle=True, minibatch_size=1, seed=None):
        self.f_act = f_act
        self.random = np.random.RandomState(seed)
        self.n_hidden = n_hidden
        # Lambda value for L2-regularization.
        self.l2 = l2
        self.epochs = epochs
        self.lr = lr
        self.shuffle = shuffle
        self.minibatch_size = minibatch_size
    
    # activation_function (sigmoid)
    def _sigmoid(self, z):
        return 1.0 / (1.0 + np.exp(-np.clip(z, -250, 250)))
        
    # activation_function (tanh)
    def _tanh(self, z):
        return np.tanh(z)
    
    def _relu(self, z):
        return np.maximum(z, 0)
        
    def _forward(self, X):
        # net input of hidden layer
        z_hidden = np.dot(X, self.weight_hidden)
        
        # activation of hidden layer
        if self.f_act == 'relu':
            activation_hidden = self._relu(z_hidden)
        elif self.f_act == 'tanh':
            activation_hidden = self._tanh(z_hidden)
        else:
            activation_hidden = self._sigmoid(z_hidden)
        
        # net input of out layer
        z_out = np.dot(activation_hidden, self.weight_out)
        
        # activation of output layer
        if self.f_act == 'relu':
            activation_out = self._relu(z_out)
        elif self.f_act == 'tanh':
            activation_out = self._tanh(z_out)
        else:
            activation_out = self._sigmoid(z_out)
        
        return z_hidden, activation_hidden, z_out, activation_out
        
    def _compute_cost(self, y_enc, output):
        # sse error with l2
        l2_term = self.l2 * (np.sum(self.weight_hidden ** 2.0) + np.sum(self.weight_out ** 2.0))
        cost = ((y_enc - output) ** 2).sum() + l2_term
        
        return cost
    
    def predict(self, X):
        # only z_out matters
        z_out = self._forward(X)[2]
        y_pred = np.argmax(z_out, axis=1)
        return y_pred
    
    def _onehot(self, y, n_classes):
        onehot = np.zeros((n_classes, y.shape[0]))
        for idx, val in enumerate(y.astype(int)):
            onehot[val, idx] = 1.
        return onehot.T
    
    def fit(self, X_train, y_train):
        n_output = np.unique(y_train).shape[0]
        n_features = X_train.shape[1]
        
        self.weight_hidden = np.array([[2, 1 , 1], [1, -2, 2]])
        self.weight_hidden = self.weight_hidden.astype(np.float64)
        
        self.weight_out = np.array([[-1], [3], [2]])
        self.weight_out = self.weight_out.astype(np.float64)
        
        self.costs_ = []
        
        # new y encoded
        y_train_enc = self._onehot(y_train, n_output)
        
        for i in range(self.epochs):
            indices = np.arange(X_train.shape[0])
            
            if self.shuffle:
                self.random.shuffle(indices)
            
            for start_idx in range(0, indices.shape[0] - self.minibatch_size + 1, self.minibatch_size):
                batch_idx = indices[start_idx:start_idx + self.minibatch_size]
                
                z_hidden, activation_hidden, z_out, activation_out = self._forward(X_train[batch_idx])

                # backpropragation starts here
                
                sigma_out = activation_out - y_train_enc[batch_idx]
                
                # when using sigmoid as activation function
                if self.f_act == 'relu':
                    activation_derivate_hidden = activation_hidden
                elif self.f_act == 'tanh':
                    activation_derivate_hidden = (1.0 - (activation_hidden ** 2))
                else:
                    # sigmoid
                    activation_derivate_hidden = activation_hidden * (1.0 - activation_hidden)
                
                sigma_hidden = np.dot(sigma_out, self.weight_out.T) * activation_derivate_hidden
                grad_weight_hidden = np.dot(X_train[batch_idx].T, sigma_hidden)
                grad_weight_out = np.dot(activation_hidden.T, sigma_out)
                
                # Regularization and weight updates
                
                # in hidden layer
                delta_weight_hidden = (grad_weight_hidden + self.l2 * self.weight_hidden)
                self.weight_hidden -= self.lr * delta_weight_hidden

                # in output layer
                delta_weight_out = (grad_weight_out + self.l2 * self.weight_out)
                self.weight_out -= self.lr * delta_weight_out
            
            # for each epoch, after backpropagation ends evaluate training
            z_hidden, activation_hidden, z_out, activation_out = self._forward(X_train)
            
            cost = self._compute_cost(y_enc=y_train_enc, output=activation_out)
            self.costs_.append(cost)
            
            
        return self

In [3]:
learning_rates = [0.001, 0.01, 0.1]
epochs = [10, 50, 100]

In [4]:
X_train = np.array([[1, 2]])
y_train = np.array([[0]])

# Questão 1.

In [5]:
for lr_ in learning_rates:
    for e_ in epochs:
        mlp = MLP(n_hidden=3, epochs=e_, lr=lr_, shuffle=False, f_act='relu', l2=0.1)
        mlp.fit(X_train, y_train)
        
        print('learning-rate = {} - epochs = {} - weights:\n'.format(lr_, e_))
        print('w_hidden:\n')
        print('{}\n'.format(mlp.weight_hidden))
        print('w_out:\n')
        print('{}\n'.format(mlp.weight_out))


learning-rate = 0.001 - epochs = 10 - weights:

w_hidden:

[[ 2.11791249  0.99900045  0.76604397]
 [ 1.23882363 -1.9980009   1.53208793]]

w_out:

[[-1.11318329]
 [ 2.99700135]
 [ 1.87836338]]

learning-rate = 0.001 - epochs = 50 - weights:

w_hidden:

[[ 2.15290634  0.99501223  0.70661083]
 [ 1.32077599 -1.99002446  1.41322166]]

w_out:

[[-1.14717634]
 [ 2.98503669]
 [ 1.84060578]]

learning-rate = 0.001 - epochs = 100 - weights:

w_hidden:

[[ 2.14192787  0.99004934  0.70337062]
 [ 1.31370773 -1.98009868  1.40674124]]

w_out:

[[-1.14124422]
 [ 2.97014802]
 [ 1.83158025]]

learning-rate = 0.01 - epochs = 10 - weights:

w_hidden:

[[ 2.03831069  0.99004488  0.61498365]
 [ 1.10648674 -1.98008976  1.2299673 ]]

w_out:

[[-1.06747397]
 [ 2.97013464]
 [ 1.8004298 ]]

learning-rate = 0.01 - epochs = 50 - weights:

w_hidden:

[[ 1.95482497  0.95120563  0.59519033]
 [ 1.05603306 -1.90241126  1.19038066]]

w_out:

[[-1.02222536]
 [ 2.85361688]
 [ 1.7322521 ]]

learning-rate = 0.01 - epochs =