In [36]:
"""
Atividade PEL219 - Multi Layer Perceptron - Gabriel Melo. Matrícula: 125.304-6

Versão Jupyter, para rodar será necessário apenas adicionar os datasets no diretório de execução.
"""

import pandas as pd
import numpy as np
import time
import math
import matplotlib.pyplot as plt
import warnings

from sklearn.neural_network import MLPClassifier  # just for comparison

from sklearn.metrics import accuracy_score, confusion_matrix, multilabel_confusion_matrix, precision_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer, OneHotEncoder

from tqdm import tqdm

warnings.filterwarnings('ignore', 'overflow')


In [37]:
# loading the dataset into memory
df_diabetes = pd.read_csv("pima-indians-diabetes.data.csv", header=None) 

X = df_diabetes.iloc[:, :-1].values

scaler = Normalizer()


X_scaled = scaler.fit_transform(X)
y = df_diabetes.iloc[:, -1].values

In [38]:
x_train, x_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.3, random_state=42)

x_train.shape, y_train.shape, x_test.shape, y_test.shape

((537, 8), (537,), (231, 8), (231,))

## Minha Implementação usando tanh e heaviside na saída

Ela consegue lidar aceita um numero configuravel de hidden layers

In [47]:
class MLP:
    def __init__(self, hidden_layer_sizes=(100), max_iter=10000, learning_rate=0.1):
        self.max_iter = max_iter
        self.hidden_layer_sizes = hidden_layer_sizes
        self.weights = None
        self.biases = None
        self.lr = learning_rate

    def init_model(self, X: np.array, y: np.array):
        weights = {}
        biases = {}
        hidden_size = len(self.hidden_layer_sizes)

        for i in range(hidden_size):
            if i==0: # input
                weights[i+1] = np.random.randn(self.hidden_layer_sizes[i], X.shape[1])*0.1
                biases[i+1] = np.random.randn(self.hidden_layer_sizes[i])*0.1
            else:
                weights[i+1] = np.random.randn(self.hidden_layer_sizes[i], weights[i].shape[0])*0.1
                biases[i+1] = np.random.randn(self.hidden_layer_sizes[i])*0.1

        weights[hidden_size+1] = np.random.randn(1 if y.ndim == 1 else y.shape[1], weights[hidden_size].shape[0])*0.1
        biases[hidden_size+1] = np.random.randn(1 if y.ndim == 1 else y.shape[1])*0.1
        
        return weights, biases

    def fit(self, X: np.array, Y: np.array, tol=1e-3, epoch=10000, train=True):      
        self.weights, self.biases = self.init_model(X, Y)

        if not train: return
        
        for i in tqdm(range(0, epoch)):
            loss=0
            # save a delta matrix for each epoch, resets when changing epoch
            for x, y in zip(X, Y):
                activations, nets = self.forward(x)
                deltas = self.backward(activations, nets, y)
                
                loss += (y - activations[len(self.weights)])**2
            if (i%100) == 0:
                print(loss/X.shape[0])

    def forward(self, x):
        n_layers = len(self.weights)
        
        activations = {0:x}  # first layer is the input
        nets = {}
        
        for i in range(1, n_layers+1):  # passing through the hidden layers
            net = np.dot(self.weights[i], activations[i-1]) + self.biases[i] 
            activations[i] = self.sigmoid(net)
            nets[i] = net

        return activations, nets

    def backward(self, activations, nets, y):
        n_layers = len(self.weights)
        deltas = {}

        for j in range(n_layers, 0, -1): # n até 1
            if j == n_layers:  # output layer, uses delta rule (prediction - target)
                deltas[n_layers] = np.sum(y - activations[n_layers]) * self.d_sigmoid(nets[n_layers])
            else:  # other backwards hidden layers, uses last activation as activation layer
                deltas[j] = np.dot(deltas[j+1], self.weights[j+1]) * self.d_sigmoid(nets[j]).T

            #print(self.weights[j].shape, deltas[j].shape, activations[j].shape)
            self.weights[j] -= self.lr * np.expand_dims((deltas[j] * activations[j]), axis=0).T
            self.biases[j] -= self.lr * deltas[j]
        return deltas

    def _loss(self, y_true: np.array, y_pred: np.array) -> np.array:
        """
        Function to compute cross-entropy loss per sample
        
        Inputs:
            y_true -> numpy array of true labels
            y_pred -> numpy array of prediction values
        Output:
            loss value
        """
        return -y_true*np.log2(y_pred) - (1 - y_true)*np.log2(1 - y_pred)
    
    def _derivative_loss(self, y_true: np.array, y_pred: np.array) -> np.array:
        """
        Function to compute the derivative of the cross-entropy loss per sample
        
        Inputs:
            y_true -> numpy array of true labels
            y_pred -> numpy array of prediction values
        Output:
            loss value
        """
        return -(1/np.log(2))*( (y_true/y_pred) - ((1-y_true)/(1-y_pred)) )

    def sigmoid(self, activation:np.array):
        return  1 / (1 + np.exp(-activation))
        
    def d_sigmoid(self, activation: np.array):
        return self.sigmoid(activation) * (1 - self.sigmoid(activation))

    def predict(self, X: np.array):
        if not self.weights:
            raise Exception("Network not initialized")
            
        if X.ndim==1:
            activations, nets = self.forward(X)
            return activations[len(self.weights)]
        else:
            return np.array([self.forward(x)[0][len(self.weights)] for x in X])

In [48]:
mlp = MLP(hidden_layer_sizes=(6,), learning_rate=0.1)

In [49]:
mlp.fit(x_train[0:20], y_train[0:20], epoch=3000, train=True)

  4%|███▏                                                                          | 121/3000 [00:00<00:02, 1208.51it/s]

[0.30332129]
[0.74811903]
[0.74913355]


  8%|██████▎                                                                       | 242/3000 [00:00<00:02, 1003.84it/s]

[0.74944557]


 15%|███████████▋                                                                   | 442/3000 [00:00<00:03, 804.98it/s]

[0.74959507]
[0.74968224]


 18%|█████████████▉                                                                 | 531/3000 [00:00<00:02, 828.54it/s]

[0.74973914]


 21%|████████████████▎                                                              | 619/3000 [00:00<00:02, 829.06it/s]

[0.74977911]


 24%|██████████████████▌                                                            | 705/3000 [00:00<00:02, 820.44it/s]

[0.74980867]


 30%|███████████████████████▌                                                       | 897/3000 [00:01<00:02, 860.73it/s]

[0.7498314]


 38%|█████████████████████████████▋                                                | 1141/3000 [00:01<00:02, 744.48it/s]

[0.7498494]
[0.749864]


 41%|████████████████████████████████▎                                             | 1242/3000 [00:01<00:02, 811.83it/s]

[0.74987607]


 44%|██████████████████████████████████▍                                           | 1325/3000 [00:01<00:02, 786.67it/s]

[0.74988622]
[0.74989486]


 50%|██████████████████████████████████████▌                                       | 1485/3000 [00:01<00:02, 719.82it/s]

[0.7499023]


 55%|██████████████████████████████████████████▋                                   | 1644/3000 [00:02<00:01, 729.70it/s]

[0.74990878]
[0.74991447]


 60%|██████████████████████████████████████████████▋                               | 1797/3000 [00:02<00:01, 701.53it/s]

[0.74991951]


 62%|████████████████████████████████████████████████▌                             | 1869/3000 [00:02<00:01, 678.08it/s]

[0.74992399]


 67%|████████████████████████████████████████████████████▏                         | 2008/3000 [00:02<00:01, 653.51it/s]

[0.74992802]


 70%|██████████████████████████████████████████████████████▍                       | 2095/3000 [00:02<00:01, 712.68it/s]

[0.74993164]


 72%|████████████████████████████████████████████████████████▎                     | 2168/3000 [00:02<00:01, 705.73it/s]

[0.74993493]

 75%|██████████████████████████████████████████████████████████▊                   | 2261/3000 [00:02<00:00, 768.73it/s]


[0.74993791]


 78%|████████████████████████████████████████████████████████████▉                 | 2346/3000 [00:03<00:00, 791.40it/s]

[0.74994064]


 83%|█████████████████████████████████████████████████████████████████             | 2503/3000 [00:03<00:00, 755.72it/s]

[0.74994315]


 86%|███████████████████████████████████████████████████████████████████▏          | 2584/3000 [00:03<00:00, 769.20it/s]

[0.74994546]


 92%|███████████████████████████████████████████████████████████████████████▊      | 2762/3000 [00:03<00:00, 826.01it/s]

[0.74994759]
[0.74994956]


 95%|█████████████████████████████████████████████████████████████████████████▉    | 2845/3000 [00:03<00:00, 810.85it/s]

[0.74995139]


100%|██████████████████████████████████████████████████████████████████████████████| 3000/3000 [00:03<00:00, 765.45it/s]


In [31]:
def rescale(X):
    return (X - X.mean(axis=0)) / (X.var(axis=0))

n_samples = 1000
X0 = np.random.normal(loc=[0,0], scale=[2,0.5], size=(int(n_samples/2), 2))
X11 = np.random.normal(loc=[0,3.5], scale=[0.5,1], size=(int(n_samples/4), 2))
X12 = np.random.normal(loc=[0,-3.5], scale=[0.5,1], size=(int(n_samples/4), 2))
X1 = np.vstack([X11, X12])
X = np.vstack([X0, X1])

# X = rescale(X)

y0 = np.zeros(shape=(int(n_samples/2), 1))
y1 = np.ones(shape=(int(n_samples/2), 1))
yhat = np.vstack([y0, y1])

In [34]:
n_epoch = 200
shape = (2,5,3,1)
alpha = 0.001

mlp2 = MLP(hidden_layer_sizes=(3,6,8), learning_rate=0.001)

In [35]:
mlp2.fit(X, yhat, epoch=n_epoch)

  0%|▍                                                                                  | 1/200 [00:00<00:59,  3.34it/s]

[0.35637747]


 50%|████████████████████████████████████████▉                                        | 101/200 [00:21<00:22,  4.41it/s]

[0.24903342]


100%|█████████████████████████████████████████████████████████████████████████████████| 200/200 [00:43<00:00,  4.62it/s]
