## Implementação de um Classificador Perceptron

In [1]:
import numpy as np


class Perceptron(object):
    """Perceptron classifier.

    Parameters
    ------------
    eta : float
      Learning rate (between 0.0 and 1.0)
    n_iter : int
      Passes over the training dataset.
    random_state : int
      Random number generator seed for random weight
      initialization.

    Attributes
    -----------
    w_ : 1d-array
      Weights after fitting.
    errors_ : list
      Number of misclassifications (updates) in each epoch.

    """
    def __init__(self, eta=0.01, n_iter=50, random_state=1):
        self.eta = eta
        self.n_iter = n_iter
        self.random_state = random_state

    def fit(self, X, y):
        """Fit training data.

        Parameters
        ----------
        X : {array-like}, shape = [n_examples, n_features]
          Training vectors, where n_examples is the number of examples and
          n_features is the number of features.
        y : array-like, shape = [n_examples]
          Target values.

        Returns
        -------
        self : object

        """
        rgen = np.random.RandomState(self.random_state)
        self.w_ = rgen.normal(loc=0.0, scale=0.01, size=1 + X.shape[1])
        self.errors_ = []

        for _ in range(self.n_iter):
            errors = 0
            for xi, target in zip(X, y):
                update = self.eta * (target - self.predict(xi))
                self.w_[1:] += update * xi
                self.w_[0] += update
                errors += int(update != 0.0)
            self.errors_.append(errors)
        return self

    def net_input(self, X):
        """Calculate net input"""
        return np.dot(X, self.w_[1:]) + self.w_[0]

    def predict(self, X):
        """Return class label after unit step"""
        return np.where(self.net_input(X) >= 0.0, 1, -1)

## Testando o classificador Perceptron

In [8]:
"""Dados de Treinamento """
X = np.array([[1, 1], 
              [2, 2],
              [3, 3]])
y = np.array([1, 1, -1])
X, y

(array([[1, 1],
        [2, 2],
        [3, 3]]),
 array([ 1,  1, -1]))

In [7]:
"""Criando objeto Perceptron"""
ppn = Perceptron(eta=0.1, n_iter=100)

"""Treinando o modelo"""
ppn.fit(X, y)

"""Testando modelo treinado """
X_newdata = np.array([[4, 4],
                      [2, 2],
                      [3, 3]])

print("Resultado da Predição", ppn.predict(X_newdata))

Resultado da Predição [-1  1 -1]


## Questão 1 - Implemente uma função para calcular a acurácia do modelo

In [3]:
def compute_accuracy_score(y_true, y_pred, normalize=True):
    """Accuracy classification score for that model.

    Parameters:
    -----------
    y_true: 1d array
        The ground-truth array.
    y_pred: 1d array
        Predicted values for that arrangement
    normalize: bool, optional (default=True)
        If True returns the fraction of correcly classified samples.
        Returns the number of classifications samples otherwise.
    """
    score = y_true == y_pred

    if normalize:
        return score.sum()/score.shape[0]
    return score.sum()

In [4]:
compute_accuracy_score(y, ppn.predict(X_newdata), normalize=True)

0.6666666666666666

## Questão 2 - Implemente um método de validação cruzada para testar 

In [None]:
from numpy.random import shuffle
from sklearn.model_selection import train_test_split, cross_val_score

In [157]:
def perform_train_test_split(X, y, test_size=0.3, shamble=True):
    """Splits data into training an testing sets.
    
    Parameters
    ----------
    X: ndimensional array 
        Allegedly the attributes set.
    y: 1d array
        The ground-truth outputs
    test_size:
        Percentual reserved for testing. The remaining set is used for training.
    shamble: bool, optional (default=True)
        Whether or not to shuffle the array when splitting 
        
    Returns
    -------
    list: Data splitted
        [X_train, X_test, y_train, y_test]
    """
    y = y.reshape((y.shape[0], 1))
    data = np.concatenate((X, y), axis=1)
    
    if shamble:
        shuffle(data)
    
    split_length = int(data.shape[0] - np.ceil(test_size * data.shape[0]))

    X_train = data[:split_length, :-1]
    X_test = data[split_length:, :-1]
    y_train = data[:split_length, -1:]
    y_test = data[split_length:, -1:]
    
    y_train, y_test = (y_train.ravel(), y_test.ravel())
    
    return [X_train, X_test, y_train, y_test]   

## Teste o classificador usando um conjunto de dados linearmente separável e outro não linearmente separável
### Sugestão: crie datasets sintéticos com apenas dois atributos para voce poder visualizar a separação das classes

## Questão 3 - Treine um classificador perceptron para os dados de seu estudo de caso