In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [2]:
class Perceptron:
    def __init__(self, input_size, *layers) -> None:
        self.layers = self._init_all_layers(input_size, layers)
    
    def _init_all_layers(self, input_size: int, layers: list) -> list:
        initialized_layers = []
        last_output_size = input_size + 1
        for n_neurons in layers:
            # Create the layer weights matrix (+bias)
            initialized_layers.append(
                self._init_layer(last_output_size, n_neurons)
            )
            # Update the number of outputs in the last layer
            last_output_size = n_neurons
            
        return initialized_layers

    def _init_layer(self, layer_input_size: int, n_neurons: int) -> np.ndarray:
        return np.random.uniform(
            -1,  # from -1
            1,  # to 1
            (layer_input_size, n_neurons)  # col = 1 neuron
        )
    
    def _normalize(self, X: np.ndarray) -> np.ndarray:
        return (X - X.mean(axis=0)) / X.std(axis=0)

    def preprocess(self, X: np.ndarray) -> np.ndarray:
        """Apply transformations to the data to ease learning.

        Transformations include: Normalization (mean zero, std. deviation 1);

        Args:
            X (np.ndarray): Input data.

        Returns:
            np.ndarray: Preprocessed data.
        """
        X_cp = X.copy()
        for col in range(X_cp.shape[1]):
            X_cp[..., col] = self._normalize(X_cp[..., col])
        return X_cp
        

    def activation(self, net: np.ndarray) -> np.ndarray:
        # Sigmoid
        return 1. / (1. + np.exp(-net))

    def _activation_derivative(self, net: np.ndarray) -> np.ndarray:
        # Derivative of the sigmoid
        return net*(1-net)

    def predict(self, X: np.ndarray) -> np.ndarray:
        """Do a forward pass using the inputs and weights specified.

        Args:
            X (np.ndarray): Inputs (With or Without bias column added)

        Returns:
            np.ndarray: Predictions
        """
        # Add a column of ones at the start of the array
        X = self._add_ones(X)

        out = X
        i=1
        for w in self.layers:
            # Apply weights and biases
            net = np.dot(out, w)
            # Apply activation function
            out = self.activation(net)
            i+=1
        
        return out

    def fit(self, 
            X: np.ndarray,
            y: np.ndarray,
            lr: float = 0.1,
            epochs: int = 1,
            batch_size: int = 1,
    ) -> None:
        """Fits the perceptron to the given target labels.

        Args:
            X (np.ndarray): Inputs (Dataset)
            y (np.ndarray): Target (Labels)
            lr (float, optional): Learning Rate. Defaults to 0.1.
            epochs (int, optional): Number of training epochs. Defaults to 1.
            batch_size (int, optional): Size of each batch. Defaults to 1.
        
        Returns:
            np.ndarray: updated weights and bias.
        """
        X = self._add_ones(X)
        batched_X = np.array_split(X,len(X)/batch_size)
        batched_y = np.array_split(y,len(y)/batch_size)

        for epoch in range(epochs):
            for X_batch, y_batch in zip(batched_X,batched_y):
                # Forward
                out = X_batch
                net_array = []
                f_net_array = []
                for w in self.layers:
                    # Apply weights and biases
                    net_array.append(np.dot(out, w))
                    # Apply activation function
                    f_net_array.append(self.activation(net_array[-1]))
                    out = f_net_array[-1]

                # Backpropagate
                # Last layer
                error = (y_batch-f_net_array[-1])*self._activation_derivative(f_net_array[-1])
                delta_w = lr*np.dot(f_net_array[-2].T, error) / len(X)
                self.layers[-1] += delta_w
                # Hidden
                e = np.dot(error, delta_w.T) * self._activation_derivative(f_net_array[-2])
                delta_w = lr*np.dot(X_batch.T, e)
                self.layers[-2] += delta_w

                pred_class = np.argmax(out, axis=1)
                gt_class = np.argmax(y, axis=1)
                print(
                    f"Epoch {epoch+1}/{epochs} - "
                    f"Acc = {np.sum(pred_class==gt_class)/len(pred_class)}" # TODO: Fix to work with batches (works with single images)
                )
        return w

    def _add_ones(self, X: np.ndarray) -> np.ndarray:
        """Add a column of ones to the start of the array 

        Args:
            X (np.ndarray): Target array

        Returns:
            np.ndarray: Array with the extra column added
        """
        ones_col = np.ones((X.shape[0], 1))
        return np.concatenate([ones_col, X], axis=1)

## Dataset

In [3]:
ds = datasets.load_iris()

X, y = ds['data'], ds['target']

In [4]:
print(f'Features: {" | ".join(ds["feature_names"])}')

Features: sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm)


In [5]:
p = Perceptron(4, 9, 3)

In [6]:
# Preprocessing X
X = p.preprocess(X)

# One-hot encoding (since the output will be 3 neurons)
y = np.eye(len(np.unique(y)))[y]

# Split test / training sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0, shuffle=True)

In [9]:
# p.fit(X[0].reshape((1,-1)), y[0].reshape((1,-1)), 0.1, 1)
p.fit(X, y, 0.1, 50000, 10)

p.predict(X[0].reshape((1,-1)))

ValueError: operands could not be broadcast together with shapes (10,) (150,) 