# Importing libraries

Loading all of the dependencies.

In [74]:
import pandas as pd
from sklearn import datasets
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np

# Perceptron Implementation

Implementing Multilayer Perceptron with functionality to choose activation function, amount and size of layers.

In [75]:
''' As the layer formula is recursive, it makes total sense to discriminate a layer to be an entity. 
Therefore, we make it a class '''
class DenseLayer:
    ''' First of all, all we need to know about a layer is the number of units and the input size. 
    However, as the input size will be dictated by either the data matrix X or the size of the preceding layer, 
    we will leave this parameter as optional. 
    This is also the reason, why we leave the weights’ initialization step aside.
    '''
    def __init__(
            self, 
            n_units, 
            input_size=None, 
            activation=None, 
            name=None):
        self.n_units = n_units
        self.input_size = input_size
        self.W = None
        self.name = name
        self.A = None
        
        self.activation = activation
        self.fn, self.df = self._select_activation_fn(activation)
        
    '''Both the __repr__ method and the self.name attribute serve no other purpose 
    but to help one debug this. '''
    def __repr__(self):
        return f"Dense['{self.name}'] in:{self.input_size} + 1, out:{self.n_units}"
    
    '''Similarly, the shape property is nothing, but an utility.'''
    @property
    def shape(self):
        return self.W.shape
    
    '''Utility function that initializes the weights'''
    def init_weights(self):
        self.W = np.random.randn(self.n_units, self.input_size + 1)
        
    '''Function that helps us choose the activation method'''    
    def _select_activation_fn(self, activation):
        if activation == 'relu':
            fn = lambda x: np.where(x < 0, 0.0, x)
            df = lambda x: np.where(x < 0, 0.0, 1.0)
        elif activation == 'sigmoid':
            fn = lambda x: 1 / (1 + np.exp(-x))
            df = lambda x: x * (1 - x)
        elif activation == 'tanh':
            fn = lambda x: (np.exp(x) - np.exp(-1)) / (np.exp(x) + np.exp(-x))
            df = lambda x: 1 - x**2
        elif activation is None:
            fn = lambda x: x
            df = lambda x: 1.0
        else:
            NotImplementedError(f"Function {activation} cannot be used.")
        return fn, df

    '''The real magic happens within the __call__ method that implements the math behind it'''
    def __call__(self, X):
        m_examples = X.shape[0]
        X_extended = np.hstack([np.ones((m_examples, 1)), X])
        Z = X_extended @ self.W.T
        A = self.fn(Z)
        self.A = A
        return A
    
    '''Here is where δs is being passed down through the layers. Observe that we “trim” the δ matrix 
    by eliminating δj=0, which relate to the bias terms.'''
    def backprop(self, delta, a):
        da = self.df(a)  # the derivative of the activation fn
        return (delta @ self.W)[:, 1:] * da

In [76]:
'''Class that descriminates a model as an entity'''
class SequentialModel:
    '''The whole constructor of this class is all about making sure that 
    all layers are initialized and “size-compatible”.'''
    def __init__(self, layers, lr=0.01):
        input_size = layers[0].n_units
        layers[0].init_weights()
        for layer in layers[1:]:
            layer.input_size = input_size
            input_size = layer.n_units
            layer.init_weights()
        self.layers = layers
        
    '''The __repr__ method exists only for the sake of debugging.'''
    def __repr__(self):
        return f"SequentialModel n_layer: {len(self.layers)}"
    
    '''Computing function'''
    def forward(self, X):
        out = self.layers[0](X)
        for layer in self.layers[1:]:
            out = layer(out)
        return out
    
    '''The cost method implements the so-called binary cross-entropy equation that suits our particular case'''
    @staticmethod
    def cost(y_pred, y_true):
        cost = -y_true * np.log(y_pred) \
               - (1 - y_true) * np.log(1 - y_pred)
        return cost.mean()
    
    '''Utility function'''
    def _extend(self, vec):
        return np.hstack([np.ones((vec.shape[0], 1)), vec])
    
    '''Backprogation is recursive so this function helps '''
    def backward(self, X, y_pred, y_true):
        n_layers = len(self.layers)
        delta = y_pred - y_true
        a = y_pred

        dWs = {}
        for i in range(-1, -len(self.layers), -1):
            a = self.layers[i - 1].A

            dWs[i] = delta.T @ self._extend(a)
            delta = self.layers[i].backprop(delta, a)

        dWs[-n_layers] = delta.T @ self._extend(X)

        for k, dW in dWs.items():
            self.layers[k].W -= self.lr * dW

# Loading Titanic Dataset

### Loading data

In [77]:
data = pd.read_csv('../data/train.csv', sep = ',')

In [78]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


### Preprocessing the data

Removing some attributes as they are not so important for our model and also filling empty spaces.

In [79]:
data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)
data.fillna(data.mean(), inplace=True)
data

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,22.000000,1,0,7.2500,S
1,1,1,female,38.000000,1,0,71.2833,C
2,1,3,female,26.000000,0,0,7.9250,S
3,1,1,female,35.000000,1,0,53.1000,S
4,0,3,male,35.000000,0,0,8.0500,S
...,...,...,...,...,...,...,...,...
886,0,2,male,27.000000,0,0,13.0000,S
887,1,1,female,19.000000,0,0,30.0000,S
888,0,3,female,29.699118,1,2,23.4500,S
889,1,1,male,26.000000,0,0,30.0000,C


### One-Hot Encoding

In [80]:
data = pd.get_dummies(data=data, prefix='c', columns=['Sex', 'Embarked'])
data

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,c_female,c_male,c_C,c_Q,c_S
0,0,3,22.000000,1,0,7.2500,0,1,0,0,1
1,1,1,38.000000,1,0,71.2833,1,0,1,0,0
2,1,3,26.000000,0,0,7.9250,1,0,0,0,1
3,1,1,35.000000,1,0,53.1000,1,0,0,0,1
4,0,3,35.000000,0,0,8.0500,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,27.000000,0,0,13.0000,0,1,0,0,1
887,1,1,19.000000,0,0,30.0000,1,0,0,0,1
888,0,3,29.699118,1,2,23.4500,1,0,0,0,1
889,1,1,26.000000,0,0,30.0000,0,1,1,0,0


### Separating the data

In [81]:
y = data[['Survived']]
X = data[data.columns[1:data.shape[1]]]

### Building a model

In [82]:
model = SequentialModel([
    DenseLayer(6, activation='sigmoid', input_size=X.shape[1], name='input'),
    DenseLayer(4, activation='relu', name='1st hidden'),
    DenseLayer(3, activation='relu', name='2nd hidden'),
    DenseLayer(1, activation='sigmoid', name='output')
])

### Fitting and estimating the model

In [83]:
model.cost(y_pred, y)

NameError: name 'y_true' is not defined

# Loading Iris Dataset

### Loading data

In [None]:
iris = datasets.load_iris()

### Splitting data

In [None]:
X = iris.data
y = iris.target

### Scaling

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)