## **Introduction to Multi-Layer Perceptrons**

In [1]:
import pandas as pd
import numpy as np

## create the data se
fat_score = [0.2, 0.1, 0.2, 0.2, 0.4, 0.3]
salt_score = [0.9, .1, .4, .5, .5 , 0.8]
acceptance = [1, 0, 0, 0, 1 , 1]

## combine in a dataframe
df = pd.DataFrame({'fat_score': fat_score, 'salt_score': salt_score, 'acceptance': acceptance})
df

Unnamed: 0,fat_score,salt_score,acceptance
0,0.2,0.9,1
1,0.1,0.1,0
2,0.2,0.4,0
3,0.2,0.5,0
4,0.4,0.5,1
5,0.3,0.8,1


In [2]:
## X and y
X = df[['fat_score', 'salt_score']]
y = df['acceptance']

In [3]:
## Perceptron
## Can be a linear model or non-linear (depends on activation)

In [6]:
from sklearn.neural_network import MLPClassifier

## instance - hidden_layer_sizer = (10,) - 10 neurons on the first layer
## tuple is obtained using the ,
## instance - hidden_layer_sizer = (10,5,2) - 3 layers with 10, 5, 2
## perceptron would be   hidden_layer_sizer = ()

## PERCEPTRON
nn = MLPClassifier(hidden_layer_sizes=(), max_iter = 1000)

## fit
## All NNs perform gradient descent (might NOT converge)
## Initialization of weights is DONE randomly
nn.fit(X, y)

In [9]:
## Multi-Layer Perceptron
## 1 hidden layer with 4 neurons
nn = MLPClassifier(hidden_layer_sizes=(4,), max_iter = 5000)
nn.fit(X, y)

In [10]:
## Deep MLP
## 3 layers
## L1: 10 neurons
## L2: 5 neurons
## L3: 2 neurons
nn = MLPClassifier(hidden_layer_sizes=(10,5,2), max_iter = 5000)
nn.fit(X, y)

#### **Weights and biases**

Neural networks ONLY need to obtain the optimal values for the weights and biases.

* weights: think slopes (we get optimal values using gradient descent). They are initialized randomly usually with a normal(0,1) distribution.
* biases: think y-intercept (we get optimal values using gradient descent). They are initialized at 0.

In [18]:
## instance a perceptron
nn = MLPClassifier(hidden_layer_sizes=(), max_iter = 1000, random_state = 591)

## fit
nn.fit(X, y)

In [19]:
## these are the weights (NOT INIATIALIZED until fit)
## cost function of NNs is NOT convex (multiple minima)
## the model may get STUCK in a local minimum and not the best
nn.coefs_

[array([[ 0.817613  ],
        [-0.61663944]])]

In [20]:
## the biases are (NOT INITIALIZED until fit)
nn.intercepts_

[array([0.11587256])]

In [22]:
## 1 Layer
## 4 neurons
nn = MLPClassifier(hidden_layer_sizes=(4,) , max_iter = 5000, random_state = 591)
nn.fit(X, y)

In [23]:
## weights
nn.coefs_

[array([[ 3.02971233e-14, -2.77636782e-01, -8.49613016e-05,
          1.26890666e-01],
        [-2.61117414e-08, -1.42191855e-01, -1.45537328e-01,
         -7.86183534e-02]]),
 array([[-1.34733791e-01],
        [ 2.18362068e-22],
        [-1.03722088e-02],
        [ 4.52890338e-01]])]

In [24]:
## biases
nn.intercepts_

[array([-0.75839907, -0.30029308, -0.40334076,  0.41923201]),
 array([0.15386197])]

#### **Summation and Activation of Neuron**

In [26]:
## Suppose
## w1 = 0.82
## w2 = -0.62
## b = 0.12

## Activation = Identity = f(z) = z

## x1 = fat_score = 0.2
## x2 = salt_score = 0.9
## what is the output of the Identity function

## 1) get the summation function z
z = 0.82*0.2 -0.62*0.9 + 0.12
print(f'The summation function is z = {z}')

## 2) the activation  f(z) = z
fz = z
print(f'The activation function is f(z) = {fz}')

The summation function is z = -0.274
The activation function is f(z) = -0.274


In [27]:
nn = MLPClassifier(hidden_layer_sizes=() , max_iter = 5000, random_state = 591, activation = "identity")
nn.fit(X, y)

In [28]:
nn.coefs_, nn.intercepts_

([array([[ 0.817613  ],
         [-0.61663944]])],
 [array([0.11587256])])

In [30]:
## Logistic Activation
## w1 = 0.82
## w2 = -0.62
## b = 0.12

## x1 = fat_score = 0.2
## x2 = salt_score = 0.9

## Logistic Activation  f(z) = 1 / (1+exp(-z))
## Output layer: between 0 and 1

## 1) get the summation function z
z = 0.82*0.2 -0.62*0.9 + 0.12
print(f'The summation function is z = {z}')

## 2) the activation  f(z) = 1 / (1 + np.exp(-z))
fz = 1 / (1 + np.exp(-z))
print(f'The activation function is f(z) = {fz}')

The summation function is z = -0.274
The activation function is f(z) = 0.4319253656434116


In [31]:
## Predictions
nn = MLPClassifier(hidden_layer_sizes=() , max_iter = 5000, random_state = 591, activation = "logistic")
nn.fit(X, y)

In [34]:
nn.predict_proba(X)[:,1]

array([0.47258731, 0.52067955, 0.51081745, 0.50316723, 0.52930042,
       0.49329545])

In [35]:
40*100 + 100*20

6000

In [37]:
## ReLu Activation
## w1 = 0.82
## w2 = -0.62
## b = 0.12

## x1 = fat_score = 0.2
## x2 = salt_score = 0.9

## activation ReLu = f(z) = max(0,z)

## 1) get the summation function z
z = 0.82*0.2 -0.62*0.9 + 0.12
print(f'The summation function is z = {z}')

## 2) activation
fz = max(0,z)
print(f'The activation function is f(z) = {fz}')

The summation function is z = -0.274
The activation function is f(z) = 0
