# Perceptron Learning

In [None]:
import numpy as np

## Vectorization of the Perceptron Rule
**Inputs: $m$ observations and $n$ features/dimensions**
$$ X =
\begin{bmatrix} 1 & 1 & 0 & 0 \\ 1 & 0 & 0 & 1 \\ 0 & 0 & 1 & 1 \\ 0 & 1 & 1 & 0 \end{bmatrix}
T =
\begin{bmatrix} 1 & 1 \\ 1 & 0 \\ 0 & 1 \\ 0 & 0 \end{bmatrix} 
$$ 
**Network Parameters**

$$ W \in R^{4 \times 2}$$

### 1. The Inputs, Weights and Targets as matrices 

In [20]:
X = np.array([[1, 1, 0, 0], [1, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 0]])
t = np.array([[1, 1], [1, 0], [0, 1], [0, 0]])
w = np.random.randn(2,4)
print("Input matrix is :\n", X)
print("++++++++++++++++++++++++++++++++")
print("Weights matrix is :\n", w)
print("++++++++++++++++++++++++++++++++")
print("Target matrix is :\n", t)

Input matrix is :
 [[1 1 0 0]
 [1 0 0 1]
 [0 0 1 1]
 [0 1 1 0]]
++++++++++++++++++++++++++++++++
Weights matrix is :
 [[-1.06806328 -1.18060446  0.01972557  0.96370678]
 [ 0.62021812  0.01448267  0.47514371 -0.00224163]]
++++++++++++++++++++++++++++++++
Target matrix is :
 [[1 1]
 [1 0]
 [0 1]
 [0 0]]


## Bias Trick
we can treat bias as another weight $w_{0}$ with input feature $x_{0} = 1$
$$ X =
\begin{bmatrix} 1 & 1 & 1 & 0 & 0 \\ 1 & 1 & 0 & 0 & 1 \\ 1 & 0 & 0 & 1 & 1 \\ 1 & 0 & 1 & 1 & 0 \end{bmatrix} $$


In [30]:
X_d = np.hstack((np.ones((X.shape[0], 1)), X))
print(f"updated data matrix after bias \n {X_d}") 
w_d = np.hstack((np.random.rand(w.shape[0], 1), w))
print(f"updated weight matrix after bias:\n {w_d}")

updated data matrix after bias 
 [[1. 1. 1. 0. 0.]
 [1. 1. 0. 0. 1.]
 [1. 0. 0. 1. 1.]
 [1. 0. 1. 1. 0.]]
updated weight matrix after bias:
 [[ 0.25809196  0.20145382 -0.96806328 -1.08060446 -0.08027443  0.86370678]
 [ 0.0352929   0.72659162  0.52021812 -0.08551733  0.37514371 -0.10224163]]


### 2. Compute the $Z$ as the dot product of $w$ and $X$
$$
\begin{align}
W &=  \begin{bmatrix}
w_{11} & w_{12} & w_{13} & w_{14} & w_{15}\\
w_{21} & w_{22} & w_{23} & w_{24} & w_{25}\\  
\end{bmatrix} \\

X &= 
\begin{bmatrix}
x_{11} & x_{12} & x_{13} & x_{14} & x_{15}\\
x_{21} & x_{22} & x_{23} & x_{24} & x_{25}\\
x_{31} & x_{32} & x_{33} & x_{34} & x_{35}\\
x_{41} & x_{42} & x_{43} & x_{44} & x_{45}
\end{bmatrix}
\end{align}
$$



$
Z = X \cdot W^{T} = 
\begin{bmatrix}
x_{11} & x_{12} & x_{13} & x_{14} & x_{15}\\
x_{21} & x_{22} & x_{23} & x_{24} & x_{25}\\
x_{31} & x_{32} & x_{33} & x_{34} & x_{35}\\
x_{41} & x_{42} & x_{43} & x_{44} & x_{45}
\end{bmatrix} \cdot\begin{bmatrix}
w_{11} & w_{21}\\  
w_{12} & w_{22}\\  
w_{13} & w_{23}\\  
w_{14} & w_{24}\\  
w_{15} & w_{25}\\
\end{bmatrix}  = \begin{bmatrix} z_{11} & z_{12} \\ z_{21} & z_{22} \\z_{31} & z_{32} \\z_{41} & z_{42} \end{bmatrix}$ 



In [22]:
Z = np.dot(X_d, w_d.T)
print(f"Z (weighted sum) matrix is :\n {Z}")

Z (weighted sum) matrix is :
 [[-2.04721392  1.5612924 ]
 [ 0.09709732  1.54456811]
 [ 1.18488617  1.39949371]
 [-0.95942507  1.416218  ]]


### 4.Applying the threshold logic for the same ▶️ Heaviside function or a Step function

In [23]:
Y =  (Z > 0).astype(int) # Step function
print("Output matrix is :\n", Y)

Output matrix is :
 [[0 1]
 [1 1]
 [1 1]
 [0 1]]


### 5. Find the error

$$ \epsilon = T - Y $$

In [24]:
e = t - Y
print("Error matrix is :\n", e)

Error matrix is :
 [[ 1  0]
 [ 0 -1]
 [-1  0]
 [ 0 -1]]


### 6. Weight Update

$ \Delta W = \alpha \epsilon X = \alpha \begin{bmatrix} \epsilon_{11} & \epsilon_{12} \\ \epsilon_{21} & \epsilon_{22} \\
\epsilon_{31} & \epsilon_{32} \\ \epsilon_{41} & \epsilon_{42}\end{bmatrix} ^{T}\cdot 
\begin{bmatrix}
x_{11} & x_{12} & x_{13} & x_{14} & x_{15}\\
x_{21} & x_{22} & x_{23} & x_{24} & x_{25}\\
x_{31} & x_{32} & x_{33} & x_{34} & x_{35}\\
x_{41} & x_{42} & x_{43} & x_{44} & x_{45}
\end{bmatrix} $


$ = \begin{bmatrix} \epsilon_{11} & \epsilon_{21} & \epsilon_{31} & \epsilon_{41} \\ \epsilon_{12} & \epsilon_{22} & \epsilon_{32} & \epsilon_{42} \end{bmatrix} \begin{bmatrix}
x_{11} & x_{12} & x_{13} & x_{14} & x_{15}\\
x_{21} & x_{22} & x_{23} & x_{24} & x_{25}\\
x_{31} & x_{32} & x_{33} & x_{34} & x_{35}\\
x_{41} & x_{42} & x_{43} & x_{44} & x_{45}
\end{bmatrix}$

In [None]:
# Perceptron learning rule
alpha = 0.1
dw = alpha*np.dot(e.T, X_d)
print("Change in weights is :\n", dw)

Change in weights is :
 [[ 0.   0.1  0.1 -0.1 -0.1]
 [-0.2 -0.1 -0.1 -0.1 -0.1]]


In [27]:
w = w_d + dw
print("Updated weights matrix is :\n", w)

Updated weights matrix is :
 [[ 0.20145382 -0.96806328 -1.08060446 -0.08027443  0.86370678]
 [ 0.72659162  0.52021812 -0.08551733  0.37514371 -0.10224163]]


### 7. Putting Everything Together

In [43]:
#Initialising parameters
epochs = 400
alpha = 0.001
X = np.array([[1, 1, 0, 0], [1, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 0]])
t = np.array([[1, 1], [1, 0], [0, 1], [0, 0]])
w = np.random.randn(2,4)
# Putting everything together in a function
def perceptron_learning_rule(X,t,w,epochs, alpha=0.1):
    X_d = np.hstack((np.ones((X.shape[0], 1)), X))
    w_d = np.hstack((np.random.rand(w.shape[0], 1), w))
    e = None
    for epoch in range(epochs):
        Z = np.dot(X_d, w_d.T)
        Y =  (Z > 0).astype(int) # Step function
        e = t - Y
        #stopping condition 
        if np.all(e == 0):
            print(f"Training completed in {epoch} epochs")
            break
        dw = alpha*np.dot(e.T, X_d)
        w_d = w_d + dw
        print(f"Epoch {epoch}")
    if e is not None:
        print(f"error matrix is :\n {e}")
    else:
        print("No iterations performed, error matrix not computed")
    return w_d
perceptron_learning_rule(X,t,w,epochs,alpha)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
Epoch 28
Epoch 29
Epoch 30
Epoch 31
Epoch 32
Epoch 33
Epoch 34
Epoch 35
Epoch 36
Epoch 37
Epoch 38
Epoch 39
Epoch 40
Epoch 41
Epoch 42
Epoch 43
Epoch 44
Epoch 45
Epoch 46
Epoch 47
Epoch 48
Epoch 49
Epoch 50
Epoch 51
Epoch 52
Epoch 53
Epoch 54
Epoch 55
Epoch 56
Epoch 57
Epoch 58
Epoch 59
Epoch 60
Epoch 61
Epoch 62
Epoch 63
Epoch 64
Epoch 65
Epoch 66
Epoch 67
Epoch 68
Epoch 69
Epoch 70
Epoch 71
Epoch 72
Epoch 73
Epoch 74
Epoch 75
Epoch 76
Epoch 77
Epoch 78
Epoch 79
Epoch 80
Epoch 81
Epoch 82
Epoch 83
Epoch 84
Epoch 85
Epoch 86
Epoch 87
Epoch 88
Epoch 89
Epoch 90
Epoch 91
Epoch 92
Epoch 93
Epoch 94
Epoch 95
Epoch 96
Epoch 97
Epoch 98
Epoch 99
Epoch 100
Epoch 101
Epoch 102
Epoch 103
Epoch 104
Epoch 105
Epoch 106
Epoch 107
Epoch 108
Epoch 109
Epoch 110


array([[ 0.63563448, -0.78807786,  0.49751661, -1.13442582,  0.45894134],
       [ 0.54655661, -0.43585591,  0.73055701, -1.22143015,  0.31469511]])

In [None]:
#Visualisation of the decision boundary for each epchs

array([[ 0.25809196,  0.20145382, -0.96806328, -1.08060446, -0.08027443,
         0.86370678],
       [ 0.0352929 ,  0.72659162,  0.52021812, -0.08551733,  0.37514371,
        -0.10224163]])