In [19]:
# Author : Pavan Yekabote
# Perceptron algorithm using gradient descent for binary classification

In [16]:
import numpy as np

In [34]:

class Perceptron:
    def __init__(self):
        self.W = None
        self.trained = False
        pass
    
    def sigmoid(self,x, derive=False):
        # Check if x is type of array or list
        if type(x) ==np.ndarray or type(x) == list:
            # Convert list to numpy array
            x = np.array(x) if type(x) == list else x
            # if shape is more than one dimension, then throw error
            if len(x.shape) > 1:
                return "Dimension Error"
            # return sigmoid array
            return np.array([self.sigmoid(i) if not derive else self.sigmoid(i, derive=True) for i in x])

        if not derive:
            return 1/(1+np.exp(-x))
        else:
            y = self.sigmoid(x)
            return y * (1 - y)
        
    def train(self,X,y,max_iter=100, lr=0.8):
        self.W = np.random.randn((X.shape[1]))
        self.bias = 1
        y = np.array(y).flatten()
        
        for iteration in range(max_iter):
           
            for i,x in enumerate(X):
                y_hat = self.predict(x)

            # using mean_squared_error [(y-y_hat) power 2 ] as loss function
            # Calculation of gradient
            # Prediction = y_hat = sigmoid(Weights=W * inputs = x +b )
            # Since Error in output ( Mean Squared Error ) E = ( y_actual - y_hat ) power 2.
            # Therefore change in Error E w.r.t Weights W = dE/dW
            # As y_hat = sigmoid(Wx+b) 
            # => d/dW( sigmoid(Wx+b) ) = sigmoid(Wx+b) * (1- sigmoid(Wx+b)) * d/dW(Wx+b)
            # => d/dW( sigmoid(Wx+b) ) = sigmoid(Wx+b) * (1- sigmoid(Wx+b)) * x        [ Since d/dW (Wx+b) = x]
            # dE/dw=[d ( (y-y_hat) power 2 ) / dW ]= 2 ( y-y_hat ) * sigmoid(y-y_hat) * (1-sigmoid(y-y_hat)) * x
            # Hence take this small change dE/dW and add to the Weights to get prediction y_hat near to y
            # And we multiply a learning rate to let the gradient be descended taking small steps
                
                gradient = 2 * (y[i]-y_hat) * lr * self.sigmoid(y_hat, derive=True)
                
                # Applying gradient descent and updating weights
                self.W += np.multiply(gradient , x)
                # Update bias with the gradient
                self.bias += gradient 
                
        return (self.W, self.bias)
        
        
    
    def predict(self,x):
        if self.W is None:
            return "Train model before use"
        if len(x.shape) > 1 and len(x[0].shape)==1:
             return np.round(np.array([self.predict(i) for i in x]).flatten(), decimals=3)
        return self.sigmoid(np.matmul(self.W, x.flatten()).flatten()+self.bias)


In [35]:

X_and = np.array([[0,0], [0,1], [1,0], [1,1]], dtype=np.int16)
y_and = np.array([[0],[0],[0],[1]])


In [36]:

p_and = Perceptron()

In [37]:
p_and.train(X_and,y_and,max_iter=1000)
p_and.predict(X_and)


array([0.   , 0.012, 0.012, 0.979])

In [38]:
# Here is the output Saying that for 
# (0,0) => (0) is the predicted value
# (0,1) => (0.012 == 0 ) is predicted value which is too near to 0
# (1,0) => (0.012 == 0 ) is predicted value which is too near to 0
# (1,1) => (0.979 == 1 ) is predicted value which is too close to 1 

In [39]:

X_or = np.array([[0,0], [0,1], [1,0], [1,1]], dtype=np.int16)
y_or = np.array([[0],[1],[1],[1]])


In [40]:
p_or = Perceptron()

In [44]:
p_or.train(X_or, y_or, max_iter=1000)
p_or.predict(X_or)

array([0.013, 0.994, 0.994, 1.   ])

In [45]:
# Here is the output Saying that for 
# (0,0) => (0.013 == 0 ) is the predicted value which is too near to 0
# (0,1) => (0.994 == 1 ) is predicted value which is too near to 1
# (1,0) => (0.994 == 1 ) is predicted value which is too near to 1
# (1,1) => (1 == 1 ) is predicted value which is 1