In [1]:
#Here we will be working with the Perceptron model using the scikit-learn library 
#to classify classes i.e. "versicolor" and "virginica"  the Iris dataset
#which are based on two major features i.e. sepal length and petal length. 

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Import a function for plotting decision boudaries
# !pip install mlxtend
from mlxtend.plotting import plot_decision_regions
# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Perceptron.html
from sklearn.linear_model import Perceptron

In [None]:
# Read the iris dataset into a pandas DataFrame object
df = pd.read_csv("./iris_dataset.csv")

In [None]:
# We need only versicolor and virginica which are the first 100 rows

df = df.iloc[50:150]
df

In [None]:
X = df[["sepal_length", "petal_length"]]

In [None]:
# Store the labels in y
y = df['species']

# We can leave the original labels and use sklearn perceptron,
# but to use mlxtend for plotting we need to encode the labels
# versicolor = -1, virginica = 1

y = np.where(y == 'versicolor', -1, 1)

y

In [None]:
# Instantiate one instance of the Perceptron class
clf = Perceptron()

# Fit the model to the training data
clf.fit(X, y)

In [None]:
# Compute predict labels on X
y_pred = clf.predict(X)
y_pred

In [None]:
# Actual labels
y

In [None]:
# Compare actual and predicted labels
print(y == y_pred)

In [None]:
clf.coef_   # weights

In [None]:
clf.intercept_  # bias 

In [None]:
X.to_numpy()

In [None]:
plt.figure(figsize = (8, 6))
plot_decision_regions(X.to_numpy(), y, clf = clf)
plt.title("Sklearn Perceptron", fontsize = 18)
plt.xlabel("sepal length", fontsize = 15)
plt.ylabel("petal length", fontsize = 15);

In [None]:
# We can use this classifier to predict the species of a flower with measurements 6 (sepal length) and 6 (petal length.)
# Obviously from the graph, the label should be 1. 

clf.predict([[6, 6]])

In [None]:
# calculate accuracy of this model for this particular data set
clf.score(X,y)

In [None]:
# The below code is for plotting the error graph for this data set

In [None]:
X = df[["sepal_length", "petal_length"]].to_numpy()

In [None]:
class MyPerceptron(object):
    def __init__(self, eta = 0.5, epochs = 50):
        self.eta = eta
        self.epochs = epochs
        
    def fit(self, X, y):
        self.w1 = np.random.rand(1)
        self.w2 = np.random.rand(1)
        self.b = np.random.rand(1)
        
        self.errors = []
        
        for _ in range(self.epochs):
            errors = 0
            for xi, yi in zip(X, y):
                update = self.eta * (self.predict(xi) - yi)
                self.w1 = self.w1 - update*xi[0]
                self.w2 = self.w2 - update*xi[1]
                self.b = self.b - update
                errors = errors + int(update != 0)
            if errors == 0:
                return self
            else:
                self.errors.append(errors)
            
        return self
    
    def weighted_sum(self, x):
        self.w = np.array([self.w1, self.w2])
        return np.dot(x, self.w) + self.b
    
    def predict(self, x):
        return np.where(self.weighted_sum(x) > 0.0, 1, -1)

In [None]:
# Instantiate one instance of My_Perceptron class
my_clf = MyPerceptron()

# Call the fit method 
my_clf.fit(X, y)

In [None]:
# Predict labels on X
y_pred = my_clf.predict(X)

In [None]:
y_pred = y_pred.reshape(100)

In [None]:
# We plot the errors for each iteration
# We also see the number of iterations needed for the algorithm to find the classifier

plt.figure(figsize = (8, 6))
plt.plot(range(1, len(my_clf.errors)+1), my_clf.errors, 
         marker = "o")
plt.title("Error plot", fontsize = 15)
plt.xlabel("The number of iterations", fontsize = 15)
plt.ylabel("The number of misclassifications", fontsize = 15)
plt.xticks(range(1, len(my_clf.errors)+1))
plt.show()

In [None]:
my_clf.w #weights

In [None]:
my_clf.b #bias