In [1]:
import numpy as np
import random


# Perceptron with numpy + pandas
# The guy is great, will definetly give a shoutout
class Perceptron():
    '''
        Perceptron Learning Algorithm that can be train using a 
        fit and predict methodology with numpy
    '''
    
    def __init__(self):
        self.weights = []
        
    def fit(self, X, y, learning_rate = 0.01, num_iteration = 100):
        
        (num_row, num_feature) = X.shape
        
        # Randomly initalize the weights
        self.weights = np.random.rand(num_feature+1) 

        # Launch the training algorithm
        for i in range(num_iteration):
            
            # Stochastic Gradient Descent
            r_i = random.randint(0,num_row-1)
            row = X[r_i,:] # take the random sample from the dataset
            yhat = self.predict(row)
            error = (y[r_i] - yhat) # estimate of the gradient
            self.weights[0] = self.weights[0] + learning_rate*error*1 # first weight one is the bias

            # Update all parameters after bias
            for f_i in range(num_feature):
                self.weights[f_i] = self.weights[f_i] + learning_rate*error*row[f_i]
                
            if i % 100 == 0:
                total_error = 0
                for r_i in range(num_row):
                    row = X[r_i,:]
                    yhat = self.predict(row)
                    error = (y[r_i] - yhat)
                    total_error = total_error + error**2
                mean_error = total_error/num_row
                print(f"Iteration {i} with error = {mean_error}")
        
    def predict(self, row):
            
        # The activation start with the bias at weights == 0
        activation = self.weights[0]
        
        # We iterate over the weights and the features in the given row
        for weight, feature in zip(self.weights[1:], row):
            activation = activation + weight*feature
            
        # Heaviside Step Function Activation
        if activation >= 0.0:
            return 1.0
        return 0.0
        

In [2]:
import pandas as pd

# Data sets
df = pd.read_csv('Iris.csv')

# Do a one hot encoding
df = pd.get_dummies(df,prefix=['variety'])
X = df[['sepal.length','sepal.width','petal.length','petal.width']]
X = X.to_numpy()
y = df['variety_Versicolor']
y = y.to_numpy()

# Shuffle the two dataset in unison
perm = np.random.permutation(len(X))
X = X[perm]
y = y[perm]

clf = Perceptron()
clf.fit(X,y, num_iteration = 1000)

Iteration 0 with error = 0.6666666666666666
Iteration 100 with error = 0.5733333333333334
Iteration 200 with error = 0.6
Iteration 300 with error = 0.78
Iteration 400 with error = 0.58
Iteration 500 with error = 0.78
Iteration 600 with error = 0.52
Iteration 700 with error = 0.7533333333333333
Iteration 800 with error = 0.38
Iteration 900 with error = 0.7133333333333334


In [3]:
import random

class Perceptron():
    '''
        Perceptron Learning Algorithm that can be train using a 
        fit and predict methodology, without any library
    '''
    
    def __init__(self):
        self.weights = []
        
    def fit(self, X, y, learning_rate = 0.01, num_iteration = 100):
        
        num_row = len(X)
        num_feature = len(X[0]) # Here we assume that we have a rectangular matrix
        
        # Randomly initalize the weights
        for i in range(num_feature+1):
            self.weights.append(random.uniform(0,1))
        
        # Launch the training algorithm
        
        for i in range(num_iteration):
            
            # Stochastic Gradient Descent
            r_i = random.randint(0,num_row-1)
            row = X[r_i]
            yhat = self.predict(row)
            error = (y[r_i] - yhat)
            self.weights[0] = self.weights[0] + learning_rate*error

            for f_i in range(num_feature):
                self.weights[f_i] = self.weights[f_i] + learning_rate*error*row[f_i]
                
            if i % 100 == 0:
                total_error = 0
                for r_i in range(num_row):
                    row = X[r_i]
                    yhat = self.predict(row)
                    error = (y[r_i] - yhat)
                    total_error = total_error + error**2
                mean_error = total_error/num_row
                print(f"Iteration {i} with error = {mean_error}")
        
    def predict(self, row):
            
        # The activation start with the bias at weights == 0
        activation = self.weights[0]
        
        # We iterate over the weights and the features in the given row
        for weight, feature in zip(self.weights[1:], row):
            activation = activation + weight*feature
            
        if activation >= 0.0:
            return 1.0
        return 0.0

In [4]:
import csv

def permute_together(X,y):
    '''
        Helper function to permute (shuffle) a matrix and a vector together
    '''
    
    perm_X = []
    perm_y = []
    while len(X) != 0 and len(y) != 0:
        
        perm_id = random.randint(0,len(X)-1)
        perm_X.append(X.pop(perm_id))
        perm_y.append(y.pop(perm_id))
        
    return (perm_X, perm_y)

class DataFrame():
    '''
        Simple dataframe to mimick the pandas library
    '''
    def __init__(self):
        self.header = [];
        self.X = []
        self.y = []
        
    def clean_string(self,string):
        '''
            Dummy function to clean up the iris dataset from (")
        '''
        return string.replace('"', '')
    

    def get_encoded_labels(self, target):
        '''
            Encode with 1 or 0 the y vector if it match our target variable
        '''
        labels = []
        for label in self.y:
            if label == target:
                labels.append(1)
            else:
                labels.append(0)
        return labels
    
    def read_csv(self, filename):
        '''
            Read the iris dataset CSV file and populate the header, the X and the y variables
            needed for the perceptron
        '''
        with open(filename, newline='', encoding="utf-8-sig") as csvfile:
            csvreader = csv.reader(csvfile, delimiter=' ', quotechar='|')
            
            index = -1
            for row in csvreader:
                
                # We have the header
                if index == -1:
                    self.header = [self.clean_string(s) for s in row[0].split(',')]
                    index += 1
                    continue
                
                # Here is the data
                x = []
                target = None
                data = row[0].split(',')
                for i in range(len(data)-1):
                    x.append(float(data[i]))
                
                # Last item in the csv will be the target
                self.y.append(self.clean_string(data[len(data)-1]))
                self.X.append(x)
                
                index += 1
        
                

# Data sets
df = DataFrame()
df.read_csv('iris.csv')

X = df.X
y = df.get_encoded_labels('Versicolor') # encoding for 0 and 1

# Shuffle the two dataset in unison
X,y = permute_together(X,y)

clf = Perceptron()
clf.fit(X,y, num_iteration = 1000)

Iteration 0 with error = 0.6666666666666666
Iteration 100 with error = 0.3333333333333333
Iteration 200 with error = 0.64
Iteration 300 with error = 0.56
Iteration 400 with error = 0.5066666666666667
Iteration 500 with error = 0.5933333333333334
Iteration 600 with error = 0.4533333333333333
Iteration 700 with error = 0.4
Iteration 800 with error = 0.3933333333333333
Iteration 900 with error = 0.3466666666666667
