#### Loss function:
$\min_{w, b} L(w, b)=-\sum_{x_{i} \in M} y_{i}\left(\sum_{j=1}^{N}\alpha_j y_j x_j \cdot x_{i}+b\right)$，$M$：error point set

#### SGD update function：
$a_i = a_i + \eta$

$b = b + \eta y_{i}$

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# load dataset
iris = load_iris()
df = pd.DataFrame(iris.data,columns=iris.feature_names)
df['label'] = iris.target

#binary classification
df = df[:100]
df.loc[df['label']==0,'label']=-1

In [3]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),label
0,5.1,3.5,1.4,0.2,-1
1,4.9,3.0,1.4,0.2,-1
2,4.7,3.2,1.3,0.2,-1
3,4.6,3.1,1.5,0.2,-1
4,5.0,3.6,1.4,0.2,-1


In [4]:
class Perceptron:
    def __init__(self,n_samples,lr=0.1,epochs=1):
        self.alpha = np.ones(n_samples,dtype=np.float32)
        self.b = 0
        self.lr = lr
        self.epochs = epochs
        
    def calc_Gram(self,X):
        n_samples = X.shape[0]
        self.Gram = np.zeros((n_samples,n_samples))
        for i in range(n_samples):
            for j in range(n_samples):
                self.Gram[i][j] = np.sum(X[i].dot(X[j]))
        
    def forward(self,i,Y):
        res = self.b
        for j in range(len(self.Gram)):
            res += self.alpha[j]*Y[j]*self.Gram[i][j]
        return res
    
    def backward(self,i,y):
        self.alpha[i] += self.lr
        self.b += self.lr * y
    
    def fit(self,X,Y):
        self.calc_Gram(X)
        for epoch in range(self.epochs):
            error_cnt = 0
            for i in range(len(X)):
                if Y[i]*self.forward(i,Y)<=0:
                    self.backward(i,Y[i])
                    error_cnt += 1
            if error_cnt == 0:
                print("Early Stopping ...")
                break
    
    #cannot use Gram matrix for predict new instances
    def generate_w(self,X,Y):
        self.w = np.zeros(X.shape[1],dtype=np.float32)
        for i in range(len(X)):
            self.w += self.alpha[i]*Y[i]*X[i]
            
    
    def predict(self,X,classes=[0,1]):
        res = []
        for x in X:
            pred = self.w.dot(x)+self.b
            y = classes[0] if pred >0  else classes[1]
            res.append(y)
        return res

In [5]:
#prepare data
X,Y = df.values[:,:-1],df.values[:,-1]
train_X,test_X,train_Y,test_Y = train_test_split(X,Y,test_size=0.9)

#usually need a bigger learning rate than normal perceptron model
model = Perceptron(train_X.shape[0],lr=1.2)
model.fit(train_X,train_Y)
model.generate_w(train_X,train_Y)

In [6]:
print("Model Parameters: w:",model.w)
print("Model Parameters: b:",model.b)

Model Parameters: w: [ -4.63999891  -9.13999939  11.84000111   5.42000103]
Model Parameters: b: 2.4


In [7]:
preds = model.predict(test_X,classes=[1,-1])
print(accuracy_score(test_Y,preds))

0.966666666667


In [8]:
test_Y

array([ 1.,  1., -1., -1., -1.,  1.,  1., -1., -1., -1.,  1., -1.,  1.,
       -1.,  1.,  1., -1., -1.,  1.,  1., -1., -1., -1., -1.,  1.,  1.,
       -1., -1.,  1.,  1.,  1., -1., -1.,  1.,  1.,  1., -1.,  1.,  1.,
       -1., -1., -1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  1., -1., -1.,
       -1., -1., -1.,  1., -1.,  1., -1.,  1.,  1.,  1., -1.,  1., -1.,
        1.,  1.,  1.,  1., -1., -1.,  1., -1., -1., -1.,  1., -1.,  1.,
        1., -1., -1., -1.,  1.,  1.,  1., -1., -1.,  1.,  1.,  1.])

In [9]:
np.array(preds)

array([ 1,  1, -1, -1, -1,  1,  1, -1, -1, -1,  1, -1,  1, -1,  1,  1, -1,
       -1,  1,  1, -1, -1, -1, -1,  1,  1, -1, -1, -1,  1,  1, -1, -1,  1,
        1,  1, -1,  1,  1, -1, -1, -1,  1, -1, -1,  1,  1,  1,  1, -1, -1,
       -1, -1, -1, -1,  1, -1,  1, -1,  1,  1,  1, -1,  1, -1,  1,  1,  1,
        1, -1, -1,  1, -1, -1, -1,  1, -1,  1,  1, -1, -1, -1,  1,  1,  1,
       -1, -1,  1,  1,  1])