In [1]:
import numpy as np
from numpy import linalg
from __future__ import print_function, division
import pandas as pd
import random
import math


In [2]:
df = pd.read_csv("twoSpirals.txt", delim_whitespace=True, header=None) 
df.head()

Unnamed: 0,0,1,2
0,10.5192,-0.717,-1.0
1,0.9987,-9.9681,-1.0
2,3.5763,8.3756,-1.0
3,1.9236,-10.6448,-1.0
4,8.1583,-5.9066,-1.0


In [3]:
def train_test_split(df, test_size):
    
    if isinstance(test_size, float):
        test_size = round(test_size * len(df))

    index_list = df.index.tolist()
    test_indexes = random.sample(population=index_list, k=test_size)

    test_df = df.loc[test_indexes]
    train_df = df.drop(test_indexes)
    
    return train_df, test_df

In [4]:
random.seed(3)
train_df, test_df = train_test_split(df, 0.20)

In [5]:
X_train = np.array(train_df.iloc[: , :-1])
y_train = np.array(train_df.iloc[: , -1])

X_test = np.array(test_df.iloc[: , :-1])
y_test = np.array(test_df.iloc[: , -1])

In [6]:
def linear_kernel(x1, x2):
    return np.dot(x1, x2)

In [7]:
def polynomial_kernel(x, y, p=3):
    return (1 + np.dot(x, y)) ** p

In [8]:
def gaussian_kernel(x, y, sigma=2.0):
    return np.exp(-linalg.norm(x-y)**2 / (2 * (sigma ** 2)))


In [9]:

class Perceptron(object):

    def __init__(self, T=1):
        self.T = T

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features, dtype=np.float64)
        self.b = 0.0

        for t in range(self.T):
            for i in range(n_samples):
                if self.predict(X[i])[0] != y[i]:
                    self.w += y[i] * X[i]
                    self.b += y[i]

    def project(self, X):
        return np.dot(X, self.w) + self.b

    def predict(self, X):
        X = np.atleast_2d(X)
        return np.sign(self.project(X))

class KernelPerceptron(object):

    def __init__(self, kernel=linear_kernel, T=1):
        self.kernel = kernel
        self.T = T

    def fit(self, X, y):
        n_samples, n_features = X.shape
        #np.hstack((X, np.ones((n_samples, 1))))
        self.alpha = np.zeros(n_samples, dtype=np.float64)

        # Gram matrix
        K = np.zeros((n_samples, n_samples))
        for i in range(n_samples):
            for j in range(n_samples):
                K[i,j] = self.kernel(X[i], X[j])

        for t in range(self.T):
            for i in range(n_samples):
                if np.sign(np.sum(K[:,i] * self.alpha * y)) != y[i]:
                    self.alpha[i] += 1.0

        # Support vectors
        sv = self.alpha > 1e-5
        ind = np.arange(len(self.alpha))[sv]
        self.alpha = self.alpha[sv]
        self.sv = X[sv]
        self.sv_y = y[sv]
        print ("%d support vectors out of %d points" % (len(self.alpha),
                                                       n_samples))

    def project(self, X):
        y_predict = np.zeros(len(X))
        for i in range(len(X)):
            s = 0
            for a, sv_y, sv in zip(self.alpha, self.sv_y, self.sv):
                s += a * sv_y * self.kernel(X[i], sv)
            y_predict[i] = s
        return y_predict

    def predict(self, X):
        X = np.atleast_2d(X)
        n_samples, n_features = X.shape
        #np.hstack((X, np.ones((n_samples, 1))))
        return np.sign(self.project(X))

if __name__ == "__main__":
    import pylab as pl

    def test_linear():

        clf = Perceptron(T=3)
        clf.fit(X_train, y_train)

        y_predict = clf.predict(X_test)
        correct = np.sum(y_predict == y_test)
        print ("%d out of %d predictions correct" % (correct, len(y_predict)))

    def test_kernel():

        clf = KernelPerceptron(gaussian_kernel, T=20)
        clf.fit(X_train, y_train)

        y_predict = clf.predict(X_test)
        correct = np.mean(y_predict == y_test)
        print (correct)

    test_kernel()


38 support vectors out of 800 points
1.0
