In [2]:
import numpy as np
import random
from sklearn.svm import SVC
import sklearn

In [7]:
class PerceptronSVM:
    def __init__(self,X_train, X_test):
        self.X_train = X_train
        self.X_test = X_test
        
        self.w_vect = np.array([[0.,0.,0.]]).T
        
        self.point1 = [random.uniform(-1,1),random.uniform(-1,1)]
        self.point2 = [random.uniform(-1,1),random.uniform(-1,1)]
    
    def target(self,point):
        x, y = point[1], point[2]
        x1,y1 = self.point1
        x2,y2 = self.point2
        
        slope = (y2-y1)/(x2-x1)
        
        if y > slope * (x - x1) + y1: return 1
        else: return -1


    def hypothesis(self,point):
        return np.sign(np.dot(point, self.w_vect))


    def train(self):
        misclassified = []
        repeats = 0

        while True:
            for point in self.X_train:
                # find all misclassified points
                real = self.target(point)
                if self.hypothesis(point) != real:
                    repeats += 1
                    misclassified.append([point, real])
                # modify weights to classify correctly, we apply PLA only to separable data 
                # otherwise would have to use pocket algorithm
            if not misclassified:
                break

            else:
                # choosing a random point to train the perceptron
                point, y_n = random.choice(misclassified)
                # w' = w + y*x 
                self.w_vect += y_n * np.array([point]).T
                misclassified = []
        return repeats


    def test(self):
        err = 0
        for point in self.X_test:
            if self.hypothesis(point) != self.target(point):
                err += 1

        return err / float(len(self.X_test))

    
def run(run_num,data_size, e_in = False, SVM = False):
    supports = []
    percentage_svm = 0
    for i in range(run_num):
        X_train = [[1.,random.uniform(-1,1),random.uniform(-1,1)] for i in range(data_size)]
        
        if e_in:
            X_test = X_train

        else:
            X_test = [[1.,random.uniform(-1,1),random.uniform(-1,1)] for i in range(data_size)]

        pla = PerceptronSVM(X_train, X_test)
        
        y_target = np.array([pla.target(point) for point in X_train])
        test_target = np.array([pla.target(point) for point in X_test])
        
        repeats = pla.train()
        error  = pla.test()

        if SVM:
            skip = 0
            support_error = 0

            # we have a problem if either of our y's are homogenous in class, hence need to check
            # if all elements are the same, we skip the fitting.
            if np.array_equal(y_target,np.ones(data_size)) or np.array_equal(y_target, -1*np.ones(data_size)): 
                skip +=1
                continue
                
            model = SVC(kernel = 'linear',C = 658758587)
            
            # we do not need the ones in the training data.
            adapted_X = np.delete(np.array(X_train), 0, 1)
          
            model.fit(adapted_X,y_target)
            
            adapted_test = np.delete(X_test,0,1)
            predict = model.predict(adapted_test)
            
            # error of SVM
            support_er = len(test_target[test_target != predict]) / float(len(test_target))
#             print("go {}, errors {} vs {}".format(str(i),str(support_er),str(error)))
            if support_er < error:
                percentage_svm += 1
            
    
    if not SVM:
        return repeats / float(run_num), error / float(run_num), pla.w_vect
    else:
        return percentage_svm / float(run_num)
if __name__ == "__main__":
    # pla
    print(run(run_num = 1000, data_size = 10))
    # returns the out-error of 0.0001 and w = (0,1.2398,1.01533)
    print("#####################################################")
    
    # svm comparrison with N = 10
    print(run(run_num = 1000, data_size = 10, SVM = True))
    # returns 0.263 which is closest to 0.3
    
    print("#####################################################")
    
    # svm comparrison with N = 100
    print(run(run_num=1000, data_size= 100, SVM = True))
    # returns value of 0.329, closest is 0.3
    
    

(0.017, 0.0002, array([[-1.        ],
       [-0.98176179],
       [ 1.89013047]]))
#####################################################
0.253
#####################################################


KeyboardInterrupt: 