In [None]:
import numpy as np
import random
import matplotlib.pyplot as plt
% matplotlib inline
import seaborn as sns
from pylab import savefig

In [61]:
class PLA:
    def __init__(self,X_train,X_test):
        self.X_train = X_train
        self.X_test = X_test
        self.w = np.zeros((3,1))
        
        self.point1 = [random.uniform(-1,1),random.uniform(-1,1)]
        self.point2 = [random.uniform(-1,1),random.uniform(-1,1)]
        
    def target(self,point):
        x,y = point[1], point[2]
        x1,y1 = self.point1
        x2,y2 = self.point2
                
        slope = (y2-y1)/(x2-x1)
        
        # slope*(x-x1) = y-y1
        if slope*(x-x1) + y1 < y: return -1
        else: return 1
        
    def hypothesis(self,point):
        return np.sign(np.dot(point,self.w))
    
    def train(self):
        misclassified = []
        
        repeats = 0
        
        while True:
            for point in self.X_train:
                #find all misclassified points
                real = self.target(point)
                if self.hypothesis(point) != real:
                    misclassified.append([point,real])
                
                # modify weights to classify correctly, we apply PLA only to separable data
                # otherwise would have to use pocket algorithm
                
            if misclassified:

                repeats +=1 
                # choose a random point to train the perceptron
                pt = random.choice(misclassified)
                point = pt[0]
                y_n = pt[1]

                self.w += y_n * np.array([point]).T
                
                
                # give back empty misclassified list
                misclassified = []
            else: break
        return repeats
            
    def test(self):
        error = 0

        for point in self.X_test:
            if self.hypothesis(point) != self.target(point): error +=1

        return error / float(len(self.X_test))
        
def data_gen(data_size, e_in = False):
    X_train = [[1.,random.uniform(-1,1),random.uniform(-1,1)] for i in range(data_size)]
    
    if e_in:
        X_test = X_train
    else:
        X_test = [[1.,random.uniform(-1,1),random.uniform(-1,1)] for i in range(data_size)]
        
    return X_train, X_test
    
def question_7():
    repeats = 0
    avg_error = 0
    for i in range(1000): 
        X_train, X_test = data_gen(10)
        
        pla = PLA(X_train,X_test)
        repeats += pla.train()
        avg_error += pla.test()
        

        
    avg_repeats = repeats / 1000.
    avg_error /=1000.
    print("Question 7.  Average number of iterations for PLA to converge is " + str(avg_repeats))
    print("Question 8.  Average E_out error is  " + str(np.round(avg_error, decimals=3)))
           

if __name__ == "__main__":
    question_7()
    
        

Question 7.  Average number of iterations for PLA to converge is 8.867
Question 8.  Average E_out error is  0.114
