In [44]:
import pandas as pd
import numpy as np
import itertools

In [42]:
INTERVAL = np.array([-1., 1.])

In [133]:
def get_line(x1, y1, x2, y2):
    A = y2 - y1
    B = x1 - x2
    C = x2*y1 - x1*y2
    
    return np.array([C, A, B])

def get_sample(sample_size):
    return np.append(np.ones((sample_size, 1)),
                     np.random.uniform(*INTERVAL, (sample_size, 2)),
                     axis=1)

def label_points(X, w):
    return np.sign(np.dot(X, w))

def run_PLA(X, y_true):
    w_hat = np.zeros(3)
    
    for n_iter in itertools.count(start=1, step=1):
        clf_result = enumerate(label_points(X, w_hat) == y_true)
        misclassified_idx = [idx for idx, is_correct in clf_result if not is_correct]
        if not misclassified_idx:
            break
        rand_misclf_idx = np.random.choice(misclassified_idx)        
        w_hat = w_hat + y_true[rand_misclf_idx] * X[rand_misclf_idx]
        
    return w_hat, n_iter

def run_experiment(n_points=10, n_runs=1000, test_sample_size=1e5):
    print("Running PLA {0} times on a set of {1} points, evaluating on a set of {2:.0f} points."
          .format(n_runs, n_points, test_sample_size))
    
    n_iters = []
    disagrements = []
    for i in range(n_runs):
        # Generate random line -- a target function
        support_points = np.random.uniform(*INTERVAL, 4)
        w_true = get_line(*support_points)
        
        # Generate points and classify them
        X_train = get_sample(n_points)
        y_train_true = label_points(X_train, w_true)
        
        # Learn approximation function
        w_hat, n_iter = run_PLA(X_train, y_train_true)
        
        # Evaluate out-of-sample performance
        X_test = get_sample(int(test_sample_size))
        y_test_true = label_points(X_test, w_true)
        y_pred = label_points(X_test, w_hat)
        disagreement = np.sum(y_test_true != y_pred) / test_sample_size
        
        n_iters.append(n_iter)
        disagrements.append(disagreement)
    
    print("Avg. iterations to converge: {:.1f}".format(np.mean(n_iters)))
    print("Avg. out-of-sample disagreement probability: {:.3f}".format(np.mean(disagrements)))

In [135]:
run_experiment()

Running PLA 1000 times on a set of 10 points, evaluating on a set of 100000 points.
Avg. iterations to converge: 10.6
Avg. out-of-sample disagreement probability: 0.106


In [136]:
run_experiment(n_points=100)

Running PLA 1000 times on a set of 100 points, evaluating on a set of 100000 points.
Avg. iterations to converge: 123.9
Avg. out-of-sample disagreement probability: 0.013
