In [73]:
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split

Let's first make our dataset!

In [74]:
X, y = make_blobs(
    n_samples=500, 
    centers=2, 
    n_features=2, 
    cluster_std=1.0,
    random_state=42
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

Below are functions for training and inference.

In [89]:
def train_model(X, y, w = None, b = 0, threshold = 0):
    """
    Trains a perceptron model, initialized off provided w and b.

    Args:
        w: weight vector
        b: bias
        X: training features
        y: training labels
        threshold: update threshold

    Returns:
        The trained weight vector and bias.
    """
    if w is None:
        d = X.shape[1]
        w = np.zeros(d)
        
    n = X.shape[0]

    prev_accuracy = None
    while True:
        for t in range(n):
            y_t = -1 if y[t] == 0 else 1
            if y_t * (np.dot(X[t], w) + b) <= threshold:
                w = w + y_t * X[t]
                b = b + y_t
        # Points have been all classified correctly -> training error not decreasing -> terminate
        new_accuracy = eval_model(X, y, w, b)
        if prev_accuracy != None and  new_accuracy - prev_accuracy < 10e-6:
            break
        else:
            prev_accuracy = new_accuracy

    return w, b

In [86]:
def infer_model(x, w, b):
    """
    Performs inference with a given trained perceptron model.

    Args:
        x: feature
        w: model weight vector
        b: model bias

    Returns:
        The inferred label.
    """
    return 1 if np.dot(x, w) + b > 0 else 0

We also need to evaluate our model.

In [87]:
def eval_model(X, y, w, b):
    """
    Evaluates a trained perceptron model.

    Args:
        X: test features
        y: test labels
        w: model weight vector
        b: model bias

    Returns:
        The accuracy of the model on the test data.
    """
    n = X.shape[0]
    correct = 0

    for i in range(n):
        predicted = infer_model(X[i], w, b)
        actual = y[i]
        if predicted == actual:
            correct += 1

    return correct / n

Let's train and evaluate a model on our test dataset!

In [90]:
w, b = train_model(X_train, y_train)
accuracy = eval_model(X_test, y_test, w, b)

print(accuracy)

1.0


Perceptron memorizes the training dataset, so if test dataset has data that violates the 'line' that we made while training, we won't predict it correctly. We got lucky here!

In [93]:
accuracy_train = eval_model(X_train, y_train, w, b)

print(accuracy_train)

1.0


Here, we see training accuracy is guaranteed to be 100% if the dataset is linearly separable.