In [None]:
%matplotlib inline

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
def plot_customers(X, y, xlabel='Inches (in)', ylabel='Pounds (lb)'):
    colors = ['g', 'y']
    labels = ['Not large', 'Large']
    for i, (color, label) in enumerate(zip(colors, labels)):
        plt.scatter(X[:, 0][y==i], X[:, 1][y==i], color=color, label=label)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

In [None]:
np.random.seed(1)
inches = np.arange(60, 78, .1)
random_fluctuations = np.random.normal(scale=10, size=inches.size)

In [None]:
pounds = 4 * inches - 130 + random_fluctuations

In [None]:
X = np.array([inches, pounds]).T

In [None]:
y = ((X[:,0] > 72) & (X[:,1] > 160)).astype(int)

In [None]:
plot_customers(X, y)
plt.legend()

In [None]:
def boundary(inches): return -3.5 * inches + 415

In [None]:
plt.plot(X[:, 0], boundary(X[:,0]), color='k', label='Boundary')
plot_customers(X, y)
plt.legend()

In [None]:
from sklearn.metrics import f1_score

In [None]:
y_pred = []
for inches, lbs in X:
    prediction = int(lbs > boundary(inches))
    y_pred.append(prediction)
f_measure = f1_score(y_pred, y)
f"The f-measure is {f_measure:.2f}"

In [None]:
weights = np.array([3.5, 1, -415])
predictions = [int((weights @ [inches, lbs, 1]) > 0) for inches, lbs in X] 

In [None]:
predictions

In [None]:
assert y_pred == predictions

In [None]:
M = np.column_stack((X, np.ones(X.shape[0])))

In [None]:
M

In [None]:
predictions = (M @ weights > 0).astype(int)

In [None]:
predictions

In [None]:
assert predictions.tolist() == y_pred

In [None]:
def linear_classifier(X, weights):
    M = np.column_stack([X, np.ones(X.shape[0])])
    return (M @ weights > 0).astype(int)

In [None]:
predictions = linear_classifier(X, weights)

In [None]:
assert predictions.tolist() == y_pred

In [None]:
np.random.seed(0)
weights = np.random.normal(size=3)

In [None]:
y_pred = linear_classifier(X, weights)

In [None]:
f_measure = f1_score(y_pred, y)

In [None]:
f_measure

In [None]:
weights

In [None]:
def get_bias_shift(predicted, actual):
    if predicted == actual:
        return 0
    if predicted > actual:
        return 1
    return -1

In [None]:
for predicted, actual in [(0, 0), (1, 0), (0, 1), (1, 1)]:
    bias_shift = get_bias_shift(predicted, actual)
    assert bias_shift == predicted - actual

In [None]:
def get_bias_shift(predicted, actual, learning_rate=0.1):
    return learning_rate * (predicted - actual)

In [None]:
def predict(v, weights): return int(v @ weights > 0)

In [None]:
starting_bias = weights[-1]

In [None]:
for i, actual in enumerate(y):
    predicted = predict(M[i], weights)
    bias_shift = get_bias_shift(predicted, actual)
    weights[-1] -= bias_shift

In [None]:
new_bias = weights[-1]

In [None]:
print(f"Our starting bias equaled {starting_bias:.2f}.")
print(f"The adjusted bias equals {new_bias:.2f}.")

In [None]:
y_pred = linear_classifier(X, weights)

In [None]:
f_measure = f1_score(y_pred, y)

In [None]:
f"The f-measure is {f_measure:.2f}"

In [None]:
old_weights = weights.copy()

In [None]:
for i, actual in enumerate(y):
    predicted = predict(M[i], weights)
    bias_shift = get_bias_shift(predicted, actual)
    weights -= bias_shift * M[i]


In [None]:
y_pred = linear_classifier(X, weights)

In [None]:
f_measure = f1_score(y_pred, y)

In [None]:
f_measure

In [None]:
np.random.seed(0)
weights = np.random.normal(size=3)

In [None]:
f_measures = []

In [None]:
for _ in range(1000):
    y_pred = linear_classifier(X, weights)
    f_measures.append(f1_score(y_pred, y))
    for i, actual in enumerate(y):
        predicted = predict(M[i], weights)
        bias_shift = get_bias_shift(predicted, actual)
        weights -= bias_shift * M[i]

In [None]:
print(f'The f-measure after 1000 iterations is {f_measures[-1]:.2f}')
plt.plot(range(len(f_measures)), f_measures)
plt.xlabel('Iteration')
plt.ylabel('F-measure')

In [None]:
def train(X, y, predict=predict):
    M = np.column_stack([X, np.ones(X.shape[0])])
    weights = np.random.normal(size=X.shape[1]+1)
    f_measures = []
    for k in range(1, 1000):
        y_pred = linear_classifier(X, weights)
        f_measures.append(f1_score(y_pred, y))
        for i, actual in enumerate(y):
            predicted = predict(M[i], weights)
            bias_shift = get_bias_shift(predicted, actual)
            weights -= bias_shift * M[i] / k
    return weights, f_measures

In [None]:
weights, f_measures = train(X, y)

In [None]:
print(f'The f-measure after 1000 iterations is {f_measures[-1]:.2f}')
plt.plot(range(len(f_measures)), f_measures)
plt.xlabel('Iteration')
plt.ylabel('F-measure')

In [None]:
inches_coef, lbs_coef, bias = weights
def new_boundary(inches):
    return -(inches_coef * inches + bias) / lbs_coef
plt.plot(X[:,0], new_boundary(X[:,0]), color='k', linestyle='--', label='Trained Boundary', linewidth=2)
plt.plot(X[:,0], boundary(X[:,0]), color='k', label='Initial Boundary')
plot_customers(X, y)
plt.legend()

In [None]:
means = X.mean(axis=0)

In [None]:
stds = X.std(axis=0)

In [None]:
means

In [None]:
stds

In [None]:
def standardize(X):
    return (X - X.mean(axis=0)) / X.std(axis=0)

In [None]:
X_s = standardize(X)

In [None]:
X_s

In [None]:
assert np.allclose(X_s.mean(axis=0), 0)

In [None]:
assert np.allclose(X_s.std(axis=0), 1.)

In [None]:
np.random.seed(0)

In [None]:
weights, f_measures = train(X_s, y)

In [None]:
print(f'After standardization, the f-measure is {f_measures[-1]:.2f}')

In [None]:
def plot_boundary(weights):
    a, b, c = weights
    new_boundary = lambda x: -(a * x + c) / b
    plt.plot(X_s[:,0], new_boundary(X_s[:,0]), color='k', linestyle='--', label='Trained Boundary', linewidth=2)
    plot_customers(X_s, y, xlabel='Standardized Inches',
    ylabel='Standardized Pounds')
    plt.legend()

In [None]:
plot_boundary(weights)