# Binary Classification Using Perceptron

In [1]:
import numpy as np
import sklearn.datasets as skdata

In [2]:
# Load the breast cancer dataset

breast_cancer_data = skdata.load_breast_cancer()
x = breast_cancer_data.data
y = breast_cancer_data.target

In [6]:
# Splitting the breast cancer dataset into 80% train, 10% val, 10% test

# creates permutation of all indices

idx = np.random.permutation(x.shape[0]) 

train_split_idx = int(0.8 * x.shape[0])
val_split_idx = int(0.9 * x.shape[0])

train_idx = idx[:train_split_idx]
val_idx = idx[train_split_idx:val_split_idx]
test_idx = idx[val_split_idx:]

# Select the examples from x and y to construct our training, validation 
# and testing sets

x_train, y_train = x[train_idx, :], y[train_idx]
x_val, y_val = x[val_idx, :], y[val_idx]
x_test, y_test = x[test_idx, :], y[test_idx]

In [4]:
from sklearn.linear_model import Perceptron

# Sets up our Perceptron model, tol is the stopping threshold if the
# training error at time t is greater than the training error at time
# t-1 by tol penalty and alpha relates to regularization

model = Perceptron(penalty = None, alpha = 0.0, tol = 1e-1)

In [7]:
# Trains our perceptron model

model.fit(x_train, y_train)

Perceptron(alpha=0.0, tol=0.1)

In [11]:
# Predict the class/labels

predictions_val = model.predict(x_val)

# Examine this array, you will see 0's and 1's
print(predictions_val)
print(np.unique(predictions_val))
np.shape(predictions_val)

[0 0 0 1 1 1 1 1 1 1 0 1 1 0 1 1 1 0 1 1 0 1 0 0 1 0 1 0 1 1 1 1 1 0 0 0 0
 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 0 0 0 0]
[0 1]


(57,)

In [14]:
# Check our accuracy

scores_val = np.where(predictions_val == y_val, 1, 0)
mean_accuracy_val = np.mean(scores_val)
print(mean_accuracy_val)

0.9298245614035088


In [16]:
# We can also use scikit-learn built-in function

mean_accuracy_val = model.score(x_val, y_val)
print(mean_accuracy_val)

0.9298245614035088


# Multiclass Classification Using Perceptron

In [17]:
# Loading the wine dataset

wine_data = skdata.load_wine()
x = wine_data.data
y = wine_data.target

In [39]:
# Splitting the data into sets

# creates permutation of all indices

idx = np.random.permutation(x.shape[0]) 

train_split_idx = int(0.8 * x.shape[0])
val_split_idx = int(0.9 * x.shape[0])

train_idx = idx[:train_split_idx]
val_idx = idx[train_split_idx:val_split_idx]
test_idx = idx[val_split_idx:]

# Select the examples from x and y to construct our training, validation 
# and testing sets

x_train, y_train = x[train_idx, :], y[train_idx]
x_val, y_val = x[val_idx, :], y[val_idx]
x_test, y_test = x[test_idx, :], y[test_idx]

In [40]:
# Scikit-learn's perceptron model can already handle multiple classes
# We can simply use it to train a model for wine classification

# Train-Val Loop

models = []
scores = []

for tol in [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1]:
    model = Perceptron(penalty = None, alpha = 0.0, tol = tol)
    model.fit(x_train, y_train)
    
    models.append(model)
    # Predict the class/labels
    predictions_val = model.predict(x_val)
    
    # Check our accuracy
    score = model.score(x_val, y_val)
    scores.append(score)
    
    print(score)

0.6111111111111112
0.6111111111111112
0.6111111111111112
0.6111111111111112
0.6111111111111112
0.6111111111111112
0.6111111111111112


In [41]:
# Test Loop

for model in models:
    score = model.score(x_test, y_test)
    print(score)

0.5555555555555556
0.5555555555555556
0.5555555555555556
0.5555555555555556
0.5555555555555556
0.5555555555555556
0.5555555555555556
