In [0]:
import numpy as np
from sklearn.datasets import load_breast_cancer 
import matplotlib.pyplot as plt

In [0]:
def perceptron_algo(x, t, w, alpha): 

  '''
    
    Implementation of perceptron algorithm:

    Function which takes arguments as x, t, w, alpha as inputs and returns the updated weights

    n: numpber of samples
    d: number of features per sample
    x: input matrix of size n*d 
    w: weight matrix of size d*1
    t: actual labels (size=n*1)
    alpha: learning rate 

    For each sample, model predicts the class, 
      - if the class matches with the actual label, it doesn't change the weights
      - if the class doesn't match with the true label, it updates the weights

    Finally, after repeating the above process for each sample, it return the updated weights
    
  '''

  n_samples=x.shape[0]

  # Model's prediction
  pred = np.dot(x,w)

  # Looping over all data samples and updating the weights if it's misclassified
  for i in range(n_samples):
    
    if pred[i,0]*t[i,0] <= 0:
      w = w + alpha*t[i,0]*x[i,:].reshape(-1,1)

  return w
  
  

In [0]:
# Loading the data

breast_cancer = load_breast_cancer() 
X = breast_cancer.data 
Y = breast_cancer.target 

# Changing label from 0 to -1
Y[Y==0] = -1


In [0]:
# Splitting data into train and test

train_X = X[:450] 
test_X = X[450:] 
train_Y = Y[:450] 
test_Y = Y[450:]


In [19]:
epochs=1000
features = X.shape[1]
samples = train_X.shape[0]
lr=0.7
train_Y = train_Y.reshape(-1,1)

# initializing weights
w = np.zeros((features,1))

# Training the model 
for epoch in range(epochs):
  w = perceptron_algo(train_X, train_Y, w, lr)
  pred = np.dot(train_X,w)
  pred[pred>=0] = 1
  pred[pred<0] = -1
  n_misclassified = np.sum(pred != train_Y)

  print('Epoch: {} | Number of misclassified data points: {} | Accuracy: {}%'.format(epoch, n_misclassified, (1-(n_misclassified/samples))*100))
  

Epoch: 0 | Number of misclassified data points: 265 | Accuracy: 41.11111111111111%
Epoch: 1 | Number of misclassified data points: 185 | Accuracy: 58.88888888888889%
Epoch: 2 | Number of misclassified data points: 265 | Accuracy: 41.11111111111111%
Epoch: 3 | Number of misclassified data points: 265 | Accuracy: 41.11111111111111%
Epoch: 4 | Number of misclassified data points: 185 | Accuracy: 58.88888888888889%
Epoch: 5 | Number of misclassified data points: 265 | Accuracy: 41.11111111111111%
Epoch: 6 | Number of misclassified data points: 181 | Accuracy: 59.77777777777777%
Epoch: 7 | Number of misclassified data points: 265 | Accuracy: 41.11111111111111%
Epoch: 8 | Number of misclassified data points: 265 | Accuracy: 41.11111111111111%
Epoch: 9 | Number of misclassified data points: 185 | Accuracy: 58.88888888888889%
Epoch: 10 | Number of misclassified data points: 265 | Accuracy: 41.11111111111111%
Epoch: 11 | Number of misclassified data points: 254 | Accuracy: 43.55555555555556%
Ep

In [20]:
# Validation test
test_Y = test_Y.reshape(-1,1)
samples = test_X.shape[0]
pred = np.dot(test_X,w)
pred[pred>=0] = 1
pred[pred<0] = -1
n_misclassified = np.sum(pred != test_Y)
print('Number of misclassified data points: {} | Accuracy: {}%'.format(n_misclassified, (1-(n_misclassified/samples))*100))

Number of misclassified data points: 11 | Accuracy: 90.75630252100841%


11
