# Perceptron Algorithm

### Learning Task 1

Build a classifier using the perceptron algorithm. Figure out if dataset is linearly seperable.

In [124]:
import numpy as np
import pandas as pd
import sys
sys.path.append("..")
from preprocessor import Preprocessor
from Models.Perceptron import Perceptron
import warnings 
warnings.filterwarnings(action="ignore")

In [125]:
dataset = pd.read_csv("../dataset.csv")

In [126]:
preprocessor = Preprocessor(dataset,"diagnosis")
splits = preprocessor.preprocess(n_splits=1) # splitting into training and testing

In [127]:
#PM1 is perceptron model 1 without shuffling training data
pm1 = Perceptron()
train, test = splits[0]
X_train, y_train = train.drop(columns=["diagnosis"]).to_numpy(), train["diagnosis"].to_numpy()
X_test, y_test = test.drop(columns=["diagnosis"]).to_numpy(), test["diagnosis"].to_numpy()

#### Using an infinite loop for the model

In [128]:
pm1.fit(X_train,y_train,True,n_threshold=1000)

0.28493150684931506
0.273972602739726
0.2876712328767123
0.2958904109589041
0.29315068493150687
0.29041095890410956
0.29041095890410956
0.29041095890410956
0.28493150684931506
0.29041095890410956
0.29315068493150687
0.29041095890410956
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29863013698630136
0.29

In [129]:
tp,tn,fp,fn=pm1.score(X_test,y_test)
acc = tp+tn/(tp+tn+fp+fn)
print(acc)

99.0


#### Using epochs instead of infinite model

In [130]:
pm1.fit(X_train,y_train,False,epochs=10)
## As epochs increases we break if if we get 100 percent accuracy

In [131]:
tp,tn,fp,fn=pm1.score(X_test,y_test)
acc = tp+tn/(tp+tn+fp+fn)
print(acc)

98.0


### Shuffling the training data

In [132]:
train = train.sample(random_state=31)
print(train.iloc[0])

id                        -0.191268
diagnosis                  1.000000
radius_mean                0.625252
texture_mean               0.271085
perimeter_mean             0.638353
area_mean                  0.457406
smoothness_mean            1.650318
compactness_mean           0.687542
concavity_mean             0.885467
concave points_mean        1.060928
symmetry_mean              0.672796
fractal_dimension_mean     0.096216
radius_se                  0.174531
texture_se                 0.051424
perimeter_se               0.247014
area_se                    0.111118
smoothness_se             -0.092318
compactness_se            -0.207916
concavity_se               0.112288
concave points_se          0.443830
symmetry_se               -0.635029
fractal_dimension_se      -0.366446
radius_worst               0.667347
texture_worst              0.728859
perimeter_worst            0.716606
area_worst                 0.449779
smoothness_worst           1.331646
compactness_worst          0

In [133]:
pm2 = Perceptron()
X_train, y_train = train.drop(columns=["diagnosis"]).to_numpy(), train["diagnosis"].to_numpy()
X_test, y_test = test.drop(columns=["diagnosis"]).to_numpy(), test["diagnosis"].to_numpy()

### Using an infinite loop with some threshold

In [134]:
pm2.fit(X_train,y_train,inf_loop=True)

0.0
Breaking as learnt perfect decision boundary


In [135]:
tp,tn,fp,fn=pm2.score(X_test,y_test)
acc = tp+tn/(tp+tn+fp+fn)
print(acc)

96.0


### Using epochs

In [136]:
pm2.fit(X_train,y_train,inf_loop=False,epochs = 100)
print(pm2.w)

[-1.91267777e-01  6.25252448e-01  2.71085293e-01  6.38352840e-01
  4.57405880e-01  1.65031836e+00  6.87542003e-01  8.85467148e-01
  1.06092791e+00  6.72795946e-01  9.62161130e-02  1.74530911e-01
  5.14238883e-02  2.47014262e-01  1.11117644e-01 -9.23182277e-02
 -2.07915674e-01  1.12287524e-01  4.43829973e-01 -6.35028990e-01
 -3.66446422e-01  6.67346795e-01  7.28859033e-01  7.16606396e-01
  4.49779249e-01  1.33164572e+00  4.10026391e-01  5.52829784e-01
  7.93336944e-01  5.51842714e-01  2.28739602e-04]


In [137]:
tp,tn,fp,fn=pm2.score(X_test,y_test)
acc = tp+tn/(tp+tn+fp+fn)
print(acc)

96.0
