# Perceptron Algorithm

## Learning Task 1

Build a classifier using the perceptron algorithm. Figure out if dataset is linearly seperable.

In [77]:
import numpy as np
import pandas as pd
import random
import sys
sys.path.append("..")
from preprocessor import Preprocessor
from Models.Perceptron import Perceptron
import warnings 
warnings.filterwarnings(action="ignore")

In [78]:
dataset = pd.read_csv("../dataset.csv")
dataset.drop(columns=["id"],inplace=True)

In [79]:
preprocessor = Preprocessor(dataset,"diagnosis")
splits = preprocessor.preprocess(drop_na=True,n_splits=10,standardize=False,labels=[-1,1]) # splitting into training and testing

### Using training data as given

In [80]:
#PM1 is perceptron model 1 without shuffling training data
pm1 = Perceptron()
train, test = splits[0]
X_train, y_train = train.drop(columns=["diagnosis"]).to_numpy(), train["diagnosis"].to_numpy()
X_test, y_test = test.drop(columns=["diagnosis"]).to_numpy(), test["diagnosis"].to_numpy()

#### Using an infinite loop for the model

In [81]:
pm1.fit(X_train,y_train,True)

Breaking as not improving


In [82]:
pm1.score(X_train,y_train,_print = True)

--------Results--------
Accuracy: 0.8986301369863013
----Class 1----
Precision: 0.7686567164179104
Recall: 0.944954128440367
----Class 0----
Precision: 0.974025974025974
Recall: 0.87890625


(103, 225, 31, 6)

In [83]:
tp,tn,fp,fn=pm1.score(X_test,y_test,_print=True)

--------Results--------
Accuracy: 0.9343434343434344
----Class 1----
Precision: 0.9587628865979382
Recall: 0.9117647058823529
----Class 0----
Precision: 0.9108910891089109
Recall: 0.9583333333333334


#### Using epochs instead of infinite model

In [84]:
pm1.fit(X_train,y_train,False,epochs=10000)
## As epochs increases we break if if we get 100 percent accuracy

In [85]:
tp,tn,fp,fn=pm1.score(X_test,y_test,_print=True)
acc = (tp+tn)/(tp+tn+fp+fn)
### Shuffling the training data

--------Results--------
Accuracy: 0.8383838383838383
----Class 1----
Precision: 1.0
Recall: 0.6862745098039216
----Class 0----
Precision: 0.75
Recall: 1.0


### Shuffling the training data

In [86]:
train = train.sample(random_state=42)

In [87]:
pm2 = Perceptron()
X_train, y_train = train.drop(columns=["diagnosis"]).to_numpy(), train["diagnosis"].to_numpy()
X_test, y_test = test.drop(columns=["diagnosis"]).to_numpy(), test["diagnosis"].to_numpy()
#### Using an infinite loop with some threshold

#### Using an infinite loop with some threshold

In [88]:
pm2.fit(X_train,y_train,inf_loop=True)

Breaking as learnt perfect decision boundary


In [89]:
tp,tn,fp,fn=pm1.score(X_test,y_test,_print=True)

--------Results--------
Accuracy: 0.8383838383838383
----Class 1----
Precision: 1.0
Recall: 0.6862745098039216
----Class 0----
Precision: 0.75
Recall: 1.0


#### Using epochs

In [90]:
pm2.fit(X_train,y_train,inf_loop=False,epochs = 1000)

In [91]:
tp,tn,fp,fn=pm1.score(X_test,y_test,_print=True)

--------Results--------
Accuracy: 0.8383838383838383
----Class 1----
Precision: 1.0
Recall: 0.6862745098039216
----Class 0----
Precision: 0.75
Recall: 1.0


## Learning Task 2

Build a perceptron model on normalized data

In [92]:
splits = preprocessor.preprocess(n_splits=1,standardize=True,labels=[-1,1]) # splitting into training and testing
train, test = splits[0]
X_train, y_train = train.drop(columns=["diagnosis"]).to_numpy(), train["diagnosis"].to_numpy()
X_test, y_test = test.drop(columns=["diagnosis"]).to_numpy(), test["diagnosis"].to_numpy()

### Using an infinite loop that terminates on some learning threshold

In [93]:
pm3 = Perceptron()
pm3.fit(X_train,y_train,inf_loop=True)

Breaking as learnt perfect decision boundary


In [94]:
tp,tn,fp,fn = pm3.score(X_test,y_test,_print=True)


--------Results--------
Accuracy: 0.9545454545454546
----Class 1----
Precision: 0.9696969696969697
Recall: 0.9411764705882353
----Class 0----
Precision: 0.9393939393939394
Recall: 0.96875


## Learning Task 3

Change the order of the features in the dataset randomly and build a perceptron model

In [105]:
splits = preprocessor.preprocess(drop_na=True,n_splits=10,standardize=False,labels=[-1,1]) # splitting into training and testing
train, test = splits[0]
seed = random.randint(0,100)
train = train.sample(frac=1,axis = 1,random_state=23)
test = test.sample(frac=1,axis = 1,random_state=23)
X_train, y_train = train.drop(columns=["diagnosis"]).to_numpy(), train["diagnosis"].to_numpy()
X_test, y_test = test.drop(columns=["diagnosis"]).to_numpy(), test["diagnosis"].to_numpy()

### Using an infinite loop 

In [107]:
pm4 = Perceptron()
pm4.fit(X_train,y_train,inf_loop=True)

Breaking as not improving


In [109]:
pm4.score(X_test,y_test,_print = True)

--------Results--------
Accuracy: 0.9343434343434344
----Class 1----
Precision: 0.9587628865979382
Recall: 0.9117647058823529
----Class 0----
Precision: 0.9108910891089109
Recall: 0.9583333333333334


(93, 92, 4, 9)