# Perceptron Algorithm

## Learning Task 1

Build a classifier using the perceptron algorithm. Figure out if dataset is linearly seperable.

In [216]:
import numpy as np
import pandas as pd
import random
import sys
sys.path.append("..")
from preprocessor import Preprocessor
from Models.Perceptron import Perceptron
import warnings 
warnings.filterwarnings(action="ignore")

In [217]:
dataset = pd.read_csv("../dataset.csv")
dataset.drop(columns=["id"],inplace=True)
# Creating a dataframe to store the results
results = pd.DataFrame(columns=["threshold","delta","method","accuracy","epochs"])

In [218]:
preprocessor = Preprocessor(dataset,"diagnosis")
splits = preprocessor.preprocess(drop_na=True,n_splits=10,standardize=False,labels=[-1,1]) # splitting into training and testing

### Using training data as given

In [219]:
#PM1 is perceptron model 1 without shuffling training data
pm1 = Perceptron()
train, test = splits[0]
X_train, y_train = train.drop(columns=["diagnosis"]).to_numpy(), train["diagnosis"].to_numpy()
X_test, y_test = test.drop(columns=["diagnosis"]).to_numpy(), test["diagnosis"].to_numpy()

#### Using an infinite loop for the model

In [220]:
pm1.fit(X_train,y_train,True)

Breaking as not improving


In [221]:
tp,tn,fp,fn=pm1.score(X_train,y_train,_print = True)


--------Results--------
Accuracy: 0.8986301369863013
----Class 1----
Precision: 0.7686567164179104
Recall: 0.944954128440367
----Class 0----
Precision: 0.974025974025974
Recall: 0.87890625


In [222]:
tp,tn,fp,fn=pm1.score(X_test,y_test,_print=True)
result_dict ={"threshold":100,"delta":0.001,"method":"PM1-Infinite loop",
              "accuracy":(tp+tn)/(tp+tn+fp+fn)
              ,"epochs":None}
results = results.append(result_dict,ignore_index=True)

--------Results--------
Accuracy: 0.9343434343434344
----Class 1----
Precision: 0.9587628865979382
Recall: 0.9117647058823529
----Class 0----
Precision: 0.9108910891089109
Recall: 0.9583333333333334


#### Using epochs instead of infinite model

In [223]:
pm1.fit(X_train,y_train,False,epochs=10000)
## As epochs increases we break if if we get 100 percent accuracy

In [224]:
tp,tn,fp,fn=pm1.score(X_test,y_test,_print=True)
acc = (tp+tn)/(tp+tn+fp+fn)
result_dict ={"threshold":None,"delta":None,"method":"PM1-Epochs",
              "accuracy":(tp+tn)/(tp+tn+fp+fn)
              ,"epochs":1000}
results = results.append(result_dict,ignore_index=True)
### Shuffling the training data

--------Results--------
Accuracy: 0.8383838383838383
----Class 1----
Precision: 1.0
Recall: 0.6862745098039216
----Class 0----
Precision: 0.75
Recall: 1.0


### Shuffling the training data

In [225]:
train = train.sample(random_state=42)

In [226]:
pm2 = Perceptron()
X_train, y_train = train.drop(columns=["diagnosis"]).to_numpy(), train["diagnosis"].to_numpy()
X_test, y_test = test.drop(columns=["diagnosis"]).to_numpy(), test["diagnosis"].to_numpy()
#### Using an infinite loop with some threshold

#### Using an infinite loop with some threshold

In [227]:
pm2.fit(X_train,y_train,inf_loop=True)

Breaking as learnt perfect decision boundary


In [228]:
tp,tn,fp,fn=pm2.score(X_test,y_test,_print=True)
result_dict ={"threshold":100,"delta":0.001,"method":"PM2-InfiniteLoop",
              "accuracy":(tp+tn)/(tp+tn+fp+fn)
              ,"epochs":None
              }
results = results.append(result_dict,ignore_index=True)

--------Results--------
Accuracy: 0.8383838383838383
----Class 1----
Precision: 1.0
Recall: 0.6862745098039216
----Class 0----
Precision: 0.75
Recall: 1.0


#### Using epochs

In [229]:
pm2.fit(X_train,y_train,inf_loop=False,epochs = 1000)

In [230]:
tp,tn,fp,fn=pm2.score(X_test,y_test,_print=True)
result_dict ={"threshold":None,"delta":None,"method":"PM2-Epochs",
              "accuracy":(tp+tn)/(tp+tn+fp+fn)
              ,"epochs":1000}
results = results.append(result_dict,ignore_index=True)

--------Results--------
Accuracy: 0.8383838383838383
----Class 1----
Precision: 1.0
Recall: 0.6862745098039216
----Class 0----
Precision: 0.75
Recall: 1.0


## Learning Task 2

Build a perceptron model on normalized data

In [231]:
splits = preprocessor.preprocess(n_splits=1,standardize=True,labels=[-1,1]) # splitting into training and testing
train, test = splits[0]
X_train, y_train = train.drop(columns=["diagnosis"]).to_numpy(), train["diagnosis"].to_numpy()
X_test, y_test = test.drop(columns=["diagnosis"]).to_numpy(), test["diagnosis"].to_numpy()

### Using an infinite loop that terminates on some learning threshold

In [232]:
pm3 = Perceptron()
pm3.fit(X_train,y_train,inf_loop=True)

Breaking as learnt perfect decision boundary


In [233]:
tp,tn,fp,fn = pm3.score(X_test,y_test,_print=True)
result_dict ={"threshold":100,"delta":0.001,"method":"PM3-Infinite",
              "accuracy":(tp+tn)/(tp+tn+fp+fn)
              ,"epochs":None
              }
results = results.append(result_dict,ignore_index=True)

--------Results--------
Accuracy: 0.9545454545454546
----Class 1----
Precision: 0.9696969696969697
Recall: 0.9411764705882353
----Class 0----
Precision: 0.9393939393939394
Recall: 0.96875


### Using Epochs

In [234]:
splits = preprocessor.preprocess(drop_na=True,n_splits=1,standardize=True,labels=[-1,1]) # splitting into training and testing
train, test = splits[0]
X_train, y_train = train.drop(columns=["diagnosis"]).to_numpy(), train["diagnosis"].to_numpy()
X_test, y_test = test.drop(columns=["diagnosis"]).to_numpy(), test["diagnosis"].to_numpy()
pm3.fit(X_train,y_train,inf_loop=False,epochs = 1000)

In [235]:
tp,tn,fp,fn=pm3.score(X_test,y_test,_print=True)
result_dict ={"threshold":None,"delta":None,"method":"PM3-Epochs",
              "accuracy":(tp+tn)/(tp+tn+fp+fn)
              ,"epochs":1000}
results = results.append(result_dict,ignore_index=True)

--------Results--------
Accuracy: 0.9545454545454546
----Class 1----
Precision: 0.9696969696969697
Recall: 0.9411764705882353
----Class 0----
Precision: 0.9393939393939394
Recall: 0.96875


## Learning Task 3

Change the order of the features in the dataset randomly and build a perceptron model

In [236]:
splits = preprocessor.preprocess(drop_na=True,n_splits=10,standardize=False,labels=[-1,1]) # splitting into training and testing
train, test = splits[0]
seed = random.randint(0,100)
train = train.sample(frac=1,axis = 1,random_state=23)
test = test.sample(frac=1,axis = 1,random_state=23)
X_train, y_train = train.drop(columns=["diagnosis"]).to_numpy(), train["diagnosis"].to_numpy()
X_test, y_test = test.drop(columns=["diagnosis"]).to_numpy(), test["diagnosis"].to_numpy()

### Using an infinite loop 

In [237]:
pm4 = Perceptron()
pm4.fit(X_train,y_train,inf_loop=True)

Breaking as not improving


In [238]:
tp,tn,fp,fn = pm4.score(X_test,y_test,_print = True)
result_dict ={"threshold":100,"delta":0.001,"method":"PM4-Infinite",
              "accuracy":(tp+tn)/(tp+tn+fp+fn)
              ,"epochs":None
              }
results = results.append(result_dict,ignore_index=True)

--------Results--------
Accuracy: 0.9343434343434344
----Class 1----
Precision: 0.9587628865979382
Recall: 0.9117647058823529
----Class 0----
Precision: 0.9108910891089109
Recall: 0.9583333333333334


### Using Epochs

In [239]:
seed = random.randint(0,100)
preprocessor.data.sample(frac=1,axis=1,random_state=seed)
splits = preprocessor.preprocess(drop_na=True,n_splits=10,standardize=False,labels=[-1,1]) # splitting into training and testing
train, test = splits[0]

# train = train.sample(frac=1,axis = 1,random_state=23)
# test = test.sample(frac=1,axis = 1,random_state=23)
X_train, y_train = train.drop(columns=["diagnosis"]).to_numpy(), train["diagnosis"].to_numpy()
X_test, y_test = test.drop(columns=["diagnosis"]).to_numpy(), test["diagnosis"].to_numpy()

pm4.fit(X_train,y_train,inf_loop=False,epochs=1000)

In [240]:
tp,tn,fp,fn=pm4.score(X_test,y_test,_print=True)
result_dict ={"threshold":None,"delta":None,"method":"PM4-Epochs",
              "accuracy":(tp+tn)/(tp+tn+fp+fn)
              ,"epochs":1000}
results = results.append(result_dict,ignore_index=True)

--------Results--------
Accuracy: 0.9191919191919192
----Class 1----
Precision: 0.8981481481481481
Recall: 0.9509803921568627
----Class 0----
Precision: 0.9444444444444444
Recall: 0.8854166666666666


## Results

In [241]:
display(results)

Unnamed: 0,threshold,delta,method,accuracy,epochs
0,100.0,0.001,Infinite loop without shuffling,0.934343,
1,,,Using epochs without shuffling training data(N...,0.838384,1000.0
2,100.0,0.001,Infinite loop (On shuffled training data),0.838384,
3,,,Epochs (shuffled training data),0.838384,1000.0
4,100.0,0.001,Infinite loop (On normalized data),0.954545,
5,,,Epochs (normalized data),0.954545,1000.0
6,100.0,0.001,Infinite loop (On shuffled attributes),0.934343,
7,,,Epochs (shuffled attributes),0.919192,1000.0
