# Perceptron
Load the `mnist` dataset. Split it into training and test sets. Train and test a perceptron model using scikit-learn. Check the documentation to identify the most important hyperparameters, attributes, and methods of the model. Use them in practice.

In [19]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import plotly.express as px


# Loading the Dataset

In [9]:
df = pd.read_csv('mnist.csv')
df = df.set_index('id')

x = df.drop(['class'], axis=1)
y = df['class']
 
print(df.shape)
df.head(3) 

(4000, 785)


Unnamed: 0_level_0,class,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
31953,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
34452,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
60897,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Spliting the data in train and test

In [3]:
x_train, x_test, y_train, y_test = train_test_split(x, y)

print(x_train.shape)
print(x_test.shape)

(3000, 784)
(1000, 784)


# Model Selection and Hyperparameter Tunning

In [13]:
parameters_grid = {
                  'penalty': ['l2','l1','elasticnet'],
                  'alpha': [0.000001, 0.00001, 0.0001, 0.0011, 0.01, 0.1, 1, 10],
                  'l1_ratio': [0.0, 0.25, 0.5, 0.75, 1.0]
}

model_1 = GridSearchCV(Perceptron(), parameters_grid,
                      scoring='accuracy', cv=5, n_jobs=-1)

model_1.fit(x_train, y_train)

print("Accuracy of best Random Forest classfier = {:.2f}".format(model_1.best_score_))
print("Best found hyperparameters of Random Forest classfier = {}".format(model_1.best_params_))




Accuracy of best Random Forest classfier = 0.86
Best found hyperparameters of Random Forest classfier = {'alpha': 1e-06, 'l1_ratio': 0.0, 'penalty': 'l2'}


# Testing The model

In [24]:
y_predicted = model_1.predict(x_test)
accuracy = accuracy_score(y_test, y_predicted)

cm = confusion_matrix(y_test, y_predicted)
precision, recall, f1, support = precision_recall_fscore_support(y_test, y_predicted)

print("Accuracy =", accuracy)
print("Precision = ", precision)
print("Recall = ", recall)
print('F1-Score', f1)
print("Confusion Matrix:\n", cm)
      
      
      
      

Accuracy = 0.832
Precision =  [0.94845361 1.         0.87912088 0.94594595 0.85436893 0.71681416
 0.83928571 0.8989899  0.68141593 0.66666667]
Recall =  [0.96842105 0.82857143 0.79207921 0.72916667 0.85436893 0.81
 0.8952381  0.79464286 0.77777778 0.88095238]
F1-Score [0.95833333 0.90625    0.83333333 0.82352941 0.85436893 0.76056338
 0.86635945 0.8436019  0.72641509 0.75897436]
Confusion Matrix:
 [[92  0  0  0  0  2  1  0  0  0]
 [ 0 87  0  1  1  3  0  2 11  0]
 [ 0  0 80  2  1  0  8  2  6  2]
 [ 2  0  3 70  0 10  2  1  6  2]
 [ 1  0  1  0 88  0  2  1  2  8]
 [ 1  0  0  0  2 81  5  0  6  5]
 [ 1  0  0  0  5  1 94  0  3  1]
 [ 0  0  4  0  2  0  0 89  2 15]
 [ 0  0  2  0  0 16  0  0 77  4]
 [ 0  0  1  1  4  0  0  4  0 74]]
