In [30]:
import pandas as pd
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report 

In [13]:
#reading banknote data set , stating there is no header 
#can download the dataset from the given link
#(https://archive.ics.uci.edu/ml/datasets/banknote+authentication#)
data_set = pd.read_csv("data_banknote_authentication.txt",header = None)

In [14]:
data_set.head() #to see first 5 rows 

Unnamed: 0,0,1,2,3,4
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [15]:
#renaming the columns of this data
data_set.columns = ['variance of Wavelet Transformed image', 'skewness of Wavelet Transformed image', 'curtosis of Wavelet Transformed image', 'entropy of image', 'class']

In [16]:
data_set.head()

Unnamed: 0,variance of Wavelet Transformed image,skewness of Wavelet Transformed image,curtosis of Wavelet Transformed image,entropy of image,class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [17]:
#first 4 columns input 
inputs = data_set.iloc[:1372,[0,1,2,3]].values
#last column output 
outputs = data_set.iloc[:1372,[4]].values

In [18]:
inputs[:5] #first 5 values in input array

array([[ 3.6216 ,  8.6661 , -2.8073 , -0.44699],
       [ 4.5459 ,  8.1674 , -2.4586 , -1.4621 ],
       [ 3.866  , -2.6383 ,  1.9242 ,  0.10645],
       [ 3.4566 ,  9.5228 , -4.0112 , -3.5944 ],
       [ 0.32924, -4.4552 ,  4.5718 , -0.9888 ]])

In [19]:
outputs[:5] #first 5 values in output array


array([[0],
       [0],
       [0],
       [0],
       [0]], dtype=int64)

In [24]:
X_train, X_test, y_train, y_test = train_test_split(inputs, outputs, test_size = 0.2, random_state = 0)
#random state to get same random data for each run 
#test size to split the data into training and testing 

In [25]:
X_train[:5]

array([[-1.2943 ,  2.6735 , -0.84085, -2.0323 ],
       [-0.40857,  3.0977 , -2.9607 , -2.6892 ],
       [-1.5228 , -6.4789 ,  5.7568 ,  0.87325],
       [-0.278  ,  8.1881 , -3.1338 , -2.5276 ],
       [ 3.7022 ,  6.9942 , -1.8511 , -0.12889]])

In [26]:
y_train[:5]

array([[1],
       [1],
       [1],
       [0],
       [0]], dtype=int64)

In [27]:
y_train = y_train.flatten() #turning 2d array into 1d array 
y_train

array([1, 1, 1, ..., 1, 0, 0], dtype=int64)

In [31]:
SGD_classifier = SGDClassifier()

#.fit to train data 
SGD_classifier_clf = SGD_classifier.fit(X_train, y_train)

#predicting the test data 
y_pred = SGD_classifier_clf.predict(X_test)

In [32]:
print(classification_report(y_test, y_pred)) #accuracy 99%

              precision    recall  f1-score   support

           0       0.99      0.98      0.99       157
           1       0.97      0.99      0.98       118

    accuracy                           0.99       275
   macro avg       0.98      0.99      0.99       275
weighted avg       0.99      0.99      0.99       275

