# Bank note classification using different algorithms

### Importing the libraries:

In [1]:
import pandas as pd
import numpy as np

### Loading the data set:

In [2]:
old = pd.read_csv("data_banknote_authentication.txt")

In [3]:
old.head()

Unnamed: 0,3.6216,8.6661,-2.8073,-0.44699,0
0,4.5459,8.1674,-2.4586,-1.4621,0
1,3.866,-2.6383,1.9242,0.10645,0
2,3.4566,9.5228,-4.0112,-3.5944,0
3,0.32924,-4.4552,4.5718,-0.9888,0
4,4.3684,9.6718,-3.9606,-3.1625,0


### Data preparation 

In [4]:
# We can see that th data set is not in a regular format. So edit it and make it more meaningful:

In [5]:
old.columns = ["variance", "skewness", "kurtosis", "entropy", "class"]

In [6]:
old.head()

Unnamed: 0,variance,skewness,kurtosis,entropy,class
0,4.5459,8.1674,-2.4586,-1.4621,0
1,3.866,-2.6383,1.9242,0.10645,0
2,3.4566,9.5228,-4.0112,-3.5944,0
3,0.32924,-4.4552,4.5718,-0.9888,0
4,4.3684,9.6718,-3.9606,-3.1625,0


In [7]:
old.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1371 entries, 0 to 1370
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   variance  1371 non-null   float64
 1   skewness  1371 non-null   float64
 2   kurtosis  1371 non-null   float64
 3   entropy   1371 non-null   float64
 4   class     1371 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 53.7 KB


In [8]:
old.describe()

Unnamed: 0,variance,skewness,kurtosis,entropy,class
count,1371.0,1371.0,1371.0,1371.0,1371.0
mean,0.43141,1.917434,1.400694,-1.1922,0.444931
std,2.842494,5.868359,4.310105,2.101683,0.497139
min,-7.0421,-13.7731,-5.2861,-8.5482,0.0
25%,-1.7747,-1.7113,-1.55335,-2.417,0.0
50%,0.49571,2.3134,0.61663,-0.58665,0.0
75%,2.81465,6.8131,3.1816,0.39481,1.0
max,6.8248,12.9516,17.9274,2.4495,1.0


In [9]:
old.corr()

Unnamed: 0,variance,skewness,kurtosis,entropy,class
variance,1.0,0.263333,-0.380358,0.276666,-0.724655
skewness,0.263333,1.0,-0.786729,-0.526896,-0.444281
kurtosis,-0.380358,-0.786729,1.0,0.319219,0.155346
entropy,0.276666,-0.526896,0.319219,1.0,-0.0232
class,-0.724655,-0.444281,0.155346,-0.0232,1.0


In [10]:
old.drop_duplicates(inplace=True)

In [11]:
old.isnull().sum()

variance    0
skewness    0
kurtosis    0
entropy     0
class       0
dtype: int64

In [12]:
# We can see that ther are no missing values.

In [13]:
# class is our target and rest is our features.

In [14]:
x = old.loc[:, ["variance", "skewness", "kurtosis", "entropy"]]

In [15]:
y = old["class"].values

In [16]:
x.shape

(1347, 4)

In [17]:
y.shape

(1347,)

### Splitting the data set into training and testing sets:

In [18]:
from sklearn.model_selection import train_test_split

In [19]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

### Model building using random forest classifier

In [20]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import r2_score

In [21]:
rfc = RandomForestClassifier()

In [22]:
rfc.fit(x_train, y_train)

RandomForestClassifier()

In [23]:
y_pred_RFC = rfc.predict(x_test)

In [24]:
r2_score(y_test, y_pred_RFC)

0.9880927142958095

In [25]:
confusion_matrix(y_test, y_pred_RFC)

array([[177,   1],
       [  0, 159]], dtype=int64)

### Model building using Logistic regression

In [26]:
from sklearn.linear_model import LogisticRegression

In [27]:
LR = LogisticRegression()

In [28]:
LR.fit(x_train, y_train)

LogisticRegression()

In [29]:
y_pred_LR = LR.predict(x_test)

In [30]:
r2_score(y_test, y_pred_LR)

0.9761854285916189

In [31]:
confusion_matrix(y_test, y_pred_LR)

array([[176,   2],
       [  0, 159]], dtype=int64)

### Model building using Support Vector machine

In [32]:
from sklearn import svm

In [33]:
svc = svm.SVC()

In [34]:
svc.fit(x_train, y_train)

SVC()

In [35]:
y_pred_svc = svc.predict(x_test)

In [36]:
r2_score(y_test, y_pred_svc)

1.0

In [37]:
confusion_matrix(y_test, y_pred_svc)

array([[178,   0],
       [  0, 159]], dtype=int64)

### Model building using KNN

In [38]:
from sklearn.neighbors import KNeighborsClassifier

In [39]:
KNN = KNeighborsClassifier(n_neighbors=7)
KNN.fit(x_train, y_train)

KNeighborsClassifier(n_neighbors=7)

In [40]:
y_pred_KNN = KNN.predict(x_test)

In [41]:
r2_score(y_test,y_pred_KNN)

1.0

In [42]:
confusion_matrix(y_test, y_pred_KNN)

array([[178,   0],
       [  0, 159]], dtype=int64)