In [1]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


### Read dataset

In [2]:
df = pd.read_csv('bill_authentication.csv')
df.head()

Unnamed: 0,Variance,Skewness,Curtosis,Entropy,Class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


### Check class balance

In [3]:
df['Class'].value_counts()

Class
0    762
1    610
Name: count, dtype: int64

### Split train and test data

In [4]:
from sklearn.model_selection import train_test_split

X = df[["Variance", "Skewness", "Curtosis", "Entropy"]]
y = df["Class"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

### Scale features as values are not in the same range

In [5]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(X_train)

X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_scaled

array([[0.63034276, 0.63196554, 0.35283348, 0.80840001],
       [0.75006671, 0.89307821, 0.06311844, 0.4514084 ],
       [0.5746872 , 0.87904674, 0.1991716 , 0.64191883],
       ...,
       [0.69048598, 0.80692709, 0.2096543 , 0.55088836],
       [0.53186725, 0.64717136, 0.03351498, 0.4235204 ],
       [0.40619749, 0.4412451 , 0.22323777, 0.78014976]])

### Prediction using Deicision Tree Classifier

In [6]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

model = DecisionTreeClassifier()
model.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = model.predict(X_test_scaled)
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.99      0.98      0.98       152
           1       0.98      0.98      0.98       123

    accuracy                           0.98       275
   macro avg       0.98      0.98      0.98       275
weighted avg       0.98      0.98      0.98       275



### Prediction using Ensemble Method (Logistic, DT, SVC)

In [7]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

# Create different models
log_model = LogisticRegression()
dt_model = DecisionTreeClassifier()
svm_model = SVC(probability=True)

# Create a voting classifier
voting_clf = VotingClassifier(
    estimators=[('lr', log_model), ('dt', dt_model), ('svm', svm_model)],
    voting='hard')  # Use 'soft' for soft voting

voting_clf.fit(X_train_scaled, y_train)

In [8]:
y_pred = voting_clf.predict(X_test_scaled)

report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       152
           1       1.00      1.00      1.00       123

    accuracy                           1.00       275
   macro avg       1.00      1.00      1.00       275
weighted avg       1.00      1.00      1.00       275



### Conclusion: Ensemble method showed better prediction accuracy than normal Decision Tree Classifier. Although for Ensemble method the accuracy of 1.00 also confirms that the model correctly classified all 275 instances in the dataset. Such results suggest that the model has achieved optimal performance on this specific dataset, though further testing on more diverse datasets may be necessary to validate its generalizability.