In [52]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import confusion_matrix, classification_report
from kears.models import load_model

In [38]:
df = pd.read_csv("./data/banknotes.csv")

In [39]:
print(df)
# Investigation of the data: Class means the label, if it is a real bill or not. 0 = fake, 1 = authentic

      variace  skewness  curtosis  entropy  class
0     3.62160   8.66610   -2.8073 -0.44699      0
1     4.54590   8.16740   -2.4586 -1.46210      0
2     3.86600  -2.63830    1.9242  0.10645      0
3     3.45660   9.52280   -4.0112 -3.59440      0
4     0.32924  -4.45520    4.5718 -0.98880      0
...       ...       ...       ...      ...    ...
1367  0.40614   1.34920   -1.4501 -0.55949      1
1368 -1.38870  -4.87730    6.4774  0.34179      1
1369 -3.75030 -13.45860   17.5932 -2.77710      1
1370 -3.56370  -8.38270   12.3930 -1.28230      1
1371 -2.54190  -0.65804    2.6842  1.19520      1

[1372 rows x 5 columns]


In [40]:
# Separating labels (class column) from the data
labels = np.array(df["class"])

In [43]:
features = df.drop("class", axis = 1)
features = np.array(features)

In [42]:
# Split data into training and test set
X = features # mathematical notation
y = labels

In [45]:
# The ratio of the split, 33% will be test set
# X = X_train + X_test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [44]:
# Better performance when the data is scaled between a certain range (e.g 0..1)
scaler_object = MinMaxScaler()
# Fit the scaler to the training data
scaler_object.fit(X_train) # finds the min and max value of the train value
scaled_X_train = scaler_object.transform(X_train)
scaled_X_test = scaler_object.transform(X_test)
# We only fit to the train and test set, only fit on the x_train. If we would fit to the whole data X we assume knowledge about the
# original data that we should not have.

In [46]:
# Build simple network with keras
model = Sequential()

# Add a densely connected layer -> adding neurons
# Dense(num_neurons, input dimensions, activation rule)
model.add(Dense(4, input_dim=4, activation="relu"))
model.add(Dense(8, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

model.fit(scaled_X_train, y_train, epochs=50, verbose=2)
# Verbose is the output information. Accuracy is based on training model

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


29/29 - 1s - 19ms/step - accuracy: 0.5495 - loss: 0.6972
Epoch 2/50
29/29 - 0s - 923us/step - accuracy: 0.5495 - loss: 0.6835
Epoch 3/50
29/29 - 0s - 903us/step - accuracy: 0.5495 - loss: 0.6732
Epoch 4/50
29/29 - 0s - 886us/step - accuracy: 0.5495 - loss: 0.6636
Epoch 5/50
29/29 - 0s - 869us/step - accuracy: 0.5495 - loss: 0.6541
Epoch 6/50
29/29 - 0s - 875us/step - accuracy: 0.5626 - loss: 0.6401
Epoch 7/50
29/29 - 0s - 945us/step - accuracy: 0.6670 - loss: 0.6187
Epoch 8/50
29/29 - 0s - 944us/step - accuracy: 0.7356 - loss: 0.5947
Epoch 9/50
29/29 - 0s - 893us/step - accuracy: 0.7682 - loss: 0.5711
Epoch 10/50
29/29 - 0s - 860us/step - accuracy: 0.7671 - loss: 0.5496
Epoch 11/50
29/29 - 0s - 860us/step - accuracy: 0.7802 - loss: 0.5299
Epoch 12/50
29/29 - 0s - 906us/step - accuracy: 0.7933 - loss: 0.5110
Epoch 13/50
29/29 - 0s - 1ms/step - accuracy: 0.8020 - loss: 0.4937
Epoch 14/50
29/29 - 0s - 1ms/step - accuracy: 0.8150 - loss: 0.4654
Epoch 15/50
29/29 - 0s - 894us/step - accurac

<keras.src.callbacks.history.History at 0x7724287b7d90>

In [53]:
# How do we predict on new data
predictions = (model.predict(scaled_X_test) > 0.5).astype("int32")
confusion_matrix(y_test, predictions)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 520us/step


array([[253,   4],
       [ 11, 185]])

In [54]:
print(classification_report(y_test, predictions))
# Observation; Well classification -> 97 % all over the board

              precision    recall  f1-score   support

           0       0.96      0.98      0.97       257
           1       0.98      0.94      0.96       196

    accuracy                           0.97       453
   macro avg       0.97      0.96      0.97       453
weighted avg       0.97      0.97      0.97       453



In [55]:
# Save the trained model
model.save("data/trained-bill-classification-model.h5")



In [None]:
# To load the trained model one can run this
my_bill_classification_model = load_model("")