In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import joblib

In [2]:
df_banknote_auth = pd.read_json('../data/banknote_auth.json', orient='records', lines=True)
df_banknote_auth.sample(5)

Unnamed: 0,variance,skewness,curtosis,entropy,not_genuine
0,3.6216,8.6661,-2.8073,-0.44699,0
1313,-1.5078,-7.3191,7.8981,1.2289,1
1314,-3.506,-12.5667,15.1606,-0.75216,1
21,0.3292,-4.4552,4.5718,-0.9888,0
892,0.21431,-0.69529,0.87711,0.29653,1


In [3]:
model_features = df_banknote_auth[['variance', 'skewness', 'curtosis', 'entropy']].values
model_labels = df_banknote_auth['not_genuine'].values

In [4]:
classifiers = [
    ('KNeighborsClassifier', KNeighborsClassifier(n_neighbors=5)),
    ('DecisionTreeClassifier', DecisionTreeClassifier(random_state=1)),
    ('RandomForestClassifier', RandomForestClassifier(max_depth=8, random_state=1)),
    ('SVC', SVC(gamma='auto',kernel='rbf', random_state=1)),
    ('MLPClassifier', MLPClassifier(hidden_layer_sizes=(2,), solver='lbfgs', random_state=1))
]

In [5]:
train_features, test_features, train_labels, test_labels = train_test_split(model_features, model_labels, train_size=0.7, test_size=0.3, random_state=1)

In [8]:
for name, classifier in classifiers:
    fitted_classifier = classifier.fit(train_features, train_labels)
    predict_labels = fitted_classifier.predict(test_features)
    accuracy = accuracy_score(y_true=test_labels, y_pred=predict_labels)
    if hasattr(fitted_classifier, 'feature_importances_'):
        feature_importances = fitted_classifier.feature_importances_
        print(f'{name} [accuracy={accuracy}, importances={str(feature_importances)}]')
    else:
        print(f'{name} [accuracy={accuracy}]')

KNeighborsClassifier [accuracy=0.9975728155339806]
DecisionTreeClassifier [accuracy=0.9878640776699029, importances=[0.64483576 0.18895411 0.15110674 0.01510339]]
RandomForestClassifier [accuracy=0.9951456310679612, importances=[0.56066718 0.24058589 0.14022782 0.05851911]]
SVC [accuracy=1.0]
MLPClassifier [accuracy=1.0]


In [9]:
best_classifier = [classifier[1] for classifier in classifiers if classifier[0] == 'MLPClassifier'][0]
joblib.dump(best_classifier, '../output/classifier.pkl')

['../output/classifier.pkl']