In [16]:
# Import dependencies
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

In [17]:
# Import the dataset
bankdata = pd.read_csv("bank_data.csv")

In [18]:
# Exploratory data analysis
bankdata.shape
bankdata.head

<bound method NDFrame.head of       Variance  Skewness  Curtosis  Entropy  Class
0      3.62160   8.66610   -2.8073 -0.44699      0
1      4.54590   8.16740   -2.4586 -1.46210      0
2      3.86600  -2.63830    1.9242  0.10645      0
3      3.45660   9.52280   -4.0112 -3.59440      0
4      0.32924  -4.45520    4.5718 -0.98880      0
...        ...       ...       ...      ...    ...
1367   0.40614   1.34920   -1.4501 -0.55949      1
1368  -1.38870  -4.87730    6.4774  0.34179      1
1369  -3.75030 -13.45860   17.5932 -2.77710      1
1370  -3.56370  -8.38270   12.3930 -1.28230      1
1371  -2.54190  -0.65804    2.6842  1.19520      1

[1372 rows x 5 columns]>

In [23]:
# Data preprocessing
X = bankdata.drop('Class', axis=1)  # X = features
y = bankdata['Class']               # y = target (label)

# divide into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [24]:
# Training the algorithm
svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train, y_train)

SVC(kernel='linear')

In [25]:
# Predicting classes
y_pred = svclassifier.predict(X_test)
print(y_pred)

[1 1 1 1 0 0 0 1 0 1 0 0 1 0 1 1 0 1 0 1 0 1 1 0 0 0 1 1 0 0 1 1 0 1 0 1 0
 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 1 0 1 0 1 1 0 1 0 0 0 1 1 1 0 0 1 1 0 1 0 0
 1 1 0 1 1 0 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 1 0 1 1 0 1 1 0 0 0 0 1 1 1 0 0
 0 1 1 0 0 1 1 0 0 0 0 1 0 1 1 0 0 1 0 1 0 1 0 1 0 1 1 0 0 0 1 1 1 0 0 0 0
 1 0 0 1 1 1 1 0 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 0 0 0 1 1 0 0 0 1 0 1 0 1 0
 1 1 0 0 1 0 0 0 1 1 0 0 1 0 1 1 0 0 0 0 1 1 1 0 1 1 1 0 0 1 1 0 1 1 0 0 0
 1 1 1 0 0 0 0 0 1 0 0 1 1 1 1 0 1 0 1 0 0 1 1 0 0 0 1 0 1 0 1 1 1 1 1 0 0
 0 0 1 0 0 0 0 0 0 1 0 1 0 0 1 0]


In [26]:
# Evaluate the model
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

[[148   4]
 [  0 123]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.99       152
           1       0.97      1.00      0.98       123

    accuracy                           0.99       275
   macro avg       0.98      0.99      0.99       275
weighted avg       0.99      0.99      0.99       275



In [27]:
# Persist the model
from joblib import dump, load
dump(svclassifier, 'model.joblib')

['mike-test1.joblib']

In [28]:
# Load the model
myclassifier = load('model.joblib')

In [34]:
# Test loaded model
new_pred = myclassifier.predict(X_test)
print(new_pred)

print(classification_report(y_test, new_pred, output_dict=True)['accuracy'])

[1 1 1 1 0 0 0 1 0 1 0 0 1 0 1 1 0 1 0 1 0 1 1 0 0 0 1 1 0 0 1 1 0 1 0 1 0
 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 1 0 1 0 1 1 0 1 0 0 0 1 1 1 0 0 1 1 0 1 0 0
 1 1 0 1 1 0 0 0 1 1 1 0 0 0 0 1 1 1 0 0 0 1 0 1 1 0 1 1 0 0 0 0 1 1 1 0 0
 0 1 1 0 0 1 1 0 0 0 0 1 0 1 1 0 0 1 0 1 0 1 0 1 0 1 1 0 0 0 1 1 1 0 0 0 0
 1 0 0 1 1 1 1 0 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 0 0 0 1 1 0 0 0 1 0 1 0 1 0
 1 1 0 0 1 0 0 0 1 1 0 0 1 0 1 1 0 0 0 0 1 1 1 0 1 1 1 0 0 1 1 0 1 1 0 0 0
 1 1 1 0 0 0 0 0 1 0 0 1 1 1 1 0 1 0 1 0 0 1 1 0 0 0 1 0 1 0 1 1 1 1 1 0 0
 0 0 1 0 0 0 0 0 0 1 0 1 0 0 1 0]
0.9854545454545455


In [30]:
print(X_test)

      Variance  Skewness  Curtosis  Entropy
1134   -1.7063   2.79560  -2.37800 -2.34910
1195   -1.9725   2.88250  -2.30860 -2.37240
928    -6.1536   7.92950   0.61663 -3.26460
837    -6.2815   6.66510   0.52581 -7.01070
70      3.9529  -2.35480   2.37920  0.48274
...        ...       ...       ...      ...
1280   -3.3553   0.35591   2.64730 -0.37846
678     2.4673   1.39260   1.71250  0.41421
2       3.8660  -2.63830   1.92420  0.10645
1314   -3.5060 -12.56670  15.16060 -0.75216
333     3.0009   5.81260  -2.23060 -0.66553

[275 rows x 4 columns]


In [31]:
predict_start = time.process_time()
new_classifier = load('model.joblib')
customer = {'Variance': [-0.60975], 'Skewness': [-4.00200], 'Curtosis': [1.847100], 'Entropy': [0.60170]}
customer_df = pd.DataFrame(customer, columns = ['Variance', 'Skewness', 'Curtosis', 'Entropy'])
test1 = new_classifier.predict(customer_df)
predict_stop = time.process_time()

p1 = ['real' if val == 1 else 'fake' for val in test1]

predict2_start = time.process_time()
new_classifier2 = load('model.joblib')
test2 = new_classifier2.predict(np.array([-2.16680,1.59330,0.045122,-1.67800]).reshape(1, -1))
predict2_stop = time.process_time()

p2 = ['real' if val == 1 else 'fake' for val in test2]

print(customer_df)
print()
print(f"{'Result 1':25}: {p1}")
print(f"{'Result 2':25}: {p2}")
print(f"{'Prediction 1 took':25}: {predict_stop-predict_start}")
print(f"{'Prediction 2 took':25}: {predict2_stop-predict2_start}")

   Variance  Skewness  Curtosis  Entropy
0  -0.60975    -4.002    1.8471   0.6017

Result 1                 : ['real']
Result 2                 : ['real']
Prediction 1 took        : 0.007296000000000191
Prediction 2 took        : 0.001972000000000307
