In [1]:
# Import dependencies
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

In [5]:
# Import the dataset
bankdata = pd.read_csv("bank_data.csv")

In [2]:
# Import the starter dataset (expect to be less accurate)
bankdata = pd.read_csv("bank_data_light.csv")

In [8]:
# Exploratory data analysis
bankdata.shape
bankdata.head

<bound method NDFrame.head of    variance  skewness  curtosis  entropy  class
0    3.6216   8.66610   -2.8073 -0.44699      0
1    4.5459   8.16740   -2.4586 -1.46210      0
2    3.8660  -2.63830    1.9242  0.10645      0
3    1.4378   0.66837   -2.0267  1.02710      1
4    2.1943   4.55030   -4.9760 -2.72540      1
5    0.7376   4.85250   -4.7986 -5.66590      1>

In [9]:
# Data preprocessing
X = bankdata.drop('class', axis=1)  # X = features
y = bankdata['class']               # y = target (label)

# divide into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [10]:
# Training the algorithm
svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train, y_train)

SVC(kernel='linear')

In [11]:
# Predicting classes
y_pred = svclassifier.predict(X_test)
print(y_pred)

[1 0]


In [12]:
# Evaluate the model
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test, y_pred))

[[1 0]
 [0 1]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



In [13]:
# Persist the model
from joblib import dump, load
dump(svclassifier, 'model.joblib')

['model.joblib']

In [14]:
# Persist the model
from joblib import dump, load
dump(svclassifier, 'model_light.joblib')

['model_light.joblib']

In [15]:
# Load the model
myclassifier = load('model.joblib')

In [16]:
# Test loaded model
new_pred = myclassifier.predict(X_test)
print(new_pred)

print(classification_report(y_test, new_pred, output_dict=True)['accuracy'])

[1 0]
1.0


In [17]:
print(X_test)

   variance  skewness  curtosis  entropy
4    2.1943    4.5503   -4.9760  -2.7254
1    4.5459    8.1674   -2.4586  -1.4621


In [18]:
# compare processing times using Pandas or Numpy
predict_start = time.process_time()
new_classifier = load('model.joblib')
customer = {'Variance': [-0.60975], 'Skewness': [-4.00200], 'Curtosis': [1.847100], 'Entropy': [0.60170]}
customer_df = pd.DataFrame(customer, columns = ['Variance', 'Skewness', 'Curtosis', 'Entropy'])
test1 = new_classifier.predict(customer_df)
predict_stop = time.process_time()

p1 = ['real' if val == 1 else 'fake' for val in test1]

predict2_start = time.process_time()
new_classifier2 = load('model.joblib')
test2 = new_classifier2.predict(np.array([-2.16680,1.59330,0.045122,-1.67800]).reshape(1, -1))
predict2_stop = time.process_time()

p2 = ['real' if val == 1 else 'fake' for val in test2]

print(customer_df)
print()
print(f"{'Result 1':25}: {p1}")
print(f"{'Result 2':25}: {p2}")
print(f"{'Prediction 1 took':25}: {predict_stop-predict_start}")
print(f"{'Prediction 2 took':25}: {predict2_stop-predict2_start}")

   Variance  Skewness  Curtosis  Entropy
0  -0.60975    -4.002    1.8471   0.6017

Result 1                 : ['real']
Result 2                 : ['real']
Prediction 1 took        : 0.005993000000000137
Prediction 2 took        : 0.002146999999999899
