In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix,accuracy_score,roc_curve,classification_report
from sklearn.svm import SVC
import pickle
import joblib

In [2]:
data = pd.read_csv("../data/heart.csv")
data.shape

(1000, 14)

In [3]:
data.head()

Unnamed: 0,patientid,age,gender,chestpain,restingBP,serumcholestrol,fastingbloodsugar,restingrelectro,maxheartrate,exerciseangia,oldpeak,slope,noofmajorvessels,target
0,103368,53,1,2,171,0,0,1,147,0,5.3,3,3,1
1,119250,40,1,0,94,229,0,1,115,0,3.7,1,1,0
2,119372,49,1,2,133,142,0,0,202,1,5.0,1,0,0
3,132514,43,1,0,138,295,1,1,153,0,3.2,2,2,1
4,146211,31,1,1,199,0,0,2,136,0,5.3,3,2,1


In [4]:
y = data["target"]
X = data.drop(columns=['patientid','target'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state = 0)

In [5]:
len(X_train), len(X_test)

(800, 200)

In [6]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
X_train.shape

(800, 12)

In [8]:
scaler.mean_

array([4.9766250e+01, 7.6875000e-01, 1.0075000e+00, 1.5193750e+02,
       3.1404625e+02, 3.1250000e-01, 7.5000000e-01, 1.4558625e+02,
       5.0875000e-01, 2.7481250e+00, 1.5600000e+00, 1.2412500e+00])

In [9]:
X_train[:, 0].min(), X_train[:, 0].max(), X_train[:, 2].min(), X_train[:, 2].max()

(-1.6725143415733175,
 1.6987823617198097,
 -1.0532897487880573,
 2.08305689772725)

In [10]:
X_train[0]

array([ 0.40645196, -1.82327297, -1.05328975,  0.93522777,  1.53631292,
       -0.67419986,  0.32547228,  1.35413438, -1.01765584, -0.49016407,
        1.43899306,  1.79450419])

In [11]:
svc =  SVC(kernel='rbf', C=2)
svc = SVC(probability = True)
svc.fit(X_train, y_train)
svc_predicted = svc.predict(X_test)
svc_conf_matrix = confusion_matrix(y_test, svc_predicted)
svc_acc_score = accuracy_score(y_test, svc_predicted)
print("confussion matrix")
print(svc_conf_matrix)
print("\n")
print("Accuracy of Support Vector Machine:",svc_acc_score*100,'\n')
print(classification_report(y_test,svc_predicted))

confussion matrix
[[ 91   4]
 [  1 104]]


Accuracy of Support Vector Machine: 97.5 

              precision    recall  f1-score   support

           0       0.99      0.96      0.97        95
           1       0.96      0.99      0.98       105

    accuracy                           0.97       200
   macro avg       0.98      0.97      0.97       200
weighted avg       0.98      0.97      0.97       200



In [12]:
svc_predicted

array([1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1,
       1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1,
       1, 1], dtype=int64)

In [13]:
svc_predict_proba = svc.predict_proba(X_test)
svc_predict_proba.shape

(200, 2)

In [14]:
svc_predict_probability = svc_predict_proba[:,1]
svc_predict_probability

array([9.96623806e-01, 9.94453872e-01, 9.99996657e-01, 2.69408301e-04,
       9.81464696e-01, 5.79304823e-03, 9.85787386e-01, 6.06355435e-02,
       9.99999366e-01, 9.90904126e-01, 9.78285018e-01, 9.99996734e-01,
       5.51970553e-05, 9.93800011e-01, 1.28054512e-04, 9.42774076e-01,
       9.99999817e-01, 9.96806414e-01, 8.37115350e-03, 7.30968319e-03,
       1.51636131e-03, 6.73428701e-02, 4.21627486e-04, 9.99999501e-01,
       9.65431265e-01, 2.25299513e-04, 2.61672567e-02, 9.99994318e-01,
       9.95302864e-01, 8.35093371e-04, 2.72904204e-02, 4.84469447e-02,
       9.99998212e-01, 9.99999985e-01, 9.99998573e-01, 9.89114704e-01,
       9.99999999e-01, 2.00597107e-03, 4.22844211e-04, 9.80027145e-01,
       2.56292078e-02, 2.74682650e-01, 9.69993255e-01, 7.23534867e-03,
       9.99999892e-01, 9.99999996e-01, 3.80496921e-04, 1.76716518e-03,
       1.25765697e-02, 9.83793079e-01, 4.14274071e-05, 3.53368464e-02,
       9.51481493e-01, 9.94305557e-01, 2.21587012e-01, 9.99998511e-01,
      

In [15]:
svc_predict_probability.shape

(200,)

In [16]:
# save the model to disk
filename = '../saved_models/support_vector_machine_model.sav'
pickle.dump(svc, open(filename, 'wb'))

In [17]:
# save the scaler to disk
scaler_filename = "../saved_models/support_vector_machine_model_scaler"
joblib.dump(scaler, scaler_filename) 

['../saved_models/support_vector_machine_model_scaler']

# Front End

In [33]:
# Load the scaler
scaler_filename = "../saved_models/support_vector_machine_model_scaler"
scaler = joblib.load(scaler_filename)

In [34]:
# load the model from disk
filename = '../saved_models/support_vector_machine_model.sav'
loaded_model = pickle.load(open(filename, 'rb'))

In [35]:
# Prediction - Based on FE values
features_values = np.array([53, 1, 2, 171, 0, 0, 1, 147, 0, 5.3, 3, 3]).reshape(1, -1)

In [36]:
features_values.shape

(1, 12)

In [37]:
scaled_features_values = scaler.transform(features_values)
scaled_features_values.shape



(1, 12)

In [38]:
fe_model_output = loaded_model.predict(scaled_features_values)
fe_model_output.shape

(1,)

In [39]:
fe_model_output

array([1], dtype=int64)

In [40]:
fe_model_proba_output = loaded_model.predict_proba(scaled_features_values)
fe_model_proba_output.shape

(1, 2)

In [41]:
fe_predict_probability = fe_model_proba_output[:,1]
fe_predict_probability

array([0.99999792])