# Support Vector Machines

# Import

In [7]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler

# Load Dataset

In [2]:
df = pd.read_csv('framingham_heart_disease.csv')
df

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.10,85.0,85.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4233,1,50,1.0,1,1.0,0.0,0,1,0,313.0,179.0,92.0,25.97,66.0,86.0,1
4234,1,51,3.0,1,43.0,0.0,0,0,0,207.0,126.5,80.0,19.71,65.0,68.0,0
4235,0,48,2.0,1,20.0,,0,0,0,248.0,131.0,72.0,22.00,84.0,86.0,0
4236,0,44,1.0,1,15.0,0.0,0,0,0,210.0,126.5,87.0,19.16,86.0,,0


In [3]:
df.dropna(inplace=True)

In [9]:
X = df[['male', 'age', 'education', 'currentSmoker', 'cigsPerDay', 'BPMeds', 'prevalentStroke', 'prevalentHyp', 'diabetes', 'totChol', 'sysBP', 'diaBP', 'BMI', 'heartRate', 'glucose']]
y = df['TenYearCHD']

# Split Data

In [10]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)

# Feature Scalling (Standardization/normalization)

## Train

In [11]:
scaler = StandardScaler().fit(x_train)

In [13]:
#transform
x_train_scaled = scaler.transform(x_train)
x_train_scaled

array([[-8.95308256e-01, -9.96059320e-01,  2.81283510e-02, ...,
        -3.10943585e-01, -2.22200441e-01, -8.87061007e-01],
       [ 1.11693374e+00, -1.34810813e+00,  2.81283510e-02, ...,
        -1.32759504e+00, -1.30350514e+00, -2.99387120e-03],
       [-8.95308256e-01, -7.61360115e-01,  1.98073439e+00, ...,
        -3.15819611e-01,  1.02545883e+00, -8.02864137e-01],
       ...,
       [ 1.11693374e+00,  9.98883928e-01, -9.48174669e-01, ...,
         9.78765340e-01, -5.58458719e-02,  2.07498304e-01],
       [ 1.11693374e+00,  8.81534325e-01, -9.48174669e-01, ...,
         2.99987819e+00, -6.38086863e-01,  5.02187350e-01],
       [ 1.11693374e+00, -1.11340892e+00, -9.48174669e-01, ...,
         2.93683662e-01, -1.30350514e+00, -4.66076657e-01]])

# Test

In [14]:
scaler1 = StandardScaler().fit(x_test)

In [15]:
#transform
x_test_scaled = scaler1.transform(x_test)
x_test_scaled

array([[-0.88609043,  1.6416073 , -0.00538006, ...,  2.42750134,
         2.03220046, -0.09081215],
       [-0.88609043, -1.70293099, -0.00538006, ..., -1.26332124,
         1.18868038, -0.04974223],
       [ 1.12855299, -1.35694427, -0.00538006, ..., -1.32660695,
         0.3451603 ,  0.36095695],
       ...,
       [ 1.12855299, -0.0883263 , -0.00538006, ..., -1.61265836,
         0.17645628,  0.15560736],
       [ 1.12855299,  0.60364714, -0.98885541, ..., -0.37478988,
         0.09210427, -0.29616174],
       [-0.88609043, -0.54964192, -0.00538006, ...,  0.16187294,
        -0.6670638 , -0.58365117]])

# Build Model

## using scaled data

In [23]:
model = SVC()

In [24]:
model.fit(x_train_scaled, y_train)

In [25]:
model.score(x_train_scaled, y_train)

0.8581327498176513

## using unscaled data

In [21]:
model1 = SVC()

In [22]:
model1.fit(x_train, y_train)

In [26]:
model1.score(x_train, y_train)

0.8460977388767323

# Make Predictions

In [27]:
y_pred = model.predict(x_test_scaled)

#unscaled 
y_pred_un = model1.predict(x_test)

# Analysis

## scaled

In [28]:
confusion_matrix(y_test, y_pred)

array([[778,   2],
       [133,   1]], dtype=int64)

In [30]:
# scalled
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.85      1.00      0.92       780
           1       0.33      0.01      0.01       134

    accuracy                           0.85       914
   macro avg       0.59      0.50      0.47       914
weighted avg       0.78      0.85      0.79       914



## Unscaled

In [29]:
confusion_matrix(y_test, y_pred_un)

array([[780,   0],
       [133,   1]], dtype=int64)

In [31]:
# unscalled
print(classification_report(y_test,y_pred_un))

              precision    recall  f1-score   support

           0       0.85      1.00      0.92       780
           1       1.00      0.01      0.01       134

    accuracy                           0.85       914
   macro avg       0.93      0.50      0.47       914
weighted avg       0.88      0.85      0.79       914

