# 1. load package and install package for EDA (dataprep)

In [None]:
!pip install dataprep

In [None]:
import pandas as pd
import numpy as np

from sklearn import svm
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score

from dataprep.eda import create_report
import matplotlib.pyplot as plt

# 2. Read data & EDA

In [None]:
heart = pd.read_csv('../input/heart-disease-uci/heart.csv')
print(heart.head(), '\n')
print(heart.isnull().any(), '\n')
print(heart.info(), '\n')
print(heart.describe().T)

In [None]:
create_report(heart)

In [None]:
sc_col = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
sc = StandardScaler()
heart.loc[:, sc_col] = sc.fit_transform(heart.loc[:, sc_col])

In [None]:
X = heart.iloc[:, :-1]
y = heart.iloc[:, -1]

# 3. Support Vector Machine parameter setting
> Using 'GridSearchCV' function for parameter grid search

> You can check which parameters are best using 'clf.best_params_'

> In this case, {'C': 0.6, 'gamma': 'auto', 'kernel': 'rbf'}

In [None]:
svm_clf = svm.SVC()
param = {'C' : [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
         'kernel' : ['linear', 'poly', 'rbf'],
         'gamma' : ['scale', 'auto']
        }

clf = GridSearchCV(svm_clf, param, cv=5, verbose=5, n_jobs=3)
clf.fit(X,y)

In [None]:
clf.best_params_

In [None]:
cv_result = pd.DataFrame(clf.cv_results_)
cv_result

In [None]:
cv_result[cv_result['rank_test_score'] == 1]

> Train / test data split (7:3)

> Train and check accuracy : 84%

In [None]:
train_X,test_X,train_y,test_y = train_test_split(X, y, test_size=0.3, random_state=123, stratify=y)

In [None]:
svm_clf = svm.SVC(C=0.6, gamma='auto', kernel='rbf')
svm_clf.fit(train_X, train_y)
pred = svm_clf.predict(test_X)
print(accuracy_score(test_y, pred))

In [None]:
svm_clf.support_vectors_

In [None]:
plt.scatter(train_X.iloc[:, 0],train_X.iloc[:, 7], c=train_y, s=10, cmap='autumn')
plt.scatter(svm_clf.support_vectors_[:,0],svm_clf.support_vectors_[:,7], color='blue')

# 4. Visualization

In [None]:
from sklearn.decomposition import PCA

> n_component = 2

In [None]:
pca = PCA(n_components=2)
X = pca.fit_transform(X)

In [None]:
train_X,test_X,train_y,test_y = train_test_split(X, y, test_size=0.3, random_state=123, stratify=y)

svm_clf = svm.SVC(C=0.6, gamma='auto', kernel='rbf')
svm_clf.fit(train_X, train_y)
pred = svm_clf.predict(test_X)
print(accuracy_score(test_y, pred))

In [None]:
plt.scatter(train_X[:, 0],train_X[:, 1], c=train_y, s=10, cmap='autumn')
plt.scatter(svm_clf.support_vectors_[:,0],svm_clf.support_vectors_[:,1], color='blue')

> n-component = 3

In [None]:
X = heart.iloc[:, :-1]
y = heart.iloc[:, -1]

In [None]:
pca = PCA(n_components=3)
X = pca.fit_transform(X)

In [None]:
train_X,test_X,train_y,test_y = train_test_split(X, y, test_size=0.3, random_state=123, stratify=y)

svm_clf = svm.SVC(C=0.6, gamma='auto', kernel='rbf')
svm_clf.fit(train_X, train_y)
pred = svm_clf.predict(test_X)
print(accuracy_score(test_y, pred))

In [None]:
fig = plt.figure(figsize=(12, 12))
ax = fig.add_subplot(projection='3d')
ax.scatter(train_X[:, 0],train_X[:, 1], train_X[:, 2],c=train_y, s=10, cmap='autumn')
ax.scatter(svm_clf.support_vectors_[:,0],svm_clf.support_vectors_[:,1], svm_clf.support_vectors_[:,2], color='blue')
plt.show()