In [25]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

In [26]:
df = pd.read_csv('realheart.csv')

In [27]:
df.sample(3)

Unnamed: 0,age,sex,cp,tresbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
245,67.0,1.0,4.0,120.0,237.0,0.0,0.0,71.0,0.0,1.0,2.0,0,3,1
289,56.0,1.0,2.0,120.0,240.0,0.0,0.0,169.0,0.0,0.0,3.0,0,3,0
202,57.0,1.0,3.0,150.0,126.0,1.0,0.0,173.0,0.0,0.2,1.0,1,7,0


# Splitting Data

In [28]:
X = df.drop(columns = 'target')
y = df['target']

In [29]:
X_train,X_test,y_train,y_test = train_test_split(X,y , test_size = 0.2, random_state = 42)

In [30]:
sc = StandardScaler()
sc.fit_transform(X_train)
sc.transform(X_test)

array([[-1.98627069e-01,  7.22504380e-01, -9.20574618e-02,
        -1.25981747e-01,  7.89327927e-03,  2.51661148e+00,
         1.01249171e+00,  1.02124161e+00, -7.15891053e-01,
        -8.73572989e-01, -9.63431646e-01,  2.63716726e+00,
        -8.75754976e-01],
       [-8.86322160e-02,  7.22504380e-01,  9.20574618e-01,
        -1.22661650e+00, -8.22616975e-01, -3.97359707e-01,
         1.01249171e+00, -1.87737451e+00,  1.39686059e+00,
        -8.73572989e-01,  6.55668759e-01,  4.01810072e-01,
        -8.75754976e-01],
       [ 1.31357489e-01,  7.22504380e-01,  9.20574618e-01,
        -4.01140435e-01,  7.01815483e-02,  2.51661148e+00,
         1.01249171e+00, -2.71987120e-01,  1.39686059e+00,
         1.38739844e-01,  6.55668759e-01,  4.01810072e-01,
        -8.75754976e-01],
       [ 3.51347195e-01,  7.22504380e-01,  9.20574618e-01,
        -1.77693388e+00, -2.41259797e-01, -3.97359707e-01,
        -9.95893490e-01,  2.63142010e-01, -7.15891053e-01,
        -7.89213586e-01, -9.63431646e

In [51]:
svc = SVC(C = 1, gamma = 'auto')

In [52]:
svc.fit(X_train , y_train)

In [53]:
y_pred = svc.predict(X_test)

In [54]:
accuracy_score(y_pred, y_test)

0.47540983606557374

# Finding Best Parameters Using GSV

In [38]:
from sklearn.pipeline import Pipeline

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()

svm = SVC()

pipeline = Pipeline([
    ('scaler', scaler),
    ('svm', svm)
])

param_grid = {
    'svm__C': [1, 10, 50, 100],
    'svm__kernel': ['rbf', 'poly'],
    'svm__degree': [2, 3, 4, 5]
    }

grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='accuracy', n_jobs=-1)

grid_search.fit(X_train, y_train)

print(f"Best parameters found: {grid_search.best_params_}")
print(f"Best cross-validation accuracy: {grid_search.best_score_:.3f}")


best_model = grid_search.best_estimator_
training_accuracy = best_model.score(X_train, y_train)
print(f"Accuracy on the training dataset: {training_accuracy:.3f}")

test_accuracy = best_model.score(X_test, y_test)
print(f"Accuracy on the test dataset: {test_accuracy:.3f}")


Best parameters found: {'svm__C': 1, 'svm__degree': 3, 'svm__kernel': 'poly'}
Best cross-validation accuracy: 0.822
Accuracy on the training dataset: 0.917
Accuracy on the test dataset: 0.918


In [None]:
print(classification_report(y_pred, y_test))

In [37]:
print(confusion_matrix(y_pred, y_test))

[[26 17]
 [ 3 15]]
