In [20]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [21]:
df = pd.read_csv(r'C:\Users\priya\OneDrive\Desktop\greenAI\day-8\SVM\heart.csv')
df.head()


Unnamed: 0,Age,Sex,ChestPain,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,Thal,AHD
0,63,1,typical,145,233,1,2,150,0,2.3,3,0,fixed,No
1,67,1,asymptomatic,160,286,0,2,108,1,1.5,2,3,normal,Yes
2,67,1,asymptomatic,120,229,0,2,129,1,2.6,2,2,reversable,Yes
3,37,1,nonanginal,130,250,0,0,187,0,3.5,3,0,normal,No
4,41,0,nontypical,130,204,0,2,172,0,1.4,1,0,normal,No


In [22]:
# Convert boolean columns to int for compatibility
df_new = pd.get_dummies(df, columns=['ChestPain', 'Thal'], drop_first=True)
for col in df_new.select_dtypes(include='bool').columns:
    df_new[col] = df_new[col].astype(int)

In [23]:
x = df_new.drop('AHD', axis=1)
y = df_new['AHD']

In [24]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

In [25]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [26]:
df_new.head()

Unnamed: 0,Age,Sex,RestBP,Chol,Fbs,RestECG,MaxHR,ExAng,Oldpeak,Slope,Ca,AHD,ChestPain_nonanginal,ChestPain_nontypical,ChestPain_typical,Thal_normal,Thal_reversable
0,63,1,145,233,1,2,150,0,2.3,3,0,No,0,0,1,0,0
1,67,1,160,286,0,2,108,1,1.5,2,3,Yes,0,0,0,1,0
2,67,1,120,229,0,2,129,1,2.6,2,2,Yes,0,0,0,0,1
3,37,1,130,250,0,0,187,0,3.5,3,0,No,1,0,0,1,0
4,41,0,130,204,0,2,172,0,1.4,1,0,No,0,1,0,1,0


In [27]:
# Retrain SVM with scaled features
model = svm.SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
model.fit(x_train, y_train)

In [28]:
y_pred = model.predict(x_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.8852459016393442
              precision    recall  f1-score   support

           0       0.84      0.93      0.89        29
           1       0.93      0.84      0.89        32

    accuracy                           0.89        61
   macro avg       0.89      0.89      0.89        61
weighted avg       0.89      0.89      0.89        61



In [29]:
# Grid search with scaled features
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 0.01, 0.1, 1],
    'kernel': ['rbf', 'linear']
}
grid_search = GridSearchCV(svm.SVC(random_state=42), param_grid, cv=5)
grid_search.fit(x_train, y_train)
print("Best parameters:", grid_search.best_params_)

# Evaluate best estimator
best_model = grid_search.best_estimator_
y_pred_best = best_model.predict(x_test)
print("Best Model Accuracy:", accuracy_score(y_test, y_pred_best))
print(classification_report(y_test, y_pred_best))

Best parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}
Best Model Accuracy: 0.8852459016393442
              precision    recall  f1-score   support

           0       0.87      0.90      0.88        29
           1       0.90      0.88      0.89        32

    accuracy                           0.89        61
   macro avg       0.88      0.89      0.89        61
weighted avg       0.89      0.89      0.89        61

