# Problem Statement
The goal of this project is to build a machine learning model to predict the likelihood of heart disease using the UC Heart Disease dataset. This dataset includes various patient attributes such as age, gender, blood pressure, cholesterol levels, and other medical factors. The model will be trained to classify patients into categories of high or low risk for heart disease based on these features, providing valuable insights for early diagnosis and potential preventive measures.

# Participants
1. **Vivek Tripathi 2201330100292**
2. **Toshak Bhat 2201330100270**
3. **Shradha Baghel 2201330100292**

In [None]:
# Importing Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report

# Load the UCI Heart Disease Dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
columns = [
    "age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach",
    "exang", "oldpeak", "slope", "ca", "thal", "target"
]
data = pd.read_csv(url, names=columns, na_values="?")

# Handle missing values
data = data.dropna()

# Features and Labels
X = data.drop("target", axis=1)
y = data["target"]
# Convert target to binary (1 = Disease, 0 = No Disease)
y = np.where(y > 0, 1, 0)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Normalize Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize Models


In [None]:
model_svc=SVC(kernel="rbf", probability=True, random_state=42)
model_svc.fit(X_train_scaled,y_train)

In [None]:
model_svc.predict(X_test_scaled[0].reshape(1,-1))

array([0])

In [12]:
y_pred = model_svc.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
y_pred

Accuracy: 85.00%


array([0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1])

In [13]:
print("Classification Report\n",classification_report(y_test,y_pred))


Classification Report
               precision    recall  f1-score   support

           0       0.83      0.91      0.87        32
           1       0.88      0.79      0.83        28

    accuracy                           0.85        60
   macro avg       0.85      0.85      0.85        60
weighted avg       0.85      0.85      0.85        60



array([0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1])

In [14]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
data.head()


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0


In [None]:
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto']
}


In [None]:
grid_search = GridSearchCV(estimator=SVC(), param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)


Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
# Extract results
results = grid_search.cv_results_

# Convert the results to a DataFrame
import pandas as pd
df_results = pd.DataFrame(results)

# Heatmap for C and gamma
pivot_table = df_results.pivot('param_C', 'param_gamma', 'mean_test_score')

plt.figure(figsize=(8, 6))
sns.heatmap(pivot_table, annot=True, cmap="YlGnBu", fmt=".3f")
plt.title('Hyperparameter Tuning for SVC')
plt.xlabel('Gamma')
plt.ylabel('C')
plt.show()


In [None]:
import pickle
with open("model_svc.pkl", "wb") as model_file:
    pickle.dump(model_svc, model_file)

with open("scaler.pkl", "wb") as scaler_file:
    pickle.dump(scaler, scaler_file)


In [None]:
from google.colab import files
files.download("model_svc.pkl")
files.download("scaler.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Comparison with other algorithms

In [None]:
models = {
    "Logistic Regression": LogisticRegression(random_state=42),
    "SVM": SVC(kernel="rbf", probability=True, random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="logloss", random_state=42)
}

# Train and Evaluate Models
results = {}
for model_name, model in models.items():
    print(f"Training {model_name}...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else model.decision_function(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_prob)

    print(f"{model_name} Results:")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"AUC-ROC: {roc_auc:.2f}")
    print(classification_report(y_test, y_pred))
    print("-" * 40)

    results[model_name] = {"Accuracy": accuracy, "AUC-ROC": roc_auc}

# Display Results
print("\nComparison of Models:")
for model_name, metrics in results.items():
    print(f"{model_name}: Accuracy = {metrics['Accuracy']:.2f}, AUC-ROC = {metrics['AUC-ROC']:.2f}")'''
