In [None]:
# Install necessary libraries
!pip install -U scikit-learn imbalanced-learn

Collecting scikit-learn
  Downloading scikit_learn-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.1/12.1 MB[0m [31m46.9 MB/s[0m eta [36m0:00:00[0m
Collecting imbalanced-learn
  Downloading imbalanced_learn-0.12.2-py3-none-any.whl (257 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.0/258.0 kB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: scikit-learn, imbalanced-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.2.2
    Uninstalling scikit-learn-1.2.2:
      Successfully uninstalled scikit-learn-1.2.2
  Attempting uninstall: imbalanced-learn
    Found existing installation: imbalanced-learn 0.10.1
    Uninstalling imbalanced-learn-0.10.1:
      Successfully uninstalled imbalanced-learn-0.10.1
Successfully installed imbalanced-learn-0.12.2 scikit-learn-1.4.2


In [None]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTE
from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
import warnings

In [None]:
# Load the data
df = pd.read_csv('/content/heart.csv')

In [None]:
# Suppress warnings
warnings.filterwarnings('ignore')

In [None]:
# Define features and target variable
X = df.drop(['target'], axis=1)
y = df['target']

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Preprocessing pipeline
numeric_features = X_train.select_dtypes(include=['int64', 'float64']).columns
categorical_features = X_train.select_dtypes(include=['object']).columns

In [None]:
numeric_transformer = make_pipeline(SimpleImputer(strategy='median'), StandardScaler())
categorical_transformer = make_pipeline(SimpleImputer(strategy='constant', fill_value='missing'), OneHotEncoder(handle_unknown='ignore'))

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

In [None]:
# Model Selection and Hyperparameter Tuning
models = {
    'Random Forest': RandomForestClassifier(random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42),
    'Linear SVM': SVC(kernel='linear', random_state=42),
    'Polynomial SVM': SVC(kernel='poly', random_state=42),
    'RBF SVM': SVC(kernel='rbf', random_state=42)
}

In [None]:
# Train models
trained_models = {}
for name, model in models.items():
    if isinstance(model, SVC):
        model.set_params(**{'probability': True})  # Set probability parameter for SVC models
    pipe = make_pipeline(preprocessor, model)
    pipe.fit(X_train, y_train)
    trained_models[name] = pipe


In [None]:
# Perform ensemble voting
voting_clf = VotingClassifier(
    estimators=[(name, model) for name, model in trained_models.items()],
    voting='soft'
)

In [None]:
# Train ensemble voting classifier
voting_clf.fit(X_train, y_train)

In [None]:
# Evaluate models
for name, model in trained_models.items():
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{name} Test Accuracy: {accuracy:.2f}")
    print(classification_report(y_test, y_pred))
    print(f"{name} ROC AUC Score: {roc_auc_score(y_test, y_pred):.2f}")

Random Forest Test Accuracy: 0.84
              precision    recall  f1-score   support

           0       0.83      0.83      0.83        29
           1       0.84      0.84      0.84        32

    accuracy                           0.84        61
   macro avg       0.84      0.84      0.84        61
weighted avg       0.84      0.84      0.84        61

Random Forest ROC AUC Score: 0.84
Gradient Boosting Test Accuracy: 0.79
              precision    recall  f1-score   support

           0       0.77      0.79      0.78        29
           1       0.81      0.78      0.79        32

    accuracy                           0.79        61
   macro avg       0.79      0.79      0.79        61
weighted avg       0.79      0.79      0.79        61

Gradient Boosting ROC AUC Score: 0.79
Linear SVM Test Accuracy: 0.87
              precision    recall  f1-score   support

           0       0.86      0.86      0.86        29
           1       0.88      0.88      0.88        32

    acc

In [None]:
# Modify SVC instantiation to enable probability estimation
models = {
    'Random Forest': RandomForestClassifier(random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42),
    'Linear SVM': SVC(kernel='linear', probability=True, random_state=42),  # Set probability=True
    'Polynomial SVM': SVC(kernel='poly', probability=True, random_state=42),  # Set probability=True
    'RBF SVM': SVC(kernel='rbf', probability=True, random_state=42)  # Set probability=True
}


In [None]:
# Evaluate ensemble voting classifier
y_pred_voting = voting_clf.predict_proba(X_test)[:, 1]  # Predict probabilities for class 1
y_pred_voting_binary = (y_pred_voting >= 0.5).astype(int)  # Convert probabilities to binary predictions
accuracy_voting = accuracy_score(y_test, y_pred_voting_binary)
print(f"Ensemble Voting Test Accuracy: {accuracy_voting:.2f}")
print(classification_report(y_test, y_pred_voting_binary))
print(f"Ensemble Voting ROC AUC Score: {roc_auc_score(y_test, y_pred_voting):.2f}")


Ensemble Voting Test Accuracy: 0.87
              precision    recall  f1-score   support

           0       0.89      0.83      0.86        29
           1       0.85      0.91      0.88        32

    accuracy                           0.87        61
   macro avg       0.87      0.87      0.87        61
weighted avg       0.87      0.87      0.87        61

Ensemble Voting ROC AUC Score: 0.93


In [None]:
# Calculate average accuracy
all_accuracies = [accuracy for accuracy in [accuracy_score(y_test, model.predict(X_test)) for model in trained_models.values()]]
all_accuracies.append(accuracy_voting)
average_accuracy = np.mean(all_accuracies)
print(f"Average Accuracy: {average_accuracy:.2f}")

Average Accuracy: 0.86
