# Hard Voting

In [1]:
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pandas as pd

df = pd.read_csv("heart_tidy.csv")
df = df.drop(['Unnamed: 14','Unnamed: 15','Unnamed: 16','Unnamed: 17','Unnamed: 18'],axis=1)

# Assuming 'data' is your dataframe containing the dataset
X = df.drop('HeartDiseasePresent', axis=1)  # Features
y = df['HeartDiseasePresent']  # Target variable
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define base models
rf = RandomForestClassifier(n_estimators=100, random_state=42)
lr = LogisticRegression(random_state=42)
knn = KNeighborsClassifier()

# Create a voting classifier with hard voting
voting_clf = VotingClassifier(estimators=[('rf', rf), ('lr', lr), ('knn', knn)], voting='hard')

# Train the voting classifier
voting_clf.fit(X_train, y_train)

# Make predictions
y_pred = voting_clf.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy (Hard Voting):", accuracy)


Accuracy (Hard Voting): 0.85


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# Soft Voting

In [2]:
# Create a voting classifier with soft voting
voting_clf = VotingClassifier(estimators=[('rf', rf), ('lr', lr), ('knn', knn)], voting='soft')

# Train the voting classifier
voting_clf.fit(X_train, y_train)

# Make predictions
y_pred = voting_clf.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy (Soft Voting):", accuracy)


Accuracy (Soft Voting): 0.85


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# weighted voting

In [3]:
# Define weights for each model
weights = [0.4, 0.3, 0.3]

# Create a voting classifier with weighted voting
voting_clf = VotingClassifier(estimators=[('rf', rf), ('lr', lr), ('knn', knn)], voting='soft', weights=weights)

# Train the voting classifier
voting_clf.fit(X_train, y_train)

# Make predictions
y_pred = voting_clf.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy (Weighted Voting):", accuracy)


Accuracy (Weighted Voting): 0.85


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# Adaptive Voting

In [4]:
# Create a voting classifier with soft voting
voting_clf = VotingClassifier(estimators=[('rf', rf), ('lr', lr), ('knn', knn)], voting='soft')

# Train the voting classifier
voting_clf.fit(X_train, y_train)

# Make predictions
y_pred = voting_clf.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy (Adaptive Voting):", accuracy)


Accuracy (Adaptive Voting): 0.85


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# Stacked Voting

In [12]:
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_val_predict
import numpy as np
import warnings 
warnings.filterwarnings("ignore")

# Generate predictions from base models using cross-validation on the training data
rf_pred_train = cross_val_predict(rf, X_train, y_train, cv=5, method='predict_proba')
lr_pred_train = cross_val_predict(lr, X_train, y_train, cv=5, method='predict_proba')
knn_pred_train = cross_val_predict(knn, X_train, y_train, cv=5, method='predict_proba')

 #Fit base models on the training data
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)
knn.fit(X_train, y_train)

# Generate predictions on the training data
rf_pred_train = rf.predict_proba(X_train)
lr_pred_train = lr.predict_proba(X_train)
knn_pred_train = knn.predict_proba(X_train)

# Concatenate predictions as features for training data
X_train_stacked = np.column_stack((rf_pred_train[:, 1], lr_pred_train[:, 1], knn_pred_train[:, 1]))

# Train meta-learner (Logistic Regression) on the training data
meta_learner = LogisticRegression()
meta_learner.fit(X_train_stacked, y_train)

# Generate predictions on the testing data
rf_pred_test = rf.predict_proba(X_test)
lr_pred_test = lr.predict_proba(X_test)
knn_pred_test = knn.predict_proba(X_test)

# Concatenate predictions as features for testing data
X_test_stacked = np.column_stack((rf_pred_test[:, 1], lr_pred_test[:, 1], knn_pred_test[:, 1]))

# Make predictions on the testing data using meta-learner
stacked_pred = meta_learner.predict(X_test_stacked)

# Evaluate accuracy
accuracy = accuracy_score(y_test, stacked_pred)
print("Accuracy (Stacked Voting):", accuracy)

Accuracy (Stacked Voting): 0.85


In [14]:
!pip install xgboost

Defaulting to user installation because normal site-packages is not writeable
Collecting xgboost
  Obtaining dependency information for xgboost from https://files.pythonhosted.org/packages/24/ec/ad387100fa3cc2b9b81af0829b5ecfe75ec5bb19dd7c19d4fea06fb81802/xgboost-2.0.3-py3-none-win_amd64.whl.metadata
  Using cached xgboost-2.0.3-py3-none-win_amd64.whl.metadata (2.0 kB)
Downloading xgboost-2.0.3-py3-none-win_amd64.whl (99.8 MB)
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB 262.6 kB/s eta 0:06:20
   ---------------------------------------- 0.0/99.8 MB 262.6 kB/s eta 0:06:20
   ---------------------------------------- 0.0/99.8 MB 178.6 kB/s eta 0:09:19
   ---------------------------------------- 0.1/99.8