# Hard Voting

In [13]:
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [10]:
df=pd.read_csv(r"C:\Users\USER\Desktop\DATA  SCIENCE\Excel Workbooks Tsi\heart_tidy.csv")

In [12]:
df.head(10)

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,CholesterolLevel,FastingBloodSugar,RestingElectrocardiographicResult,MaxHeartRate,ExerciseAngina,STDepression,STSegmentSlope,NumMajorVessels,ThalliumStressRest,HeartDiseasePresent
0,63,1,1,145,233,1,2,150,0,2.3,3,0,6,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3,3,1
2,67,1,4,120,229,0,2,129,1,2.6,2,2,7,1
3,37,1,3,130,250,0,0,187,0,3.5,3,0,3,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0,3,0
5,56,1,2,120,236,0,0,178,0,0.8,1,0,3,0
6,62,0,4,140,268,0,2,160,0,3.6,3,2,3,1
7,57,0,4,120,354,0,0,163,1,0.6,1,0,3,0
8,63,1,4,130,254,0,2,147,0,1.4,2,1,7,1
9,53,1,4,140,203,1,2,155,1,3.1,3,0,7,1


In [11]:
df = df.drop(['Unnamed: 14','Unnamed: 15','Unnamed: 16','Unnamed: 17','Unnamed: 18'],axis=1)

In [14]:
# Assuming 'data' is your dataframe containing the dataset
X = df.drop('HeartDiseasePresent', axis=1)  # Features
y = df['HeartDiseasePresent']  # Target variable
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define base models
rf = RandomForestClassifier(n_estimators=100, random_state=42)
lr = LogisticRegression(random_state=42)
knn = KNeighborsClassifier()

# Create a voting classifier with hard voting
voting_clf = VotingClassifier(estimators=[('rf', rf), ('lr', lr), ('knn', knn)], voting='hard')

# Train the voting classifier
voting_clf.fit(X_train, y_train)

# Make predictions
y_pred = voting_clf.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy (Hard Voting):", accuracy)


Accuracy (Hard Voting): 0.85


# Soft Voting

In [15]:
# Create a voting classifier with soft voting
voting_clf = VotingClassifier(estimators=[('rf', rf), ('lr', lr), ('knn', knn)], voting='soft')

# Train the voting classifier
voting_clf.fit(X_train, y_train)

# Make predictions
y_pred = voting_clf.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy (Soft Voting):", accuracy)


Accuracy (Soft Voting): 0.8333333333333334


# weighted voting

In [16]:
# Define weights for each model
weights = [0.4, 0.3, 0.3]

# Create a voting classifier with weighted voting
voting_clf = VotingClassifier(estimators=[('rf', rf), ('lr', lr), ('knn', knn)], voting='soft', weights=weights)

# Train the voting classifier
voting_clf.fit(X_train, y_train)

# Make predictions
y_pred = voting_clf.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy (Weighted Voting):", accuracy)


Accuracy (Weighted Voting): 0.8333333333333334


# Adaptive Voting

In [17]:
# Create a voting classifier with soft voting
voting_clf = VotingClassifier(estimators=[('rf', rf), ('lr', lr), ('knn', knn)], voting='soft')

# Train the voting classifier
voting_clf.fit(X_train, y_train)

# Make predictions
y_pred = voting_clf.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy (Adaptive Voting):", accuracy)


Accuracy (Adaptive Voting): 0.8333333333333334


# Stacked Voting

In [18]:
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_val_predict
import numpy as np
import warnings 
warnings.filterwarnings("ignore")

# Generate predictions from base models using cross-validation on the training data
rf_pred_train = cross_val_predict(rf, X_train, y_train, cv=5, method='predict_proba')
lr_pred_train = cross_val_predict(lr, X_train, y_train, cv=5, method='predict_proba')
knn_pred_train = cross_val_predict(knn, X_train, y_train, cv=5, method='predict_proba')

 #Fit base models on the training data
rf.fit(X_train, y_train)
lr.fit(X_train, y_train)
knn.fit(X_train, y_train)

# Generate predictions on the training data
rf_pred_train = rf.predict_proba(X_train)
lr_pred_train = lr.predict_proba(X_train)
knn_pred_train = knn.predict_proba(X_train)

# Concatenate predictions as features for training data
X_train_stacked = np.column_stack((rf_pred_train[:, 1], lr_pred_train[:, 1], knn_pred_train[:, 1]))

# Train meta-learner (Logistic Regression) on the training data
meta_learner = LogisticRegression()
meta_learner.fit(X_train_stacked, y_train)

# Generate predictions on the testing data
rf_pred_test = rf.predict_proba(X_test)
lr_pred_test = lr.predict_proba(X_test)
knn_pred_test = knn.predict_proba(X_test)

# Concatenate predictions as features for testing data
X_test_stacked = np.column_stack((rf_pred_test[:, 1], lr_pred_test[:, 1], knn_pred_test[:, 1]))

# Make predictions on the testing data using meta-learner
stacked_pred = meta_learner.predict(X_test_stacked)

# Evaluate accuracy
accuracy = accuracy_score(y_test, stacked_pred)
print("Accuracy (Stacked Voting):", accuracy)

Accuracy (Stacked Voting): 0.85
