# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, r2_score

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, StackingClassifier

# Load Datasets

In [4]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("johnsmith88/heart-disease-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/johnsmith88/heart-disease-dataset?dataset_version_number=2...


100%|██████████| 6.18k/6.18k [00:00<00:00, 2.16MB/s]

Extracting files...
Path to dataset files: /root/.cache/kagglehub/datasets/johnsmith88/heart-disease-dataset/versions/2





In [5]:
import os
os.listdir(path)

['heart.csv']

In [6]:
df = pd.read_csv(path + '/heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


# Train Test Split

In [7]:
X = df.drop('target', axis=1)
y = df['target']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Base Model

In [12]:
model1 = LogisticRegression(max_iter=1000)
model2 = DecisionTreeClassifier(max_depth=15)
model3 = KNeighborsClassifier()

In [13]:
model1.fit(X_train, y_train)
model2.fit(X_train, y_train)
model3.fit(X_train, y_train)

In [16]:
accuracy_score(y_test, model1.predict(X_test))

0.7951219512195122

In [17]:
accuracy_score(y_test, model2.predict(X_test))

0.9853658536585366

In [18]:
accuracy_score(y_test, model3.predict(X_test))

0.7317073170731707

# Stacking ALgorithm

## Model 1

In [20]:
estimators = [
    ('lr', LogisticRegression(max_iter=5000)),
    ('dt', DecisionTreeClassifier(max_depth=15)),
    ('knn', KNeighborsClassifier())
]

In [24]:
stacking1 = StackingClassifier(
    estimators = estimators,
    final_estimator = RandomForestClassifier(),
    cv = 10
)

In [25]:
stacking1.fit(X_train, y_train)

In [26]:
accuracy_score(y_test, stacking1.predict(X_test))

0.9804878048780488

## Model 2

In [27]:
stacking2 = StackingClassifier(
    estimators = estimators,
    final_estimator = GradientBoostingClassifier(),
    cv = 10
)

In [28]:
stacking2.fit(X_train, y_train)

In [29]:
accuracy_score(y_test, stacking2.predict(X_test))

0.9804878048780488