In [8]:
import pandas as pd
from scipy.stats import zscore
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [9]:
# Step 1: Load data
df = pd.read_csv('heart.csv')
print("Step 1: Data loaded")



Step 1: Data loaded


In [10]:
# Step 2: Remove outliers
z = df.select_dtypes(include='number').apply(zscore)
df = df[(z.abs() <= 3).all(axis=1)]
print("Step 2: Outliers removed")



Step 2: Outliers removed


In [11]:
# Step 3: Encode categorical columns
df = pd.get_dummies(df, columns=['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope'], drop_first=True)
print("Step 3: Encoding done")

Step 3: Encoding done


In [12]:
# Step 4: Scale data
X = df.drop('HeartDisease', axis=1)
y = df['HeartDisease']
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Step 4: Scaling done")


Step 4: Scaling done


In [6]:
# Step 5: SVM
svm = SVC().fit(X_train, y_train)
svm_score = accuracy_score(y_test, svm.predict(X_test))
print(f"Step 5a: SVM = {svm_score:.3f}")

bag_svm = BaggingClassifier(SVC(), n_estimators=10).fit(X_train, y_train)
bag_svm_score = accuracy_score(y_test, bag_svm.predict(X_test))
print(f"Step 5b: Bagging SVM = {bag_svm_score:.3f}")



Step 5a: SVM = 0.889
Step 5b: Bagging SVM = 0.889


In [7]:
# Step 6: Decision Tree
dt = DecisionTreeClassifier().fit(X_train, y_train)
dt_score = accuracy_score(y_test, dt.predict(X_test))
print(f"Step 6a: Decision Tree = {dt_score:.3f}")

bag_dt = BaggingClassifier(DecisionTreeClassifier(), n_estimators=10).fit(X_train, y_train)
bag_dt_score = accuracy_score(y_test, bag_dt.predict(X_test))
print(f"Step 6b: Bagging DT = {bag_dt_score:.3f}")

# Step 7: Compare
print("\nStep 7: Results")
print(f"SVM: {svm_score:.3f} -> Bagging: {bag_svm_score:.3f} (Improved: {bag_svm_score-svm_score:.3f})")
print(f"DT:  {dt_score:.3f} -> Bagging: {bag_dt_score:.3f} (Improved: {bag_dt_score-dt_score:.3f})")
print("\nBagging works better with Decision Trees because they overfit more!")

Step 6a: Decision Tree = 0.778
Step 6b: Bagging DT = 0.844

Step 7: Results
SVM: 0.889 -> Bagging: 0.889 (Improved: 0.000)
DT:  0.778 -> Bagging: 0.844 (Improved: 0.067)

Bagging works better with Decision Trees because they overfit more!
