In [1]:
import numpy as np 
import pandas as pd

In [2]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [3]:
df.shape

(918, 12)

In [4]:
df = pd.get_dummies(df,columns=['Sex','ChestPainType','RestingECG','ExerciseAngina','ST_Slope'], drop_first=True)
df.sample(5)

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease,Sex_M,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_Normal,RestingECG_ST,ExerciseAngina_Y,ST_Slope_Flat,ST_Slope_Up
762,40,110,167,0,114,2.0,1,1,0,0,0,0,0,1,1,0
536,62,133,0,1,119,1.2,1,1,0,1,0,0,1,1,1,0
248,45,130,219,0,130,1.0,1,1,0,0,0,0,1,1,1,0
771,55,140,217,0,111,5.6,1,1,0,0,0,1,0,1,0,0
368,57,140,0,0,120,2.0,1,1,0,0,0,1,0,1,1,0


In [5]:
df.shape

(918, 16)

In [6]:
X = df[['Age','RestingBP','Cholesterol','FastingBS','MaxHR','Oldpeak','Sex_M','ChestPainType_ATA','ChestPainType_NAP','ChestPainType_TA','RestingECG_Normal','RestingECG_ST','ExerciseAngina_Y','ST_Slope_Flat','ST_Slope_Up']]
y = df['HeartDisease']

In [7]:
X

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,Sex_M,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_Normal,RestingECG_ST,ExerciseAngina_Y,ST_Slope_Flat,ST_Slope_Up
0,40,140,289,0,172,0.0,1,1,0,0,1,0,0,0,1
1,49,160,180,0,156,1.0,0,0,1,0,1,0,0,1,0
2,37,130,283,0,98,0.0,1,1,0,0,0,1,0,0,1
3,48,138,214,0,108,1.5,0,0,0,0,1,0,1,1,0
4,54,150,195,0,122,0.0,1,0,1,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,110,264,0,132,1.2,1,0,0,1,1,0,0,1,0
914,68,144,193,1,141,3.4,1,0,0,0,1,0,0,1,0
915,57,130,131,0,115,1.2,1,0,0,0,1,0,1,1,0
916,57,130,236,0,174,0.0,0,1,0,0,0,0,0,1,0


In [8]:
y

0      0
1      1
2      0
3      1
4      0
      ..
913    1
914    1
915    1
916    1
917    0
Name: HeartDisease, Length: 918, dtype: int64

In [9]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=4)

In [10]:
X_train.shape

(734, 15)

In [11]:
X_test.shape

(184, 15)

# Modelling

In [12]:
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

# 1. Bagging

In [13]:
bag = BaggingClassifier(                            # bagging -> row sampling with replacement
    base_estimator=DecisionTreeClassifier(),
    n_estimators=100,
    max_samples=0.5,
    bootstrap=True,                      
    random_state=4
)

In [14]:
bag.fit(X_train,y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=0.5,
                  n_estimators=100, random_state=4)

In [15]:
y_pred = bag.predict(X_test)

In [16]:
accuracy_score(y_test,y_pred)

0.8695652173913043

# 2. Pasting

In [17]:
paste = BaggingClassifier(                           # pasting -> row sampling without replacement
    base_estimator=DecisionTreeClassifier(),
    n_estimators=100,
    max_samples=0.5,
    bootstrap=False,
    random_state=4
)

In [18]:
paste.fit(X_train,y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), bootstrap=False,
                  max_samples=0.5, n_estimators=100, random_state=4)

In [19]:
y_pred = paste.predict(X_test)

In [20]:
accuracy_score(y_test,y_pred)

0.8695652173913043

# 3. Random Subspaces

In [21]:
subspaces = BaggingClassifier(                          # random subspaces -> column sampling 
    base_estimator=DecisionTreeClassifier(),
    n_estimators=100,
    max_samples=1.0,
    bootstrap=False,
    max_features=0.5,
    bootstrap_features=True,
    random_state=4
)

In [22]:
subspaces.fit(X_train,y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), bootstrap=False,
                  bootstrap_features=True, max_features=0.5, n_estimators=100,
                  random_state=4)

In [23]:
y_pred = subspaces.predict(X_test)

In [24]:
accuracy_score(y_test,y_pred)

0.8913043478260869

# 4. Random Patches

In [25]:
patches = BaggingClassifier(                          # random patches -> row & column sampling 
    base_estimator=DecisionTreeClassifier(),
    n_estimators=100,
    max_samples=0.5,
    bootstrap=True,
    max_features=0.5,
    bootstrap_features=True,
    random_state=4
)

In [26]:
patches.fit(X_train,y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                  bootstrap_features=True, max_features=0.5, max_samples=0.5,
                  n_estimators=100, random_state=4)

In [27]:
y_pred = patches.predict(X_test)

In [28]:
accuracy_score(y_test,y_pred)

0.8967391304347826

# Applying GridSearchCV

In [29]:
from sklearn.model_selection import GridSearchCV

In [30]:
parameters = {
    'n_estimators': [100,200,300,400], 
    'max_samples': [0.25,0.4,0.5,0.6],
    'bootstrap' : [True,False],
    'max_features' : [0.25,0.4,0.5,0.7,1.0]
    }

In [40]:
model = BaggingClassifier(base_estimator= DecisionTreeClassifier(random_state= 4))

In [41]:
grid = GridSearchCV(model, parameters, cv=5, n_jobs=-1)

In [42]:
grid.fit(X_train,y_train)

GridSearchCV(cv=5,
             estimator=BaggingClassifier(base_estimator=DecisionTreeClassifier(random_state=4)),
             n_jobs=-1,
             param_grid={'bootstrap': [True, False],
                         'max_features': [0.25, 0.4, 0.5, 0.7, 1.0],
                         'max_samples': [0.25, 0.4, 0.5, 0.6],
                         'n_estimators': [100, 200, 300, 400]})

In [43]:
grid.best_score_

0.8719317864131954

In [44]:
grid.best_params_

{'bootstrap': True,
 'max_features': 0.7,
 'max_samples': 0.6,
 'n_estimators': 200}

In [45]:
model1 = BaggingClassifier(                          
    base_estimator=DecisionTreeClassifier(),
    n_estimators=200,
    max_samples=0.6,
    bootstrap=True,
    max_features=0.7,
    bootstrap_features=True,
    random_state=4
)

In [46]:
model1.fit(X_train,y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                  bootstrap_features=True, max_features=0.7, max_samples=0.6,
                  n_estimators=200, random_state=4)

In [47]:
y_pred = model1.predict(X_test)

In [48]:
accuracy_score(y_test,y_pred)

0.8967391304347826