# **Bagging**

## **1. For Classification**

In [52]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split , GridSearchCV
from sklearn.linear_model import LinearRegression , LogisticRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeClassifier , DecisionTreeRegressor
from sklearn.ensemble import BaggingClassifier,BaggingRegressor
from sklearn.metrics import r2_score , accuracy_score
from sklearn.svm import SVC 

In [18]:
X , y = make_classification(n_samples=10000,n_features=10,n_informative=3)

In [19]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

### **Using Decision Tree**

In [20]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train,y_train)
y_pred = dt.predict(X_test)
print("Accuracy Score of Decision Tree:",accuracy_score(y_test,y_pred))

Accuracy Score of Decision Tree: 0.8635


### **Using Bagging**

In [21]:
bag = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators= 500,
    max_samples= 0.5,
    bootstrap=True,
    random_state=42
)

In [22]:
bag.fit(X_train,y_train)
y_pred = bag.predict(X_test)
print("Accuracy Score of Bagging:",accuracy_score(y_test,y_pred))

Accuracy Score of Bagging: 0.9085


### **Bagging Using SVM**

In [23]:
bag = BaggingClassifier(
    estimator=SVC(),
    n_estimators= 500,
    max_samples= 0.5,
    bootstrap=True,
    random_state=42
)

In [24]:
bag.fit(X_train,y_train)
y_pred = bag.predict(X_test)
print("Accuracy Score of Bagging using SVM:",accuracy_score(y_test,y_pred))

Accuracy Score of Bagging using SVM: 0.9055


### **Pasting**

In [27]:
bag = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators= 500,
    max_samples= 0.5,
    bootstrap=False,
    random_state=42
)

In [28]:
bag.fit(X_train,y_train)
y_pred = bag.predict(X_test)
print("Accuracy Score of Pasting Classifier:",accuracy_score(y_test,y_pred))

Accuracy Score of Pasting Classifier: 0.909


### **Random Subspace**

In [None]:
bag = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators= 500,
    max_samples= 0.5,
    bootstrap=False,
    bootstrap_features=True
    random_state=42
)

In [29]:
bag.fit(X_train,y_train)
y_pred = bag.predict(X_test)
print("Accuracy Score of Random Subspace Classifier:",accuracy_score(y_test,y_pred))

Accuracy Score of Random Subspace Classifier: 0.909


### **Random Patches**

In [31]:
bag = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators= 500,
    max_samples= 0.5,
    max_features=0.5,
    bootstrap=True,
    bootstrap_features=True,
    random_state=42
)

In [32]:
bag.fit(X_train,y_train)
y_pred = bag.predict(X_test)
print("Random Patches classifier",accuracy_score(y_test,y_pred))

Random Patches classifier 0.9015


### **OOB Score**

In [33]:
bag = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators= 500,
    max_samples= 0.5,
    bootstrap=True,
    oob_score=True,
    random_state=42
)

In [34]:
bag.fit(X_train,y_train)

In [35]:
bag.oob_score_

0.906375

In [36]:
y_pred = bag.predict(X_test)
print("Accuracy",accuracy_score(y_test,y_pred))

Accuracy 0.9085


### **Applying GridSearchCV**

In [40]:
parameters = {
    'estimator':[DecisionTreeClassifier(),LogisticRegression(),SVC()],
    'n_estimators': [50,100,300,500],
    'max_samples': [0.25,0.5,0.75,1.0],
    'bootstrap':[True,False],
    'max_features':[0.3,0.5,0.7,1.0],
    'bootstrap_features':[True,False]
}

In [41]:
search = GridSearchCV(BaggingClassifier(),cv=3,param_grid=parameters)

In [None]:
# search.fit(X_train,y_train)

## **2. For Regression**

In [47]:
from sklearn import datasets

diabetes = datasets.load_diabetes()
X_diabetes, Y_diabetes = diabetes.data, diabetes.target
print('Dataset features names : '+ str(diabetes.feature_names))
print('Dataset features size : '+ str(diabetes.data.shape))
print('Dataset target size : '+ str(diabetes.target.shape))

Dataset features names : ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
Dataset features size : (442, 10)
Dataset target size : (442,)


In [50]:
X_train, X_test, Y_train, Y_test = train_test_split(X_diabetes, Y_diabetes , train_size=0.80, test_size=0.20, random_state=123)
print('Train/Test Sets Sizes : ',X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)

Train/Test Sets Sizes :  (353, 10) (89, 10) (353,) (89,)


In [53]:
lr = LinearRegression()
dt = DecisionTreeRegressor()
knn = KNeighborsRegressor()

In [54]:
lr.fit(X_train,Y_train)
dt.fit(X_train,Y_train)
knn.fit(X_train,Y_train)

In [55]:
y_pred1 = lr.predict(X_test)
y_pred2 = dt.predict(X_test)
y_pred3 = knn.predict(X_test)

In [56]:
print("R^2 score for LR",r2_score(Y_test,y_pred1))
print("R^2 score for DT",r2_score(Y_test,y_pred2))
print("R^2 score for KNN",r2_score(Y_test,y_pred3))

R^2 score for LR 0.5675895725793205
R^2 score for DT 0.13016757539822876
R^2 score for KNN 0.438839665879189


In [57]:
bag_regressor = BaggingRegressor(random_state=1)
bag_regressor.fit(X_train, Y_train)

In [58]:
Y_preds = bag_regressor.predict(X_test)

print('Training Coefficient of R^2 : %.3f'%bag_regressor.score(X_train, Y_train))
print('Test Coefficient of R^2 : %.3f'%bag_regressor.score(X_test, Y_test))

Training Coefficient of R^2 : 0.897
Test Coefficient of R^2 : 0.499
