# Import Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [15]:
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [5]:
import os
import kagglehub

# Import Dataests

In [11]:
# Download latest version
path = kagglehub.dataset_download("schirmerchad/bostonhoustingmlnd")
print("Path to dataset files:", path)
for i in os.listdir(path):
    print(i)

Path to dataset files: /kaggle/input/bostonhoustingmlnd
housing.csv


In [12]:
df = pd.read_csv(path + "/housing.csv")
df.head()

Unnamed: 0,RM,LSTAT,PTRATIO,MEDV
0,6.575,4.98,15.3,504000.0
1,6.421,9.14,17.8,453600.0
2,7.185,4.03,17.8,728700.0
3,6.998,2.94,18.7,701400.0
4,7.147,5.33,18.7,760200.0


# Train Test Split

In [13]:
X = df.drop("MEDV", axis=1)
y = df["MEDV"]

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Individual Model Creation

In [16]:
lr1 = LinearRegression()
dtr1 = DecisionTreeRegressor()
nbr1 = KNeighborsRegressor()

In [17]:
lr1.fit(X_train, y_train)
dtr1.fit(X_train, y_train)
nbr1.fit(X_train, y_train)

## Prediction Check

In [18]:
lr1_pred = lr1.predict(X_test)
dtr1_pred = dtr1.predict(X_test)
nbr1_pred = nbr1.predict(X_test)


## Accuaracy check

In [19]:
print("Linear Regression Accuracy: ", lr1.score(X_test, y_test))
print("Decision Tree Regression Accuracy: ", dtr1.score(X_test, y_test))
print("K Neighbors Regression Accuracy: ", nbr1.score(X_test, y_test))

Linear Regression Accuracy:  0.691093400309851
Decision Tree Regression Accuracy:  0.7180865920270323
K Neighbors Regression Accuracy:  0.8173282035090312


# Default Parameter
```python
class sklearn.ensemble.BaggingRegressor(
    estimator=None, n_estimators=10, *, max_samples=1.0,
    max_features=1.0, bootstrap=True, bootstrap_features=False,
    oob_score=False, warm_start=False, n_jobs=None,
    random_state=None, verbose=0
)
```

In [20]:
bag_reg1 = BaggingRegressor()
bag_reg1.fit(X_train, y_train)

## Prediction

In [21]:
y_pred1 = bag_reg1.predict(X_test)

## Accuracy Check

In [26]:
print("Bagging Regressor Accuracy: ", r2_score(y_test, y_pred1))

Bagging Regressor Accuracy:  0.8485946208049053


# Apply `GridSearchCV`

In [24]:

    # estimator=None, n_estimators=10, *, max_samples=1.0,
    # max_features=1.0, bootstrap=True, bootstrap_features=False,
    # oob_score=False, warm_start=False, n_jobs=None,
    # random_state=None, verbose=0

params = {

          'estimator' : [LinearRegression(), DecisionTreeRegressor(), KNeighborsRegressor()],
          'n_estimators' : [10, 50, 100, 500],
          'max_samples' : [0.5, 0.75, 1.0],
          'bootstrap' : [True, False],
}

In [29]:
estimators = BaggingRegressor(n_jobs=-1, random_state=42)

In [32]:
%%time
grid_bag_reg1 = GridSearchCV(estimators, params, cv=5, n_jobs=-1)
grid_bag_reg1.fit(X_train, y_train)

CPU times: user 1.64 s, sys: 229 ms, total: 1.87 s
Wall time: 2min 13s


In [33]:
print('Train R^2 Score : %.3f' %grid_bag_reg1.best_estimator_.score(X_train, y_train))
print('Test R^2 Score : %.3f' %grid_bag_reg1.best_estimator_.score(X_test, y_test))
print('Best R^2 Score Through Grid Search : %.3f' %grid_bag_reg1.best_score_)
print('Best Parameters : ', grid_bag_reg1.best_params_)

Train R^2 Score : 0.956
Test R^2 Score : 0.857
Best R^2 Score Through Grid Search : 0.832
Best Parameters :  {'bootstrap': False, 'estimator': DecisionTreeRegressor(), 'max_samples': 0.5, 'n_estimators': 100}


In [34]:
grid_bag_reg1.best_estimator_

# Checks

In [35]:
xd1 = BaggingRegressor(bootstrap=False, estimator=DecisionTreeRegressor(),
                 max_samples=0.5, n_estimators=100, n_jobs=-1, random_state=42)

In [36]:
xd1.fit(X_train, y_train)
r2_score(y_test, xd1.predict(X_test))

0.8566914626258881

In [46]:
df.sample()

Unnamed: 0,RM,LSTAT,PTRATIO,MEDV
370,5.0,31.99,20.2,155400.0


In [50]:
data = np.array([5.0, 31.99, 20.2]);
data = data.reshape(1, -1)
xd1.predict(data)



array([176337.])