In [1]:
# import required libraries and load dataset
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, BaggingRegressor, StackingRegressor, VotingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score

# load the Boston Housing dataset
boston = load_boston()
X = pd.DataFrame(boston.data, columns=boston.feature_names)
y = pd.Series(boston.target)

# initialize the regression models
lr = LinearRegression()
rf = RandomForestRegressor(random_state=42)
gb = GradientBoostingRegressor(random_state=42)
ab = AdaBoostRegressor(random_state=42)
bg = BaggingRegressor(random_state=42)
# stack the models and define the meta-regressor
stack = StackingRegressor(estimators=[('rf', rf), ('gb', gb), ('ab', ab), ('bg', bg)], final_estimator=lr)

# define the voting regressor
vote = VotingRegressor(estimators=[('lr', lr), ('rf', rf), ('gb', gb), ('ab', ab), ('bg', bg)])

# define the hyperparameter grid for each model
rf_param_grid = {'n_estimators': [50, 100, 200], 'max_depth': [5, 10, 15]}
gb_param_grid = {'n_estimators': [50, 100, 200], 'max_depth': [5, 10, 15], 'learning_rate': [0.1, 0.5, 1.0]}
ab_param_grid = {'n_estimators': [50, 100, 200], 'learning_rate': [0.1, 0.5, 1.0]}
bg_param_grid = {'n_estimators': [50, 100, 200], 'max_samples': [0.5, 0.8, 1.0]}
stack_param_grid = {
    'final_estimator__fit_intercept': [True, False],
    'final_estimator__normalize': [True, False]
}

vote_param_grid = {
    'weights': [[1, 1, 1, 1, 1], [1, 2, 1, 1, 1], [1, 1, 2, 1, 1], [1, 1, 1, 2, 1], [1, 1, 1, 1, 2]],
}
# perform hyperparameter tuning using GridSearchCV for each model
rf_grid = GridSearchCV(rf, rf_param_grid, cv=5, scoring='r2')
rf_grid.fit(X, y)

gb_grid = GridSearchCV(gb, gb_param_grid, cv=5, scoring='r2')
gb_grid.fit(X, y)

ab_grid = GridSearchCV(ab, ab_param_grid, cv=5, scoring='r2')
ab_grid.fit(X, y)

bg_grid = GridSearchCV(bg, bg_param_grid, cv=5, scoring='r2')
bg_grid.fit(X, y)

stack_grid = GridSearchCV(stack, stack_param_grid, cv=5, scoring='r2')
stack_grid.fit(X, y)

vote_grid = GridSearchCV(vote, vote_param_grid, cv=5, scoring='r2')
vote_grid.fit(X, y)
# print the best parameters and r2_score of each model after hyperparameter tuning
print("Simple Linear Regression r2_score:", r2_score(y, lr.fit(X, y).predict(X)))
print("Random Forest Regression best parameters:", rf_grid.best_params_)
print("Random Forest Regression r2_score:", rf_grid.best_score_)
print("Gradient Boosting Regression best parameters:", gb_grid.best_params_)
print("Gradient Boosting Regression r2_score:", gb_grid.best_score_)
print("AdaBoost Regression best parameters:", ab_grid.best_params_)
print("AdaBoost Regression r2_score:", ab_grid.best_score_)
print("Bagging Regression best parameters:", bg_grid.best_params_)
print("Bagging Regression r2_score:", bg_grid.best_score_)
print("Stacking Regression best parameters:", stack_grid.best_params_)
print("Stacking Regression r2_score:", stack_grid.best_score_)
print("Voting Regression best parameters:", vote_grid.best_params_)
print("Voting Regression r2_score:", vote_grid.best_score_)


    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np

        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_ho

Simple Linear Regression r2_score: 0.7406426641094095
Random Forest Regression best parameters: {'max_depth': 10, 'n_estimators': 200}
Random Forest Regression r2_score: 0.6285031509753464
Gradient Boosting Regression best parameters: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100}
Gradient Boosting Regression r2_score: 0.5580127580938832
AdaBoost Regression best parameters: {'learning_rate': 1.0, 'n_estimators': 100}
AdaBoost Regression r2_score: 0.608589752644294
Bagging Regression best parameters: {'max_samples': 0.8, 'n_estimators': 200}
Bagging Regression r2_score: 0.6394893326342523
Stacking Regression best parameters: {'final_estimator__fit_intercept': True, 'final_estimator__normalize': False}
Stacking Regression r2_score: 0.6346440505800522
Voting Regression best parameters: {'weights': [1, 1, 2, 1, 1]}
Voting Regression r2_score: 0.6654805878995067
