In [1]:
# import required libraries and load dataset
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, BaggingRegressor, StackingRegressor, VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler

# load the Dataset
boston = load_boston()
X = pd.DataFrame(boston.data, columns=boston.feature_names)
y = pd.Series(boston.target)

# rescale the features
scaler = MinMaxScaler()

# apply scaler() to all the numeric columns 
X = scaler.fit_transform(X)

# split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

#Linear Regression
lr = LinearRegression()

# initialize the ensemble regression models
rf = RandomForestRegressor(n_estimators=100, random_state=42)
gb = GradientBoostingRegressor(n_estimators=100, random_state=42)
ab = AdaBoostRegressor(n_estimators=100, random_state=42)
bg = BaggingRegressor(n_estimators=100, random_state=42)

# stack the models and define the meta-regressor
stack = StackingRegressor(estimators=[('rf', rf), ('gb', gb), ('ab', ab), ('bg', bg)], final_estimator=lr)

# define the voting regressor
vote = VotingRegressor(estimators=[('rf', rf), ('gb', gb), ('ab', ab), ('bg', bg)])

# fit the models on the training data
rf.fit(X_train, y_train)
gb.fit(X_train, y_train)
ab.fit(X_train, y_train)
bg.fit(X_train, y_train)
stack.fit(X_train, y_train)
vote.fit(X_train, y_train)
lr.fit(X_train, y_train)

# make predictions on the testing data
rf_pred = rf.predict(X_test)
gb_pred = gb.predict(X_test)
ab_pred = ab.predict(X_test)
bg_pred = bg.predict(X_test)
stack_pred = stack.predict(X_test)
vote_pred = vote.predict(X_test)
lr_pred = lr.predict(X_test)

# calculate the root mean squared error of each model
lr_rmse = np.sqrt(mean_squared_error(y_test, lr_pred))
rf_rmse = np.sqrt(mean_squared_error(y_test, rf_pred))
gb_rmse = np.sqrt(mean_squared_error(y_test, gb_pred))
ab_rmse = np.sqrt(mean_squared_error(y_test, ab_pred))
bg_rmse = np.sqrt(mean_squared_error(y_test, bg_pred))
stack_rmse = np.sqrt(mean_squared_error(y_test, stack_pred))
vote_rmse = np.sqrt(mean_squared_error(y_test, vote_pred))

# calculate the root mean absolute error of each model
lr_rmae = (mean_absolute_error(y_test, lr_pred))
rf_rmae = (mean_absolute_error(y_test, rf_pred))
gb_rmae = (mean_absolute_error(y_test, gb_pred))
ab_rmae = (mean_absolute_error(y_test, ab_pred))
bg_rmae = (mean_absolute_error(y_test, bg_pred))
stack_rmae = (mean_absolute_error(y_test, stack_pred))
vote_rmae = (mean_absolute_error(y_test, vote_pred))


# print the RMSE of each model
print("Linear Regression RMSE:", lr_rmse)
print("Random Forest RMSE:", rf_rmse)
print("Gradient Boosting RMSE:", gb_rmse)
print("AdaBoost RMSE:", ab_rmse)
print("Bagging RMSE:", bg_rmse)
print("Stacking RMSE:", stack_rmse)
print("Voting RMSE:", vote_rmse)


# print the RMAE of each model
print("Linear Regression RMAE:", lr_rmae)
print("Random Forest RMAE:", rf_rmae)
print("Gradient Boosting RMAE:", gb_rmae)
print("AdaBoost RMAE:", ab_rmae)
print("Bagging RMAE:", bg_rmae)
print("Stacking RMAE:", stack_rmae)
print("Voting RMAE:", vote_rmae)

# create a KFold object with 5 splits 
folds = KFold(n_splits = 5, shuffle = True, random_state = 100)
# perform cross-validation and calculate the r2_score for each model
rf_scores = cross_val_score(rf, X, y, cv=folds, scoring='r2')
gb_scores = cross_val_score(gb, X, y, cv=folds, scoring='r2')
ab_scores = cross_val_score(ab, X, y, cv=folds, scoring='r2')
bg_scores = cross_val_score(bg, X, y, cv=folds, scoring='r2')
stack_scores = cross_val_score(stack, X, y, cv=folds, scoring='r2')
vote_scores = cross_val_score(vote, X, y, cv=folds, scoring='r2')
lr_scores = cross_val_score(lr, X, y, cv=folds, scoring='r2')

# print the r2_score of each model
print("Random Forest r2_score:", np.mean(rf_scores))
print("Gradient Boosting r2_score:", np.mean(gb_scores))
print("AdaBoost r2_score:", np.mean(ab_scores))
print("Bagging r2_score:", np.mean(bg_scores))
print("Stacking r2_score:", np.mean(stack_scores))
print("Voting r2_score:", np.mean(vote_scores))
print("Linear r2_score:", np.mean(lr_scores))


    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np

        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_ho

Linear Regression RMSE: 4.5523645984630665
Random Forest RMSE: 3.2579872712888616
Gradient Boosting RMSE: 2.9079805878194303
AdaBoost RMSE: 3.8548840050706428
Bagging RMSE: 3.2343899960744658
Stacking RMSE: 2.9046628135346033
Voting RMSE: 3.151664846645677
Linear Regression RMAE: 3.1482557548168324
Random Forest RMAE: 2.2318023952095802
Gradient Boosting RMAE: 1.9814280516443268
AdaBoost RMAE: 2.6872865513825976
Bagging RMAE: 2.2179640718562874
Stacking RMAE: 1.9676701388742954
Voting RMAE: 2.1348881014622734
Random Forest r2_score: 0.8528390975280444
Gradient Boosting r2_score: 0.8637280551774674
AdaBoost r2_score: 0.8000608867817437
Bagging r2_score: 0.8536096698379272
Stacking r2_score: 0.8590508511333066
Voting r2_score: 0.8557093740727252
Linear r2_score: 0.7136663740773465
