In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import VotingRegressor,GradientBoostingRegressor,HistGradientBoostingRegressor,StackingRegressor
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error,accuracy_score
from sklearn.ensemble import AdaBoostRegressor,BaggingRegressor,ExtraTreesRegressor
from xgboost import XGBRegressor

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras.models import Sequential
from keras.metrics import RootMeanSquaredError

In [2]:
from sklearn.datasets import load_boston
boston_dataset = load_boston()

In [3]:
print(boston_dataset.keys())

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])


In [4]:
boston=pd.DataFrame(boston_dataset.data,columns=boston_dataset.feature_names)

In [5]:
boston.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [6]:
X=boston
y=boston_dataset.target

In [7]:
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=42)

In [8]:
lin_reg= LinearRegression()
rnd_reg =RandomForestRegressor(n_estimators=100, random_state=42)
svr_reg = SVR(gamma="scale")
st_reg=StackingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg)])

In [9]:
st_reg.fit(X_train,y_train)

StackingRegressor(cv=None,
                  estimators=[('lr',
                               LinearRegression(copy_X=True, fit_intercept=True,
                                                n_jobs=None, normalize=False)),
                              ('rf',
                               RandomForestRegressor(bootstrap=True,
                                                     ccp_alpha=0.0,
                                                     criterion='mse',
                                                     max_depth=None,
                                                     max_features='auto',
                                                     max_leaf_nodes=None,
                                                     max_samples=None,
                                                     min_impurity_decrease=0.0,
                                                     min_impurity_split=None,
                                                     min_samples_leaf=1,
            

In [11]:
from sklearn.metrics import accuracy_score

for clf in (lin_reg, rnd_reg, svr_reg):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, mean_squared_error(y_test, y_pred,squared=False))

LinearRegression 4.700924890603808
RandomForestRegressor 3.204375185696684
SVR 7.020270305836468


In [12]:
def build_nn():
    model= Sequential([Dense(50,activation='selu',input_shape=[13]),
                        Dense(25,activation='selu'),
                        Dropout(0.2),
                        Dense(25,activation='selu'),
                        Dense(1)
                       ])

    model.compile(optimizer='adam',loss='mean_squared_error',metrics=['RootMeanSquaredError'])
    return model

In [13]:
m2 = tf.keras.wrappers.scikit_learn.KerasRegressor(build_nn,epochs=1000,verbose=False)

In [14]:
m2._estimator_type = "regressor"

1  Gradient boosting estimator

In [15]:

lin_reg= LinearRegression()
rnd_reg =RandomForestRegressor(n_estimators=100, random_state=42)
svr_reg = SVR(gamma="scale")
st_reg=StackingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('Dense',m2)],
                        final_estimator=GradientBoostingRegressor(random_state=42))
voting_reg = VotingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('Dense',m2)])

In [16]:
from sklearn.metrics import r2_score

In [17]:
for clf in (lin_reg, rnd_reg, svr_reg,m2,voting_reg,st_reg):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, mean_squared_error(y_test, y_pred,squared=False))
    print('R2 score: {:.2f}'.format(r2_score(y_test, y_pred)))

LinearRegression 4.700924890603808
R2 score: 0.68
RandomForestRegressor 3.204375185696684
R2 score: 0.85
SVR 7.020270305836468
R2 score: 0.30
KerasRegressor 4.556034277624393
R2 score: 0.70
VotingRegressor 3.9278771109005355
R2 score: 0.78
StackingRegressor 3.0975877492206823
R2 score: 0.86


**2  default** **estimator**

In [18]:
lin_reg= LinearRegression()
rnd_reg =RandomForestRegressor(n_estimators=100, random_state=42)
svr_reg = SVR(gamma="scale")
st_reg=StackingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('Dense',m2)])
voting_reg = VotingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('Dense',m2)])

In [19]:
for clf in (lin_reg, rnd_reg, svr_reg,m2,voting_reg,st_reg):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, mean_squared_error(y_test, y_pred,squared=False))
    print('R2 score: {:.2f}'.format(r2_score(y_test, y_pred)))

LinearRegression 4.700924890603808
R2 score: 0.68
RandomForestRegressor 3.204375185696684
R2 score: 0.85
SVR 7.020270305836468
R2 score: 0.30
KerasRegressor 5.031192105631254
R2 score: 0.64
VotingRegressor 4.053213089837031
R2 score: 0.77
StackingRegressor 3.062933593379404
R2 score: 0.87


3  ADA BOOST regressor

In [20]:
lin_reg= LinearRegression()
rnd_reg =RandomForestRegressor(n_estimators=100, random_state=42)
svr_reg = SVR(gamma="scale")
st_reg=StackingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('Dense',m2)],
                        final_estimator=AdaBoostRegressor(random_state=42))
voting_reg = VotingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('Dense',m2)])

In [21]:
for clf in (lin_reg, rnd_reg, svr_reg,m2,voting_reg,st_reg):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, mean_squared_error(y_test, y_pred,squared=False))
    print('R2 score: {:.2f}'.format(r2_score(y_test, y_pred)))

LinearRegression 4.700924890603808
R2 score: 0.68
RandomForestRegressor 3.204375185696684
R2 score: 0.85
SVR 7.020270305836468
R2 score: 0.30
KerasRegressor 5.088217763820352
R2 score: 0.63
VotingRegressor 3.873083862211521
R2 score: 0.79
StackingRegressor 3.068673608446953
R2 score: 0.87


4  bagging regressor

In [22]:
lin_reg= LinearRegression()
rnd_reg =RandomForestRegressor(n_estimators=100, random_state=42)
svr_reg = SVR(gamma="scale")
st_reg=StackingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('Dense',m2)],
                        final_estimator=BaggingRegressor(random_state=42))
voting_reg = VotingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('Dense',m2)])

In [23]:
for clf in (lin_reg, rnd_reg, svr_reg,m2,voting_reg,st_reg):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, mean_squared_error(y_test, y_pred,squared=False))
    print('R2 score: {:.2f}'.format(r2_score(y_test, y_pred)))

LinearRegression 4.700924890603808
R2 score: 0.68
RandomForestRegressor 3.204375185696684
R2 score: 0.85
SVR 7.020270305836468
R2 score: 0.30
KerasRegressor 4.733952446439036
R2 score: 0.68
VotingRegressor 4.130953493526157
R2 score: 0.76
StackingRegressor 3.442527225860008
R2 score: 0.83


5  Extra trees regressor

In [24]:
lin_reg= LinearRegression()
rnd_reg =RandomForestRegressor(n_estimators=100, random_state=42)
svr_reg = SVR(gamma="scale")
st_reg=StackingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('Dense',m2)],
                        final_estimator=ExtraTreesRegressor(random_state=42))
voting_reg = VotingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('Dense',m2)])

In [25]:
for clf in (lin_reg, rnd_reg, svr_reg,m2,voting_reg,st_reg):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, mean_squared_error(y_test, y_pred,squared=False))
    print('R2 score: {:.2f}'.format(r2_score(y_test, y_pred)))

LinearRegression 4.700924890603808
R2 score: 0.68
RandomForestRegressor 3.204375185696684
R2 score: 0.85
SVR 7.020270305836468
R2 score: 0.30
KerasRegressor 4.948774088144174
R2 score: 0.65
VotingRegressor 4.109692299459575
R2 score: 0.76
StackingRegressor 2.90597836940755
R2 score: 0.88


6  hist gradient boosting regrssor

In [None]:
lin_reg= LinearRegression()
rnd_reg =RandomForestRegressor(n_estimators=100, random_state=42)
svr_reg = SVR(gamma="scale")
st_reg=StackingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('Dense',m2)],
                        final_estimator=HistGradientBoostingRegressor(random_state=42))
voting_reg = VotingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('Dense',m2)])

In [None]:
for clf in (lin_reg, rnd_reg, svr_reg,m2,voting_reg,st_reg):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, mean_squared_error(y_test, y_pred,squared=False))
    print('R2 score: {:.2f}'.format(r2_score(y_test, y_pred)))

In [None]:
import xgboost as xgb

In [None]:
in_reg= LinearRegression()
rnd_reg =RandomForestRegressor(n_estimators=100, random_state=42)
svr_reg = SVR(gamma="scale")
xgb_reg=xgb.XGBRegressor(random_state=42)
st_reg=StackingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('xgb',xgb_reg),('Dense',m2)],
                        final_estimator=HistGradientBoostingRegressor(random_state=42))
voting_reg = VotingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('xgb',xgb_reg),('Dense',m2)])

In [None]:
for clf in (lin_reg, rnd_reg, svr_reg,xgb_reg,m2,voting_reg,st_reg):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, mean_squared_error(y_test, y_pred,squared=False))
    print('R2 score: {:.2f}'.format(r2_score(y_test, y_pred)))

In [None]:
in_reg= LinearRegression()
rnd_reg =RandomForestRegressor(n_estimators=100, random_state=42)
svr_reg = SVR(gamma="scale")
xgb_reg=xgb.XGBRegressor(random_state=42)
st_reg=StackingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('xgb',xgb_reg),('Dense',m2)],
                        final_estimator=XGBRegressor(random_state=42))
voting_reg = VotingRegressor(estimators=[('lr', lin_reg), ('rf', rnd_reg), ('svr', svr_reg),('xgb',xgb_reg),('Dense',m2)])

In [None]:
for clf in (lin_reg, rnd_reg, svr_reg,xgb_reg,m2,voting_reg,st_reg):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, mean_squared_error(y_test, y_pred,squared=False))
    print('R2 score: {:.2f}'.format(r2_score(y_test, y_pred)))