In [1]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import LabelEncoder

In [2]:
X = pd.DataFrame({'LikesExercising': [False,False,False,True,False,True,True,True,True],
                'GotoGym': [True,True,True,True,True,False,True,False,False],
                 'DrivesCar': [True,False,False,True,True,False,True,False,True]})
Y = pd.Series(name = 'Age',data = [14,15,16,26,36,50,69,72,74])

In [3]:
LE = LabelEncoder()
X['LikesExercising'] = LE.fit_transform(X['LikesExercising'])
X['GotoGym'] = LE.fit_transform(X['GotoGym'])
X['DrivesCar'] = LE.fit_transform(X['DrivesCar'])

In [6]:
GB = GradientBoostingRegressor(n_estimators=3)
GB.fit(X,Y)
Y_predict = GB.predict(X)
Y_predict

array([36.907 , 34.3325, 34.3325, 43.0045, 36.907 , 46.663 , 43.0045,
       46.663 , 50.186 ])

In [7]:
MSE_3 = (sum((Y-Y_predict) ** 2)) / len(Y)
print('MSE for 3 estimators:', MSE_3)

MSE for 3 estimators: 380.05602055555556


MSE reduces as we increase estimator value. But as we increase more and more, the decrease in MSE becomes less significant  
To find best number of estimators, use GridSearchCV

In [8]:
from sklearn.model_selection import GridSearchCV

In [9]:
model = GradientBoostingRegressor()
params = {'n_estimators': range(1, 200)}
grid = GridSearchCV(estimator= model, cv=2, param_grid=params, scoring='neg_mean_squared_error')
grid.fit(X, Y)
print('The best estimator returned by GridSearchCV is', grid.best_estimator_)

The best estimator returned by GridSearchCV is GradientBoostingRegressor(n_estimators=19)


In [10]:
GB = grid.best_estimator_
GB.fit(X, Y)
Y_predict = GB.predict(X)
Y_predict

array([27.20639114, 18.98970027, 18.98970027, 46.66697477, 27.20639114,
       58.34332496, 46.66697477, 58.34332496, 69.58721772])

In [11]:
MSE_best = (sum((Y-Y_predict) ** 2)) / len(Y)
print('MSE for best no. of estimators:', MSE_best)
# although MSE value is not the best value, it is the most efficient and is close enough to the best it can be

MSE for best no. of estimators: 164.2298548605391
