In [36]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [37]:
X, y=make_regression(n_samples=1000, n_features=10,noise=10, random_state=1)

In [38]:
X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.33,random_state=1)

In [39]:
regressor=GradientBoostingRegressor()
regressor.fit(X_train, y_train)

In [40]:
y_pred=regressor.predict(X_test)

print("Current model performace")
print(f"r2 score: {r2_score(y_test, y_pred)}")
print(f"MAE: {mean_absolute_error(y_test, y_pred)}")
print(f"MSE: {mean_squared_error(y_test, y_pred)}")

Current model performace
r2 score: 0.9091317801650575
MAE: 40.59521530440614
MSE: 2922.147434463169


In [42]:
# hyper parameter tuning: 
from sklearn.model_selection import GridSearchCV
param_grid={
    'n_estimators': [50,100,200],
    'learning_rate': [0.01, 0.1,0.5, 1.0, 1.5],
     # 'max_depth': [3,4,5],
    # 'subsample': [0.8, 0.9, 1.0],
    # 'min_samples_split': [2,5,10],
    # 'min_samples_leaf': [1,2,4]
}

In [43]:
gbr=GradientBoostingRegressor()

In [45]:
grid_search=GridSearchCV(estimator=gbr, param_grid=param_grid,cv=5, verbose=3)

In [46]:
grid_search

In [47]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 15 candidates, totalling 75 fits
[CV 1/5] END learning_rate=0.01, n_estimators=50;, score=0.316 total time=   0.1s
[CV 2/5] END learning_rate=0.01, n_estimators=50;, score=0.304 total time=   0.1s
[CV 3/5] END learning_rate=0.01, n_estimators=50;, score=0.350 total time=   0.1s
[CV 4/5] END learning_rate=0.01, n_estimators=50;, score=0.324 total time=   0.1s
[CV 5/5] END learning_rate=0.01, n_estimators=50;, score=0.342 total time=   0.2s
[CV 1/5] END learning_rate=0.01, n_estimators=100;, score=0.507 total time=   0.4s
[CV 2/5] END learning_rate=0.01, n_estimators=100;, score=0.496 total time=   0.3s
[CV 3/5] END learning_rate=0.01, n_estimators=100;, score=0.529 total time=   0.3s
[CV 4/5] END learning_rate=0.01, n_estimators=100;, score=0.512 total time=   0.3s
[CV 5/5] END learning_rate=0.01, n_estimators=100;, score=0.538 total time=   0.3s
[CV 1/5] END learning_rate=0.01, n_estimators=200;, score=0.691 total time=   0.7s
[CV 2/5] END learning_rate=0.01

In [48]:
grid_search.best_params_

{'learning_rate': 0.1, 'n_estimators': 200}

In [49]:
grid_search.best_score_

0.9342880054868784

In [50]:
best_model=grid_search.best_estimator_
y_pred=best_model.predict(X_test)

print("Current model performace")
print(f"r2 score: {r2_score(y_test, y_pred)}")
print(f"MAE: {mean_absolute_error(y_test, y_pred)}")
print(f"MSE: {mean_squared_error(y_test, y_pred)}")

Current model performace
r2 score: 0.9336160437064901
MAE: 34.986551605238326
MSE: 2134.780541810509
