## Sklearn Gradient Boosting Regressor 
### We will peform GridSearch to find the optimal parameters to be used in the Gradient Boosting Regressor model to predict sales. Tuning of the hyperparameters are based on the MSE value obtained

In [1]:
#Importing relevant libraries
from data import train
from sklearn.svm import SVR
import numpy as np
from multiprocessing import cpu_count
from utils import StandardizedGridSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error as MSE
from sklearn.model_selection import train_test_split

In [2]:
#Separating predictors from sales column
X=train.drop('sales', axis=1)
y=train.sales

### We will first fit the training data into the default Gradient Boosting Regressor model to see the MSE value before conducting GridSearch to find the optimal hyperparameters

In [7]:
# conducting a separate train test split to fit into default Gradient Boosting Regressor Model

X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.2,random_state =20)

In [8]:
#default for Gradient Boosting Regressor 
from sklearn.ensemble import GradientBoostingRegressor
GB_default = GradientBoostingRegressor()
GB_default.fit(X_train,y_train)


GradientBoostingRegressor()

In [9]:
# MSE value for Gradient Boosting Regressor default model
GBR_default_MSE=np.mean((y_test - GB_default.predict(X_test))**2)

GBR_default_MSE

2886.4762722716077

### We will now conduct GridSearchCV to see if a better model can be obtained

In [10]:
#Retriving saved Decision Tree Models
# Gradient Boosting Regressor Run 1

GB_run1=StandardizedGridSearchCV.load('models/Boosting9.p')
GB_run1

GridSearchCV(cv=10, estimator=GradientBoostingRegressor(), n_jobs=1,
             param_grid={'ccp_alpha': (0.01, 0.1, 1),
                         'learning_rate': (0.05, 0.1, 1), 'max_depth': [10],
                         'max_features': (None, 'auto'), 'n_estimators': [50]},
             refit='neg_mean_squared_error', return_train_score=True,
             scoring=['neg_mean_squared_error', 'r2'], verbose=10)

In [11]:
# Top 10 model results for Gradient Boosting Regressor Run 1

GB_run1.results.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,split0_test_neg_mean_squared_error,split1_test_neg_mean_squared_error,split2_test_neg_mean_squared_error,split3_test_neg_mean_squared_error,split4_test_neg_mean_squared_error,split5_test_neg_mean_squared_error,...,split2_train_r2,split3_train_r2,split4_train_r2,split5_train_r2,split6_train_r2,split7_train_r2,split8_train_r2,split9_train_r2,mean_train_r2,std_train_r2
ccp_alpha,learning_rate,max_depth,max_features,n_estimators,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
1.0,0.1,10,,50,1.052179,0.038014,0.002802,0.000404,-1007.148977,-1488.989788,-3738.470681,-1503.467889,-1889.377608,-1065.253254,...,0.991782,0.991534,0.9914,0.991801,0.990892,0.991369,0.990158,0.991663,0.991323,0.000465
0.01,0.1,10,,50,1.066941,0.06677,0.003296,0.000645,-1075.71213,-1411.137113,-3529.921037,-1597.379636,-2081.005901,-993.318734,...,0.999668,0.999645,0.99967,0.999678,0.999647,0.999635,0.999577,0.999671,0.999649,3e-05
1.0,0.1,10,auto,50,1.05218,0.035107,0.003398,0.001025,-1016.48923,-1477.055659,-4352.517244,-1517.646468,-1995.571505,-1024.025095,...,0.991782,0.991534,0.9914,0.991839,0.990892,0.991369,0.990158,0.991663,0.991327,0.000469
1.0,0.05,10,,50,1.039662,0.044803,0.003095,0.000298,-1147.126324,-1741.344204,-3280.381714,-1519.161122,-1779.240936,-994.26543,...,0.984755,0.984525,0.984435,0.984575,0.983779,0.984229,0.98276,0.984586,0.984201,0.000549
0.1,0.1,10,auto,50,1.156711,0.110265,0.003584,0.00066,-1034.271474,-1747.934465,-3955.518351,-1526.296836,-1929.962863,-995.577883,...,0.998485,0.998414,0.998411,0.998458,0.998304,0.998413,0.998142,0.998447,0.998392,9.7e-05
0.1,0.1,10,,50,1.130972,0.137082,0.003697,0.000638,-1118.607157,-1721.072651,-3965.201871,-1576.828079,-1854.617892,-1026.31681,...,0.998485,0.998426,0.998411,0.998458,0.998304,0.998395,0.998142,0.998447,0.998392,9.7e-05
1.0,0.05,10,auto,50,1.049985,0.046751,0.003192,0.000399,-1164.83249,-1663.185747,-3578.807456,-1530.379737,-1896.041194,-1001.895637,...,0.984755,0.984525,0.984435,0.984575,0.983779,0.984229,0.98276,0.984586,0.984201,0.000549
0.01,0.1,10,auto,50,1.046305,0.037861,0.004184,0.001324,-1098.211643,-1368.043926,-4533.260078,-1598.687894,-1977.468093,-1023.424639,...,0.999657,0.999645,0.99967,0.999683,0.999647,0.999635,0.999541,0.99968,0.999646,4e-05
0.01,0.05,10,auto,50,0.951078,0.019256,0.003289,0.000459,-1300.842633,-1878.69437,-3177.202247,-1515.176486,-1912.128433,-1030.132525,...,0.99267,0.992603,0.992553,0.992595,0.99261,0.992299,0.992196,0.992585,0.992495,0.000147
0.01,0.05,10,,50,0.995993,0.041416,0.00329,0.000456,-1317.580421,-1756.008199,-3364.211433,-1512.687803,-1857.434061,-1055.676382,...,0.99267,0.992603,0.992502,0.992595,0.99261,0.992299,0.992196,0.992614,0.992486,0.000155


In [12]:
#Printing top 10 models for Gradient Boosting Regressor Run 1 based on test MSE value

GB_run1.results['mean_test_neg_mean_squared_error'].head(10)

ccp_alpha  learning_rate  max_depth  max_features  n_estimators
1.00       0.10           10         NaN           50             -2414.730690
0.01       0.10           10         NaN           50             -2476.764123
1.00       0.10           10         auto          50             -2500.674680
           0.05           10         NaN           50             -2519.584491
0.10       0.10           10         auto          50             -2521.825112
                                     NaN           50             -2535.384604
1.00       0.05           10         auto          50             -2552.715643
0.01       0.10           10         auto          50             -2556.335300
           0.05           10         auto          50             -2575.678639
                                     NaN           50             -2577.167875
Name: mean_test_neg_mean_squared_error, dtype: float64

In [13]:
#We aggreagted the train and test MSE scores of run1 to see if learning rate had a significant effect on the MSE values

(-GB_run1.results[['mean_train_neg_mean_squared_error', 'mean_test_neg_mean_squared_error']]).groupby('learning_rate').mean()

Unnamed: 0_level_0,mean_train_neg_mean_squared_error,mean_test_neg_mean_squared_error
learning_rate,Unnamed: 1_level_1,Unnamed: 2_level_1
0.05,108.105028,2573.163968
0.1,36.145153,2500.952418
1.0,44.441383,3022.182704


### From the table above, we can see that learning_rate has a significant impact on the MSE Value and we will conduct GridSearchCV on a wider range of learning_rate values to find the best value

In [15]:
#We aggreagted the train and test MSE scores of run1 to see if ccp_alpha had a significant effect on the MSE values

(-GB_run1.results[['mean_train_neg_mean_squared_error', 'mean_test_neg_mean_squared_error']]).groupby('ccp_alpha').mean()

Unnamed: 0_level_0,mean_train_neg_mean_squared_error,mean_test_neg_mean_squared_error
ccp_alpha,Unnamed: 1_level_1,Unnamed: 2_level_1
0.01,27.512708,2727.591215
0.1,40.454289,2723.417936
1.0,120.724568,2645.289939


In [16]:
#Retriving saved Decision Tree Models
# Gradient Boosting Regressor Run 2

GB_run2=StandardizedGridSearchCV.load('models/Boosting10.p')
GB_run2

GridSearchCV(cv=10, estimator=GradientBoostingRegressor(), n_jobs=1,
             param_grid={'ccp_alpha': (0.01, 0.1, 1),
                         'learning_rate': array([0.005     , 0.00693878, 0.00887755, 0.01081633, 0.0127551 ,
       0.01469388, 0.01663265, 0.01857143, 0.0205102 , 0.02244898,
       0.02438776, 0.02632653, 0.02826531, 0.03020408, 0.03214286,
       0.03408163, 0.03602041, 0.03795918, 0.03989796, 0.04183673,
       0.04377551,...
       0.06316327, 0.06510204, 0.06704082, 0.06897959, 0.07091837,
       0.07285714, 0.07479592, 0.07673469, 0.07867347, 0.08061224,
       0.08255102, 0.0844898 , 0.08642857, 0.08836735, 0.09030612,
       0.0922449 , 0.09418367, 0.09612245, 0.09806122, 0.1       ]),
                         'max_depth': [10], 'max_features': (None, 'auto'),
                         'n_estimators': [50]},
             refit='neg_mean_squared_error', return_train_score=True,
             scoring=['neg_mean_squared_error', 'r2'], verbose=10)

In [17]:
# Top 10 results for Gradient Boosting Regressor Run 2
GB_run2.results.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,split0_test_neg_mean_squared_error,split1_test_neg_mean_squared_error,split2_test_neg_mean_squared_error,split3_test_neg_mean_squared_error,split4_test_neg_mean_squared_error,split5_test_neg_mean_squared_error,...,split2_train_r2,split3_train_r2,split4_train_r2,split5_train_r2,split6_train_r2,split7_train_r2,split8_train_r2,split9_train_r2,mean_train_r2,std_train_r2
ccp_alpha,learning_rate,max_depth,max_features,n_estimators,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
1.0,0.090306,10,,50,1.293937,0.025117,0.003598,0.000497,-1024.567493,-1534.213518,-3268.100077,-1548.683799,-1979.384165,-1042.39112,...,0.991613,0.991558,0.99124,0.991641,0.990821,0.991187,0.989703,0.99154,0.99119,0.000552
1.0,0.074796,10,auto,50,1.347802,0.095758,0.003997,0.00161,-1039.739352,-1335.807032,-3247.80589,-1503.302933,-1841.886813,-984.864452,...,0.991172,0.990915,0.990697,0.991167,0.990147,0.990397,0.989012,0.990966,0.9906,0.000609
1.0,0.092245,10,,50,1.289564,0.051624,0.003981,0.00077,-947.318615,-1522.224734,-3460.695985,-1498.291099,-1565.341809,-1005.048436,...,0.991595,0.991556,0.991272,0.991798,0.99079,0.991083,0.989675,0.991366,0.991177,0.000569
1.0,0.082551,10,,50,1.317374,0.108088,0.004089,0.001041,-985.932004,-1563.637043,-3405.64805,-1464.628696,-1800.403,-979.788016,...,0.991492,0.991224,0.991104,0.991534,0.990591,0.991153,0.989448,0.991294,0.991023,0.000579
1.0,0.076735,10,auto,50,1.265526,0.049977,0.003383,0.000487,-992.457689,-1488.207809,-3135.506286,-1513.314401,-2241.055584,-997.163745,...,0.99137,0.991047,0.990831,0.991182,0.99052,0.990994,0.989381,0.991115,0.990798,0.000526
1.0,0.070918,10,auto,50,1.254745,0.030873,0.003791,0.000884,-1019.706984,-1632.680293,-3211.09967,-1541.27716,-1958.604473,-986.133287,...,0.990783,0.990687,0.990534,0.990767,0.989902,0.990263,0.9889,0.990698,0.99032,0.000542
1.0,0.1,10,auto,50,1.344489,0.085106,0.003485,0.000492,-987.019454,-1492.53605,-3459.304416,-1549.153294,-1944.968649,-1046.000386,...,0.991782,0.991534,0.9914,0.991801,0.990892,0.991369,0.990158,0.991663,0.991323,0.000465
1.0,0.074796,10,,50,1.281956,0.030507,0.00359,0.000489,-1040.652505,-1523.024355,-3395.072397,-1487.827581,-1841.171723,-997.195641,...,0.991172,0.990915,0.990697,0.991167,0.990147,0.990397,0.989012,0.990966,0.9906,0.000609
1.0,0.076735,10,,50,1.266113,0.032781,0.003387,0.0005,-1032.308355,-1505.733573,-3472.759326,-1461.383536,-2050.682325,-1008.248726,...,0.99137,0.991047,0.990831,0.991182,0.99052,0.990994,0.989381,0.991115,0.990798,0.000526
1.0,0.067041,10,,50,1.249062,0.021151,0.003887,0.001369,-1105.897619,-1573.728502,-3541.036259,-1552.172645,-1812.939384,-963.027132,...,0.990505,0.990155,0.990116,0.990351,0.989446,0.989921,0.988344,0.990156,0.989905,0.000587


In [18]:
#Printing top 10 models for Gradient Boosting Regressor Run 2 based on test MSE value

GB_run2.results['mean_test_neg_mean_squared_error'].head(10)

ccp_alpha  learning_rate  max_depth  max_features  n_estimators
1.0        0.090306       10         NaN           50             -2380.512852
           0.074796       10         auto          50             -2383.715041
           0.092245       10         NaN           50             -2396.952036
           0.082551       10         NaN           50             -2404.009310
           0.076735       10         auto          50             -2406.116954
           0.070918       10         auto          50             -2412.790000
           0.100000       10         auto          50             -2414.845873
           0.074796       10         NaN           50             -2416.821154
           0.076735       10         NaN           50             -2418.580002
           0.067041       10         NaN           50             -2421.494067
Name: mean_test_neg_mean_squared_error, dtype: float64

### Comparing the 2 GridSearchCV models as well as the default model, we see that GB_run2 is the best model as it gives the lower test MSE score of 2380.51

In [19]:
## Printing best parameters from GB_run2

GB_run2.best_estimator_

GradientBoostingRegressor(ccp_alpha=1, learning_rate=0.09030612244897959,
                          max_depth=10, n_estimators=50)