# Model Comparison & Selection

This notebooks contains the review of the regression models produced from the other notebooks.  The final model used for compression strength prediction will be trained and prepared for deployment.

------

In [1]:
# Import necessary libraries

import pandas as pd

from sklearn.ensemble import GradientBoostingRegressor

In [2]:
# Read csv into a pandas dataframe
df = pd.read_csv('../data/Concrete_Data_Yeh.csv')

In [3]:
# Split data into predictor features (X) and target feature (y)
X = df.drop('csMPa', axis = 1)
y = df['csMPa']

### Regression Models

In [4]:
#Show optimized models in order of performance
model = pd.read_csv('../data/models.csv')
model.sort_values('train_score_cv_mse', ascending=True)

Unnamed: 0,model,params,train_score_cv_mse,test_score_mse
4,gradient_boosted_tree,"{'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': ...",19.844803,18.358834
5,light_gradient_boosted_tree,"{'boosting_type': 'gbdt', 'class_weight': None...",21.815849,21.86678
3,random_forest,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",27.351751,27.255496
6,bayesian_additive_regression_tree,"{'alpha': 0.95, 'beta': 2.0, 'initializer': No...",35.333413,34.901752
2,decision_tree,"{'ccp_alpha': 0.0, 'criterion': 'squared_error...",43.322388,45.856332
1,ridge_regression,"{'intercept': 28.73084095328784, 'parameters':...",104.597275,135.429931
0,ols_regression,"{'intercept': 29.231649835711803, 'parameters'...",104.603482,135.349416


### GBM Regression Model

In [5]:
#Get the best performing regression model
best_model = model[model['train_score_cv_mse'] == model['train_score_cv_mse'].min()]
best_model

Unnamed: 0,model,params,train_score_cv_mse,test_score_mse
4,gradient_boosted_tree,"{'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': ...",19.844803,18.358834


In [6]:
#Get the GBMs best parameters as a dictionary
model_params = eval(best_model.params[4])

In [7]:
#Create GBM model with optimized parameters
concrete_strength_gbm = GradientBoostingRegressor(**model_params)

#Fit model to entire dataset
concrete_strength_gbm.fit(X, y)