# Purpose
- A light script to 10 FOLD CV regression models
- Exhaustively used all scoring metrics sklearn
- Modifiables: models, scoring_metrics

In [41]:
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_validate
from sklearn.datasets import make_regression
from pandas import DataFrame
from sklearn.metrics import make_scorer,r2_score,explained_variance_score, mean_absolute_error, mean_squared_error,mean_squared_log_error,median_absolute_error 

In [11]:
models = [
    GradientBoostingRegressor(n_estimators=100),
    GradientBoostingRegressor(n_estimators=250),
    GradientBoostingRegressor(n_estimators=500),
    GradientBoostingRegressor(n_estimators=750)
]

In [49]:
score_metrics = {
    "r2_score": make_scorer(r2_score),
    "explained_variance_score": make_scorer(explained_variance_score),
    "mean_absolute_error": make_scorer(mean_absolute_error), 
    "mean_squared_error": make_scorer(mean_squared_error),
    #"mean_squared_log_error": make_scorer(mean_squared_log_error),
    "median_absolute_error":make_scorer(median_absolute_error)
}

In [50]:
X,y = make_regression()

In [68]:
cv_scores = cross_validate(models[0], X,y, cv=10, return_train_score=False, scoring=score_metrics)
cv_scores = DataFrame(cv_scores)
cv_scores.head()

Unnamed: 0,fit_time,score_time,test_explained_variance_score,test_mean_absolute_error,test_mean_squared_error,test_median_absolute_error,test_r2_score
0,0.051146,0.0,0.379544,153.336343,28591.139936,163.596148,0.347272
1,0.067108,0.0,0.486346,143.126848,25517.920352,158.471857,0.174031
2,0.064686,0.0,0.653247,108.36863,15657.724597,108.507788,0.466802
3,0.065665,0.001,0.360983,127.442376,25164.581183,100.708247,0.34843
4,0.047681,0.0,0.290482,123.642812,22626.479518,109.667072,0.276237


In [112]:
DataFrame(cv_scores).mean().to_dict()

{'fit_time': 0.060821700096130374,
 'score_time': 0.00039927959442138673,
 'test_explained_variance_score': 0.30757897043163435,
 'test_mean_absolute_error': 133.28648358090828,
 'test_mean_squared_error': 25419.435931960088,
 'test_median_absolute_error': 128.61392539519377,
 'test_r2_score': 0.17353683299697614}

In [66]:
from IPython.display import display

In [134]:
def cv_models(models):
    rows = []
    for model in models:
        cv_score = DataFrame(cross_validate(model, X,y, cv=10, return_train_score=False, scoring=score_metrics)).mean().to_dict()
        model_parameters = model.get_params()
        collate = {**model_parameters, **cv_score}
        rows.append(collate)
    display(DataFrame(rows).round(2))

In [135]:
cv_models(models)

Unnamed: 0,alpha,criterion,fit_time,init,learning_rate,loss,max_depth,max_features,max_leaf_nodes,min_impurity_decrease,...,random_state,score_time,subsample,test_explained_variance_score,test_mean_absolute_error,test_mean_squared_error,test_median_absolute_error,test_r2_score,verbose,warm_start
0,0.9,friedman_mse,0.06,,0.1,ls,3,,,0.0,...,,0.0,1.0,0.3,134.15,25917.87,127.82,0.16,0,False
1,0.9,friedman_mse,0.15,,0.1,ls,3,,,0.0,...,,0.0,1.0,0.31,134.02,25736.92,126.58,0.16,0,False
2,0.9,friedman_mse,0.23,,0.1,ls,3,,,0.0,...,,0.0,1.0,0.31,134.38,25593.95,131.6,0.17,0,False
3,0.9,friedman_mse,0.25,,0.1,ls,3,,,0.0,...,,0.0,1.0,0.31,134.04,25780.15,124.73,0.17,0,False
