In [12]:
from sklearn.linear_model import SGDRegressor, LinearRegression, ElasticNet, BayesianRidge
from sklearn.model_selection import ShuffleSplit
from catboost import CatBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from xgboost.sklearn import XGBRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.svm import SVR
from sklearn.model_selection import cross_validate
from tabular_data import AirbnbLoader
import pandas as pd

loader = AirbnbLoader()
X, y = loader.load_airbnb('Price_Night', normalized=True)

cv_split = ShuffleSplit(n_splits = 5, test_size = .3, train_size = .7, random_state = 40)

In [13]:

MLA = [
  GradientBoostingRegressor(),
  LinearRegression(),
  SGDRegressor(),
  RandomForestRegressor(),
  ElasticNet(),
  SVR(),
  BayesianRidge(),
  KernelRidge(),
  XGBRegressor(),
  CatBoostRegressor(silent=True)
]


MLA_columns = ['MLA Name', 'MLA Parameters','Train MSE', 'Test MSE', 'Train r2', 'Test r2']
MLA_compare = pd.DataFrame(columns = MLA_columns)

row_index = 0

for alg in MLA:
  MLA_name = alg.__class__.__name__
  MLA_compare.loc[row_index, 'MLA Name'] = MLA_name
  MLA_compare.loc[row_index, 'MLA Parameters'] = str(alg.get_params())

  cv_results = cross_validate(alg, X=X, y=y, cv = cv_split , scoring=['r2', 'neg_mean_squared_error'], return_train_score=True) 

  MLA_compare.loc[row_index, 'Train MSE'] = cv_results['train_neg_mean_squared_error'].mean()
  MLA_compare.loc[row_index, 'Test MSE'] = cv_results['test_neg_mean_squared_error'].mean()
  MLA_compare.loc[row_index, 'Train r2'] = cv_results['train_r2'].mean()
  MLA_compare.loc[row_index, 'Test r2'] = cv_results['test_r2'].mean()

  row_index += 1

MLA_compare.sort_values(by = ['Test MSE'], ascending = False, inplace = True)

MLA_compare

Unnamed: 0,MLA Name,MLA Parameters,Train MSE,Test MSE,Train r2,Test r2
7,KernelRidge,"{'alpha': 1, 'coef0': 1, 'degree': 3, 'gamma':...",-0.008281,-0.008937,0.354399,0.3367
6,BayesianRidge,"{'alpha_1': 1e-06, 'alpha_2': 1e-06, 'alpha_in...",-0.008113,-0.008973,0.367296,0.330479
9,CatBoostRegressor,"{'loss_function': 'RMSE', 'silent': True}",-0.001356,-0.009456,0.894232,0.298866
3,RandomForestRegressor,"{'bootstrap': True, 'ccp_alpha': 0.0, 'criteri...",-0.001388,-0.010123,0.89193,0.246414
0,GradientBoostingRegressor,"{'alpha': 0.9, 'ccp_alpha': 0.0, 'criterion': ...",-0.003447,-0.011108,0.730714,0.172579
8,XGBRegressor,"{'objective': 'reg:squarederror', 'base_score'...",-0.000116,-0.012678,0.990963,0.061523
5,SVR,"{'C': 1.0, 'cache_size': 200, 'coef0': 0.0, 'd...",-0.012276,-0.01299,0.044634,0.043804
4,ElasticNet,"{'alpha': 1.0, 'copy_X': True, 'fit_intercept'...",-0.012852,-0.013604,0.0,-0.001004
1,LinearRegression,"{'copy_X': True, 'fit_intercept': True, 'n_job...",-0.008087,-0.181675,0.369435,-11.20911
2,SGDRegressor,"{'alpha': 0.0001, 'average': False, 'early_sto...",-7.074351216771068e+17,-8.158003081753875e+17,-5.433297594896518e+19,-6.3036494392271766e+19
