In [None]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

# Regression

In [None]:
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=100, n_features=100)

In [None]:
import time
import numpy as np
from sklearn.pipeline import Pipeline
from xgboost.sklearn import XGBRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.isotonic import IsotonicRegression
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.model_selection import cross_validate, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet

In [None]:
models = {
    'Linear Regression': LinearRegression(),
    'Ridge': Ridge(),
    'Lasso': Lasso(),
    'Elastic Net': ElasticNet(),
    'Random Forest': RandomForestRegressor(),
    'Isotonic': IsotonicRegression(),
    'Gaussian Process': GaussianProcessRegressor(),
    'Simpler Neural Network': MLPRegressor(early_stopping=True),
    'XGBoost': XGBRegressor(verbosity=0)
}

parameters = {
    'Ridge': {'alpha': [1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3]},
    'Lasso': {'alpha': [1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3]},
    'Elastic Net': {'alpha': [1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3]},
    'Random Forest': {'bootstrap':[True, False], 'n_estimators':[10, 30, 100, 300]},
    'Simpler Neural Network': {'hidden_layer_sizes': [30, 100, 300], 'activation': ['logistic', 'tanh', 'relu']}
}

In [None]:
import pickle

def experimental(X, y, filename=None):
    ans = {}
    for c in [m for m in models.keys() if m not in list(parameters.keys())]:
        start = time.process_time()
        
        pipeline = Pipeline([('transformer', StandardScaler()), ('estimator', models[c])])
        ans[c] = cross_validate(pipeline, X, y)
        print('Elapsed time of {} is {:.6f} seconds.'.format(c, time.process_time() - start))

    for c in parameters.keys():
        start = time.process_time()
        
        clf = Pipeline([('transformer', StandardScaler()), 
                        ('estimator', GridSearchCV(models[c], param_grid=parameters[c]))]).fit(X, y) # Grid search
        
        pipeline = Pipeline([('transformer', StandardScaler()), ('estimator', clf)])
        ans[c] = cross_validate(pipeline, X, y)
        print('Elapsed time of {} is {:.6f} seconds.'.format(c, time.process_time() - start))
    
    pickle.dump(ans, open(filename, "wb" ))
    return ans

In [None]:
ans = experimental(X, y, 'file')

Elapsed time of Linear Regression is 61.585555 seconds.
Elapsed time of XGBoost is 2.523960 seconds.
Elapsed time of Ridge is 438.911417 seconds.
Elapsed time of Lasso is 1273.599287 seconds.
Elapsed time of Elastic Net is 2010.287056 seconds.
