In [1]:
#Import non-sklearn packages
import numpy as np
import time

#import sklearn auxillary packages
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.preprocessing import StandardScaler

#import sklearn regression models
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, Lars, OrthogonalMatchingPursuit, BayesianRidge, ARDRegression, SGDRegressor, PassiveAggressiveRegressor, RANSACRegressor, TheilSenRegressor, HuberRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor, RadiusNeighborsRegressor
from sklearn.svm import SVR, NuSVR, LinearSVR
from sklearn.kernel_ridge import KernelRidge
from sklearn.isotonic import IsotonicRegression

In [2]:
#set the dataset parameters

sample_number = 100
feature_number = 1
test_set_perc = 0.3
noise = 0
scale = True
effective_rank = 1
n_informative = 1
random_state = 1
#shifts output labels into a quadratic structure (rather than linear)
make_quadratic = False

#print the model coeficients (not all models have this method, 
#so may have to set to False if certain models are being tested)
print_coef = False

In [3]:
#create a dataset to use for regression problem
x, y = make_regression(n_samples=sample_number, n_features=feature_number, noise=noise, n_informative=n_informative, effective_rank=effective_rank, random_state=random_state)

if make_quadratic:
  y = y**2

In [4]:
#split dataset into train and test sets
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=test_set_perc)

In [5]:
#print the sizes of the train and test sets
print('Train Set Shapes:')
print(train_x.shape)
print(train_y.shape)

print('Test Set Shapes:')
print(test_x.shape)
print(test_y.shape)
print('')

Train Set Shapes:
(70, 1)
(70,)
Test Set Shapes:
(30, 1)
(30,)



In [6]:
# scale the data if desired
if scale:
    print('Scaling Data')
    print('')
    scaler = StandardScaler()
    scaler.fit(train_x)
    train_x = scaler.transform(train_x)
    test_x = scaler.transform(test_x)
    print('Data Scaled')
    print('')

Scaling Data

Data Scaled



In [7]:
#Dictionary of all models. All models intialized with no args. Can modify any of them to test various args.
#IsotonicRegression does not abide by the same fit() function and thus must be tested seperately
models = {'LinearRegression': LinearRegression(),
          'Ridge': Ridge(),
          'Lasso': Lasso(),
          'ElasticNet': ElasticNet(),
          'Lars': Lars(),
          'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
          'BayesianRidge': BayesianRidge(),
          'ARDRegression': ARDRegression(),
          'SGDRegressor': SGDRegressor(),
          'PassiveAggressiveRegressor': PassiveAggressiveRegressor(),
          'RANSACRegressor': RANSACRegressor(),
          'TheilSenRegressor': TheilSenRegressor(),
          'HuberRegressor': HuberRegressor(),
          'DecisionTreeRegressor': DecisionTreeRegressor(),
          'GaussianProcessRegressor': GaussianProcessRegressor(),
          'MLPRegressor': MLPRegressor(),
          'KNeighborsRegressor': KNeighborsRegressor(),
          'RadiusNeighborsRegressor': RadiusNeighborsRegressor(),
          'SVR': SVR(gamma='scale'),
          'NuSVR': NuSVR(gamma='scale'),
          'LinearSVR': LinearSVR(),
          'KernelRidge': KernelRidge()
         }

In [8]:
for key, model in models.items():
    begin = time.time()
    model.fit(train_x,train_y)
    print(key + ' Train time: ' + str((time.time() - begin)/60) + " minutes")
    preds = model.predict(test_x)
    mse = mean_squared_error(test_y,preds)
    r2 = r2_score(test_y,preds)
    scores = cross_val_score(model, train_x, train_y, cv=5)
    print(key + ' MSE: ' + str(mse))
    print(key + ' R2 ' + str(r2))
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    if print_coef:
      print('Coefficients:')
      print(model.coef_)
    print('')

LinearRegression Train time: 0.00011525154113769531 minutes
LinearRegression MSE: 4.630141018834316e-30
LinearRegression R2 1.0
Accuracy: 1.00 (+/- 0.00)

Ridge Train time: 0.00010718504587809245 minutes
Ridge MSE: 0.012500554870811257
Ridge R2 0.9997597814944618
Accuracy: 1.00 (+/- 0.00)

Lasso Train time: 6.398359934488932e-05 minutes
Lasso MSE: 0.7834089967932735
Lasso R2 0.9849455211884787
Accuracy: 0.98 (+/- 0.01)

ElasticNet Train time: 1.8215179443359376e-05 minutes
ElasticNet MSE: 8.650111264942366
ElasticNet R2 0.8337740346505871
Accuracy: 0.83 (+/- 0.12)

Lars Train time: 3.7380059560139974e-05 minutes
Lars MSE: 2.0482650144547132e-30
Lars R2 1.0
Accuracy: 1.00 (+/- 0.00)

OrthogonalMatchingPursuit Train time: 2.206563949584961e-05 minutes
OrthogonalMatchingPursuit MSE: 2.0482650144547132e-30
OrthogonalMatchingPursuit R2 1.0
Accuracy: 1.00 (+/- 0.00)

BayesianRidge Train time: 0.0004682819048563639 minutes
BayesianRidge MSE: 1.6816107798899328e-21
BayesianRidge R2 1.0
Accurac



TheilSenRegressor Train time: 0.0008198817571004232 minutes
TheilSenRegressor MSE: 1.656001875008291e-29
TheilSenRegressor R2 1.0
Accuracy: 1.00 (+/- 0.00)

HuberRegressor Train time: 0.0005645473798116048 minutes
HuberRegressor MSE: 5.978687826712552e-24
HuberRegressor R2 1.0
Accuracy: 1.00 (+/- 0.00)

DecisionTreeRegressor Train time: 1.9919872283935546e-05 minutes
DecisionTreeRegressor MSE: 0.049699475241572
DecisionTreeRegressor R2 0.9990449437011438
Accuracy: 0.99 (+/- 0.01)

GaussianProcessRegressor Train time: 8.051395416259765e-05 minutes
GaussianProcessRegressor MSE: 2.0578106620461604e-08
GaussianProcessRegressor R2 0.9999999996045582
Accuracy: 1.00 (+/- 0.00)





MLPRegressor Train time: 0.0029527028401692707 minutes




MLPRegressor MSE: 15.655565595782091
MLPRegressor R2 0.6991528288431472
Accuracy: 0.73 (+/- 0.19)

KNeighborsRegressor Train time: 1.3736883799235026e-05 minutes
KNeighborsRegressor MSE: 0.973397653068811
KNeighborsRegressor R2 0.9812945799661579
Accuracy: 0.96 (+/- 0.09)

RadiusNeighborsRegressor Train time: 9.878476460774739e-06 minutes
RadiusNeighborsRegressor MSE: 4.146911374264704
RadiusNeighborsRegressor R2 0.9203103491628666
Accuracy: 0.92 (+/- 0.06)

SVR Train time: 2.1147727966308594e-05 minutes
SVR MSE: 12.270325040467991
SVR R2 0.7642057353332257
Accuracy: 0.67 (+/- 0.48)

NuSVR Train time: 1.7917156219482423e-05 minutes
NuSVR MSE: 11.905643502983162
NuSVR R2 0.7712136845672668
Accuracy: 0.66 (+/- 0.47)

LinearSVR Train time: 1.2886524200439452e-05 minutes
LinearSVR MSE: 1.078643479971161e-24
LinearSVR R2 1.0
Accuracy: 1.00 (+/- 0.00)

KernelRidge Train time: 3.26991081237793e-05 minutes
KernelRidge MSE: 2.372505332988692
KernelRidge R2 0.9544084489559103
Accuracy: 0.96 (+/-

