In [87]:
# imports
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [88]:
# data
housing = fetch_california_housing()
X, y = housing['data'], housing['target']

In [89]:
# split data train test
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.1,
                                                    shuffle=True,
                                                    random_state=43)

In [90]:
scores(y_test, y_preds_svm)

r2:  0.7295080649899692
mse:  0.3477101776542994
mae:  0.3897680598426782


In [91]:
def score(y_true, y_pred):
    print("r2: ", r2_score(y_true, y_pred))
    print("mae: ", mean_absolute_error(y_true, y_pred))
    print("mse: ", mean_squared_error(y_true, y_pred))

In [104]:
def def_pipe(model, title):
    pipeline = [('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
    ('model', model)]
    pipe = Pipeline(pipeline)
    pipe.fit(X_train, y_train)

    y_preds_test = pipe.predict(X_test)
    y_preds_train = pipe.predict(X_train)

    print(f"{title}:\n")
    print("Train: \n")
    score(y_train, y_preds_train)
    print("\nTest: \n")
    score(y_test, y_preds_test)

In [105]:
def_pipe(LinearRegression(), "Linear Regression")

Linear Regression:

Train: 

r2:  0.6054131599242079
mae:  0.533092001261455
mse:  0.5273648371379568

Test: 

r2:  0.6128959462132963
mae:  0.5196420310323715
mse:  0.49761195027083815


In [106]:
def_pipe(SVR(), "SVM")

SVM:

Train: 

r2:  0.749610858293664
mae:  0.38356451633259886
mse:  0.3346447867133917

Test: 

r2:  0.7295080649899692
mae:  0.3897680598426782
mse:  0.3477101776542994


In [107]:
def_pipe(DecisionTreeRegressor(random_state=43), "Decision Tree")

Decision Tree:

Train: 

r2:  1.0
mae:  4.221907539810565e-17
mse:  9.24499456646287e-32

Test: 

r2:  0.6228217144931267
mae:  0.4403051356589147
mse:  0.4848526395290697


In [108]:
def_pipe(RandomForestRegressor(random_state=43), "Random Forest")

Random Forest:

Train: 

r2:  0.9741263135396302
mae:  0.12000198560508221
mse:  0.03458015083247723

Test: 

r2:  0.8119778189909694
mae:  0.3194169859011629
mse:  0.24169750554364758


In [109]:
def_pipe(GradientBoostingRegressor(random_state=43), "Gradient Boosting")

Gradient Boosting:

Train: 

r2:  0.8042086499063386
mae:  0.35656543036682264
mse:  0.26167490389525294

Test: 

r2:  0.7895081234643192
mae:  0.36455447680396397
mse:  0.27058170064218096
