# Data

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

In [None]:
from sklearn.linear_model import Ridge, LinearRegression, Lasso

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
from sklearn.datasets import load_boston
boston = load_boston()

In [None]:
boston['DESCR']

In [None]:
X, y = boston.data, boston.target

In [None]:
X.shape

In [None]:
y.shape

In [None]:
fig, axes = plt.subplots(3, 5, figsize=(20, 10))
for i, ax in enumerate(axes.ravel()):
    if i > 12:
        ax.set_visible(False)
        continue
    ax.plot(X[:, i], y, 'o', alpha=.5)
    ax.set_title("{}: {}".format(i, boston.feature_names[i]))
    ax.set_ylabel("MEDV")

# Ridge

## Scenario Without Polynomial Features

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
np.mean(cross_val_score(LinearRegression(), X_train, y_train, cv=10))

In [None]:
np.mean(cross_val_score(Ridge(), X_train, y_train, cv=10))

### Tuning the Hyperparameter

In [None]:
np.set_printoptions(suppress=True, precision=3)

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {'alpha': np.logspace(-3, 3, 13)}
print(param_grid)

In [None]:
grid = GridSearchCV(Ridge(), param_grid, cv=10, return_train_score=True)
grid.fit(X_train, y_train)

In [None]:
import pandas as pd
results = pd.DataFrame(grid.cv_results_)
results

In [None]:
results.plot('param_alpha', 'mean_train_score')
results.plot('param_alpha', 'mean_test_score', ax=plt.gca())
plt.fill_between(results.param_alpha.astype(np.float),
                 results['mean_train_score'] + results['std_train_score'],
                 results['mean_train_score'] - results['std_train_score'], alpha=0.2)
plt.fill_between(results.param_alpha.astype(np.float),
                 results['mean_test_score'] + results['std_test_score'],
                 results['mean_test_score'] - results['std_test_score'], alpha=0.2)
plt.legend()
plt.xscale("log")

In [None]:
grid.best_params_

In [None]:
grid.best_score_

## Scenario With Polynomial Features

In [None]:
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import make_pipeline

In [None]:
poly = PolynomialFeatures(degree=2).fit(X_train)
X_train_poly = poly.transform(X_train)
X_test_poly = poly.transform(X_test)
print("X_train.shape: {}".format(X_train.shape))
print("X_train_poly.shape: {}".format(X_train_poly.shape))

In [None]:
print("Polynomial feature names:\n{}".format(poly.get_feature_names()))

In [None]:
pipe = make_pipeline(StandardScaler(),Ridge())
ridge = pipe.fit(X_train, y_train)
print("Score without interactions: {:.3f}".format(
    ridge.score(X_test, y_test)))
ridge_poly = pipe.fit(X_train_poly, y_train)
print("Score with interactions: {:.3f}".format(
    ridge_poly.score(X_test_poly, y_test)))

In [None]:
np.mean(cross_val_score(pipe, X_train, y_train, cv=10))

In [None]:
np.mean(cross_val_score(pipe, X_train_poly, y_train, cv=10))

### Tuning the Hyperparameter

In [None]:
pipe = make_pipeline(PolynomialFeatures(degree=2),StandardScaler(),Ridge())

param_grid = {'ridge__alpha': np.logspace(-3, 3, 13)}

grid = GridSearchCV(pipe, param_grid, cv=10, return_train_score=True)
grid.fit(X_train, y_train)

In [None]:
results = pd.DataFrame(grid.cv_results_)

In [None]:
results

In [None]:
results.plot('param_ridge__alpha', 'mean_train_score')
results.plot('param_ridge__alpha', 'mean_test_score', ax=plt.gca())
plt.fill_between(results.param_ridge__alpha.astype(np.float),
                 results['mean_train_score'] + results['std_train_score'],
                 results['mean_train_score'] - results['std_train_score'], alpha=0.2)
plt.fill_between(results.param_ridge__alpha.astype(np.float),
                 results['mean_test_score'] + results['std_test_score'],
                 results['mean_test_score'] - results['std_test_score'], alpha=0.2)
plt.legend()
plt.xscale("log")

In [None]:
print(grid.best_params_)
print(grid.best_score_)

In [None]:
grid.score(X_test, y_test)

## Ridge Regression Coefficients

In [None]:
ridge = grid.best_estimator_
plt.scatter(range(X_train_poly.shape[1]), ridge["ridge"].coef_, c=np.sign(ridge["ridge"].coef_), cmap="bwr_r")

In [None]:
print(X_train_poly.shape)
np.sum(ridge["ridge"].coef_ != 0)

In [None]:
ridge100 = Ridge(alpha=100).fit(X_train_poly, y_train)
ridge10 = Ridge(alpha=10).fit(X_train_poly, y_train)
ridge1 = Ridge(alpha=1).fit(X_train_poly, y_train)
plt.figure(figsize=(8, 4))

plt.plot(ridge1.coef_, 'o', label="alpha=1")
plt.plot(ridge10.coef_, 'o', label="alpha=10")
plt.plot(ridge100.coef_, 'o', label="alpha=100")
plt.legend()

In [None]:
n_alphas = 200
alphas = np.logspace(-5, 6, n_alphas)
plt.figure(figsize=(8, 4))
coefs = []
for a in alphas:
    ridge = Ridge(alpha=a)
    ridge.fit(StandardScaler().fit_transform(X_train_poly), y_train)
    coefs.append(ridge.coef_)

plt.plot(alphas, coefs, c='k', alpha=.1)
plt.xscale("log")
plt.xlabel("alpha")
plt.ylabel("coefficient")

## Learning Curve

In [None]:
from sklearn.model_selection import learning_curve
def plot_learning_curve(est, name):
    train_set_size, train_scores, test_scores = learning_curve(est, X, y, cv=10, train_sizes=np.linspace(0, 1, 20)[1:])
    test_mean = test_scores.mean(axis=1)
    train_mean = train_scores.mean(axis=1)
    line, = plt.plot(train_set_size, train_mean, linestyle="--", label="train score {}".format(name))
    plt.plot(train_set_size, test_mean, label="test score {}".format(name),
             c=line.get_color())
    
plot_learning_curve(Ridge(alpha=1), "alpha=1")
plot_learning_curve(Ridge(alpha=10), "alpha=10")
plot_learning_curve(Ridge(alpha=100), "alpha=100")

plot_learning_curve(LinearRegression(), "lr")
plt.legend(loc=(1, 0))
plt.xlabel("training set size")
plt.ylabel("R^2")
plt.ylim(-1, 1)

## Grid-searching preprocessing steps and model parameters

In [None]:
pipe = make_pipeline(PolynomialFeatures(), StandardScaler(),Ridge())

In [None]:
param_grid = {'polynomialfeatures__degree': [1, 2, 3],
              'ridge__alpha': [0.001, 0.01, 0.1, 1, 10, 100]}

In [None]:
grid = GridSearchCV(pipe, param_grid=param_grid, cv=5, n_jobs=-1)
grid.fit(X_train, y_train)

In [None]:
import pandas as pd
res = pd.pivot_table(pd.DataFrame(grid.cv_results_), values='mean_test_score', index='param_ridge__alpha', columns='param_polynomialfeatures__degree')
pd.set_option("display.precision",3)
res = res.set_index(res.index.values.round(4))

In [None]:
res

In [None]:
import seaborn as sns
sns.heatmap(res, annot=True, fmt=".3g", vmin=0.6)

In [None]:
plt.figure(dpi=100)
plt.imshow(res) #, vmin=.70, vmax=.825)
plt.colorbar()
alphas = param_grid['ridge__alpha']
polynomial_degree = np.array(param_grid['polynomialfeatures__degree'])
plt.xlabel("polynomial_degree")
plt.ylabel("alpha")
plt.yticks(range(len(alphas)), ["{:.4f}".format(a) for a in alphas])
plt.xticks(range(len(polynomial_degree)), polynomial_degree);

In [None]:
print("Best parameters: {}".format(grid.best_params_))

In [None]:
print("Test-set score: {:.2f}".format(grid.score(X_test, y_test)))

In [None]:
param_grid = {'ridge__alpha': [0.001, 0.01, 0.1, 1, 10, 100]}
pipe = make_pipeline(StandardScaler(), Ridge())
grid = GridSearchCV(pipe, param_grid, cv=5)
grid.fit(X_train, y_train)
print("Score without poly features: {:.2f}".format(grid.score(X_test, y_test)))

# Lasso

In [None]:
pipe2 = make_pipeline(PolynomialFeatures(degree=2),StandardScaler(),Lasso(max_iter=1e6))

param_grid = {'lasso__alpha': np.logspace(-3, 0, 13)}

grid2 = GridSearchCV(pipe2, param_grid, cv=10, return_train_score=True)
grid2.fit(X_train, y_train)

In [None]:
results = pd.DataFrame(grid2.cv_results_)
results

In [None]:
results.plot('param_lasso__alpha', 'mean_train_score')
results.plot('param_lasso__alpha', 'mean_test_score', ax=plt.gca())
plt.fill_between(results.param_lasso__alpha.astype(np.float),
                 results['mean_train_score'] + results['std_train_score'],
                 results['mean_train_score'] - results['std_train_score'], alpha=0.2)
plt.fill_between(results.param_lasso__alpha.astype(np.float),
                 results['mean_test_score'] + results['std_test_score'],
                 results['mean_test_score'] - results['std_test_score'], alpha=0.2)
plt.legend()
plt.xscale("log")

In [None]:
print(grid2.best_params_)
print(grid2.best_score_)

In [None]:
grid2.score(X_test, y_test)

In [None]:
lasso = grid2.best_estimator_
plt.scatter(range(X_train_poly.shape[1]), lasso['lasso'].coef_, c=np.sign(lasso['lasso'].coef_), cmap="bwr_r", edgecolor='k')

In [None]:
print(X_train_poly.shape)
np.sum(lasso['lasso'].coef_ != 0)

In [None]:
from sklearn.linear_model import lars_path
# lars_path computes the exact regularization path which is piecewise linear.
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
alphas, active, coefs = lars_path(X_train, y_train, eps=0.00001, method="lasso")

In [None]:
plt.plot(alphas, coefs.T, alpha=.5)
plt.xscale("log")

# Elastic Net

In [None]:
from sklearn.linear_model import ElasticNet

In [None]:
pipe3 = make_pipeline(PolynomialFeatures(degree=2),StandardScaler(),ElasticNet(max_iter=100000))

param_grid = {'elasticnet__alpha': np.logspace(-3, -1, 5), 'elasticnet__l1_ratio': [0.01, .1, .5, .9, 1]}

grid3 = GridSearchCV(pipe3, param_grid, cv=10, return_train_score=True)
grid3.fit(X_train, y_train)

In [None]:
pd.DataFrame(grid3.cv_results_).columns

In [None]:
import pandas as pd
res = pd.pivot_table(pd.DataFrame(grid3.cv_results_), values='mean_test_score', index='param_elasticnet__alpha', columns='param_elasticnet__l1_ratio')
pd.set_option("display.precision",3)
res = res.set_index(res.index.values.round(4))

In [None]:
res

In [None]:
import seaborn as sns
sns.heatmap(res, annot=True, fmt=".3g", vmin=0.6)

In [None]:
plt.figure(dpi=100)
plt.imshow(res) #, vmin=.70, vmax=.825)
plt.colorbar()
alphas = param_grid['elasticnet__alpha']
l1_ratio = np.array(param_grid['elasticnet__l1_ratio'])
plt.xlabel("l1_ratio")
plt.ylabel("alpha")
plt.yticks(range(len(alphas)), ["{:.4f}".format(a) for a in alphas])
plt.xticks(range(len(l1_ratio)), l1_ratio);

In [None]:
print(grid3.best_params_)
print(grid3.best_score_)

In [None]:
en = grid3.best_estimator_
plt.scatter(range(X_train_poly.shape[1]), en['elasticnet'].coef_, c=np.sign(en['elasticnet'].coef_), cmap="bwr_r", edgecolor='k')

In [None]:
print(X_train_poly.shape)
np.sum(en['elasticnet'].coef_ != 0)

In [None]:
grid3.score(X_test, y_test)

# Random Forest Regressor

In [None]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators=100).fit(X_train, y_train)
print("Score without interactions: {:.3f}".format(
    rf.score(X_test, y_test)))
rf = RandomForestRegressor(n_estimators=100).fit(X_train_poly, y_train)
print("Score with interactions: {:.3f}".format(rf.score(X_test_poly, y_test)))