pip install mglearn

In [None]:
%pylab inline
import mglearn

#### Linear Models
##### Linear models for regression

#### Linear regression aka ordinary least squares

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [None]:
# Boston Housing dataset with 506 samples and 104 derived features.
X, y = mglearn.datasets.load_extended_boston()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
lr = LinearRegression().fit(X_train, y_train)

In [None]:
print("Training set score: {:.2f}".format(lr.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lr.score(X_test, y_test)))

##### Ridge regression

In [None]:
from sklearn.linear_model import Ridge

ridge = Ridge().fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge.score(X_test, y_test)))

In [None]:
ridge10 = Ridge(alpha=10).fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge10.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge10.score(X_test, y_test)))

In [None]:
ridge01 = Ridge(alpha=0.1).fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge01.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge01.score(X_test, y_test)))

In [None]:
ridge001 = Ridge(alpha=0.01).fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge001.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge001.score(X_test, y_test)))

In [None]:
plt.plot(ridge.coef_, 's', label="Ridge alpha=1")
plt.plot(ridge10.coef_, '^', label="Ridge alpha=10")
plt.plot(ridge01.coef_, 'v', label="Ridge alpha=0.1")

plt.plot(lr.coef_, 'o', label="LinearRegression")
plt.xlabel("Coefficient index")
plt.ylabel("Coefficient magnitude")
xlims = plt.xlim()
plt.hlines(0, xlims[0], xlims[1])
plt.xlim(xlims)
plt.ylim(-25, 25)
plt.legend()

In [None]:
mglearn.plots.plot_ridge_n_samples()

##### Lasso

In [None]:
from sklearn.linear_model import Lasso

lasso = Lasso().fit(X_train, y_train)
print("Training set score: {:.2f}".format(lasso.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso.score(X_test, y_test)))
print("Number of features used:", np.sum(lasso.coef_ != 0))

In [None]:
# we increase the default setting of "max_iter",
# otherwise the model would warn us that we should increase max_iter.
lasso001 = Lasso(alpha=0.01, max_iter=100000).fit(X_train, y_train)
print("Training set score: {:.2f}".format(lasso001.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso001.score(X_test, y_test)))
print("Number of features used:", np.sum(lasso001.coef_ != 0))

In [None]:
lasso00001 = Lasso(alpha=0.0001, max_iter=100000).fit(X_train, y_train)
print("Training set score: {:.2f}".format(lasso00001.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso00001.score(X_test, y_test)))
print("Number of features used:", np.sum(lasso00001.coef_ != 0))

In [None]:
plt.plot(lasso.coef_, 's', label="Lasso alpha=1")
plt.plot(lasso001.coef_, '^', label="Lasso alpha=0.01")
plt.plot(lasso00001.coef_, 'v', label="Lasso alpha=0.0001")

plt.plot(ridge01.coef_, 'o', label="Ridge alpha=0.1")
plt.legend(ncol=2, loc=(0, 1.05))
plt.ylim(-25, 25)
plt.xlabel("Coefficient index")
plt.ylabel("Coefficient magnitude")

<img src="images/l2_l1_l0.png">
<img src="images/l1l2ball.png">
<img src="images/l1_kink.png">
<img src="images/l1l2ball_intersect.png">
<img src="images/l1l2_elasticnet.png">

##### ElasticNet

In [None]:
from sklearn.linear_model import ElasticNet

en = ElasticNet(l1_ratio = 0.1).fit(X_train, y_train)
print("Training set score: {:.2f}".format(en.score(X_train, y_train)))
print("Test set score: {:.2f}".format(en.score(X_test, y_test)))
print("Number of features used:", np.sum(en.coef_ != 0))

In [None]:
# we increase the default setting of "max_iter",
# otherwise the model would warn us that we should increase max_iter.
en001 = ElasticNet(alpha=0.01, l1_ratio = 0.1, max_iter=100000).fit(X_train, y_train)
print("Training set score: {:.2f}".format(en001.score(X_train, y_train)))
print("Test set score: {:.2f}".format(en001.score(X_test, y_test)))
print("Number of features used:", np.sum(en001.coef_ != 0))

In [None]:
# we increase the default setting of "max_iter",
# otherwise the model would warn us that we should increase max_iter.
en001 = ElasticNet(alpha=0.1, l1_ratio = 0, max_iter=100000000).fit(X_train, y_train)
print("Training set score: {:.2f}".format(en001.score(X_train, y_train)))
print("Test set score: {:.2f}".format(en001.score(X_test, y_test)))
print("Number of features used:", np.sum(en001.coef_ != 0))

In [None]:
en00001 = ElasticNet(alpha=0.0001, l1_ratio = 0.1, max_iter=100000).fit(X_train, y_train)
print("Training set score: {:.2f}".format(en00001.score(X_train, y_train)))
print("Test set score: {:.2f}".format(en00001.score(X_test, y_test)))
print("Number of features used:", np.sum(en00001.coef_ != 0))

In [None]:
plt.plot(en.coef_, 's', label="ElasticNet alpha=1")
plt.plot(en001.coef_, '^', label="ElasticNet alpha=0.01")
plt.plot(en00001.coef_, 'v', label="ElasticNet alpha=0.0001")

plt.plot(ridge01.coef_, 'o', label="Ridge alpha=0.1")
plt.legend(ncol=2, loc=(0, 1.05))
plt.ylim(-25, 25)
plt.xlabel("Coefficient index")
plt.ylabel("Coefficient magnitude")