# Training Models

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

%matplotlib widget

## Polynomial regression

In [2]:
np.random.seed(42)

m = 100 # number of observations
X = 6 * np.random.rand(100, 1) - 3
y = 0.5 * X**2 + X + 2 + np.random.randn(100, 1)

#### Figure 4.13

In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

poly2 = PolynomialFeatures(degree=2, include_bias=False)
X_poly2 = poly2.fit_transform(X)
lm = LinearRegression()
lm.fit(X_poly2, y)
X_test = np.linspace(-3, 3, 100)
X_test_poly2 = poly2.transform(X_test.reshape(-1, 1))
y_pred = lm.predict(X_test_poly2)

fig, ax = plt.subplots(1,1,figsize=(6,6))
ax.plot(X.flatten(), y, 'b.', label='truth')
ax.plot(X_test, y_pred, 'r--', label='predictions')
ax.set_title('Polynomial (deg=2) regression')
ax.set_xlim([-4,4])
ax.set_ylim([-1,11])
ax.legend();

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Learning curves

#### Figure 4.14

In [4]:
degrees = [1, 2, 30]
formats = ['r-+', 'b--', 'g-']

def poly_regr(X, y, degrees):
    for i, d in enumerate(degrees):
        poly = PolynomialFeatures(degree=d, include_bias=False)
        X_poly = poly.fit_transform(X)
        lm = LinearRegression()
        lm.fit(X_poly, y)
        X_test = np.linspace(X.flatten().min(),X.flatten().max(),  100)
        X_test_poly = poly.transform(X_test.reshape(-1, 1))
        y_pred = lm.predict(X_test_poly)
        ax.plot(X_test, y_pred, formats[i], label='deg= ' + str(d))

fig, ax = plt.subplots(1,1,figsize=(6,6))
ax.plot(X.flatten(), y, 'b.', label='truth')
poly_regr(X, y, degrees)
ax.set_title('Polynomial regression')
ax.set_xlim([-4,4])
ax.set_ylim([-1,11])
ax.legend();

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [5]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

def plot_learning_curves(model, X, y):
    """Plots learning curves by using different sized subsets of the training set."""
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
    train_rmse, valid_rmse = [], []
    for i in range(1, len(X_train[:, 0])):
        model.fit(X_train[:i], y[:i]) # choose first i observations
        y_train_pred = model.predict(X_train[:i])
        y_val_pred = model.predict(X_val)
        train_rmse.append(np.sqrt(mean_squared_error(y_train[:i], y_train_pred)))
        valid_rmse.append(np.sqrt(mean_squared_error(y_val, y_val_pred)))
    plt.figure()
    plt.plot(train_rmse, 'r-+', label='train')
    plt.plot(valid_rmse, 'b-.', label='val')
    plt.xlabel('Training set size')
    plt.ylabel('RMSE')
    plt.title('Learning curve')
    plt.ylim([0, 5])
    plt.legend();

plot_learning_curves(LinearRegression(), X, y)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [6]:
from sklearn.pipeline import Pipeline
poly_reg = Pipeline([
    ('poly_features', PolynomialFeatures(degree=10, include_bias=False)),
    ('lin_reg', LinearRegression()),
])
plot_learning_curves(poly_reg, X, y)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …