In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import validation_curve
from sklearn.tree import DecisionTreeRegressor

In [None]:
n_samples = 1000

def f(X, noise=0.2, random_state=0):
    x = X[:, 0]
    rng = np.random.RandomState(random_state)
    return np.sin(2 * np.pi * x) + rng.randn(x.shape[0]) * noise


def make_data(n, random_state=0):
    rng = np.random.RandomState(random_state)
    X = rng.uniform(0, 1.5, size=(n, 1))
    y = f(X, random_state=random_state)
    return X, y


X_train, y_train = make_data(100, random_state=0)
X_test, y_test = make_data(10000, random_state=1)


plt.scatter(X_train[:, 0], y_train)

In [None]:
param_range = np.arange(2, 15)
train_scores, test_scores = validation_curve(
    DecisionTreeRegressor(), X_train, y_train, param_name="max_depth", param_range=param_range,
    cv=5, scoring="neg_mean_squared_error", n_jobs=8)
train_scores_mean = -np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = -np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)

plt.title("Validation Curve with Decision Tree")
plt.xlabel("max_depth")
plt.ylabel("Score")
lw = 2
plt.plot(param_range, train_scores_mean, label="Training score",
             color="darkorange", lw=lw)
plt.fill_between(param_range, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.2,
                 color="darkorange", lw=lw)
plt.plot(param_range, test_scores_mean, label="Cross-validation score",
             color="navy", lw=lw)
plt.fill_between(param_range, test_scores_mean - test_scores_std,
                 test_scores_mean + test_scores_std, alpha=0.2,
                 color="navy", lw=lw)
plt.legend(loc="best")

In [None]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline

poly = Pipeline([
    ('poly_features', PolynomialFeatures()),
    ('ols', LinearRegression()),
])

param_range = np.arange(0, 25, 5)
train_scores, test_scores = validation_curve(
    poly, X_train, y_train, param_name="poly_features__degree",
    param_range=param_range, cv=5, scoring="neg_mean_squared_error", n_jobs=8)
train_scores_mean = -np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = -np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)

plt.title("Validation Curve with Polynomial Regression")
plt.xlabel("degree")
plt.ylabel("Score")
lw = 2
plt.plot(param_range, train_scores_mean, label="Training score",
             color="darkorange", lw=lw)
plt.fill_between(param_range, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.2,
                 color="darkorange", lw=lw)
plt.plot(param_range, test_scores_mean, label="Cross-validation score",
             color="navy", lw=lw)
plt.fill_between(param_range, test_scores_mean - test_scores_std,
                 test_scores_mean + test_scores_std, alpha=0.2,
                 color="navy", lw=lw)
plt.legend(loc="best")