In [None]:
import numpy as np
import matplotlib.pyplot as plt
from networkx.algorithms.bipartite.basic import color

In [None]:
np.random.seed(233)
x = np.random.uniform(-4, 2, size=100)
y = x ** 2 + 4 * x + 3 + np.random.randn(100)

X = x.reshape(-1, 1)

In [None]:
plt.scatter(x, y)
plt.show()

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [None]:
plt.rcParams['figure.figsize'] = (12, 8)

In [None]:
degrees = [1, 2, 5, 20]
for i in range(len(degrees)):
    plt.subplot(2, 2, i + 1)
    plt.scatter(x, y)
    poly = PolynomialFeatures(degree=degrees[i])
    X_poly = poly.fit_transform(X)
    reg = LinearRegression()
    reg.fit(X_poly, y)
    y_pred = reg.predict(X_poly)
    plt.ylim(-5, 25)
    plt.plot(np.sort(x), y_pred[np.argsort(x)], color='r')
    plt.title('degree = %d' % degrees[i])
    print('Degree = %d, score = %f' % (degrees[i], reg.score(X_poly, y)))
plt.show()

### 划分训练集和测试集

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=233)

In [None]:
from sklearn.metrics import mean_squared_error

plt.rcParams['figure.figsize'] = (12, 8)

degrees = [1, 2, 5, 20]
for i in range(len(degrees)):
    poly = PolynomialFeatures(degree=degrees[i])
    X_poly_train = poly.fit_transform(X_train)
    X_poly_test = poly.fit_transform(X_test)
    
    train_error, test_error = [], []
    for k in range(len(X_train)):
        reg = LinearRegression()
        reg.fit(X_poly_train[:k + 1], y_train[:k + 1])
        
        y_pred_train = reg.predict(X_poly_train[:k + 1])
        y_pred_test = reg.predict(X_poly_test)
        train_error.append(mean_squared_error(y_train[:k + 1], y_pred_train))
        test_error.append(mean_squared_error(y_test, y_pred_test))
    
    plt.subplot(2, 2, i + 1)
    plt.title('Degree = %d' % degrees[i])
    plt.ylim(-5, 25)
    plt.plot([k + 1 for k in range(len(X_train))], train_error, label='train',color='r')
    plt.plot([k + 1 for k in range(len(X_train))], test_error, label='test', color='b')

plt.show()

### $R^2$值

In [None]:
from sklearn.metrics import mean_squared_error

plt.rcParams['figure.figsize'] = (12, 8)

degrees = [1, 2, 5, 20]
for i, degree in enumerate(degrees):
    poly = PolynomialFeatures(degree=degree)
    X_poly_train = poly.fit_transform(X_train.reshape(-1, 1))
    X_poly_test = poly.fit_transform(X_test.reshape(-1, 1))
    
    train_error, test_error = [], []
    for k in range(1, len(X_train)):
        reg = LinearRegression()
        reg.fit(X_poly_train[:k + 1], y_train[:k + 1])
        
        y_pred_train = reg.predict(X_poly_train[:k + 1])
        y_pred_test = reg.predict(X_poly_test)
        train_error.append(reg.score(X_poly_train[:k + 1], y_train[:k + 1]))
        test_error.append(reg.score(X_poly_test, y_test))
    
    plt.subplot(2, 2, i + 1)
    plt.title('Degree = %d' % degrees[i])
    plt.ylim(-1, 1)
    plt.plot([k + 1 for k in range(1, len(X_train))], train_error, label='train',color='r')
    plt.plot([k + 1 for k in range(1, len(X_train))], test_error, label='test', color='b')

plt.show()