In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

###  Polynomial Reggression

In [None]:
m=100
X=6*np.random.rand(m,1)-3
y=0.5*X**2+X+2+np.random.rand(m,1)

In [None]:
plt.scatter(X,y)
plt.xlabel('x1')
plt.ylabel('y')
plt.axis([-3,3,0,10])

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
poly_features=PolynomialFeatures(degree=2,include_bias=False)

In [None]:
X_poly=poly_features.fit_transform(X)

In [None]:
X[0]

In [None]:
X_poly[0]

In [None]:
from sklearn.linear_model import LinearRegression

lin_reg=LinearRegression()
lin_reg.fit(X_poly,y)
lin_reg.intercept_,lin_reg.coef_

### learning curves

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [None]:
def plot_learning_curves(model,X,y):
    X_train,X_val,y_train,y_val=train_test_split(X,y,test_size=0.2)
    train_errors,val_errors=[], []
    for m in range(1,len(X_train)):
        model.fit(X_train[:m],y_train[:m])
        y_train_predict= model.predict(X_train[:m])
        y_val_predict= model.predict(X_val)
        train_errors.append(mean_squared_error(y_train[:m],y_train_predict))
        val_errors.append(mean_squared_error(y_val,y_val_predict))
    plt.plot(np.sqrt(train_errors),'r-+',linewidth=2,label='train')
    plt.plot(np.sqrt(val_errors),'b-',linewidth=3,label="val")

In [None]:
lin_reg=LinearRegression()
plot_learning_curves(lin_reg,X,y)
plt.axis([0,80,0.0,3.0])
plt.xlabel('Training set size')
plt.ylabel('RMSE')

###  10 degree polynomial model

In [None]:
from sklearn.pipeline import Pipeline

polynomial_regression=Pipeline([
    ('poly_features', PolynomialFeatures(degree=10, include_bias=False)),
    ('lin_reg',LinearRegression())
])

plot_learning_curves(lin_reg,X,y)
plt.axis([0,80,0.0,3.0])
plt.xlabel('Training set size')
plt.ylabel('RMSE')

### Early Stopping

In [None]:
from sklearn.base import clone
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler

In [None]:
X_train,X_val,y_train,y_val=train_test_split(X,y,test_size=0.2)
    #prepare data
poly_scaler=Pipeline([
    ('poly_features', PolynomialFeatures(degree=90,include_bias=False)),
    ('std_scaler', StandardScaler())
])

X_train_poly_scaled=poly_scaler.fit_transform(X_train) 
X_val_poly_scaled= poly_scaler.transform(X_val)

sgd_reg= SGDRegressor(max_iter=1, tol=-np.infty, warm_start=True, penalty=None, learning_rate='constant', eta0=0.0005)

minimum_val_error= float("inf")
best_epoch= None
best_model= None
for epoch in range(1000):
    sgd_reg.fit(X_train_poly_scaled,y_train)
    y_val_predict=sgd_reg.predict(X_val_poly_scaled)
    val_error=mean_squared_error(y_val,y_val_predict)
    if val_error < minimum_val_error:
        minimum_val_error= val_error
        best_epoch= epoch
        best_model= clone(sgd_reg)