### Import libraries

In [1]:
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

np.random.seed(43)

### Create synthetic data

In [2]:
m = 100
X = 6 * np.random.rand(m, 1) - 3
y = 0.5 * X**2 + X + 2 + np.random.randn(m, 1)

In [3]:
from sklearn.preprocessing import PolynomialFeatures

poly_features = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly_features.fit_transform(X)

### Split into train and test

In [4]:
x_train_poly, x_val_poly, y_train, y_val = train_test_split(X_poly, y, test_size=0.2)

# reshape ytrain, yval to avoid the warning message from sklearn
y_train = y_train.reshape(y_train.shape[0],)
y_val = y_val.reshape(y_val.shape[0],)

print(x_train_poly.shape)
print(x_val_poly.shape)
print(y_train.shape)
print(y_val.shape)

(80, 2)
(20, 2)
(80,)
(20,)


### Rescaled data (important for Gradient Descent algorithm)

In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(x_train_poly)
X_train_poly_scaled = scaler.transform(x_train_poly)
X_val_poly_scaled = scaler.transform(x_val_poly)

### Model

In [11]:
from sklearn.base import clone
from sklearn.linear_model import SGDRegressor

# sgd_reg = SGDRegressor(n_iter=1, warm_start=True, penalty=None,
# learning_rate="constant", eta0=0.0005)

sgd_reg = SGDRegressor(max_iter=500, warm_start=True, penalty=None,
learning_rate="constant", eta0=0.0005, tol=1e-3)

__Note:__ with `warm_start=True`, when the `fit()` method is called, it just continues training where it left off instead of restarting from scratch.

In [12]:
# first assign min to a very big value
minimum_val_error = float("inf")
best_epoch = None
best_model = None

# training
for epoch in range(1000):
    sgd_reg.fit(X_train_poly_scaled, y_train) # continues where it left off
    # make predictions on validation data
    y_val_predict = sgd_reg.predict(X_val_poly_scaled)
    # calculate error
    val_error = mean_squared_error(y_val_predict, y_val)
    if val_error < minimum_val_error:
        minimum_val_error = val_error
        best_epoch = epoch
        best_model = clone(sgd_reg)