In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

sns.set_style()
sns.set_theme()

# Generate quadratic dataset
np.random.seed(40)

x = np.linspace(-5, 5, 80).reshape(-1, 1)
y = np.sin(x) + np.random.normal(scale=0.2, size=x.shape)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.5, random_state=42)

In [None]:
sns.scatterplot(x=X_train[:,0].ravel(), y=y_train.ravel(), color='blue')
plt.title('Train dataset')
plt.xlabel('X')
plt.ylabel('y')

In [None]:
sns.scatterplot(x=X_test[:,0].ravel(), y=y_test.ravel(), color='blue')
plt.title('Test dataset')
plt.xlabel('X')
plt.ylabel('y')

In [5]:
def plot_results(X_train, y_train, X_test, y_test, model, title):
    # Predict
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    # Calculate metrics
    train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
    test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
    train_r2 = r2_score(y_train, y_train_pred)
    test_r2 = r2_score(y_test, y_test_pred)
    
    # Print out metrics
    print(f"{title}")
    print(f"Train RMSE: {train_rmse:.4f}, Test RMSE: {test_rmse:.4f}")
    print(f"Train R²: {train_r2:.4f}, Test R²: {test_r2:.4f}\n")
    
    # Plot Training Set
    plt.figure(figsize=(6, 5))
    sns.scatterplot(x=X_train[:, 0].ravel(), y=y_train.ravel(), color='blue', label='Train Data')
    sns.lineplot(x=X_train[:, 0].ravel(), y=y_train_pred.ravel(), color='red', label='Model Fit')
    plt.title(f"{title} - Training Set")
    plt.legend()
    plt.show()
    
    # Plot Test Set
    plt.figure(figsize=(6, 5))
    sns.scatterplot(x=X_test[:, 0].ravel(), y=y_test.ravel(), color='orange', label='Test Data')
    sns.lineplot(x=X_test[:, 0].ravel(), y=y_test_pred.ravel(), color='red', label='Model Fit')
    plt.title(f"{title} - Test Set")
    plt.legend()
    plt.show()

In [None]:
# Determine polynomial degree
poly_degree = 2

# Create polynomial features
poly_tranformer = PolynomialFeatures(degree=poly_degree, include_bias=False)
X_train_poly = poly_tranformer.fit_transform(X_train)
X_test_poly = poly_tranformer.fit_transform(X_test)

# Scale the data
scaler = StandardScaler().fit(X_train_poly)
X_train_poly = scaler.transform(X_train_poly)
X_test_poly = scaler.transform(X_test_poly)

# Fit the model and plot model predictions and data
poly_reg = LinearRegression()
poly_reg.fit(X_train_poly, y_train)
plot_results(X_train_poly, y_train, X_test_poly, y_test, poly_reg, f"Polynomial Regression (Degree {poly_degree})")

In [None]:
# Determine polynomial degree
poly_degree = 5

# Create polynomial features
poly_tranformer = PolynomialFeatures(degree=poly_degree, include_bias=False)
X_train_poly = poly_tranformer.fit_transform(X_train)
X_test_poly = poly_tranformer.fit_transform(X_test)

# Scale the data
scaler = StandardScaler().fit(X_train_poly)
X_train_poly = scaler.transform(X_train_poly)
X_test_poly = scaler.transform(X_test_poly)

# Fit the model and plot model predictions and data
poly_reg = LinearRegression()
poly_reg.fit(X_train_poly, y_train)
plot_results(X_train_poly, y_train, X_test_poly, y_test, poly_reg, f"Polynomial Regression (Degree {poly_degree})")

In [None]:
# Determine polynomial degree
poly_degree = 50

# Create polynomial features
poly_tranformer = PolynomialFeatures(degree=poly_degree, include_bias=False)
X_train_poly = poly_tranformer.fit_transform(X_train)
X_test_poly = poly_tranformer.fit_transform(X_test)

# Scale the data
scaler = StandardScaler().fit(X_train_poly)
X_train_poly = scaler.transform(X_train_poly)
X_test_poly = scaler.transform(X_test_poly)

# Fit the model and plot model predictions and data
poly_reg = LinearRegression()
poly_reg.fit(X_train_poly, y_train)
plot_results(X_train_poly, y_train, X_test_poly, y_test, poly_reg, f"Polynomial Regression (Degree {poly_degree})")

In [65]:
from sklearn.linear_model import Lasso

In [None]:
# Determine polynomial degree
poly_degree = 50

# Create polynomial features
poly_tranformer = PolynomialFeatures(degree=poly_degree, include_bias=False)
X_train_poly = poly_tranformer.fit_transform(X_train)
X_test_poly = poly_tranformer.fit_transform(X_test)

# Scale the data
scaler = StandardScaler().fit(X_train_poly)
X_train_poly = scaler.transform(X_train_poly)
X_test_poly = scaler.transform(X_test_poly)

# Fit the model and plot model predictions and data
poly_reg = Lasso(alpha=0.001)
poly_reg.fit(X_train_poly, y_train)
plot_results(X_train_poly, y_train, X_test_poly, y_test, poly_reg, f"Polynomial Regression (Degree {poly_degree})")

In [None]:
plt.hist(poly_reg.coef_, bins=20)

In [58]:
from sklearn.linear_model import Ridge

In [None]:
# Determine polynomial degree
poly_degree = 50

# Create polynomial features
poly_tranformer = PolynomialFeatures(degree=poly_degree, include_bias=False)
X_train_poly = poly_tranformer.fit_transform(X_train)
X_test_poly = poly_tranformer.fit_transform(X_test)

# Scale the data
scaler = StandardScaler().fit(X_train_poly)
X_train_poly = scaler.transform(X_train_poly)
X_test_poly = scaler.transform(X_test_poly)

# Fit the model and plot model predictions and data
poly_reg = Ridge(alpha=0.1)
poly_reg.fit(X_train_poly, y_train)
plot_results(X_train_poly, y_train, X_test_poly, y_test, poly_reg, f"Polynomial Regression (Degree {poly_degree})")

In [None]:
plt.hist(poly_reg.coef_, bins=20)