In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression
from ipywidgets import interactive, IntSlider, FloatSlider, VBox, Label, Layout
from IPython.display import display

# For reproducibility
np.random.seed(42)


def plot_description(text):
    print(f"\nDescription:\n{text}\n")


def polynomial_overfitting_demo(poly_degree=1, num_samples=60):
    # ---------------------------------------------------
    # 1. Create synthetic data from a known nonlinear function
    # ---------------------------------------------------
    noise_level=10.0
    np.random.seed(42)
    X = np.linspace(-3, 3, num_samples).reshape(-1, 1)
    y_true = np.sin(X).ravel()
    
    # Add Gaussian noise
    y = y_true + np.random.randn(num_samples) * (noise_level / 10)
    
    # ---------------------------------------------------
    # 2. Build and fit polynomial regression model
    # ---------------------------------------------------
    poly = PolynomialFeatures(degree=poly_degree, include_bias=False)
    X_poly = poly.fit_transform(X)
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_poly)
    
    model = LinearRegression().fit(X_scaled, y)
    
    # ---------------------------------------------------
    # 3. Generate smooth prediction curve
    # ---------------------------------------------------
    X_line = np.linspace(-3, 3, 300).reshape(-1, 1)
    X_line_poly = poly.transform(X_line)
    X_line_scaled = scaler.transform(X_line_poly)
    y_pred = model.predict(X_line_scaled)
    
    # ---------------------------------------------------
    # 4. Plot results
    # ---------------------------------------------------
    plt.figure(figsize=(8, 6))
    plt.scatter(X, y, color="gray", alpha=0.7, label="Training Data")
    plt.plot(X_line, np.sin(X_line), color="green", linewidth=2, label="True Function (sin x)")
    plt.plot(X_line, y_pred, color="red", linewidth=2, label=f"Polynomial Fit (degree={poly_degree})")
    
    plt.title("Effect of Model Complexity (Overfitting Demonstration)", fontsize=16)
    plt.xlabel("X", fontsize=14)
    plt.ylabel("y", fontsize=14)
    plt.legend()
    plt.grid(True)
    plt.show()


# ---------------------------------------------------
# 5. Interactive controls
# ---------------------------------------------------
def polynomial_overfitting_demo_interact():
    plot_description("Demonstration of overfitting with a polynomial model. The true function to be fitted is a sinus curve (green line)."
                     " The samples (gray dots) are the (noisy) measurements taken from the true function. The task here is to "
                     "create a polynomial model (red line) that resembles the true function as closely as possible, based on the measured"
                     " samples. Increase the polynomial degree of the model and notice how the model is overfitting the samples "
                    "starting from degree > 4.")
    
    degree_slider = IntSlider(
        value=1, min=1, max=20, step=1,
        description="Polynomial Degree",
        style={'description_width': '150px'},
        layout=Layout(width='500px')
    )
    sample_slider = IntSlider(
        value=60, min=20, max=200, step=10,
        description="Number of Samples",
        style={'description_width': '150px'},
        layout=Layout(width='500px')
    )

    ui_box = VBox([
        Label(value="ðŸ“Š Controls", layout=Layout(margin="0 0 0 0")),
    ])

    interactive_plot = interactive(
        polynomial_overfitting_demo,
        poly_degree=degree_slider,
        num_samples=sample_slider
    )

    display(ui_box, interactive_plot)