# 1.1 Code Brief: Introduction to Regularization

Quick reference for regularization concepts and visualizations.

## Setup

In [None]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## Visualize Overfitting

In [None]:
np.random.seed(42)
X = np.linspace(0, 10, 20)
y_true = 2 * X + 1
y_noisy = y_true + np.random.normal(0, 3, len(X))

X_smooth = np.linspace(0, 10, 100)
y_underfit = np.ones_like(X_smooth) * np.mean(y_noisy)
y_goodfit = np.polyval(np.polyfit(X, y_noisy, 1), X_smooth)
y_overfit = np.polyval(np.polyfit(X, y_noisy, 15), X_smooth)

fig = make_subplots(rows=1, cols=3, subplot_titles=('Underfitting', 'Good Fit', 'Overfitting'))
for col in [1, 2, 3]:
    fig.add_trace(go.Scatter(x=X, y=y_noisy, mode='markers', marker=dict(color='blue', size=8), showlegend=(col==1), name='Data'), row=1, col=col)
fig.add_trace(go.Scatter(x=X_smooth, y=y_underfit, mode='lines', line=dict(color='red', width=2), showlegend=False), row=1, col=1)
fig.add_trace(go.Scatter(x=X_smooth, y=y_goodfit, mode='lines', line=dict(color='green', width=2), showlegend=False), row=1, col=2)
fig.add_trace(go.Scatter(x=X_smooth, y=y_overfit, mode='lines', line=dict(color='red', width=2), showlegend=False), row=1, col=3)
fig.update_layout(height=400, title_text="The Fitting Spectrum")
fig.show()

## Bias-Variance Trade-off

In [None]:
complexity = np.linspace(0.1, 3, 100)
bias_squared = 1 / complexity
variance = 0.3 * complexity ** 2
total_error = bias_squared + variance + 0.1

fig = go.Figure()
fig.add_trace(go.Scatter(x=complexity, y=bias_squared, mode='lines', name='Bias²', line=dict(color='blue', width=2)))
fig.add_trace(go.Scatter(x=complexity, y=variance, mode='lines', name='Variance', line=dict(color='orange', width=2)))
fig.add_trace(go.Scatter(x=complexity, y=total_error, mode='lines', name='Total Error', line=dict(color='red', width=3)))
optimal_idx = np.argmin(total_error)
fig.add_vline(x=complexity[optimal_idx], line_dash="dash", line_color="green", annotation_text="Optimal")
fig.update_layout(title='Bias-Variance Trade-off', xaxis_title='Model Complexity', yaxis_title='Error', height=400)
fig.show()

## L1 vs L2 Regularization Geometry

In [None]:
theta = np.linspace(0, 2*np.pi, 100)
x_l2 = np.cos(theta)
y_l2 = np.sin(theta)

t = np.linspace(0, 1, 25)
x_l1 = np.concatenate([1-t, -t, -1+t, t])
y_l1 = np.concatenate([t, 1-t, -t, -1+t])

fig = make_subplots(rows=1, cols=2, subplot_titles=('L2 (Ridge) Constraint', 'L1 (Lasso) Constraint'))
fig.add_trace(go.Scatter(x=x_l2, y=y_l2, mode='lines', fill='toself', fillcolor='rgba(0,100,200,0.3)', line=dict(color='blue', width=2)), row=1, col=1)
fig.add_trace(go.Scatter(x=x_l1, y=y_l1, mode='lines', fill='toself', fillcolor='rgba(200,100,0,0.3)', line=dict(color='orange', width=2)), row=1, col=2)
fig.update_layout(height=400, title_text="Geometry of Regularization", showlegend=False)
fig.update_xaxes(title_text="β₁", range=[-1.5, 1.5])
fig.update_yaxes(title_text="β₂", range=[-1.5, 1.5])
fig.show()

## Key Takeaways

| Type | Penalty | Feature Selection | Best For |
|:-----|:--------|:------------------|:---------|
| L2 (Ridge) | Sum of squared coefficients | No | Many small effects |
| L1 (Lasso) | Sum of absolute coefficients | Yes | Few large effects |
| ElasticNet | Both | Yes | Correlated features |