# 🎯 Overfitting vs Underfitting
In this notebook, we'll:
- Generate a noisy polynomial dataset
- Fit models of different complexity (linear, medium, high-degree)
- Visualize how model complexity affects performance

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline

In [None]:
# Generate data
np.random.seed(42)
X = np.sort(np.random.rand(30, 1) * 2 - 1, axis=0)  # range: [-1, 1]
y = X**5 - X**3 + 0.2 * np.random.randn(30, 1)  # True function + noise

In [None]:
# Plot the true data
plt.scatter(X, y, color='black', label='Noisy Data')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Noisy Polynomial Data')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
def plot_model(degree, ax):
    model = make_pipeline(PolynomialFeatures(degree), LinearRegression())
    model.fit(X, y)

    X_test = np.linspace(-1, 1, 200).reshape(-1, 1)
    y_pred = model.predict(X_test)

    ax.scatter(X, y, color='black', s=20)
    ax.plot(X_test, y_pred, color='red')
    ax.set_title(f"Degree = {degree}")
    ax.set_ylim(-1, 1.5)

In [None]:
# Plot underfitting vs good fit vs overfitting
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
for ax, degree in zip(axes, [1, 4, 15]):
    plot_model(degree, ax)
plt.suptitle('Underfitting vs Good Fit vs Overfitting')
plt.show()

### 🔍 Summary:
- **Degree 1**: Underfits (too simple, misses patterns)
- **Degree 4**: Fits well
- **Degree 15**: Overfits (memorizes noise)

This visually demonstrates the **bias-variance tradeoff** in machine learning.