In [8]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error

# Set random seed for reproducibility
np.random.seed(0)

# Parameters
best_m = 3              # Optimal polynomial degree
number_of_samples = 50  # Number of data points
noise_scale = 100       # Noise level

# Generate synthetic dataset
x = 30 * (np.random.rand(number_of_samples, 1) - 0.5)
y = 6 * x + 7 * x**2 + 3 * x**3 + noise_scale * np.random.randn(number_of_samples, 1)

# Polynomial feature transformation
poly = PolynomialFeatures(degree=best_m)
x_poly = poly.fit_transform(x)

# Define regression models
models = {
    "Unregularized (Linear Regression)": LinearRegression(),
    "Ridge Regression (L2)": Ridge(alpha=1.0),
    "Lasso Regression (L1)": Lasso(alpha=0.1)
}

# Train models and calculate MSE
mse_results = {}
for name, model in models.items():
    model.fit(x_poly, y)
    mse = mean_squared_error(y, model.predict(x_poly))
    mse_results[name] = mse

# Display MSE values
for name, mse in mse_results.items():
    print(f"MSE ({name}): {mse:.2f}")

# Generate predictions for visualization
x_range = np.linspace(min(x), max(x), 100).reshape(-1, 1)
x_poly_range = poly.transform(x_range)

predictions = {name: model.predict(x_poly_range) for name, model in models.items()}

# Plotting
plt.figure(figsize=(10, 6))
plt.scatter(x, y, color='red', label="Noisy Data")

# Plot each model's predictions
for name, y_pred in predictions.items():
    plt.plot(x_range, y_pred, label=name)

plt.xlabel('x')
plt.ylabel('y')
plt.title(f'Polynomial Regression (Degree {best_m}) with Regularization')
plt.legend()
plt.grid(True)
plt.show()

# Analysis
analysis = """
The MSE values for the unregularized, Ridge, and Lasso models are very similar. This is expected because the data is generated from a cubic function, which matches the chosen polynomial degree (3). Therefore, regularization doesn't significantly impact the model performance.

Regularization techniques like Ridge (L2) and Lasso (L1) are more beneficial when dealing with higher-order polynomials prone to overfitting. Ridge helps stabilize predictions by shrinking coefficients, while Lasso can reduce complexity by eliminating less important features.
"""

print(analysis)



ModuleNotFoundError: No module named 'sklearn'