In [None]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

# Generate synthetic data
np.random.seed(42)
X = 2 - 3 * np.random.normal(0, 1, 100)
y = X**3 + X**2 + X + np.random.normal(-3, 3, 100)
X = X.reshape(-1, 1)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create polynomial features
poly = PolynomialFeatures(degree=3)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Apply StandardScaler after Polynomial transformation
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_poly)
X_test_scaled = scaler.transform(X_test_poly)

# Fit Linear Regression on scaled polynomial features
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Predict
y_pred = model.predict(X_test_scaled)

# Evaluation
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R² Score:", r2_score(y_test, y_pred))

# Plot the results
plt.scatter(X, y, color='blue', label='Original Data')
X_range = np.linspace(min(X), max(X), 100).reshape(-1, 1)
X_range_poly = poly.transform(X_range)
X_range_scaled = scaler.transform(X_range_poly)
y_range_pred = model.predict(X_range_scaled)
plt.plot(X_range, y_range_pred, color='red', label='Polynomial Fit')
plt.xlabel("X")
plt.ylabel("y")
plt.title("Polynomial Regression with StandardScaler")
plt.legend()
plt.show()


PolynomialFeatures → transforms the input to higher dimensions.

StandardScaler → scales the polynomial-transformed data to zero mean and unit variance.

Improves performance especially when using polynomial terms and prevents numerical instability.

