## OLS - Simple Linear Regression (SLR) with StandardScaler

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error, r2_score

# Sample data (SLR): Predict y from one feature x
np.random.seed(42)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# OLS using statsmodels
X_train_ols = sm.add_constant(X_train_scaled)  # add intercept
model = sm.OLS(y_train, X_train_ols).fit()
print(model.summary())

# Predict
X_test_ols = sm.add_constant(X_test_scaled)
y_pred = model.predict(X_test_ols)

# Evaluation
print("MSE:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))


## OLS - Multiple Linear Regression (MLR) with StandardScaler

In [None]:
# Simulate MLR data: y = b0 + b1*x1 + b2*x2 + noise
np.random.seed(42)
X = np.random.rand(100, 3)
y = 1.5 + 2 * X[:, 0] + 3 * X[:, 1] - 1 * X[:, 2] + np.random.randn(100)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# OLS with statsmodels
X_train_ols = sm.add_constant(X_train_scaled)
model = sm.OLS(y_train, X_train_ols).fit()
print(model.summary())

# Predict
X_test_ols = sm.add_constant(X_test_scaled)
y_pred = model.predict(X_test_ols)

# Evaluation
print("MSE:", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))


## Simple Linear Regression (SLR) SKLEARN

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Sample data for SLR
np.random.seed(42)
X = 2 * np.random.rand(100, 1)
y = 5 + 3 * X.flatten() + np.random.randn(100)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the model
slr_model = LinearRegression()
slr_model.fit(X_train, y_train)

# Extract coefficients
intercept = slr_model.intercept_
coefficient = slr_model.coef_[0]

# Model Equation
print(f"SLR Model Equation: y = {intercept:.3f} + {coefficient:.3f} * x")


## Multiple Linear Regression (MLR) SKLEARN

In [None]:
# Simulate data for MLR
np.random.seed(42)
X_mlr = np.random.rand(100, 3)
y_mlr = 1.5 + 2 * X_mlr[:, 0] + 3 * X_mlr[:, 1] - 1 * X_mlr[:, 2] + np.random.randn(100)

# Train-test split
X_train_mlr, X_test_mlr, y_train_mlr, y_test_mlr = train_test_split(X_mlr, y_mlr, test_size=0.2, random_state=42)

# Fit the model
mlr_model = LinearRegression()
mlr_model.fit(X_train_mlr, y_train_mlr)

# Extract coefficients
intercept_mlr = mlr_model.intercept_
coefficients_mlr = mlr_model.coef_

# Model Equation
equation_terms = [f"{coef:.3f} * x{i+1}" for i, coef in enumerate(coefficients_mlr)]
mlr_equation = f"MLR Model Equation: y = {intercept_mlr:.3f} + " + " + ".join(equation_terms)
print(mlr_equation)
