<a href="https://colab.research.google.com/github/nazimulrahmann/machine_learning/blob/main/regression_algorithms.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split

# Set random seed for reproducibility
np.random.seed(42)

# Parameters
n_samples = 1000  # Total number of samples
n_features = 5    # Number of features
noise_level = 2.0 # Amount of noise to add

# Generate random features (X) - normally distributed
X = np.random.randn(n_samples, n_features)

# Create meaningful coefficients for the regression
true_coefficients = np.random.randn(n_features) * 3
true_intercept = 2.5

# Generate target values (y) with linear relationship plus noise
y = X @ true_coefficients + true_intercept + np.random.randn(n_samples) * noise_level

# Add some non-linear relationships to make it more interesting
y += 0.5 * (X[:, 0] ** 2)  # Quadratic term for first feature
y += 0.8 * np.sin(X[:, 1])  # Non-linear term for second feature

# Split into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)

In [5]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

def evaluate_regression(model, X_test, y_test):
    """
    Evaluate a regression model and print metrics
    Returns a dictionary of metrics
    """
    # Make predictions
    y_pred = model.predict(X_test)

    # Calculate metrics
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred) * 100

    # Print results
    print(f"MAE: {mae:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R²: {r2:.4f}")

    return {
        'model': str(model),
        'mae': mae,
        'mse': mse,
        'rmse': rmse,
        'r2': r2
    }

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import GridSearchCV

# Initialize model
linear_reg = LinearRegression()

# Parameter grid
param_grid = {
    'fit_intercept': [True, False],
    'positive': [True, False]
}

# Grid search
linear_grid = GridSearchCV(linear_reg, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
linear_grid.fit(X_train, y_train)

# Best model
best_linear = linear_grid.best_estimator_
print(f"Best parameters: {linear_grid.best_params_}")

# Evaluation
y_pred = best_linear.predict(X_test)
print("Linear Regression Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [7]:
from sklearn.linear_model import Ridge

ridge = Ridge(random_state=42)
param_grid = {
    'alpha': [0.1, 0.5, 1.0, 2.0, 5.0],
    'solver': ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']
}

ridge_grid = GridSearchCV(ridge, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
ridge_grid.fit(X_train, y_train)

best_ridge = ridge_grid.best_estimator_
print(f"Best parameters: {ridge_grid.best_params_}")

y_pred = best_ridge.predict(X_test)
print("\nRidge Regression Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

Best parameters: {'alpha': 1.0, 'solver': 'sag'}

Ridge Regression Evaluation:
MAE: 1.6497
MSE: 4.3721
RMSE: 2.0910
R²: 0.8937


In [8]:
from sklearn.linear_model import Lasso

lasso = Lasso(random_state=42)
param_grid = {
    'alpha': [0.0001, 0.001, 0.01, 0.1, 1.0],
    'selection': ['cyclic', 'random']
}

lasso_grid = GridSearchCV(lasso, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
lasso_grid.fit(X_train, y_train)

best_lasso = lasso_grid.best_estimator_
print(f"Best parameters: {lasso_grid.best_params_}")

y_pred = best_lasso.predict(X_test)
print("\nLasso Regression Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

Best parameters: {'alpha': 0.0001, 'selection': 'random'}

Lasso Regression Evaluation:
MAE: 1.6497
MSE: 4.3726
RMSE: 2.0911
R²: 0.8937


In [9]:
from sklearn.linear_model import ElasticNet

elastic = ElasticNet(random_state=42)
param_grid = {
    'alpha': [0.0001, 0.001, 0.01, 0.1, 1.0],
    'l1_ratio': [0.1, 0.3, 0.5, 0.7, 0.9],
    'selection': ['cyclic', 'random']
}

elastic_grid = GridSearchCV(elastic, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
elastic_grid.fit(X_train, y_train)

best_elastic = elastic_grid.best_estimator_
print(f"Best parameters: {elastic_grid.best_params_}")

y_pred = best_elastic.predict(X_test)
print("\nElasticNet Regression Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

Best parameters: {'alpha': 0.0001, 'l1_ratio': 0.9, 'selection': 'random'}

ElasticNet Regression Evaluation:
MAE: 1.6497
MSE: 4.3726
RMSE: 2.0911
R²: 0.8937


In [None]:
from sklearn.tree import DecisionTreeRegressor

dt = DecisionTreeRegressor(random_state=42)
param_grid = {
    'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
    'max_depth': [None, 5, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2', None]
}

dt_grid = GridSearchCV(dt, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
dt_grid.fit(X_train, y_train)

best_dt = dt_grid.best_estimator_
print(f"Best parameters: {dt_grid.best_params_}")

y_pred = best_dt.predict(X_test)
print("\nDecision Tree Regressor Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(random_state=42)
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2', None],
    'bootstrap': [True, False]
}

rf_grid = GridSearchCV(rf, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
rf_grid.fit(X_train, y_train)

best_rf = rf_grid.best_estimator_
print(f"Best parameters: {rf_grid.best_params_}")

y_pred = best_rf.predict(X_test)
print("\nRandom Forest Regressor Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from sklearn.ensemble import GradientBoostingRegressor

gb = GradientBoostingRegressor(random_state=42)
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'subsample': [0.8, 1.0],
    'min_samples_split': [2, 5]
}

gb_grid = GridSearchCV(gb, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
gb_grid.fit(X_train, y_train)

best_gb = gb_grid.best_estimator_
print(f"Best parameters: {gb_grid.best_params_}")

y_pred = best_gb.predict(X_test)
print("\nGradient Boosting Regressor Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from sklearn.svm import SVR

svr = SVR()
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto', 0.1, 1],
    'degree': [2, 3, 4],
    'epsilon': [0.01, 0.1, 0.5]
}

svr_grid = GridSearchCV(svr, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
svr_grid.fit(X_train, y_train)

best_svr = svr_grid.best_estimator_
print(f"Best parameters: {svr_grid.best_params_}")

y_pred = best_svr.predict(X_test)
print("\nSupport Vector Regressor Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from xgboost import XGBRegressor

xgb = XGBRegressor(random_state=42)
param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 6, 9],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0],
    'gamma': [0, 0.1, 0.2],
    'reg_alpha': [0, 0.1, 1],
    'reg_lambda': [0, 0.1, 1]
}

xgb_grid = GridSearchCV(xgb, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
xgb_grid.fit(X_train, y_train)

best_xgb = xgb_grid.best_estimator_
print(f"Best parameters: {xgb_grid.best_params_}")

y_pred = best_xgb.predict(X_test)
print("\nXGBoost Regressor Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from lightgbm import LGBMRegressor

lgbm = LGBMRegressor(random_state=42)
param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'num_leaves': [31, 63, 127],
    'max_depth': [-1, 10, 20],
    'min_child_samples': [20, 50],
    'reg_alpha': [0, 0.1, 1],
    'reg_lambda': [0, 0.1, 1]
}

lgbm_grid = GridSearchCV(lgbm, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
lgbm_grid.fit(X_train, y_train)

best_lgbm = lgbm_grid.best_estimator_
print(f"Best parameters: {lgbm_grid.best_params_}")

y_pred = best_lgbm.predict(X_test)
print("\nLightGBM Regressor Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from catboost import CatBoostRegressor

cat = CatBoostRegressor(random_state=42, verbose=0)
param_grid = {
    'iterations': [100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'depth': [4, 6, 8],
    'l2_leaf_reg': [1, 3, 5]
}

cat_grid = GridSearchCV(cat, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
cat_grid.fit(X_train, y_train)

best_cat = cat_grid.best_estimator_
print(f"Best parameters: {cat_grid.best_params_}")

y_pred = best_cat.predict(X_test)
print("\nCatBoost Regressor Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from sklearn.neural_network import MLPRegressor

mlp = MLPRegressor(random_state=42, early_stopping=True)
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
    'activation': ['relu', 'tanh', 'logistic'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'invscaling', 'adaptive'],
    'learning_rate_init': [0.001, 0.01]
}

mlp_grid = GridSearchCV(mlp, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
mlp_grid.fit(X_train, y_train)

best_mlp = mlp_grid.best_estimator_
print(f"Best parameters: {mlp_grid.best_params_}")

y_pred = best_mlp.predict(X_test)
print("\nMLP Regressor Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from sklearn.linear_model import BayesianRidge

bayesian = BayesianRidge()
param_grid = {
    'alpha_1': [1e-6, 1e-5, 1e-4],
    'alpha_2': [1e-6, 1e-5, 1e-4],
    'lambda_1': [1e-6, 1e-5, 1e-4],
    'lambda_2': [1e-6, 1e-5, 1e-4]
}

bayesian_grid = GridSearchCV(bayesian, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
bayesian_grid.fit(X_train, y_train)

best_bayesian = bayesian_grid.best_estimator_
print(f"Best parameters: {bayesian_grid.best_params_}")

y_pred = best_bayesian.predict(X_test)
print("\nBayesian Ridge Regression Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel

kernel = ConstantKernel() * RBF()
gp = GaussianProcessRegressor(kernel=kernel, random_state=42)
param_grid = {
    'alpha': [1e-10, 1e-5, 1e-2],
    'normalize_y': [True, False]
}

gp_grid = GridSearchCV(gp, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
gp_grid.fit(X_train, y_train)

best_gp = gp_grid.best_estimator_
print(f"Best parameters: {gp_grid.best_params_}")

y_pred = best_gp.predict(X_test)
print("\nGaussian Process Regressor Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from sklearn.neighbors import KNeighborsRegressor

knn = KNeighborsRegressor()
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'p': [1, 2]
}

knn_grid = GridSearchCV(knn, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
knn_grid.fit(X_train, y_train)

best_knn = knn_grid.best_estimator_
print(f"Best parameters: {knn_grid.best_params_}")

y_pred = best_knn.predict(X_test)
print("\nK-Nearest Neighbors Regressor Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from sklearn.ensemble import AdaBoostRegressor

ada = AdaBoostRegressor(random_state=42)
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1.0],
    'loss': ['linear', 'square', 'exponential']
}

ada_grid = GridSearchCV(ada, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
ada_grid.fit(X_train, y_train)

best_ada = ada_grid.best_estimator_
print(f"Best parameters: {ada_grid.best_params_}")

y_pred = best_ada.predict(X_test)
print("\nAdaBoost Regressor Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from sklearn.linear_model import BayesianRidge

# Initialize model
bayesian = BayesianRidge()

# Parameter grid
param_grid = {
    'alpha_1': [1e-6, 1e-5, 1e-4],
    'alpha_2': [1e-6, 1e-5, 1e-4],
    'lambda_1': [1e-6, 1e-5, 1e-4],
    'lambda_2': [1e-6, 1e-5, 1e-4]
}

# Grid search
print("Training Bayesian Ridge Regression...")
bayesian_grid = GridSearchCV(bayesian, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
bayesian_grid.fit(X_train, y_train)

# Best model
best_bayesian = bayesian_grid.best_estimator_
print(f"Best parameters: {bayesian_grid.best_params_}")

# Evaluation
y_pred = best_bayesian.predict(X_test)
print("\nBayesian Ridge Regression Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from sklearn.linear_model import SGDRegressor

# Initialize model
sgd = SGDRegressor(random_state=42)

# Parameter grid
param_grid = {
    'loss': ['squared_error', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'],
    'penalty': ['l1', 'l2', 'elasticnet'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'optimal', 'invscaling', 'adaptive'],
    'eta0': [0.01, 0.1]  # Initial learning rate
}

# Grid search
print("Training SGD Regressor...")
sgd_grid = GridSearchCV(sgd, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
sgd_grid.fit(X_train, y_train)

# Best model
best_sgd = sgd_grid.best_estimator_
print(f"Best parameters: {sgd_grid.best_params_}")

# Evaluation
y_pred = best_sgd.predict(X_test)
print("\nSGD Regressor Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from sklearn.linear_model import HuberRegressor

# Initialize model
huber = HuberRegressor()

# Parameter grid
param_grid = {
    'epsilon': [1.1, 1.35, 1.5],  # Threshold for outliers
    'alpha': [0.0001, 0.001, 0.01],  # Regularization strength
    'max_iter': [100, 200, 300]  # Maximum iterations
}

# Grid search
print("Training Huber Regressor...")
huber_grid = GridSearchCV(huber, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
huber_grid.fit(X_train, y_train)

# Best model
best_huber = huber_grid.best_estimator_
print(f"Best parameters: {huber_grid.best_params_}")

# Evaluation
y_pred = best_huber.predict(X_test)
print("\nHuber Regressor Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from sklearn.linear_model import TheilSenRegressor

# Initialize model
theil = TheilSenRegressor(random_state=42)

# Parameter grid
param_grid = {
    'max_subpopulation': [1000, 5000, 10000],  # Max subsets considered
    'n_subsamples': [None, 100, 200],  # Number of samples per subset
    'max_iter': [100, 300, 500]  # Maximum iterations
}

# Grid search
print("Training Theil-Sen Regressor...")
theil_grid = GridSearchCV(theil, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
theil_grid.fit(X_train, y_train)

# Best model
best_theil = theil_grid.best_estimator_
print(f"Best parameters: {theil_grid.best_params_}")

# Evaluation
y_pred = best_theil.predict(X_test)
print("\nTheil-Sen Regressor Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
from sklearn.linear_model import RANSACRegressor

# Initialize base estimator (using LinearRegression as default)
base_estimator = LinearRegression()

# Initialize RANSAC model
ransac = RANSACRegressor(base_estimator=base_estimator, random_state=42)

# Parameter grid
param_grid = {
    'min_samples': [None, 0.1, 0.5, 0.9],  # Min samples to fit
    'residual_threshold': [None, 1.0, 2.0],  # Threshold for inliers
    'max_trials': [50, 100, 200],  # Max iterations
    'stop_probability': [0.95, 0.99]  # Probability to stop
}

# Grid search
print("Training RANSAC Regressor...")
ransac_grid = GridSearchCV(ransac, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
ransac_grid.fit(X_train, y_train)

# Best model
best_ransac = ransac_grid.best_estimator_
print(f"Best parameters: {ransac_grid.best_params_}")

# Evaluation
y_pred = best_ransac.predict(X_test)
print("\nRANSAC Regressor Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")
print(f"R²: {r2_score(y_test, y_pred):.4f}")

In [None]:
# Collect all metrics (add all model metrics here)
all_metrics = [
    {'model': 'Linear Regression', **linear_metrics},
    {'model': 'Ridge Regression', **ridge_metrics},
    {'model': 'Lasso Regression', **lasso_metrics},
    {'model': 'ElasticNet', **elastic_metrics},
    {'model': 'Decision Tree', **dt_metrics},
    {'model': 'Random Forest', **rf_metrics},
    {'model': 'Gradient Boosting', **gb_metrics},
    {'model': 'XGBoost', **xgb_metrics},
    {'model': 'LightGBM', **lgbm_metrics},
    {'model': 'CatBoost', **cat_metrics},
    {'model': 'SVR', **svr_metrics},
    {'model': 'MLP', **mlp_metrics},
    {'model': 'Bayesian Ridge', **bayesian_metrics},
    {'model': 'Gaussian Process', **gp_metrics},
    {'model': 'KNN', **knn_metrics},
    {'model': 'AdaBoost', **ada_metrics},
    {'model': 'Bayesian Ridge', **bayesian_metrics},
    {'model': 'SGD Regressor', **sgd_metrics},
    {'model': 'Huber Regressor', **huber_metrics},
    {'model': 'Theil-Sen Regressor', **theil_metrics},
    {'model': 'RANSAC Regressor', **ransac_metrics}
]

# Create DataFrame
results_df = pd.DataFrame(all_metrics)

# Sort by RMSE (lower is better)
results_df = results_df.sort_values('RMSE').reset_index(drop=True)

print("\nFinal Model Comparison:")
print(results_df[['model', 'RMSE', 'R²', 'MAE', 'MSE']])