In [4]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer, mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from xgboost import XGBRegressor
import numpy as np

# Define the models
models = {
    'Linear Regression': LinearRegression(),
    'Lasso Regression': Lasso(alpha=1.0),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=0),
    'Decision Tree': DecisionTreeRegressor(random_state=0),
    'Polynomial Regression': Pipeline([('poly', PolynomialFeatures(degree=2)),
                                        ('linear', LinearRegression())]),
    'XGBoost': XGBRegressor(random_state=0),
    'AdaBoost': AdaBoostRegressor(random_state=0),
    'SVR': SVR(kernel='rbf'),
    'KNN': KNeighborsRegressor(n_neighbors=5),
    'Gradient Boosting': GradientBoostingRegressor(random_state=0)
}

# Define custom scoring functions
def rmse_scorer(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

scorers = {
    'MSE': make_scorer(mean_squared_error, greater_is_better=False),
    'MAE': make_scorer(mean_absolute_error, greater_is_better=False),
    'RMSE': make_scorer(rmse_scorer, greater_is_better=False),
    'R2': make_scorer(r2_score)
}

# Perform cross-validation for each model
results = {}
cv = 5  # Number of folds for cross-validation

for name, model in models.items():
    model_results = {}
    for metric_name, scorer in scorers.items():
        scores = cross_val_score(model, X_train, Y_train, cv=cv, scoring=scorer, n_jobs=-1)
        model_results[metric_name] = {
            'Mean': np.mean(scores),
            'Std Dev': np.std(scores)
        }
    results[name] = model_results

# Print results
for model_name, metrics in results.items():
    print(f"\nModel: {model_name}")
    for metric, values in metrics.items():
        print(f"  {metric}: Mean = {values['Mean']:.4f}, Std Dev = {values['Std Dev']:.4f}")




Model: Linear Regression
  MSE: Mean = -56904764.6905, Std Dev = 72638334.6457
  MAE: Mean = -3089.0003, Std Dev = 471.0987
  RMSE: Mean = -6392.4754, Std Dev = 4005.1246
  R2: Mean = -0.7061, Std Dev = 1.0216

Model: Lasso Regression
  MSE: Mean = -56873898.5824, Std Dev = 72877894.4528
  MAE: Mean = -3050.2931, Std Dev = 462.7122
  RMSE: Mean = -6380.1898, Std Dev = 4020.8304
  R2: Mean = -0.6849, Std Dev = 0.9911

Model: Random Forest
  MSE: Mean = -75870442.9816, Std Dev = 62968513.7992
  MAE: Mean = -3278.3932, Std Dev = 164.1629
  RMSE: Mean = -8140.5831, Std Dev = 3098.6045
  R2: Mean = -4.7153, Std Dev = 7.4702

Model: Decision Tree
  MSE: Mean = -203718813.1500, Std Dev = 166187653.5837
  MAE: Mean = -4184.8543, Std Dev = 1503.1211
  RMSE: Mean = -12898.8957, Std Dev = 6110.4258
  R2: Mean = -32.7132, Std Dev = 60.1560

Model: Polynomial Regression
  MSE: Mean = -847539122754558887985152.0000, Std Dev = 1321324612375856937959424.0000
  MAE: Mean = -360204509652.4116, Std Dev 