# Bagging for car price prediction

In [9]:
from sklearn.ensemble import BaggingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, median_absolute_error, r2_score
from sklearn.model_selection import KFold, cross_val_score
import pandas as pd
import numpy as np

In [1]:
def bagging(X_train, y_train, estimator_range, cv_folds=5, decimal_places=2):
    results = []

    for n_estimators in estimator_range:
        # Create bagging regressor
        clf = BaggingRegressor(n_estimators=n_estimators, random_state=22)

        # Perform 5-fold cross-validation
        kf = KFold(n_splits=cv_folds, shuffle=True, random_state=42)
        
        mae_scores = -cross_val_score(clf, X_train, y_train, cv=kf, scoring='neg_mean_absolute_error')
        medae_scores = -cross_val_score(clf, X_train, y_train, cv=kf, scoring='neg_median_absolute_error')
        r2_scores = cross_val_score(clf, X_train, y_train, cv=kf, scoring='r2')

        # Predicting on the test data
        clf.fit(X_train, y_train)  # Fit the model on the entire training data
        y_pred = clf.predict(X_test)  # Predict on the test data
        
        # Round the values
        mae_scores = np.round(mae_scores, decimal_places)
        medae_scores = np.round(medae_scores, decimal_places)
        r2_scores = np.round(r2_scores, decimal_places)
        
        # Store results for each fold
        for fold_idx, (mae, medae, r2) in enumerate(zip(mae_scores, medae_scores, r2_scores)):
            results.append({
                'n_estimators': n_estimators,
                'Fold': fold_idx + 1,
                'MAE': mae,
                'medAE': medae,
                'R2': r2
            })

    # Convert the results list to a pandas DataFrame
    df_errors = pd.DataFrame(results)
    
    return y_pred, df_errors