# Bagging for car price prediction

In [9]:
from sklearn.ensemble import BaggingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, median_absolute_error, r2_score
from sklearn.model_selection import KFold, cross_val_score
import pandas as pd
import numpy as np

In [12]:
def bagging(X_train, y_train, estimator_range, cv_folds=5, decimal_places=2):
    results = []

    for n_estimators in estimator_range:
        # Create bagging regressor
        clf = BaggingRegressor(n_estimators=n_estimators, random_state=22)

        # Perform 5-fold cross-validation
        kf = KFold(n_splits=cv_folds, shuffle=True, random_state=42)
        
        mae_scores = -cross_val_score(clf, X_train, y_train, cv=kf, scoring='neg_mean_absolute_error')
        medae_scores = -cross_val_score(clf, X_train, y_train, cv=kf, scoring='neg_median_absolute_error')
        r2_scores = cross_val_score(clf, X_train, y_train, cv=kf, scoring='r2')

        # Calculate mean and round the values
        mae = round(np.mean(mae_scores), decimal_places)
        medae = round(np.mean(medae_scores), decimal_places)
        r2 = round(np.mean(r2_scores), decimal_places)

        # Store results
        results.append({
            'n_estimators': n_estimators,
            'MAE': mae,
            'medAE': medae,
            'R2': r2
        })

    # Convert the results list to a pandas DataFrame
    df_errors = pd.DataFrame(results)
    
    return df_errors

In [None]:
def visualize_predictions(y_test, y_pred):
    fig = plt.figure(figsize=(10, 6))
    plt.scatter(y_test, y_pred, alpha=0.5)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
    plt.xlabel('Actual Price')
    plt.ylabel('Predicted Price')
    plt.title('Bagging Regressor: Actual vs. Predicted Prices')
    plt.show()