# AdaBoost

In [10]:
from sklearn.model_selection import cross_val_score, KFold, cross_val_predict
from sklearn import metrics
from sklearn.ensemble import AdaBoostRegressor
import numpy as np
import pandas as pd

In [11]:
def adaboost(X_train, X_test, y_train, y_test, estimator_range, decimal_places=2):
    results = []
    
    for n_estimators in estimator_range:
        # Create regressor
        reg = AdaBoostRegressor(n_estimators=n_estimators, algorithm="SAMME", random_state=62)
        
        # 5-fold cross-validation score & predict
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        
        mae_scores = -cross_val_score(reg, X_train, y_train, cv=kf, scoring='neg_mean_absolute_error')
        medae_scores = -cross_val_score(reg, X_train, y_train, cv=kf, scoring='neg_median_absolute_error')
        r2_scores = cross_val_score(reg, X_train, y_train, cv=kf, scoring='r2')

        # prediction via cross validation
        y_pred_cv = cross_val_predict(reg, X_train, y_train, cv=kf)

        # Cross Validation errors on training set
        mae_cv = mean_absolute_error(y_train, y_pred_cv)
        medAE_cv = median_absolute_error(y_train,y_pred_cv)
        
        # fitting model on all data
        reg.fit(X_train,y_train)
        y_pred_test = reg.predict(X_test)

        # Print predictions for debugging
        # print(f'n_estimators: {n_estimators}, Predictions: {y_pred_test[:10]}')

        # Erros on the test set
        mae_test = mean_absolute_error(y_test,y_pred_test)
        medae_test = median_absolute_error(y_test, y_pred_test)
        r2_test = r2_score(y_test,y_pred_test)
        
        #(mean value of) the errors; ignoring each fold
        results.append({
            'n_estimators': n_estimators,
            # 'Fold': fold_idx + 1,
            'MAE CV': mae_cv,
            'medAE CV': medAE_cv,
            'R2 CV': r2_scores.mean(),
            'MAE Train (mean)': mae_scores.mean(),
            'medAE Train (mean)': medae_scores.mean(),
            'MAE Test': mae_test,
            'medAE Test': medae_test,
            'R2 Test': r2_test
        })
        

    # Convert the results list to a pandas DataFrame
    df_errors = pd.DataFrame(results).round(decimal_places)
    
    return y_pred_test, df_errors