In [6]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_percentage_error

def generate_data():
    X = np.random.rand(500, 2)
    y = np.random.rand(500)
    return X, y

def fit_adaboost_regressor(X, y, num_stumps):
    n_samples = X.shape[0]
    # Initialize weights to 1/n_samples
    sample_weights = np.full(n_samples, (1 / n_samples))
    stumps = []
    stump_weights = []
    for _ in range(num_stumps):
        # Fit a decision tree to the data using the current weights
        stump = DecisionTreeRegressor(max_depth=1, max_leaf_nodes=2)
        stump.fit(X, y, sample_weight=sample_weights)
        # Predict the labels and calculate the loss
        y_pred = stump.predict(X)
        loss = np.abs(y - y_pred) / np.max(np.abs(y - y_pred))
        # Calculate the weighted error rate and stump weight
        weighted_error = np.sum(sample_weights * loss)
        stump_weight = np.log((1 - weighted_error) / weighted_error)
        # Update the sample weights for the next iteration
        sample_weights *= np.exp(stump_weight * loss)
        sample_weights /= np.sum(sample_weights)
        # Save the stump and its weight
        stumps.append(stump)
        stump_weights.append(stump_weight)
    return stumps, stump_weights

def predict_adaboost_regressor(X, stumps, stump_weights):
    n_samples = X.shape[0]
    y_pred = np.zeros(n_samples)
    for i in range(len(stumps)):
        y_pred += stump_weights[i] * stumps[i].predict(X)
    return y_pred

def evaluate_mape(y_true, y_pred):
    return mean_absolute_percentage_error(y_true, y_pred)

# Generate the data and split into train and test sets
X, y = generate_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the adaboost regressor and make predictions
adaboost_stumps, adaboost_stump_weights = fit_adaboost_regressor(X_train, y_train, num_stumps=50)
adaboost_y_train_pred = predict_adaboost_regressor(X_train, adaboost_stumps, adaboost_stump_weights)
adaboost_y_test_pred = predict_adaboost_regressor(X_test, adaboost_stumps, adaboost_stump_weights)

# Evaluate the performance of the adaboost regressor using MAPE
adaboost_train_mape = evaluate_mape(y_train, adaboost_y_train_pred)
adaboost_test_mape = evaluate_mape(y_test, adaboost_y_test_pred)

print("Adaboost Regression Train MAPE:", adaboost_train_mape)
print("Adaboost Regression Test MAPE:", adaboost_test_mape)

Adaboost Regression Train MAPE: 2.173933907471666
Adaboost Regression Test MAPE: 2.944399207297575


In [7]:
# Use the sklearn AdaboostRegressor as a comparison
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_absolute_percentage_error

# Train the sklearn AdaboostRegressor and make predictions
sklearn_adaboost = AdaBoostRegressor(n_estimators=50, learning_rate=1)
sklearn_adaboost.fit(X_train, y_train)
sklearn_y_train_pred = sklearn_adaboost.predict(X_train)
sklearn_y_test_pred = sklearn_adaboost.predict(X_test)

# Evaluate the performance of the sklearn AdaboostRegressor using MAPE
sklearn_train_mape = mean_absolute_percentage_error(y_train, sklearn_y_train_pred)
sklearn_test_mape = mean_absolute_percentage_error(y_test, sklearn_y_test_pred)

print("Sklearn AdaboostRegressor Train MAPE: {:.2f}%".format(sklearn_train_mape * 100))
print("Sklearn AdaboostRegressor Test MAPE: {:.2f}%".format(sklearn_test_mape * 100))


Sklearn AdaboostRegressor Train MAPE: 256.42%
Sklearn AdaboostRegressor Test MAPE: 343.80%


In [8]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid to search over
param_grid = {'n_estimators': [10, 50, 100],
              'learning_rate': [0.1, 0.5, 1]}

# Create the grid search object
grid_search = GridSearchCV(AdaBoostRegressor(), param_grid, scoring='neg_mean_absolute_percentage_error', cv=5)

# Fit the grid search to the training data
grid_search.fit(X_train, y_train)

# Print the best hyperparameters found
print("Best hyperparameters:", grid_search.best_params_)

# Make predictions using the best estimator found
sklearn_adaboost_tuned = grid_search.best_estimator_
sklearn_y_train_pred_tuned = sklearn_adaboost_tuned.predict(X_train)
sklearn_y_test_pred_tuned = sklearn_adaboost_tuned.predict(X_test)

# Evaluate the performance of the tuned sklearn AdaboostRegressor using MAPE
sklearn_train_mape_tuned = mean_absolute_percentage_error(y_train, sklearn_y_train_pred_tuned)
sklearn_test_mape_tuned = mean_absolute_percentage_error(y_test, sklearn_y_test_pred_tuned)
print("Tuned sklearn AdaboostRegressor MAPE - Train:", sklearn_train_mape_tuned)
print("Tuned sklearn AdaboostRegressor MAPE - Test:", sklearn_test_mape_tuned)


Best hyperparameters: {'learning_rate': 1, 'n_estimators': 50}
Tuned sklearn AdaboostRegressor MAPE - Train: 2.518053641005037
Tuned sklearn AdaboostRegressor MAPE - Test: 3.423859236833386


In [33]:
import numpy as np

def fit_adaboost_regressor(X_train, y_train, n_estimators, eta):
    n_samples, n_features = X_train.shape
    weights = np.ones(n_samples) / n_samples
    stumps = []
    alphas = []
    
    for _ in range(n_estimators):
        stump = DecisionTreeRegressor(max_depth=1)
        stump.fit(X_train, y_train, sample_weight=weights)
        y_train_pred = stump.predict(X_train)
        error = np.mean(weights * np.exp(-y_train * y_train_pred))
        alpha = 0.5 * np.log((1 - error) / error)
        weights = weights * np.exp(-alpha * (y_train * y_train_pred))
        weights /= np.sum(weights)
        stumps.append(stump)
        alphas.append(alpha)
    
    return stumps, alphas

def predict_adaboost_regressor(X, stumps, alphas):
    y_pred = np.zeros(len(X))
    for stump, alpha in zip(stumps, alphas):
        y_pred += alpha * stump.predict(X)
    return y_pred

def mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true))

# Generate synthetic dataset
np.random.seed(42)
X = np.random.rand(500, 2)
y = np.random.rand(500)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit adaboost regression using exponential loss
n_estimators = 50
adaboost_fit = fit_adaboost_regressor(X_train, y_train, n_estimators, eta)

# Make predictions on train and test sets
y_train_pred = predict_adaboost_regressor(X_train, *adaboost_fit)
y_test_pred = predict_adaboost_regressor(X_test, *adaboost_fit)

# Evaluate performance using MAPE
train_mape = mape(y_train, y_train_pred)
test_mape = mape(y_test, y_test_pred)

print("Train MAPE:", train_mape)
print("Test MAPE:", test_mape)


Train MAPE: 8.892400504139253
Test MAPE: 7.950289928679891


In [37]:
def grid_search_adaboost_regressor(X_train, y_train, X_val, y_val, n_estimators_list, eta_list):
    best_hyperparams = None
    best_val_mape = float('inf')
    adaboost_model = None
    
    for n_estimators in n_estimators_list:
        for eta in eta_list:
            # Fit adaboost regression using exponential loss
            adaboost_fit = fit_adaboost_regressor(X_train, y_train, n_estimators, eta)

            # Make predictions on train and validation sets
            y_train_pred = predict_adaboost_regressor(X_train, *adaboost_fit)
            y_val_pred = predict_adaboost_regressor(X_val, *adaboost_fit)

            # Evaluate performance using MAPE
            train_mape = mape(y_train, y_train_pred)
            val_mape = mape(y_val, y_val_pred)

            # Update best hyperparameters if we have found a better model
            if val_mape < best_val_mape:
                best_hyperparams = {'n_estimators': n_estimators, 'eta': eta}
                best_val_mape = val_mape
                adaboost_model = adaboost_fit
                
    return best_hyperparams, train_mape, best_val_mape, adaboost_model


In [38]:
from sklearn.model_selection import train_test_split

# Split into train, validation, and test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, random_state=42)

# Define hyperparameters to search over
n_estimators_list = [50, 100, 200]
eta_list = [0.1, 0.5, 1.0]

# Perform grid search
best_hyperparams, train_mape, val_mape, adaboost_model = grid_search_adaboost_regressor(X_train, y_train, X_val, y_val, n_estimators_list, eta_list)

# Evaluate performance on test set using best model
test_mape = mape(y_test, predict_adaboost_regressor(X_test, *adaboost_model))
print("\nBest hyperparameters:", best_hyperparams)
print("Train MAPE:", train_mape)
print("Validation MAPE:", val_mape)
print("Test MAPE:", test_mape)



Best hyperparameters: {'n_estimators': 50, 'eta': 1.0}
Train MAPE: 208.44131540397265
Validation MAPE: 89.5301253449452
Test MAPE: 91.74265496239677
