## XG Boost Regressor

In [67]:


# Define the hyperparameters and their possible values
param_grid = {
    'n_estimators': [100, 200, 300],  # Number of boosting rounds
    'max_depth': [3, 4, 5],  # Maximum depth of the trees
    'learning_rate': [0.01, 0.1, 0.2],  # Learning rate
    'min_child_weight': [1, 2, 3],  # Minimum sum of instance weight (hessian) needed in a child
}

# Create an XGBRegressor
xgb_gs = XGBRegressor()

# Create the GridSearchCV object with R2 scoring
grid_search = GridSearchCV(xgb_gs, param_grid, cv=5, scoring='r2', n_jobs=-1)

# Fit the GridSearchCV to the training data
grid_search.fit(x_train, y_train)

# Print the best hyperparameters and corresponding R-squared
print("Best Hyperparameters: ", grid_search.best_params_)

best_xgb_param = grid_search.best_params_
# Get the best estimator (XGBRegressor) with the best hyperparameters
best_xgb = grid_search.best_estimator_

# Use the best estimator to make predictions
y_train_pred = best_xgb.predict(x_train)
y_test_pred = best_xgb.predict(x_test)

# Calculate the R-squared scores for training data
r2_train_xgb = r2_score(y_train, y_train_pred)

# Calculate the R-squared scores for testing data
r2_test_xgb = r2_score(y_test, y_test_pred)

# Calculate the Mean Squared Error for training data
mse_train_xgb = mean_squared_error(y_train, y_train_pred)

# Calculate the Mean Squared Error for testing data
mse_test_xgb = mean_squared_error(y_test, y_test_pred)

# Print the results
print("R-squared (R2) for Training Data:", r2_train_xgb)
print("R-squared (R2) for Testing Data:", r2_test_xgb)
print("Mean Squared Error (MSE) for Training Data:", mse_train_xgb)
print("Mean Squared Error (MSE) for Testing Data:", mse_test_xgb)

Best Hyperparameters:  {'learning_rate': 0.01, 'max_depth': 3, 'min_child_weight': 3, 'n_estimators': 300}
R-squared (R2) for Training Data: 0.6531205659875161
R-squared (R2) for Testing Data: 0.5930683308017327
Mean Squared Error (MSE) for Training Data: 56714043.28503834
Mean Squared Error (MSE) for Testing Data: 58389552.327463485


## Bagging Regressor

In [71]:
from sklearn.tree import DecisionTreeRegressor

# Create a base regression model
base_model = DecisionTreeRegressor()

# Create a BaggingRegressor with the base model
bagging_reg = BaggingRegressor(base_model, random_state=0)

# Define the hyperparameters and their possible values
param_grid = {
    'n_estimators': [50, 100, 150, 200]  # List of possible numbers of base models
}

# Create the GridSearchCV object
grid_search = GridSearchCV(bagging_reg, param_grid, cv=5, scoring='r2', n_jobs=-1)

# Fit the GridSearchCV to the training data
grid_search.fit(x_train, y_train)

# Print the best hyperparameters and corresponding mean squared error
print("Best Hyperparameters: ", grid_search.best_params_)

# Get the best estimator (BaggingRegressor) with the best hyperparameters
best_bagging_reg = grid_search.best_estimator_

# Use the best estimator to make predictions
y_train_pred = best_bagging_reg.predict(x_train)
y_test_pred = best_bagging_reg.predict(x_test)

# Calculate the R-squared scores for training data
r2_train_bagging_reg = r2_score(y_train, y_train_pred)

# Calculate the R-squared scores for testing data
r2_test_bagging_reg = r2_score(y_test, y_test_pred)

# Calculate the Mean Squared Error for training data
mse_train_bagging_reg = mean_squared_error(y_train, y_train_pred)
mse_test_bagging_reg = mean_squared_error(y_test, y_test_pred)

# Print the results
print("R-squared (R2) for Training Data:", r2_train_bagging_reg)
print("R-squared (R2) for Testing Data:", r2_test_bagging_reg)
print("Mean Squared Error (MSE) for Training Data:", mse_train_bagging_reg)
print("Mean Squared Error (MSE) for Testing Data:", mse_test_bagging_reg)


Best Hyperparameters:  {'n_estimators': 200}
R-squared (R2) for Training Data: 0.9452423921939579
R-squared (R2) for Testing Data: 0.5928202372696956
Mean Squared Error (MSE) for Training Data: 8952751.40233669
Mean Squared Error (MSE) for Testing Data: 58425150.61427052
