-
Notifications
You must be signed in to change notification settings - Fork 45
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #100 from richford/enh/bagging
ENH: Allow forestci to work on general Bagging estimators
- Loading branch information
Showing
4 changed files
with
229 additions
and
97 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
""" | ||
====================================== | ||
Plotting Bagging Regression Error Bars | ||
====================================== | ||
This example demonstrates using `forestci` to calculate the error bars of | ||
the predictions of a :class:`sklearn.ensemble.BaggingRegressor` object. | ||
The data used here are a classical machine learning data-set, describing | ||
various features of different cars, and their MPG. | ||
""" | ||
|
||
# Regression Forest Example | ||
import numpy as np | ||
from matplotlib import pyplot as plt | ||
from sklearn.ensemble import BaggingRegressor | ||
from sklearn.svm import SVR | ||
import sklearn.model_selection as xval | ||
from sklearn.datasets import fetch_openml | ||
import forestci as fci | ||
|
||
# retreive mpg data from machine learning library | ||
mpg_data = fetch_openml(data_id=196) | ||
|
||
# separate mpg data into predictors and outcome variable | ||
mpg_X = mpg_data["data"] | ||
mpg_y = mpg_data["target"] | ||
|
||
# remove rows where the data is nan | ||
not_null_sel = np.invert(np.sum(np.isnan(mpg_data["data"]), axis=1).astype(bool)) | ||
mpg_X = mpg_X[not_null_sel] | ||
mpg_y = mpg_y[not_null_sel] | ||
|
||
# split mpg data into training and test set | ||
mpg_X_train, mpg_X_test, mpg_y_train, mpg_y_test = xval.train_test_split( | ||
mpg_X, mpg_y, test_size=0.25, random_state=42 | ||
) | ||
|
||
# Create RandomForestRegressor | ||
n_estimators = 1000 | ||
mpg_bagger = BaggingRegressor( | ||
base_estimator=SVR(), n_estimators=n_estimators, random_state=42 | ||
) | ||
mpg_bagger.fit(mpg_X_train, mpg_y_train) | ||
mpg_y_hat = mpg_bagger.predict(mpg_X_test) | ||
|
||
# Plot predicted MPG without error bars | ||
plt.scatter(mpg_y_test, mpg_y_hat) | ||
plt.plot([5, 45], [5, 45], "k--") | ||
plt.xlabel("Reported MPG") | ||
plt.ylabel("Predicted MPG") | ||
plt.show() | ||
|
||
# Calculate the variance | ||
mpg_V_IJ_unbiased = fci.random_forest_error(mpg_bagger, mpg_X_train, mpg_X_test) | ||
|
||
# Plot error bars for predicted MPG using unbiased variance | ||
plt.errorbar(mpg_y_test, mpg_y_hat, yerr=np.sqrt(mpg_V_IJ_unbiased), fmt="o") | ||
plt.plot([5, 45], [5, 45], "k--") | ||
plt.xlabel("Reported MPG") | ||
plt.ylabel("Predicted MPG") | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.