Skip to content
This repository has been archived by the owner on Feb 28, 2024. It is now read-only.

[WIP] GBTRegressor does not provide uncertainty estimates #9

Merged
merged 3 commits into from Mar 28, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
53 changes: 53 additions & 0 deletions skopt/gbt.py
@@ -0,0 +1,53 @@
import numpy as np

from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.utils import check_random_state


class GBTQuantiles(BaseEstimator, RegressorMixin):
def __init__(self, quantiles=[0.16, 0.5, 0.84], random_state=None):
"""Predict several quantiles with one estimator

This is a wrapper around `GradientBoostingRegressor`'s quantile
regression that allows you to predict several `quantiles` in
one go.

Parameters
----------
quantiles : array-like, optional
Quantiles to predict. By default the 16, 50 and 84%
quantiles are predicted.

random-state : int, RandomState instance, or None (default)
Set random state to something other than None for reproducible
results.
"""
self.quantiles = quantiles
self.random_state = random_state

def fit(self, X, y):
"""Fit one regressor for each quantile.

Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples
and n_features is the number of features.

y : array-like, shape = [n_samples]
Target values (real numbers in regression)
"""
rng = check_random_state(self.random_state)
self.regressors_ = [GradientBoostingRegressor(loss='quantile',
alpha=a,
random_state=rng)
for a in self.quantiles]
for rgr in self.regressors_:
rgr.fit(X, y)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return self


return self

def predict(self, X):
"""Predictions for each quantile."""
return np.vstack([rgr.predict(X) for rgr in self.regressors_])
55 changes: 55 additions & 0 deletions skopt/tests/test_gbt.py
@@ -0,0 +1,55 @@
import numpy as np
from scipy import stats

from sklearn.utils import check_random_state
from sklearn.utils.testing import assert_equal, assert_almost_equal

from skopt.gbt import GBTQuantiles


def truth(X):
return 0.5 * np.sin(1.75*X[:, 0])

def constant_noise(X):
return np.ones_like(X)

def sample_noise(X, std=0.2, noise=constant_noise,
random_state=None):
"""Uncertainty inherent to the process

The regressor should try and model this.
"""
rng = check_random_state(random_state)
return np.array([rng.normal(0, std*noise(x)) for x in X])

def test_gbt_gaussian():
# estiamte quantiles of the normal distribution
rng = np.random.RandomState(1)
N = 10000
X = np.ones((N, 1))
y = rng.normal(size=N)

rgr = GBTQuantiles()
rgr.fit(X, y)

estimates = rgr.predict(X)
assert_almost_equal(stats.norm.ppf(rgr.quantiles),
np.mean(estimates, axis=1),
decimal=2)

def test_gbt_with_std():
# simple test of the interface
rng = np.random.RandomState(1)
X = rng.uniform(0, 5, 500)[:, np.newaxis]

noise_level = 0.5
y = truth(X) + sample_noise(X, noise_level, random_state=rng)

X_ = np.linspace(0, 5, 1000)[:, np.newaxis]

model = GBTQuantiles()
model.fit(X, y)

l, c, h = model.predict(X_)
assert_equal(l.shape, c.shape, h.shape)
assert_equal(l.shape[0], X_.shape[0])