Test by estimating the quantiles of the normal

scikit-optimize · Mar 23, 2016 · 2fbb4ba · 2fbb4ba
1 parent 0dcab69
commit 2fbb4ba
Show file tree

Hide file tree

Showing 2 changed files with 46 additions and 32 deletions.
diff --git a/skopt/gbt.py b/skopt/gbt.py
@@ -1,28 +1,25 @@
+import numpy as np
+
 from sklearn.base import BaseEstimator, RegressorMixin
 from sklearn.ensemble import GradientBoostingRegressor
+from sklearn.utils import check_random_state
 
 
-class GradientBoostingRegressorWithStd(BaseEstimator, RegressorMixin):
-    def __init__(self, alpha):
-        self.alpha = alpha
+class GBTQuantiles(BaseEstimator, RegressorMixin):
+    def __init__(self, quantiles=[0.16, 0.5, 0.84], random_state=None):
+        self.quantiles = quantiles
+        self.random_state = random_state
 
     def fit(self, X, y):
-        """Fit regressor"""
-        self.regressor_ = GradientBoostingRegressor(loss='quantile')
-        self.rgr_up_ = GradientBoostingRegressor(loss='quantile',
-                                                 alpha=0.5 + self.alpha/2.)
-        self.rgr_down_ = GradientBoostingRegressor(loss='quantile',
-                                                   alpha=0.5 - self.alpha/2.)
-
-        self.regressor_.fit(X, y)
-        self.rgr_up_.fit(X, y)
-        self.rgr_down_.fit(X, y)
-
-    def predict(self, X, return_std=False):
-        """Prediction with uncertainties"""
-        up = self.rgr_up_.predict(X)
-        down = self.rgr_down_.predict(X)
-        std = up - down
-        central = self.regressor_.predict(X)
+        """Fit one regressor for each quantile"""
+        rng = check_random_state(self.random_state)
+        self.regressors_ = [GradientBoostingRegressor(loss='quantile',
+                                                      alpha=a,
+                                                      random_state=rng)
+                            for a in self.quantiles]
+        for rgr in self.regressors_:
+            rgr.fit(X, y)
 
-        return (central, std)
+    def predict(self, X):
+        """Predictions for each quantile"""
+        return np.vstack([rgr.predict(X) for rgr in self.regressors_])
diff --git a/skopt/tests/test_gbt.py b/skopt/tests/test_gbt.py
@@ -1,38 +1,55 @@
 import numpy as np
+from scipy import stats
 
-from sklearn.utils.testing import assert_equal
+from sklearn.utils import check_random_state
+from sklearn.utils.testing import assert_equal, assert_almost_equal
 
-from skopt.gbt import GradientBoostingRegressorWithStd
+from skopt.gbt import GBTQuantiles
 
 
-rng = np.random.RandomState(324)
-
 def truth(X):
     return 0.5 * np.sin(1.75*X[:, 0])
 
 def constant_noise(X):
     return np.ones_like(X)
 
-def sample_noise(X, std=0.2, noise=constant_noise):
+def sample_noise(X, std=0.2, noise=constant_noise,
+                 random_state=None):
     """Uncertainty inherent to the process
 
     The regressor should try and model this.
     """
+    rng = check_random_state(random_state)
     return np.array([rng.normal(0, std*noise(x)) for x in X])
 
+def test_gbt_gaussian():
+    # estiamte quantiles of the normal distribution
+    rng = np.random.RandomState(1)
+    N = 10000
+    X = np.ones((N, 1))
+    y = rng.normal(size=N)
+
+    rgr = GBTQuantiles()
+    rgr.fit(X, y)
+
+    estimates = rgr.predict(X)
+    assert_almost_equal(stats.norm.ppf(rgr.quantiles),
+                        np.mean(estimates, axis=1),
+                        decimal=2)
+
 def test_gbt_with_std():
+    # simple test of the interface
     rng = np.random.RandomState(1)
-    # simple test of interface
     X = rng.uniform(0, 5, 500)[:, np.newaxis]
 
     noise_level = 0.5
-    y = truth(X) + sample_noise(X, noise_level)
+    y = truth(X) + sample_noise(X, noise_level, random_state=rng)
 
     X_ = np.linspace(0, 5, 1000)[:, np.newaxis]
 
-    model = GradientBoostingRegressorWithStd(alpha=0.68)
+    model = GBTQuantiles()
     model.fit(X, y)
 
-    p, s = model.predict(X_)
-    assert_equal(p.shape, s.shape)
-    assert_equal(p.shape[0], X_.shape[0])
+    l, c, h = model.predict(X_)
+    assert_equal(l.shape, c.shape, h.shape)
+    assert_equal(l.shape[0], X_.shape[0])