-
-
Notifications
You must be signed in to change notification settings - Fork 25.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MRG+2] Adding return_std options for models in linear_model/bayes.py #7838
Changes from 13 commits
c19e2c9
2fad2d5
a6c0bf3
f92a860
4bae33d
ea9fad4
25c457e
b905a23
0a3ccd2
5634ee2
e817de3
806818a
2f0bd32
21ba9d5
df3038a
5d9739d
542de0b
a552022
b9c55df
b9f7319
0cd9f5c
8eaa4c7
ba1c2c6
3599b57
6a615f1
0ded8b7
1e1392c
f7e31f1
039ae83
092a569
561ef01
5bb4080
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -91,6 +91,9 @@ class BayesianRidge(LinearModel, RegressorMixin): | |
lambda_ : float | ||
estimated precision of the weights. | ||
|
||
sigma_ : array, shape = (n_features, n_features) | ||
estimated variance-covariance matrix of the weights | ||
|
||
scores_ : float | ||
if computed, value of the objective function (to be maximized) | ||
|
||
|
@@ -144,6 +147,8 @@ def fit(self, X, y): | |
X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True) | ||
X, y, X_offset, y_offset, X_scale = self._preprocess_data( | ||
X, y, self.fit_intercept, self.normalize, self.copy_X) | ||
self.X_offset = X_offset | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be |
||
self.X_scale = X_scale | ||
n_samples, n_features = X.shape | ||
|
||
# Initialization of the values of the parameters | ||
|
@@ -216,10 +221,48 @@ def fit(self, X, y): | |
self.alpha_ = alpha_ | ||
self.lambda_ = lambda_ | ||
self.coef_ = coef_ | ||
sigma_ = np.dot(Vh.T, | ||
Vh / (eigen_vals_ + lambda_ / alpha_)[:, None]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer |
||
self.sigma_ = (1. / alpha_) * sigma_ | ||
|
||
self._set_intercept(X_offset, y_offset, X_scale) | ||
return self | ||
|
||
def predict(self, X, return_std=False): | ||
"""Predict using the linear model. In addition to the mean of the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not pep 257: https://www.python.org/dev/peps/pep-0257/ Please add an empty line after the first sentence. |
||
predictive distribution, also its standard deviation can be returned. | ||
|
||
See: http://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add reference to class docstring instead |
||
Slide 15, titled "Predictive Distribution" | ||
Russ's beta is our self.beta_ | ||
Russ's alpha is our self.lambda_ | ||
|
||
Parameters | ||
---------- | ||
X : {array-like, sparse matrix}, shape = (n_samples, n_features) | ||
Samples. | ||
|
||
return_std : boolean, optional | ||
Whether to return the standard deviation of posterior prediction. | ||
|
||
Returns | ||
------- | ||
y_mean : array, shape = (n_samples,) | ||
Mean of predictive distribution of query points. | ||
|
||
y_std : array, shape = (n_samples,) | ||
Standard deviation of predictive distribution of query points. | ||
""" | ||
y_mean = self._decision_function(X) | ||
if return_std is False: | ||
return y_mean | ||
else: | ||
if self.normalize: | ||
X = (X - self.X_offset) / self.X_scale | ||
sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1) | ||
y_std = np.sqrt(sigmas_squared_data + (1. / self.alpha_)) | ||
return y_mean, y_std | ||
|
||
|
||
############################################################################### | ||
# ARD (Automatic Relevance Determination) regression | ||
|
@@ -417,7 +460,7 @@ def fit(self, X, y): | |
s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum() | ||
s += alpha_1 * log(alpha_) - alpha_2 * alpha_ | ||
s += 0.5 * (fast_logdet(sigma_) + n_samples * log(alpha_) + | ||
np.sum(np.log(lambda_))) | ||
np.sum(np.log(lambda_))) | ||
s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_ ** 2).sum()) | ||
self.scores_.append(s) | ||
|
||
|
@@ -434,3 +477,42 @@ def fit(self, X, y): | |
self.lambda_ = lambda_ | ||
self._set_intercept(X_offset, y_offset, X_scale) | ||
return self | ||
|
||
def predict(self, X, return_std=False): | ||
"""Predict using the linear model. In addition to the mean of the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment about docstring pep. |
||
predictive distribution, also its standard deviation can be returned. | ||
|
||
See: http://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be great to have this and a more formal reference in the class docstring, formatted as we usually do for references. |
||
Slide 15, titled "Predictive Distribution" | ||
Russ's beta is our self.beta_ | ||
Russ's alpha is our self.lambda_ | ||
ARD is only a little different: only dimensions/features for which | ||
self.lambda_ < self.threshold_lambda are kept and the rest are | ||
discarded. | ||
|
||
Parameters | ||
---------- | ||
X : {array-like, sparse matrix}, shape = (n_samples, n_features) | ||
Samples. | ||
|
||
return_std : boolean, optional | ||
Whether to return the standard deviation of posterior prediction. | ||
|
||
Returns | ||
------- | ||
y_mean : array, shape = (n_samples,) | ||
Mean of predictive distribution of query points. | ||
|
||
y_std : array, shape = (n_samples,) | ||
Standard deviation of predictive distribution of query points. | ||
""" | ||
y_mean = self._decision_function(X) | ||
if return_std is False: | ||
return y_mean | ||
else: | ||
if self.normalize: | ||
X = (X - self.X_offset) / self.X_scale | ||
X = X[:, self.lambda_ < self.threshold_lambda] | ||
sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1) | ||
y_std = np.sqrt(sigmas_squared_data + (1. / self.alpha_)) | ||
return y_mean, y_std |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -56,3 +56,55 @@ def test_toy_ard_object(): | |
# Check that the model could approximately learn the identity function | ||
test = [[1], [3], [4]] | ||
assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2) | ||
|
||
|
||
def test_return_std_bayesian(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the tests are the same (might have overlooked something?), why not do them both in the same test? |
||
def f(X): | ||
return np.dot(X, w) + b | ||
|
||
def f_noise(X): | ||
return f(X) + np.random.randn(X.shape[0])*noise_mult | ||
|
||
d = 5 | ||
n_train = 50 | ||
n_test = 10 | ||
|
||
noise_mult = 0.1 | ||
w = np.array([1.0, 0.0, 1.0, -1.0, 0.0]) | ||
b = 1.0 | ||
|
||
X = np.random.random((n_train, d)) | ||
X_test = np.random.random((n_test, d)) | ||
y = f_noise(X) | ||
|
||
m1 = BayesianRidge() | ||
m1.fit(X, y) | ||
X_test = np.random.random((n_test, d)) | ||
y_mean, y_std = m1.predict(X_test, return_std=True) | ||
assert_array_almost_equal(y_std, 0.1, decimal=1) | ||
|
||
|
||
def test_return_std_ard(): | ||
def f(X): | ||
return np.dot(X, w) + b | ||
|
||
def f_noise(X): | ||
return f(X) + np.random.randn(X.shape[0])*noise_mult | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is that pep8 without space around There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks ok, check out: https://www.python.org/dev/peps/pep-0008/#id28 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fair |
||
|
||
d = 5 | ||
n_train = 50 | ||
n_test = 10 | ||
|
||
noise_mult = 0.1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this work if you do a for-loop over multiple There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Works fine for Bayesian Ridge, but unfortunately, ARD behaves oddly because it gets rid of a bunch of dimensions. It gets it MOSTLY right. If you set |
||
w = np.array([1.0, 0.0, 1.0, -1.0, 0.0]) | ||
b = 1.0 | ||
|
||
X = np.random.random((n_train, d)) | ||
X_test = np.random.random((n_test, d)) | ||
y = f_noise(X) | ||
|
||
m1 = ARDRegression() | ||
m1.fit(X, y) | ||
X_test = np.random.random((n_test, d)) | ||
y_mean, y_std = m1.predict(X_test, return_std=True) | ||
assert_array_almost_equal(y_std, 0.1, decimal=1) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Incorrect label for ARD