/
feature_scaled.py
141 lines (126 loc) · 5.91 KB
/
feature_scaled.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# Copyright 2018 The TensorFlow Probability Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Feature scaled kernel."""
import tensorflow.compat.v2 as tf
from tensorflow_probability.python.internal import assert_util
from tensorflow_probability.python.internal import dtype_util
from tensorflow_probability.python.internal import parameter_properties
from tensorflow_probability.python.internal import tensor_util
from tensorflow_probability.python.math.psd_kernels import feature_transformed
from tensorflow_probability.python.math.psd_kernels.internal import util
__all__ = ['FeatureScaled']
# TODO(b/132103412): Support more general scaling via LinearOperator, along with
# scaling all feature dimensions.
class FeatureScaled(feature_transformed.FeatureTransformed):
"""Kernel that first rescales all feature dimensions.
Given a kernel `k` and `scale_diag` and inputs `x` and `y`, this kernel
first rescales the input by computing `x / scale_diag` and
`y / scale_diag`, and passing this to `k`.
With 1 feature dimension, this is also called Automatic Relevance
Determination (ARD) [1].
#### References
[1]: Carl Edward Rasmussen and Christopher K. I. Williams. Gaussian
Processes for Machine Learning. Section 5.1 2006.
http://www.gaussianprocess.org/gpml/chapters/RW5.pdf
"""
def __init__(
self,
kernel,
scale_diag=None,
inverse_scale_diag=None,
validate_args=False,
name='FeatureScaled'):
"""Construct an FeatureScaled kernel instance.
Args:
kernel: `PositiveSemidefiniteKernel` instance. Inputs are rescaled and
passed in to this kernel. Parameters to `kernel` must be broadcastable
with `scale_diag`.
scale_diag: Floating point `Tensor` that controls how sharp or wide the
kernel shape is. `scale_diag` must have at least `kernel.feature_ndims`
dimensions, and extra dimensions must be broadcastable with parameters
of `kernel`. This is a "diagonal" in the sense that if all the feature
dimensions were flattened, `scale_diag` acts as the inverse of a
diagonal matrix.
inverse_scale_diag: Non-negative floating point `Tensor` that is treated
as `1 / scale_diag`. Only one of `scale_diag` or `inverse_scale_diag`
should be provided.
Default value: None
validate_args: If `True`, parameters are checked for validity despite
possibly degrading runtime performance
name: Python `str` name prefixed to Ops created by this class.
"""
parameters = dict(locals())
if (scale_diag is None) == (inverse_scale_diag is None):
raise ValueError(
'Must specify exactly one of `scale_diag` and `inverse_scale_diag`.')
with tf.name_scope(name):
dtype = util.maybe_get_common_dtype(
[kernel, scale_diag, inverse_scale_diag])
self._scale_diag = tensor_util.convert_nonref_to_tensor(
scale_diag, dtype=dtype, name='scale_diag')
self._inverse_scale_diag = tensor_util.convert_nonref_to_tensor(
inverse_scale_diag, dtype=dtype, name='inverse_scale_diag')
def rescale_input(x, feature_ndims, example_ndims):
"""Computes `x / scale_diag`."""
inverse_scale_diag = self.inverse_scale_diag
if inverse_scale_diag is None:
inverse_scale_diag = tf.math.reciprocal(self.scale_diag)
inverse_scale_diag = tf.convert_to_tensor(inverse_scale_diag)
inverse_scale_diag = util.pad_shape_with_ones(
inverse_scale_diag,
example_ndims,
# Start before the first feature dimension. We assume scale_diag has
# at least as many dimensions as feature_ndims.
start=-(feature_ndims + 1))
return x * inverse_scale_diag
super(FeatureScaled, self).__init__(
kernel,
transformation_fn=rescale_input,
validate_args=validate_args,
name=name,
parameters=parameters)
@property
def scale_diag(self):
return self._scale_diag
@property
def inverse_scale_diag(self):
return self._inverse_scale_diag
@classmethod
def _parameter_properties(cls, dtype):
from tensorflow_probability.python.bijectors import softplus # pylint:disable=g-import-not-at-top
return dict(
kernel=parameter_properties.BatchedComponentProperties(),
scale_diag=parameter_properties.ParameterProperties(
event_ndims=lambda self: self.kernel.feature_ndims,
default_constraining_bijector_fn=(
lambda: softplus.Softplus(low=dtype_util.eps(dtype)))),
inverse_scale_diag=parameter_properties.ParameterProperties(
event_ndims=lambda self: self.kernel.feature_ndims,
default_constraining_bijector_fn=softplus.Softplus))
def _parameter_control_dependencies(self, is_init):
if not self.validate_args:
return []
assertions = []
if (self._inverse_scale_diag is not None and
is_init != tensor_util.is_ref(self._inverse_scale_diag)):
assertions.append(assert_util.assert_non_negative(
self._inverse_scale_diag,
message='`inverse_scale_diag` must be non-negative.'))
if (self._scale_diag is not None and
is_init != tensor_util.is_ref(self._scale_diag)):
assertions.append(assert_util.assert_positive(
self._scale_diag,
message='`scale_diag` must be positive.'))
return assertions