-
Notifications
You must be signed in to change notification settings - Fork 1.1k
/
autoregressive.py
412 lines (350 loc) · 16.9 KB
/
autoregressive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
# Copyright 2018 The TensorFlow Probability Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Autoregressive model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Dependency imports
import tensorflow.compat.v2 as tf
from tensorflow_probability.python import bijectors as tfb
from tensorflow_probability.python import distributions as tfd
from tensorflow_probability.python.internal import distribution_util as dist_util
from tensorflow_probability.python.internal import dtype_util
from tensorflow_probability.python.sts.internal import util as sts_util
from tensorflow_probability.python.sts.structural_time_series import Parameter
from tensorflow_probability.python.sts.structural_time_series import StructuralTimeSeries
class AutoregressiveStateSpaceModel(tfd.LinearGaussianStateSpaceModel):
"""State space model for an autoregressive process.
A state space model (SSM) posits a set of latent (unobserved) variables that
evolve over time with dynamics specified by a probabilistic transition model
`p(z[t+1] | z[t])`. At each timestep, we observe a value sampled from an
observation model conditioned on the current state, `p(x[t] | z[t])`. The
special case where both the transition and observation models are Gaussians
with mean specified as a linear function of the inputs, is known as a linear
Gaussian state space model and supports tractable exact probabilistic
calculations; see `tfp.distributions.LinearGaussianStateSpaceModel` for
details.
In an autoregressive process, the expected level at each timestep is a linear
function of previous levels, with added Gaussian noise:
```python
level[t+1] = (sum(coefficients * levels[t:t-order:-1]) +
Normal(0., level_scale))
```
The process is characterized by a vector `coefficients` whose size determines
the order of the process (how many previous values it looks at), and by
`level_scale`, the standard deviation of the noise added at each step.
This is formulated as a state space model by letting the latent state encode
the most recent values; see 'Mathematical Details' below.
The parameters `level_scale` and `observation_noise_scale` are each (a batch
of) scalars, and `coefficients` is a (batch) vector of size `[order]`. The
batch shape of this `Distribution` is the broadcast batch
shape of these parameters and of the `initial_state_prior`.
#### Mathematical Details
The autoregressive model implements a
`tfp.distributions.LinearGaussianStateSpaceModel` with `latent_size = order`
and `observation_size = 1`. The latent state vector encodes the recent history
of the process, with the current value in the topmost dimension. At each
timestep, the transition sums the previous values to produce the new expected
value, shifts all other values down by a dimension, and adds noise to the
current value. This is formally encoded by the transition model:
```
transition_matrix = [ coefs[0], coefs[1], ..., coefs[order]
1., 0 , ..., 0.
0., 1., ..., 0.
...
0., 0., ..., 1., 0. ]
transition_noise ~ N(loc=0., scale=diag([level_scale, 0., 0., ..., 0.]))
```
The observation model simply extracts the current (topmost) value, and
optionally adds independent noise at each step:
```
observation_matrix = [[1., 0., ..., 0.]]
observation_noise ~ N(loc=0, scale=observation_noise_scale)
```
Models with `observation_noise_scale = 0.` are AR processes in the formal
sense. Setting `observation_noise_scale` to a nonzero value corresponds to a
latent AR process observed under an iid noise model.
#### Examples
A simple model definition:
```python
ar_model = AutoregressiveStateSpaceModel(
num_timesteps=50,
coefficients=[0.8, -0.1],
level_scale=0.5,
initial_state_prior=tfd.MultivariateNormalDiag(
scale_diag=[1., 1.]))
y = ar_model.sample() # y has shape [50, 1]
lp = ar_model.log_prob(y) # log_prob is scalar
```
Passing additional parameter dimensions constructs a batch of models. The
overall batch shape is the broadcast batch shape of the parameters:
```python
ar_model = AutoregressiveStateSpaceModel(
num_timesteps=50,
coefficients=[0.8, -0.1],
level_scale=tf.ones([10]),
initial_state_prior=tfd.MultivariateNormalDiag(
scale_diag=tf.ones([10, 10, 2])))
y = ar_model.sample(5) # y has shape [5, 10, 10, 50, 1]
lp = ar_model.log_prob(y) # has shape [5, 10, 10]
```
"""
def __init__(self,
num_timesteps,
coefficients,
level_scale,
initial_state_prior,
observation_noise_scale=0.,
initial_step=0,
validate_args=False,
name=None):
"""Build a state space model implementing an autoregressive process.
Args:
num_timesteps: Scalar `int` `Tensor` number of timesteps to model
with this distribution.
coefficients: `float` `Tensor` of shape `concat(batch_shape, [order])`
defining the autoregressive coefficients. The coefficients are defined
backwards in time: `coefficients[0] * level[t] + coefficients[1] *
level[t-1] + ... + coefficients[order-1] * level[t-order+1]`.
level_scale: Scalar (any additional dimensions are treated as batch
dimensions) `float` `Tensor` indicating the standard deviation of the
transition noise at each step.
initial_state_prior: instance of `tfd.MultivariateNormal`
representing the prior distribution on latent states. Must have
event shape `[order]`.
observation_noise_scale: Scalar (any additional dimensions are
treated as batch dimensions) `float` `Tensor` indicating the standard
deviation of the observation noise.
Default value: 0.
initial_step: Optional scalar `int` `Tensor` specifying the starting
timestep.
Default value: 0.
validate_args: Python `bool`. Whether to validate input
with asserts. If `validate_args` is `False`, and the inputs are
invalid, correct behavior is not guaranteed.
Default value: `False`.
name: Python `str` name prefixed to ops created by this class.
Default value: "AutoregressiveStateSpaceModel".
"""
with tf.name_scope(name or 'AutoregressiveStateSpaceModel') as name:
# The initial state prior determines the dtype of sampled values.
# Other model parameters must have the same dtype.
dtype = initial_state_prior.dtype
coefficients = tf.convert_to_tensor(
value=coefficients, name='coefficients', dtype=dtype)
level_scale = tf.convert_to_tensor(
value=level_scale, name='level_scale', dtype=dtype)
observation_noise_scale = tf.convert_to_tensor(
value=observation_noise_scale,
name='observation_noise_scale', dtype=dtype)
order = tf.compat.dimension_value(coefficients.shape[-1])
if order is None:
raise ValueError('Autoregressive coefficients must have static shape.')
self._order = order
self._coefficients = coefficients
self._level_scale = level_scale
super(AutoregressiveStateSpaceModel, self).__init__(
num_timesteps=num_timesteps,
transition_matrix=make_ar_transition_matrix(coefficients),
transition_noise=tfd.MultivariateNormalDiag(
scale_diag=tf.stack([level_scale] +
[tf.zeros_like(level_scale)] * (
self.order - 1), axis=-1)),
observation_matrix=tf.concat([tf.ones([1, 1], dtype=dtype),
tf.zeros([1, self.order - 1],
dtype=dtype)],
axis=-1),
observation_noise=tfd.MultivariateNormalDiag(
scale_diag=observation_noise_scale[..., tf.newaxis]),
initial_state_prior=initial_state_prior,
initial_step=initial_step,
validate_args=validate_args,
name=name)
@property
def order(self):
return self._order
@property
def coefficients(self):
return self._coefficients
@property
def level_scale(self):
return self._level_scale
def make_ar_transition_matrix(coefficients):
"""Build transition matrix for an autoregressive StateSpaceModel.
When applied to a vector of previous values, this matrix computes
the expected new value (summing the previous states according to the
autoregressive coefficients) in the top dimension of the state space,
and moves all previous values down by one dimension, 'forgetting' the
final (least recent) value. That is, it looks like this:
```
ar_matrix = [ coefs[0], coefs[1], ..., coefs[order]
1., 0 , ..., 0.
0., 1., ..., 0.
...
0., 0., ..., 1., 0. ]
```
Args:
coefficients: float `Tensor` of shape `concat([batch_shape, [order]])`.
Returns:
ar_matrix: float `Tensor` with shape `concat([batch_shape,
[order, order]])`.
"""
top_row = tf.expand_dims(coefficients, -2)
coef_shape = dist_util.prefer_static_shape(coefficients)
batch_shape, order = coef_shape[:-1], coef_shape[-1]
remaining_rows = tf.concat([
tf.eye(order - 1, dtype=coefficients.dtype, batch_shape=batch_shape),
tf.zeros(tf.concat([batch_shape, (order - 1, 1)], axis=0),
dtype=coefficients.dtype)
], axis=-1)
ar_matrix = tf.concat([top_row, remaining_rows], axis=-2)
return ar_matrix
class Autoregressive(StructuralTimeSeries):
"""Formal representation of an autoregressive model.
An autoregressive (AR) model posits a latent `level` whose value at each step
is a noisy linear combination of previous steps:
```python
level[t+1] = (sum(coefficients * levels[t:t-order:-1]) +
Normal(0., level_scale))
```
The latent state is `levels[t:t-order:-1]`. We observe a noisy realization of
the current level: `f[t] = level[t] + Normal(0., observation_noise_scale)` at
each timestep.
If `coefficients=[1.]`, the AR process is a simple random walk, equivalent to
a `LocalLevel` model. However, a random walk's variance increases with time,
while many AR processes (in particular, any first-order process with
`abs(coefficient) < 1`) are *stationary*, i.e., they maintain a constant
variance over time. This makes AR processes useful models of uncertainty.
See the [Wikipedia article](
https://en.wikipedia.org/wiki/Autoregressive_model#Definition) for details on
stationarity and other mathematical properties of autoregressive processes.
"""
def __init__(self,
order,
coefficients_prior=None,
level_scale_prior=None,
initial_state_prior=None,
coefficient_constraining_bijector=None,
observed_time_series=None,
name=None):
"""Specify an autoregressive model.
Args:
order: scalar Python positive `int` specifying the number of past
timesteps to regress on.
coefficients_prior: optional `tfd.Distribution` instance specifying a
prior on the `coefficients` parameter. If `None`, a default standard
normal (`tfd.MultivariateNormalDiag(scale_diag=tf.ones([order]))`) prior
is used.
Default value: `None`.
level_scale_prior: optional `tfd.Distribution` instance specifying a prior
on the `level_scale` parameter. If `None`, a heuristic default prior is
constructed based on the provided `observed_time_series`.
Default value: `None`.
initial_state_prior: optional `tfd.Distribution` instance specifying a
prior on the initial state, corresponding to the values of the process
at a set of size `order` of imagined timesteps before the initial step.
If `None`, a heuristic default prior is constructed based on the
provided `observed_time_series`.
Default value: `None`.
coefficient_constraining_bijector: optional `tfb.Bijector` instance
representing a constraining mapping for the autoregressive coefficients.
For example, `tfb.Tanh()` constrains the coefficients to lie in
`(-1, 1)`, while `tfb.Softplus()` constrains them to be positive, and
`tfb.Identity()` implies no constraint. If `None`, the default behavior
constrains the coefficients to lie in `(-1, 1)` using a `Tanh` bijector.
Default value: `None`.
observed_time_series: optional `float` `Tensor` of shape
`batch_shape + [T, 1]` (omitting the trailing unit dimension is also
supported when `T > 1`), specifying an observed time series.
Any priors not explicitly set will be given default values according to
the scale of the observed time series (or batch of time series). May
optionally be an instance of `tfp.sts.MaskedTimeSeries`, which includes
a mask `Tensor` to specify timesteps with missing observations.
Default value: `None`.
name: the name of this model component.
Default value: 'Autoregressive'.
"""
with tf.name_scope(name or 'Autoregressive') as name:
masked_time_series = None
if observed_time_series is not None:
masked_time_series = (
sts_util.canonicalize_observed_time_series_with_mask(
observed_time_series))
dtype = dtype_util.common_dtype(
[(masked_time_series.time_series
if masked_time_series is not None else None),
coefficients_prior,
level_scale_prior,
initial_state_prior], dtype_hint=tf.float32)
if observed_time_series is not None:
_, observed_stddev, observed_initial = sts_util.empirical_statistics(
masked_time_series)
else:
observed_stddev, observed_initial = (
tf.convert_to_tensor(value=1., dtype=dtype),
tf.convert_to_tensor(value=0., dtype=dtype))
batch_ones = tf.ones(tf.concat([
tf.shape(observed_initial), # Batch shape
[order]], axis=0), dtype=dtype)
# Heuristic default priors. Overriding these may dramatically
# change inference performance and results.
if coefficients_prior is None:
coefficients_prior = tfd.MultivariateNormalDiag(
scale_diag=batch_ones)
if level_scale_prior is None:
level_scale_prior = tfd.LogNormal(
loc=tf.math.log(0.05 * observed_stddev), scale=3.)
if (coefficients_prior.event_shape.is_fully_defined() and
order != coefficients_prior.event_shape[0]):
raise ValueError("Prior dimension {} doesn't match order {}.".format(
coefficients_prior.event_shape[0], order))
if initial_state_prior is None:
initial_state_prior = tfd.MultivariateNormalDiag(
loc=observed_initial[..., tf.newaxis] * batch_ones,
scale_diag=(tf.abs(observed_initial) +
observed_stddev)[..., tf.newaxis] * batch_ones)
self._order = order
self._coefficients_prior = coefficients_prior
self._level_scale_prior = level_scale_prior
self._initial_state_prior = initial_state_prior
if coefficient_constraining_bijector is None:
coefficient_constraining_bijector = tfb.Tanh()
super(Autoregressive, self).__init__(
parameters=[
Parameter('coefficients',
coefficients_prior,
coefficient_constraining_bijector),
Parameter('level_scale', level_scale_prior,
tfb.Chain([tfb.AffineScalar(scale=observed_stddev),
tfb.Softplus()]))
],
latent_size=order,
name=name)
@property
def initial_state_prior(self):
return self._initial_state_prior
def _make_state_space_model(self,
num_timesteps,
param_map=None,
initial_state_prior=None,
initial_step=0):
if initial_state_prior is None:
initial_state_prior = self.initial_state_prior
return AutoregressiveStateSpaceModel(
num_timesteps=num_timesteps,
initial_state_prior=initial_state_prior,
initial_step=initial_step,
name=self.name,
**param_map)