This repository has been archived by the owner on Jun 10, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
time_series_mcmc.py
402 lines (341 loc) · 14.2 KB
/
time_series_mcmc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
from os import path
import matplotlib.pyplot as plt
import numpy as np
from compound_poisson import mcmc
from compound_poisson import time_series
from compound_poisson.mcmc import target_time_series
class TimeSeriesMcmc(time_series.TimeSeries):
"""Fit Compound Poisson time series using Rwmh from a Bayesian setting
Method uses Metropolis Hastings within Gibbs. Sample either the z or the
regression parameters. Uniform prior on z, Normal prior on the
regression parameters. Adaptive MCMC from Roberts and Rosenthal (2009)
For more attributes, see the superclass
Attributes:
n_sample: number of MCMC samples
parameter_target: wrapper Target object to evaluate the posterior of
the parameters
parameter_mcmc: Mcmc object which does MCMC using parameter_target
z_target: wrapper Target object to evaluate the posterior of z
z_mcmc: Mcmc object which does MCMC using z_target
gibbs_weight: up to a constant, probability of sampling each mcmc in
self.get_mcmc_array()
burn_in: integer, which samples to discard when doing posterior
sampling which is used for forecasting
memmap_dir: directory to store the MCMC samples
set_from_mcmc: boolean, True if to automatically randomly set from
MCMC samples
"""
def __init__(self,
x,
rainfall=None,
poisson_rate_n_arma=None,
gamma_mean_n_arma=None,
cp_parameter_array=None):
super().__init__(x,
rainfall,
poisson_rate_n_arma,
gamma_mean_n_arma,
cp_parameter_array)
self.n_sample = 50000
self.parameter_target = target_time_series.TargetParameter(self)
self.parameter_mcmc = None
self.z_target = target_time_series.TargetZ(self)
self.z_mcmc = None
self.gibbs_weight = [0.003*len(self), 1]
self.burn_in = 0
self.memmap_dir = ""
self.set_from_mcmc = True
def fit(self):
"""Fit using Gibbs sampling
Override - Gibbs sample either z, regression parameters or the
precision.
"""
self.initalise_z()
self.instantiate_mcmc()
mcmc_array = self.get_mcmc_array()
mcmc.do_gibbs_sampling(
mcmc_array, self.n_sample, self.rng, self.gibbs_weight)
def resume_fitting(self, n_sample):
"""Run more MCMC samples
Args:
n_sample: new number of mcmc samples
"""
if n_sample > self.n_sample:
mcmc_array = self.get_mcmc_array()
for mcmc_i in mcmc_array:
mcmc_i.extend_memmap(n_sample)
# in resume, do not use initial value as sample (False in arg 3)
mcmc.do_gibbs_sampling(
mcmc_array, n_sample - self.n_sample, self.rng,
self.gibbs_weight, False)
self.n_sample = n_sample
self.delete_old_memmap()
def initalise_z(self):
"""Initalise all z in self.z_array and update all parameters
Initalise all z in self.z_array using e_step() and update all
parameters using update_all_cp_parameters(). Required for e.g.
likelihood evaluation because z=0 if and only if y=0.
"""
self.e_step() # initalise the z using the E step
self.z_array = self.z_array.round() # round it to get integer
# z cannot be 0 if y is not 0
self.z_array[np.logical_and(self.z_array == 0, self.y_array > 0)] = 1
self.update_all_cp_parameters() # initalse cp parameters
def instantiate_mcmc(self):
"""Instantiate all MCMC objects
Instantiate all MCMC objects by passing the corresponding Target
objects and random number generators
"""
self.parameter_mcmc = mcmc.Rwmh(
self.parameter_target, self.rng, self.n_sample, self.memmap_dir)
self.z_mcmc = mcmc.ZRwmh(
self.z_target, self.rng, self.n_sample, self.memmap_dir)
def get_mcmc_array(self):
"""Return array of Mcmc objects
Each element in this array can be called to do a Gibbs step for
different components
"""
mcmc_array = [
self.z_mcmc,
self.parameter_mcmc,
]
return mcmc_array
def set_burn_in(self, burn_in):
self.burn_in = burn_in
def set_parameter_from_sample(self, rng):
"""Set parameter from MCMC sample
Set the regression parameters and latent variables z from the MCMC
samples in parameter_sample and z_sample.
"""
index = rng.randint(self.burn_in, len(self.parameter_mcmc))
self.set_parameter_from_sample_i(index)
self.update_all_cp_parameters()
def set_parameter_from_sample_i(self, index):
"""Set parameter for a specified MCMC sample
"""
self.set_parameter_vector(self.parameter_mcmc[index])
self.z_array = self.z_mcmc[index]
# override
def forecast_self(self, n_simulation):
self.read_memmap()
super().forecast_self(n_simulation)
self.del_memmap()
def instantiate_forecast_self(self):
"""Override - Set the parameter from the MCMC sample
"""
if self.set_from_mcmc:
self.set_parameter_from_sample(self.self_forecaster_rng)
forecast = super().instantiate_forecast_self()
return forecast
# override
def forecast(self, x, n_simulation):
self.read_memmap()
super().forecast(x, n_simulation)
self.del_memmap()
# override
def instantiate_forecast(self, x):
"""Override - Set the parameter from the MCMC sample
"""
if self.set_from_mcmc:
self.set_parameter_from_sample(self.forecaster_rng)
forecast = super().instantiate_forecast(x)
return forecast
def simulate_from_prior(self):
"""Simulate using a parameter from the prior
MODIFIES ITSELF
Replaces the parameter with a sample from the prior. The prior mean and
prior covariance unmodified.
"""
# keep sampling until the sampled parameter does not have numerical
# problems
while True:
try:
# sample from the prior and set it
prior_parameter = self.simulate_parameter_from_prior()
self.set_parameter_vector(prior_parameter)
self.simulate()
# check if any of the parameters are not nan
if np.any(np.isnan(self.poisson_rate.value_array)):
pass
elif np.any(np.isnan(self.gamma_mean.value_array)):
pass
elif np.any(np.isnan(self.gamma_dispersion.value_array)):
pass
else:
break
# try again if there are numerical problems
except(ValueError, OverflowError):
pass
def simulate_parameter_from_prior(self):
"""Simulate parameter from the prior
Return a sample from the prior
"""
return self.parameter_target.simulate_from_prior(self.rng)
def read_memmap(self):
"""Read all memmap file handling from all MCMCs
"""
for mcmc_i in self.get_mcmc_array():
mcmc_i.read_memmap()
def read_to_write_memmap(self):
for mcmc_i in self.get_mcmc_array():
mcmc_i.read_to_write_memmap()
def del_memmap(self):
for mcmc_i in self.get_mcmc_array():
mcmc_i.del_memmap()
def delete_old_memmap(self):
for mcmc_i in self.get_mcmc_array():
mcmc_i.delete_old_memmap()
def print_mcmc(self, directory, true_parameter=None):
parameter_name = self.get_parameter_vector_name()
self.read_memmap()
chain = np.asarray(self.parameter_mcmc[:])
for i in range(self.n_parameter):
chain_i = chain[:, i]
plt.figure()
plt.plot(chain_i)
if true_parameter is not None:
plt.hlines(true_parameter[i], 0, len(chain)-1)
plt.ylabel(parameter_name[i])
plt.xlabel("Sample number")
plt.savefig(
path.join(directory, "chain_parameter_" + str(i) + ".pdf"))
plt.close()
chain = []
z_chain = np.asarray(self.z_mcmc[:])
chain = np.mean(z_chain, 1)
plt.figure()
plt.plot(chain)
plt.ylabel("Mean of latent variables")
plt.xlabel("Sample number")
plt.savefig(path.join(directory, "chain_z.pdf"))
plt.close()
self.print_chain_property(directory)
self.del_memmap()
def print_chain_property(self, directory):
plt.figure()
plt.plot(np.asarray(self.parameter_mcmc.accept_array))
plt.ylabel("Acceptance rate of parameters")
plt.xlabel("Parameter sample number")
plt.savefig(path.join(directory, "accept_parameter.pdf"))
plt.close()
plt.figure()
plt.plot(np.asarray(self.z_mcmc.accept_array))
plt.ylabel("Acceptance self of latent variables")
plt.xlabel("Latent variable sample number")
plt.savefig(path.join(directory, "accept_z.pdf"))
plt.close()
class TimeSeriesSlice(TimeSeriesMcmc):
"""Fit Compound Poisson time series using slice sampling from a Bayesian
setting
Method uses slice within Gibbs. Sample either the z or the
regression parameters. Uniform prior on z, Normal prior on the
regression parameters. Sample z using slice sampling (Neal, 2003).
Sampling the parameters using elliptical slice sampling (Murray 2010).
For more attributes, see the superclass
"""
def __init__(self,
x,
rainfall=None,
poisson_rate_n_arma=None,
gamma_mean_n_arma=None,
cp_parameter_array=None):
super().__init__(x,
rainfall,
poisson_rate_n_arma,
gamma_mean_n_arma,
cp_parameter_array)
self.n_sample = 10000
def instantiate_mcmc(self):
"""Instantiate all MCMC objects
Override
Instantiate slice sampling for the parameter and z
"""
self.parameter_mcmc = mcmc.Elliptical(
self.parameter_target, self.rng, self.n_sample, self.memmap_dir)
self.z_mcmc = mcmc.ZSlice(
self.z_target, self.rng, self.n_sample, self.memmap_dir)
def print_chain_property(self, directory):
plt.figure()
plt.plot(np.asarray(self.parameter_mcmc.n_reject_array))
plt.ylabel("Number of rejects in parameter slicing")
plt.xlabel("Parameter sample number")
plt.savefig(path.join(directory, "n_reject_parameter.pdf"))
plt.close()
plt.figure()
plt.plot(np.asarray(self.z_mcmc.slice_width_array))
plt.ylabel("Latent variable slice width")
plt.xlabel("Latent variable sample number")
plt.savefig(path.join(directory, "slice_width_z.pdf"))
plt.close()
class TimeSeriesHyperSlice(TimeSeriesSlice):
"""Fit Compound Poisson time series using slice sampling with a prior on
the precision
Method uses slice within Gibbs. Uniform prior on z, Normal prior on the
regression parameters, Gamma prior on the precision of the covariance
of the Normal prior. Gibbs sample either z, regression parameters or
the precision. Sample z using slice sampling (Neal, 2003). Sampling the
parameters using elliptical slice sampling (Murray 2010).
For more attributes, see the superclass
Attributes:
precision_target: wrapper Target object with evaluates the posterior of
the precision
precision_mcmc: Mcmc object for precision_target
"""
def __init__(self,
x,
rainfall=None,
poisson_rate_n_arma=None,
gamma_mean_n_arma=None,
cp_parameter_array=None):
super().__init__(x,
rainfall,
poisson_rate_n_arma,
gamma_mean_n_arma,
cp_parameter_array)
self.precision_target = target_time_series.TargetPrecision(self)
# mcmc object evaluated at instantiate_mcmc
self.precision_mcmc = None
self.gibbs_weight = [0.003*len(self), 1, 0.2]
def instantiate_mcmc(self):
"""Instantiate all MCMC objects
Override - instantiate the MCMC for the precision
"""
super().instantiate_mcmc()
self.precision_target.prograte_precision()
self.precision_mcmc = mcmc.Rwmh(
self.precision_target, self.rng, self.n_sample, self.memmap_dir)
def get_mcmc_array(self):
mcmc_array = super().get_mcmc_array()
mcmc_array.append(self.precision_mcmc)
return mcmc_array
def simulate_parameter_from_prior(self):
"""Simulate parameter from the prior
Override - Sample the precision from the prior and then sample the
parameter from the prior
"""
self.precision_target.set_from_prior(self.rng)
self.precision_target.prograte_precision()
return super().simulate_parameter_from_prior()
def print_chain_property(self, directory):
super().print_chain_property(directory)
precision_chain = np.asarray(self.precision_mcmc[:])
for i in range(2):
chain_i = precision_chain[:, i]
plt.figure()
plt.plot(chain_i)
plt.ylabel("precision" + str(i))
plt.xlabel("Sample number")
plt.savefig(
path.join(directory, "chain_precision_" + str(i) + ".pdf"))
plt.close()
plt.figure()
plt.plot(np.asarray(self.precision_mcmc.accept_array))
plt.ylabel("Acceptance rate of parameters")
plt.xlabel("Parameter sample number")
plt.savefig(path.join(directory, "accept_precision.pdf"))
plt.close()
def static_initalise_z(time_series):
"""Static version of initalise_z(). Used for parallel programming.
"""
time_series.initalise_z()
return time_series