forked from brandonshensley/SinglePixel
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_many_mcmc.py
executable file
·251 lines (201 loc) · 8.56 KB
/
run_many_mcmc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
#!/usr/bin/python
"""
Do MCMC runs to fit FG models to simulated data, over a grid of
(nu_min, nu_max) values.
"""
import numpy as np
import models
import model_list
import fitting
from utils import rj2cmb, bands_log
import sys, time, os, copy
#from multiprocessing import Pool
from mpi4py import MPI
# Set-up MPI
comm = MPI.COMM_WORLD
myid = comm.Get_rank()
nproc = comm.Get_size()
# Prefix for output files
PREFIX = "xxx"
NBURN = 500
NSTEPS = 10000
NWALKERS = 100
# Reference noise curve (assumes noise file contains sigma_P=sigma_Q=sigma_U
# in uK_CMB.deg for a given experiment)
NOISE_FILE = "data/noise_coreplus_extended.dat"
#NOISE_FILE = "data/core_plus_extended_noise.dat"
# Band parameter definitions
nbands = 7
nthreads = 2
# Frequency ranges
numin_vals = [20., 30., 40.]
numax_vals = [300., 400., 500., 600., 700., 800.]
# Temperature/polarisation noise rms for all bands, as a fraction of T_cmb
fsigma_T = 1. / np.sqrt(2.)
fsigma_P = 1.
# Lists of input and output models
in_list = ['cmb', ]; fit_list = ['cmb', ]
# Define input models and their amplitudes/parameters
allowed_comps = model_list.model_dict
cmb_model = model_list.cmb_model
# Parse seed specification. This can be either a range, specified like "32-41",
# or a single number, like "32".
if len(sys.argv) < 2:
print "Must specify random seed range and models to fit"
sys.exit(1)
if "-" in sys.argv[1]:
seed_min, seed_max = sys.argv[1].split("-")
seeds = np.arange(int(seed_min), int(seed_max)+1)
else:
seeds = int(sys.argv[1])
# Parse args to define input and output models
if len(sys.argv) > 2:
in_list += sys.argv[2].split(",")
fit_list += sys.argv[3].split(",")
else:
in_list = ['cmb', 'synch', 'mbb']
fit_list = ['cmb', 'synch', 'mbb']
# Make sure models are of known types
for item in in_list:
if item not in allowed_comps.keys():
raise ValueError("Unknown component type '%s'" % item)
for item in fit_list:
if item not in allowed_comps.keys():
raise ValueError("Unknown component type '%s'" % item)
# Print recognised models and specify name
print "Input components:", in_list
print "Fitting components:", fit_list
name_in = "-".join(in_list)
name_fit = "-".join(fit_list)
# Collect components into lists and set input amplitudes
mods_in = [allowed_comps[comp] for comp in in_list]
mods_fit = [allowed_comps[comp] for comp in fit_list]
amps_in = np.array([m.amps() for m in mods_in])
params_in = np.array([m.params() for m in mods_in])
def model_test(nu, D_vec, Ninv, models_fit, initial_vals=None, burn=500,
steps=1000, nwalkers=100, cmb_amp_in=None, sample_file=None):
"""
Generate simulated data given an input model, and perform MCMC fit using
another model.
"""
# Collect together data and noise/instrument model
beam_mat = np.identity(3*len(nu)) # Beam model
data_spec = (nu, D_vec, Ninv, beam_mat)
# Get a list of amplitude/parameter names and initial values
amp_names = []; amp_vals = []; param_names = []; param_vals = []
amp_parent_model = []; param_parent_model = []
for mod in models_fit:
# Parameter names
amp_names += ["%s_%s" % (mod.model, pol) for pol in "IQU"]
param_names += mod.param_names
# Parameter values
amp_vals = np.concatenate( (amp_vals, mod.amps()) )
param_vals = np.concatenate( (param_vals, mod.params()) )
# Parent model list
amp_parent_model.append(mod)
param_parent_model.append(mod)
# Concatenate parameter lists
pnames = amp_names + param_names
pvals = np.concatenate((amp_vals, param_vals))
parent_model = amp_parent_model + param_parent_model
# Use 'guess' as the initial point for the MCMC if specified
if initial_vals is None: initial_vals = pvals
# Collect names, initial values, and parent components for the parameters
param_spec = (pnames, initial_vals, parent_model)
# Run MCMC sampler on this model
t0 = time.time()
pnames, samples, logp = fitting.joint_mcmc(data_spec, models_fit, param_spec,
burn=burn, steps=steps,
nwalkers=nwalkers,
nthreads=nthreads,
sample_file=sample_file)
print "MCMC run in %d sec." % (time.time() - t0)
# Return parameter names and samples
return pnames, samples, logp, initial_vals
def run_model(nu_params, filename, cut_range):
# Get band definition
nu_min, nu_max = nu_params
print "nu_min = %d GHz, nu_max = %d GHz" % (nu_min, nu_max)
nu = bands_log(nu_min, nu_max, nbands)
label = str(nu_min) + '_' + str(nu_max)
# Make copies of models
my_mods_in = mods_in
my_mods_fit = mods_fit
# Name of sample file
#fname_samples = "output/%s_samples_%s.%s_nb%d_seed%d_%s.dat" \
# % (PREFIX, name_in, name_fit, nbands, SEED, label)
fname_samples = None
# Simulate data and run MCMC fit
D_vec, Ninv = fitting.generate_data(nu, fsigma_T, fsigma_P,
components=my_mods_in,
noise_file=NOISE_FILE)
pnames, samples, logp, ini = model_test(nu, D_vec, Ninv, my_mods_fit,
burn=NBURN, steps=NSTEPS,
nwalkers=NWALKERS,
cmb_amp_in=cmb_model.amps(),
sample_file=fname_samples)
# Calculate best-fit chisq.
chisq = -2.*logp
dof = D_vec.size - len(pnames)
# Reshape sample array into (Nparams, Nwalkers, Nsamples)
samples = samples.reshape((samples.shape[0],
NWALKERS,
samples.shape[1]/NWALKERS))
# Loop over different burn-in cuts to produce summary stats
for cut in cut_range:
# Set output filename
fname = filename + "_cut%d.dat" % cut
# Output mean and bias
summary_str = "%4.4e %4.4e %4.4e " % (nu_min, nu_max, np.min(chisq))
summary_data = [nu_min, nu_max, np.min(chisq)]
header = "nu_min nu_max chi2_min "
# Loop over parameter names
for i in range(len(pnames)):
# Mean, std. dev., and fractional shift from true value
_mean = np.mean(samples[i,:,cut:])
_std = np.std(samples[i,:,cut:])
_fracbias = (np.mean(samples[i,:,cut:]) - ini[i]) \
/ np.std(samples[i,:,cut:])
stats = [_mean, _std, _fracbias]
# Keep summary stats, to be written to file
summary_data += stats
header += "mean_%s std_%s Delta_%s " \
% (pnames[i], pnames[i], pnames[i])
print "%14s: %+3.3e +/- %3.3e [Delta = %+3.3f]" \
% (pnames[i], stats[0], stats[1], stats[2])
# If file is empty, set flag to write header when saving output
has_header = False if os.stat(fname).st_size == 0 else True
# Append summary statistics to file
f = open(fname, 'a')
if has_header:
np.savetxt(f, np.atleast_2d(summary_data))
else:
np.savetxt(f, np.atleast_2d(summary_data), header=header[:-1])
f.close()
# Prep output files for writing
for j in range(len(seeds)):
filename = "output/%s_summary_%s.%s_nb%d_seed%d" \
% (PREFIX, name_in, name_fit, nbands, seeds[j])
cut_range = np.arange(NSTEPS, step=200)
for cut in cut_range:
fname = filename + "_cut%d.dat" % cut
f = open(fname, 'w')
f.close()
# Loop over noise realisations (different random seed each time)
for j in range(len(seeds)):
# Expand into all combinations of nu_min,max
nu_min, nu_max = np.meshgrid(numin_vals, numax_vals)
nu_params = np.column_stack((nu_min.flatten(), nu_max.flatten()))
# Set random seed
seed = seeds[j]
print "SEED =", seed
np.random.seed(myid*1000 + seed)
# Build filename for this seed
filename = "output/%s_summary_%s.%s_nb%d_seed%d" \
% (PREFIX, name_in, name_fit, nbands, seeds[j])
# Loop over all band specs and run MCMC
for i in range(len(nu_params)):
k = j * len(nu_params) + i
if k % nproc != myid: continue
# Run the model for this set of params
run_model(nu_params[i], filename, cut_range)