/
Statistics.h
370 lines (293 loc) · 12.8 KB
/
Statistics.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
/**
* \file npctransport/Statistics.h
* \brief statistics and order parameters about the simulations
* that is associated with a SimulationData object
*
* \author Barak Raveh, Daniel Russell
* Copyright 2007-2022 IMP Inventors. All rights reserved.
*/
#ifndef IMPNPCTRANSPORT_STATISTICS_H
#define IMPNPCTRANSPORT_STATISTICS_H
#include "npctransport_config.h"
#include <IMP/Model.h>
#include <IMP/PairContainer.h>
#include <IMP/atom/BrownianDynamics.h>
#include <IMP/atom/Hierarchy.h>
#include <IMP/container/CloseBipartitePairContainer.h>
#include <IMP/container/ListSingletonContainer.h>
#include <IMP/container/PairContainerSet.h>
#include <IMP/container/PredicatePairsRestraint.h>
#include <IMP/core/pair_predicates.h>
#include <IMP/core/BoundingBox3DSingletonScore.h>
#include <IMP/core/Typed.h>
#include <IMP/display/declare_Geometry.h>
#include <IMP/rmf/SaveOptimizerState.h>
#include <IMP/Pointer.h>
#include <IMP/set_map_macros.h>
#include <RMF/HDF5/File.h>
#include "io.h"
#include "BodyStatisticsOptimizerState.h"
#include "GlobalStatisticsOptimizerState.h"
#include "ParticleTransportStatisticsOptimizerState.h"
#include "ChainStatisticsOptimizerState.h"
#include "BipartitePairsStatisticsOptimizerState.h"
#include "Parameter.h"
#include "Scoring.h"
#include "typedefs.h"
#include <IMP/internal/SimpleTimer.h>
#include "boost/tuple/tuple.hpp"
#include <boost/utility/value_init.hpp>
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
#include <stdint.h>
#include <string>
#ifdef SWIG
namespace IMP {
namespace internal {
struct SimpleTimer {};
}
}
#endif
IMPNPCTRANSPORT_BEGIN_NAMESPACE
class SimulationData; // fwd incomplete declaration
//! Statistics and order parameters about the simulations
class IMPNPCTRANSPORTEXPORT Statistics : public Object {
private:
UncheckedWeakPointer<SimulationData> owner_sd_;
// Whether statistics gathering has been activated (= optimizer states added to optimizer)
bool is_activated_;
// interval of simulation frames for gathering stats
Parameter<int> statistics_interval_frames_;
// the file to which simulation statistics are dumped:
std::string output_file_name_;
IMP::PointerMember<GlobalStatisticsOptimizerState> global_stats_;
// statistics about all fgs, per particle, per chain, per particle type
typedef std::vector< BodyStatisticsOptimizerStates >
FGsBodyStatisticsOSs;
typedef IMP_KERNEL_LARGE_UNORDERED_MAP<core::ParticleType, FGsBodyStatisticsOSs>
FGsBodyStatisticsOSsMap;
FGsBodyStatisticsOSsMap fgs_bodies_stats_map_;
// statistics about all floaters (kaps etc.), per particle type
typedef IMP_KERNEL_LARGE_UNORDERED_MAP<core::ParticleType, BodyStatisticsOptimizerStates>
BodyStatisticsOSsMap;
BodyStatisticsOSsMap floaters_stats_map_;
// transport statistics about all floaters (kaps etc.) per particle type
typedef IMP_KERNEL_LARGE_UNORDERED_MAP< core::ParticleType,
ParticleTransportStatisticsOptimizerStates>
ParticleTransportStatisticsOSsMap;
ParticleTransportStatisticsOSsMap floaters_transport_stats_map_;
int update_calls_ = 0;
#ifndef SWIG
// distributions
typedef IMP_KERNEL_LARGE_UNORDERED_MAP< core::ParticleType,
std::vector< std::vector<int> > >
ParticleTypeZRDistributionMap;
ParticleTypeZRDistributionMap particle_type_zr_distribution_map_;
typedef IMP_KERNEL_LARGE_UNORDERED_MAP< uint_fast8_t,
IMP_KERNEL_LARGE_UNORDERED_MAP< uint_fast8_t,
IMP_KERNEL_LARGE_UNORDERED_MAP< uint_fast8_t,
boost::value_initialized<unsigned int> > > >
t_sparse_3d_matrix;
typedef IMP_KERNEL_LARGE_UNORDERED_MAP< core::ParticleType,
t_sparse_3d_matrix >
ParticleTypeXYZDistributionMap;
ParticleTypeXYZDistributionMap particle_type_xyz_distribution_map_;
struct t_size_3d_matrix{
uint_fast8_t d0;
uint_fast8_t d1;
uint_fast8_t d2;
};
t_size_3d_matrix xyz_distribution_sizes_;
#endif
// statistics about entire FG chains, for each FG type
typedef IMP_KERNEL_LARGE_UNORDERED_MAP<core::ParticleType, ChainStatisticsOptimizerStates>
ChainStatisticsOSsMap;
ChainStatisticsOSsMap chains_stats_map_;
// statistics of pairs of interactions for each interaction type
typedef IMP_KERNEL_LARGE_UNORDERED_MAP< npctransport::InteractionType,
PointerMember<BipartitePairsStatisticsOptimizerState> >
BipartitePairsStatisticsOSMap;
BipartitePairsStatisticsOSMap interaction_stats_map_;
// true if statistics were recently reset, so that update()
// should restart averaging from 0 frames
mutable bool is_stats_reset_;
public:
//! Remove all statistics related to particle type pt
/**
Removes statistics of particle type pt from all internal maps
and from the optimizer states of get_sd()->get_bd()
@param pt the particle type to be removed
*/
void remove_particle_type(core::ParticleType pt);
/**
@param sd the sd that owns and uses this statistics object
@param statistics_interval_frames the interval of simulation frames for gathering
statistics
@param output_file_name name of output file to which to dump statistics (or update
if it already exists) when calling update()
*/
Statistics(SimulationData* sd,
unsigned int statistics_interval_frames,
std::string output_file_name);
//! add statistics about an FG chain
/** add statistics about an FG chains
@param fg_chain the chain
*/
void add_fg_chain_stats(FGChain* fg_chain);
//! add statistics about a floater particle
/** add statistics about a floater particle
@param p the particle
*/
void add_floater_stats(IMP::Particle* p);
//! add statistics about interactions between particles of type 0 and 1
/** add statistics about interactions between particles of type 0 and 1
(order does not matter)
@param type0 type of first interacting particles
@param type1 type of other interacting particles
*/
void add_interaction_stats
( core::ParticleType type0, core::ParticleType type1);
//! add all statistics-related optimizer states to o
/**
@param o optimizer to which optimizer states are added,
use get_sd()->get_bd() if nullptr
@return the list of optimizer states that were added
@note If called for more than one optimizer, only the last
optimizer will be guaranteed to work well.
*/
OptimizerStates add_optimizer_states(Optimizer* o = nullptr);
/**
updates the map of z-r distributions of particle coordinates
with p's binned position counts (if z-symmetry flag is on,
z is absolute vertical location; r is distance from pore axis)
Comment: assume a pore geometry, no checks made that it is so
*/
void update_particle_type_zr_distribution_map(Particle* p);
/**
updates the map of x-y-z distributions of particle coordinates
with p's binned position counts (if z-symmetry flag is on,
z is absolute vertical location)
Comment: assume a pore geometry, no checks made that it is so
*/
void update_particle_type_xyz_distribution_map(Particle* p);
/**
opens / creates statistics protobuf file, and update it
with appropriate statistics, using statistics file
originally specified in the constructor, and based on statistics
gathered from the optimizer that was specified by
add_optimizer_states().
@throw UsageException If add_optimizer_states() was not called yet
(= get_is_activated() is false)
@param timer the timer that was used to measure the time
that has elapsed for statistics
@param nf_new the number of frames by which the statistics file
should be advanced. This is used to weight the
contribution of average statistics over time.
@param force_full_output if true, then full statistics are dumped
to the output file, even if the number
of calls to update does not divide by the
value of full_output_statistics_interval_factor
@note this method is not const cause it may invoke e.g., energy evaluation
though it does not substantially change anything in the state of the object
@note if configuration file full_output_statistics_interval_factor
is larger than 1, then full statistics are dumped every N calls
to update(), where N is the value of full_output_statistics_interval_factor,
and only the HDF5 file is updated at each call.
*/
void update(const IMP::internal::SimpleTimer &timer,
unsigned int nf_new = 1,
bool force_full_output = false);
/** resets all the counters of any statistics counters,
and the simulation time to zero */
void reset_statistics_optimizer_states();
//! Loads the stats file and set the interrupted flag to true
void set_interrupted(bool tf);
/************************************************************/
/************* various simple getters and setters *******************/
/************************************************************/
/** returns the model associated with the owned SimulationData */
Model* get_model();
#ifndef SWIG
/** returns the model associated with the owned SimulationData */
Model* get_model() const;
#endif
/** return the SimulationData object that owns this ScoringFunction */
SimulationData* get_sd() {
return owner_sd_;
}
//! if true, statistics have been activated, so add_optimizer_states()
//! was called such that statistics are being tracked
bool get_is_activated(){
return is_activated_;
}
#ifndef SWIG
/** return the SimulationData object that owns this ScoringFunction */
SimulationData const* get_sd() const{
return owner_sd_;
}
#endif
std::string get_output_file_name() const{
return output_file_name_;
}
private:
//! update the xyz distribution of type p_type to a dataset in
//! hdf5_group, with name p_type.get_string()
bool update_xyz_distribution_to_hdf5
(RMF::HDF5::Group hdf5_group,
core::ParticleType p_type);
//! updates pStats with all statistics related to fgs, averaged over
//! nf_new additional frames
//! @param zr_hist a grid on z / (x,y)-radial axis relevant only if not outputting xyz stats to hdf5
void update_fg_stats( ::npctransport_proto::Statistics* pStats,
unsigned int nf_new,
unsigned int zr_hist[4][3]);
//! updates pStats with all statistics related to floaters, averaged over
//! nf_new additional frames
//!
//! @return for historical reasons, returns a map of diffusion coefficients for each particle type
//! to be used in order params later on
std::map<IMP::core::ParticleType, double> update_floater_stats( ::npctransport_proto::Statistics* pStats,
unsigned int nf_new);
// TODO: move to util.h, possibly internal
// @param floaters a list of floater particles
// @param fg_roots a list of hierarchy particles that are the root of an
// fg chain (= all their children are fg typed particles)
// @return a 4-tuple (1,2,3,4) with:
// 1 - total # of individual site-site interactions between the specified
// floaters and chains
// 2 - total # of floaters that site-interact with any specified chain
// 3 - total # of fg bead-floater pairs that site-interact
// 4 - sum of # of chain-floater that site-intercat
boost::tuple<double, double, double, double> get_interactions_and_interacting
( const ParticlesTemp &floaters, const atom::Hierarchies &chain_roots) const;
//! @return the top of a z-axis bin for various stats
double get_z_distribution_top() const;
//! @return the radius of a outermost radial (x,y) bin for various stats
double get_r_distribution_max() const;
/**
for particles ps, returns the distribution along the z axis, in regions
z0 = [top...) ; z1 = [0..top) ; z2 = [-top..top) ; z3 = (...top)
with top being the return value of get_z_distribution_top()
@param ps the particles
@return a tuple <z0,z1,z2,z3> with counts of particles from ps
in z0, z1, z2 and z3 regions
*/
boost::tuple<int, int, int, int>
get_z_distribution(const ParticlesTemp& ps) const;
/** add the z-axis / (x,y)-radial distribution of particles ps
to zr_hist, a grid with z-axis bins on the first dimension and
radial bins on the second dimension.
@param zr_hist a grid on z / (x,y)-radial axis
@param ps the particles
*/
void fill_in_zr_hist(unsigned int zr_hist[4][3],
ParticlesTemp ps) const;
void update_hdf5_statistics(); // output HDF5 statistics
public:
IMP_OBJECT_METHODS(Statistics);
};
inline IMPNPCTRANSPORTEXPORT IMP::internal::SimpleTimer create_boost_timer() {
return IMP::internal::SimpleTimer();
}
IMPNPCTRANSPORT_END_NAMESPACE
#endif /* IMPNPCTRANSPORT_STATISTICS_H */