/
BinMD.cpp
429 lines (373 loc) · 16.4 KB
/
BinMD.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
// Mantid Repository : https://github.com/mantidproject/mantid
//
// Copyright © 2018 ISIS Rutherford Appleton Laboratory UKRI,
// NScD Oak Ridge National Laboratory, European Spallation Source,
// Institut Laue - Langevin & CSNS, Institute of High Energy Physics, CAS
// SPDX - License - Identifier: GPL - 3.0 +
#include "MantidMDAlgorithms/BinMD.h"
#include "MantidAPI/ImplicitFunctionFactory.h"
#include "MantidDataObjects/CoordTransformAffine.h"
#include "MantidDataObjects/CoordTransformAffineParser.h"
#include "MantidDataObjects/CoordTransformAligned.h"
#include "MantidDataObjects/MDBox.h"
#include "MantidDataObjects/MDBoxBase.h"
#include "MantidDataObjects/MDEventFactory.h"
#include "MantidDataObjects/MDEventWorkspace.h"
#include "MantidDataObjects/MDHistoWorkspace.h"
#include "MantidGeometry/MDGeometry/MDBoxImplicitFunction.h"
#include "MantidGeometry/MDGeometry/MDHistoDimension.h"
#include "MantidKernel/CPUTimer.h"
#include "MantidKernel/EnabledWhenProperty.h"
#include "MantidKernel/Strings.h"
#include "MantidKernel/System.h"
#include "MantidKernel/Utils.h"
#include <boost/algorithm/string.hpp>
namespace Mantid::MDAlgorithms {
// Register the algorithm into the AlgorithmFactory
DECLARE_ALGORITHM(BinMD)
using namespace Mantid::Kernel;
using namespace Mantid::API;
using namespace Mantid::Geometry;
using namespace Mantid::DataObjects;
//----------------------------------------------------------------------------------------------
/** Constructor
*/
BinMD::BinMD()
: SlicingAlgorithm(), outWS(), implicitFunction(), indexMultiplier(), signals(nullptr), errors(nullptr),
numEvents(nullptr) {}
//----------------------------------------------------------------------------------------------
/** Initialize the algorithm's properties.
*/
void BinMD::init() {
declareProperty(std::make_unique<WorkspaceProperty<IMDWorkspace>>("InputWorkspace", "", Direction::Input),
"An input MDWorkspace.");
// Properties for specifying the slice to perform.
this->initSlicingProps();
// --------------- Processing methods and options
// ---------------------------------------
std::string grp = "Methods";
declareProperty(std::make_unique<PropertyWithValue<std::string>>("ImplicitFunctionXML", "", Direction::Input),
"XML string describing the implicit function determining "
"which bins to use.");
setPropertyGroup("ImplicitFunctionXML", grp);
declareProperty(std::make_unique<PropertyWithValue<bool>>("IterateEvents", true, Direction::Input),
"Alternative binning method where you iterate through every event, "
"placing them in the proper bin.\n"
"This may be faster for workspaces with few events and lots of output "
"bins.");
setPropertyGroup("IterateEvents", grp);
declareProperty(std::make_unique<PropertyWithValue<bool>>("Parallel", false, Direction::Input),
"Temporary parameter: true to run in parallel. This is ignored for "
"file-backed workspaces, where running in parallel makes things slower "
"due to disk thrashing.");
setPropertyGroup("Parallel", grp);
declareProperty(std::make_unique<WorkspaceProperty<IMDHistoWorkspace>>("TemporaryDataWorkspace", "", Direction::Input,
PropertyMode::Optional),
"An input MDHistoWorkspace used to accumulate results from "
"multiple MDEventWorkspaces. If unspecified a blank "
"MDHistoWorkspace will be created.");
declareProperty(std::make_unique<WorkspaceProperty<Workspace>>("OutputWorkspace", "", Direction::Output),
"A name for the output MDHistoWorkspace.");
}
//----------------------------------------------------------------------------------------------
/** Bin the contents of a MDBox
*
* @param box :: pointer to the MDBox to bin
* @param chunkMin :: the minimum index in each dimension to consider "valid"
*(inclusive)
* @param chunkMax :: the maximum index in each dimension to consider "valid"
*(exclusive)
*/
template <typename MDE, size_t nd>
inline void BinMD::binMDBox(MDBox<MDE, nd> *box, const size_t *const chunkMin, const size_t *const chunkMax) {
// An array to hold the rotated/transformed coordinates
auto outCenter = std::vector<coord_t>(m_outD);
// Evaluate whether the entire box is in the same bin
if (box->getNPoints() > (1 << nd) * 2) {
// There is a check that the number of events is enough for it to make sense
// to do all this processing.
size_t numVertexes = 0;
auto vertexes = box->getVertexesArray(numVertexes);
// All vertexes have to be within THE SAME BIN = have the same linear index.
size_t lastLinearIndex = 0;
bool badOne = false;
for (size_t i = 0; i < numVertexes; i++) {
// Cache the center of the event (again for speed)
const coord_t *inCenter = vertexes.get() + i * nd;
// Now transform to the output dimensions
m_transform->apply(inCenter, outCenter.data());
// To build up the linear index
size_t linearIndex = 0;
// To mark VERTEXES outside range
badOne = false;
/// Loop through the dimensions on which we bin
for (size_t bd = 0; bd < m_outD; bd++) {
// What is the bin index in that dimension
coord_t x = outCenter[bd];
auto ix = size_t(x);
// Within range (for this chunk)?
if ((x >= 0) && (ix >= chunkMin[bd]) && (ix < chunkMax[bd])) {
// Build up the linear index
linearIndex += indexMultiplier[bd] * ix;
} else {
// Outside the range
badOne = true;
break;
}
} // (for each dim in MDHisto)
// Is the vertex at the same place as the last one?
if (!badOne) {
if ((i > 0) && (linearIndex != lastLinearIndex)) {
// Change of index
badOne = true;
break;
}
lastLinearIndex = linearIndex;
}
// Was the vertex completely outside the range?
if (badOne)
break;
} // (for each vertex)
if (!badOne) {
// Yes, the entire box is within a single bin
// std::cout << "Box at " << box->getExtentsStr() << " is within a
// single bin.\n";
// Add the CACHED signal from the entire box
signals[lastLinearIndex] += box->getSignal();
errors[lastLinearIndex] += box->getErrorSquared();
// TODO: If DataObjects get a weight, this would need to get the summed
// weight.
numEvents[lastLinearIndex] += static_cast<signal_t>(box->getNPoints());
// And don't bother looking at each event. This may save lots of time
// loading from disk.
return;
}
}
// If you get here, you could not determine that the entire box was in the
// same bin.
// So you need to iterate through events.
const std::vector<MDE> &events = box->getConstEvents();
for (auto it = events.begin(); it != events.end(); ++it) {
// Cache the center of the event (again for speed)
const coord_t *inCenter = it->getCenter();
// Now transform to the output dimensions
m_transform->apply(inCenter, outCenter.data());
// To build up the linear index
size_t linearIndex = 0;
// To mark events outside range
bool badOne = false;
/// Loop through the dimensions on which we bin
for (size_t bd = 0; bd < m_outD; bd++) {
// What is the bin index in that dimension
coord_t x = outCenter[bd];
auto ix = size_t(x);
// Within range (for this chunk)?
if ((x >= 0) && (ix >= chunkMin[bd]) && (ix < chunkMax[bd])) {
// Build up the linear index
linearIndex += indexMultiplier[bd] * ix;
} else {
// Outside the range
badOne = true;
break;
}
} // (for each dim in MDHisto)
if (!badOne) {
// Sum the signals as doubles to preserve precision
signals[linearIndex] += static_cast<signal_t>(it->getSignal());
errors[linearIndex] += static_cast<signal_t>(it->getErrorSquared());
// TODO: If DataObjects get a weight, this would need to get the summed
// weight.
numEvents[linearIndex] += 1.0;
}
}
// Done with the events list
box->releaseEvents();
}
//----------------------------------------------------------------------------------------------
/** Perform binning by iterating through every event and placing them in the
*output workspace
*
* @param ws :: MDEventWorkspace of the given type.
*/
template <typename MDE, size_t nd> void BinMD::binByIterating(typename MDEventWorkspace<MDE, nd>::sptr ws) {
BoxController_sptr bc = ws->getBoxController();
// store exisiting write buffer size for the future
// uint64_t writeBufSize =bc->getDiskBuffer().getWriteBufferSize();
// and disable write buffer (if any) for input MD Events for this algorithm
// purposes;
// bc->setCacheParameters(1,0);
// Cache some data to speed up accessing them a bit
indexMultiplier.resize(m_outD);
for (size_t d = 0; d < m_outD; d++) {
if (d > 0)
indexMultiplier[d] = outWS->getIndexMultiplier()[d - 1];
else
indexMultiplier[d] = 1;
}
signals = outWS->mutableSignalArray();
errors = outWS->mutableErrorSquaredArray();
numEvents = outWS->mutableNumEventsArray();
if (!m_accumulate) {
// Start with signal/error/numEvents at 0.0
outWS->setTo(0.0, 0.0, 0.0);
}
// The dimension (in the output workspace) along which we chunk for parallel
// processing
// TODO: Find the smartest dimension to chunk against
size_t chunkDimension = 0;
// How many bins (in that dimension) per chunk.
// Try to split it so each core will get 2 tasks:
auto chunkNumBins = int(m_binDimensions[chunkDimension]->getNBins() / (PARALLEL_GET_MAX_THREADS * 2));
if (chunkNumBins < 1)
chunkNumBins = 1;
// Do we actually do it in parallel?
bool doParallel = getProperty("Parallel");
// Not if file-backed!
if (bc->isFileBacked())
doParallel = false;
if (!doParallel)
chunkNumBins = int(m_binDimensions[chunkDimension]->getNBins());
// Total number of steps
size_t progNumSteps = 0;
if (prog) {
prog->setNotifyStep(0.1);
prog->resetNumSteps(100, 0.00, 1.0);
}
// Run the chunks in parallel. There is no overlap in the output workspace so
// it is thread safe to write to it..
PRAGMA_OMP( parallel for schedule(dynamic,1) if (doParallel) )
for (int chunk = 0; chunk < int(m_binDimensions[chunkDimension]->getNBins()); chunk += chunkNumBins) {
PARALLEL_START_INTERUPT_REGION
// Region of interest for this chunk.
std::vector<size_t> chunkMin(m_outD);
std::vector<size_t> chunkMax(m_outD);
for (size_t bd = 0; bd < m_outD; bd++) {
// Same limits in the other dimensions
chunkMin[bd] = 0;
chunkMax[bd] = m_binDimensions[bd]->getNBins();
}
// Parcel out a chunk in that single dimension dimension
chunkMin[chunkDimension] = size_t(chunk);
if (size_t(chunk + chunkNumBins) > m_binDimensions[chunkDimension]->getNBins())
chunkMax[chunkDimension] = m_binDimensions[chunkDimension]->getNBins();
else
chunkMax[chunkDimension] = size_t(chunk + chunkNumBins);
// Build an implicit function (it needs to be in the space of the
// MDEventWorkspace)
auto function = this->getImplicitFunctionForChunk(chunkMin.data(), chunkMax.data());
// Use getBoxes() to get an array with a pointer to each box
std::vector<API::IMDNode *> boxes;
// Leaf-only; no depth limit; with the implicit function passed to it.
ws->getBox()->getBoxes(boxes, 1000, true, function.get());
// Sort boxes by file position IF file backed. This reduces seeking time,
// hopefully.
if (bc->isFileBacked())
API::IMDNode::sortObjByID(boxes);
// For progress reporting, the # of boxes
if (prog) {
PARALLEL_CRITICAL(BinMD_progress) {
g_log.debug() << "Chunk " << chunk << ": found " << boxes.size() << " boxes within the implicit function.\n";
progNumSteps += boxes.size();
prog->setNumSteps(progNumSteps);
}
}
// Go through every box for this chunk.
for (auto &boxe : boxes) {
auto *box = dynamic_cast<MDBox<MDE, nd> *>(boxe);
// Perform the binning in this separate method.
if (box && !box->getIsMasked())
this->binMDBox(box, chunkMin.data(), chunkMax.data());
// Progress reporting
if (prog)
prog->report();
// For early cancelling of the loop
if (this->m_cancel)
break;
} // for each box in the vector
PARALLEL_END_INTERUPT_REGION
} // for each chunk in parallel
PARALLEL_CHECK_INTERUPT_REGION
// Now the implicit function
if (implicitFunction) {
if (prog)
prog->report("Applying implicit function.");
signal_t nan = std::numeric_limits<signal_t>::quiet_NaN();
outWS->applyImplicitFunction(implicitFunction.get(), nan, nan);
}
}
//----------------------------------------------------------------------------------------------
/** Execute the algorithm.
*/
void BinMD::exec() {
// Input MDEventWorkspace/MDHistoWorkspace
m_inWS = getProperty("InputWorkspace");
// Look at properties, create either axis-aligned or general transform.
// This (can) change m_inWS
this->createTransform();
// De serialize the implicit function
std::string ImplicitFunctionXML = getPropertyValue("ImplicitFunctionXML");
implicitFunction = nullptr;
if (!ImplicitFunctionXML.empty())
implicitFunction = std::unique_ptr<MDImplicitFunction>(
Mantid::API::ImplicitFunctionFactory::Instance().createUnwrapped(ImplicitFunctionXML));
// This gets deleted by the thread pool; don't delete it in here.
prog = std::make_unique<Progress>(this, 0.0, 1.0, 1);
// Create the dense histogram. This allocates the memory
std::shared_ptr<IMDHistoWorkspace> tmp = this->getProperty("TemporaryDataWorkspace");
outWS = std::dynamic_pointer_cast<MDHistoWorkspace>(tmp);
if (!outWS) {
outWS = std::make_shared<MDHistoWorkspace>(m_binDimensions);
} else {
m_accumulate = true;
}
// Saves the geometry transformation from original to binned in the workspace
outWS->setTransformFromOriginal(this->m_transformFromOriginal.release(), 0);
outWS->setTransformToOriginal(this->m_transformToOriginal.release(), 0);
for (size_t i = 0; i < m_bases.size(); i++)
outWS->setBasisVector(i, m_bases[i]);
outWS->setOrigin(this->m_translation);
outWS->setOriginalWorkspace(m_inWS, 0);
// And the intermediate WS one too, if any
if (m_intermediateWS) {
outWS->setOriginalWorkspace(m_intermediateWS, 1);
outWS->setTransformFromOriginal(m_transformFromIntermediate.release(), 1);
outWS->setTransformToOriginal(m_transformToIntermediate.release(), 1);
}
// Wrapper to cast to MDEventWorkspace then call the function
bool IterateEvents = getProperty("IterateEvents");
if (!IterateEvents) {
g_log.warning() << "IterateEvents=False is no longer supported. Setting "
"IterateEvents=True.\n";
IterateEvents = true;
}
/*
We should fail noisily here. CALL_MDEVENT_FUNCTION will silently allow
IMDHistoWorkspaces to cascade through to the end
and result in an empty output. The only way we allow InputWorkspaces to be
IMDHistoWorkspaces is if they also happen to contain original workspaces
that are MDEventWorkspaces.
*/
if (m_inWS->isMDHistoWorkspace()) {
throw std::runtime_error("Cannot rebin a workspace that is histogrammed and has no original "
"workspace that is an MDEventWorkspace. "
"Reprocess the input so that it contains full MDEvents.");
}
CALL_MDEVENT_FUNCTION(this->binByIterating, m_inWS);
// Copy the coordinate system & experiment infos to the output
IMDEventWorkspace_sptr inEWS = std::dynamic_pointer_cast<IMDEventWorkspace>(m_inWS);
if (inEWS) {
outWS->setCoordinateSystem(inEWS->getSpecialCoordinateSystem());
try {
outWS->copyExperimentInfos(*inEWS);
} catch (std::runtime_error &) {
g_log.warning() << this->name() << " was not able to copy experiment info to output workspace "
<< outWS->getName() << '\n';
}
}
// Pass on the display normalization from the input workspace
outWS->setDisplayNormalization(m_inWS->displayNormalizationHisto());
outWS->updateSum();
// Save the output
setProperty("OutputWorkspace", std::dynamic_pointer_cast<Workspace>(outWS));
}
} // namespace Mantid::MDAlgorithms