forked from ORNL/TASMANIAN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tsgGridLocalPolynomial.hpp
499 lines (438 loc) · 27.2 KB
/
tsgGridLocalPolynomial.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
/*
* Copyright (c) 2017, Miroslav Stoyanov
*
* This file is part of
* Toolkit for Adaptive Stochastic Modeling And Non-Intrusive ApproximatioN: TASMANIAN
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
* and the following disclaimer in the documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse
* or promote products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
* OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* UT-BATTELLE, LLC AND THE UNITED STATES GOVERNMENT MAKE NO REPRESENTATIONS AND DISCLAIM ALL WARRANTIES, BOTH EXPRESSED AND IMPLIED.
* THERE ARE NO EXPRESS OR IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY PATENT,
* COPYRIGHT, TRADEMARK, OR OTHER PROPRIETARY RIGHTS, OR THAT THE SOFTWARE WILL ACCOMPLISH THE INTENDED RESULTS OR THAT THE SOFTWARE OR ITS USE WILL NOT RESULT IN INJURY OR DAMAGE.
* THE USER ASSUMES RESPONSIBILITY FOR ALL LIABILITIES, PENALTIES, FINES, CLAIMS, CAUSES OF ACTION, AND COSTS AND EXPENSES, CAUSED BY, RESULTING FROM OR ARISING OUT OF,
* IN WHOLE OR IN PART THE USE, STORAGE OR DISPOSAL OF THE SOFTWARE.
*/
#ifndef __TASMANIAN_SPARSE_GRID_LPOLY_HPP
#define __TASMANIAN_SPARSE_GRID_LPOLY_HPP
#include "tsgGridCore.hpp"
namespace TasGrid{
#ifndef __TASMANIAN_DOXYGEN_SKIP
class GridLocalPolynomial : public BaseCanonicalGrid{
public:
GridLocalPolynomial(AccelerationContext const *acc) : BaseCanonicalGrid(acc), order(1), top_level(0){}
friend struct GridReaderVersion5<GridLocalPolynomial>;
GridLocalPolynomial(AccelerationContext const *acc, const GridLocalPolynomial *pwpoly, int ibegin, int iend);
GridLocalPolynomial(AccelerationContext const *acc, int cnum_dimensions, int cnum_outputs, int depth, int corder, TypeOneDRule crule, const std::vector<int> &level_limits);
~GridLocalPolynomial() = default;
bool isLocalPolynomial() const override{ return true; }
void write(std::ostream &os, bool iomode) const override{ if (iomode == mode_ascii) write<mode_ascii>(os); else write<mode_binary>(os); }
template<bool iomode> void write(std::ostream &os) const;
TypeOneDRule getRule() const override{ return RuleLocal::getRule(effective_rule); }
int getOrder() const{ return order; }
template<RuleLocal::erule, typename points_mode>
void getPoints(double *x) const;
void getLoadedPoints(double *x) const override;
void getNeededPoints(double *x) const override;
void getPoints(double *x) const override; // returns the loaded points unless no points are loaded, then returns the needed points
template<RuleLocal::erule effrule>
void getQuadratureWeights(double weights[]) const;
void getQuadratureWeights(double weights[]) const override;
void getInterpolationWeights(const double x[], double weights[]) const override;
void getDifferentiationWeights(const double x[], double weights[]) const override;
void loadNeededValues(const double *vals) override;
void evaluate(const double x[], double y[]) const override;
void integrate(double q[], double *conformal_correction) const override;
void differentiate(const double x[], double jacobian[]) const override;
void evaluateBatchOpenMP(const double x[], int num_x, double y[]) const;
void evaluateBatch(const double x[], int num_x, double y[]) const override;
void loadNeededValuesGPU(const double *vals);
void evaluateGpuMixed(const double x[], int num_x, double y[]) const;
void evaluateBatchGPU(const double gpu_x[], int cpu_num_x, double gpu_y[]) const override;
void evaluateBatchGPU(const float gpu_x[], int cpu_num_x, float gpu_y[]) const override;
template<typename T> void evaluateBatchGPUtempl(const T gpu_x[], int cpu_num_x, T gpu_y[]) const;
void evaluateHierarchicalFunctionsGPU(const double gpu_x[], int cpu_num_x, double *gpu_y) const override;
void buildSparseBasisMatrixGPU(const double gpu_x[], int cpu_num_x, GpuVector<int> &gpu_spntr, GpuVector<int> &gpu_sindx, GpuVector<double> &gpu_svals) const;
void evaluateHierarchicalFunctionsGPU(const float gpu_x[], int cpu_num_x, float *gpu_y) const override;
void buildSparseBasisMatrixGPU(const float gpu_x[], int cpu_num_x, GpuVector<int> &gpu_spntr, GpuVector<int> &gpu_sindx, GpuVector<float> &gpu_svals) const;
void setSurplusRefinement(double tolerance, TypeRefinement criteria, int output, const std::vector<int> &level_limits, const double *scale_correction);
void clearRefinement() override;
void mergeRefinement() override;
/*!
* \brief Returns a vector with the normalized and rescaled coefficients.
*
* \param output indicates which output to use or can be -1 to indicate the use of all outputs (returns the max for each output)
* \param scale_correction user provided corrections that rescale the coefficients by spacial or other importance
*/
std::vector<double> getScaledCoefficients(int output, const double *scale_correction);
int removePointsByHierarchicalCoefficient(double tolerance, int output, const double *scale_correction); // returns the number of points kept
void removePointsByHierarchicalCoefficient(int new_num_points, int output, const double *scale_correction);
/*!
* \brief Remove all points marked as \b false in the \b pmap.
*
* \param pmap is a vector with size equal to the loaded points and flags as \b false each point that should be removed.
*/
int removeMappedPoints(std::vector<bool> const &pmap);
void beginConstruction() override;
void writeConstructionData(std::ostream &os, bool) const override;
void readConstructionData(std::istream &is, bool) override;
template<RuleLocal::erule effrule>
std::vector<double> getCandidateConstructionPoints(double tolerance, TypeRefinement criteria, int output, std::vector<int> const &level_limits, double const *scale_correction);
std::vector<double> getCandidateConstructionPoints(double tolerance, TypeRefinement criteria, int output, std::vector<int> const &level_limits, double const *scale_correction);
template<RuleLocal::erule effrule>
void loadConstructedPoint(const double x[], const std::vector<double> &y);
void loadConstructedPoint(const double x[], const std::vector<double> &y) override;
template<RuleLocal::erule effrule>
void loadConstructedPoint(const double x[], int numx, const double y[]);
void loadConstructedPoint(const double x[], int numx, const double y[]) override;
void finishConstruction() override;
void evaluateHierarchicalFunctions(const double x[], int num_x, double y[]) const override;
std::vector<double> getSupport() const override final;
void setHierarchicalCoefficients(const double c[]) override;
void integrateHierarchicalFunctions(double integrals[]) const override;
void updateAccelerationData(AccelerationContext::ChangeType change) const override;
const double* getSurpluses() const;
const int* getNeededIndexes() const;
void buildSpareBasisMatrix(const double x[], int num_x, int num_chunk, std::vector<int> &spntr, std::vector<int> &sindx, std::vector<double> &svals) const;
void buildSpareBasisMatrixStatic(const double x[], int num_x, int num_chunk, int *spntr, int *sindx, double *svals) const;
int getSpareBasisMatrixNZ(const double x[], int num_x) const;
protected:
//! \brief Create a new grid with given parameters and moving the data out of the vectors and sets.
GridLocalPolynomial(AccelerationContext const *acc, int cnum_dimensions, int cnum_outputs, int corder, TypeOneDRule crule, std::vector<int> &&pnts, std::vector<double> &&vals, std::vector<double> &&surps);
//! \brief Used as part of the loadNeededPoints() algorithm, updates the values and cuda cache, but does not touch the surpluses.
void updateValues(double const *vals);
void buildTree();
//! \brief Returns a list of indexes of the nodes in \b points that are descendants of the \b point.
template<RuleLocal::erule effrule>
std::vector<int> getSubGraph(std::vector<int> const &point) const;
//! \brief Add the \b point to the grid using the \b values.
template<RuleLocal::erule effrule>
void expandGrid(std::vector<int> const &point, std::vector<double> const &value);
//! \brief Return the multi-index of canonical point \b x.
template<RuleLocal::erule effrule>
std::vector<int> getMultiIndex(const double x[]);
//! \brief Looks for a batch of constructed points and processes all that will result in a connected graph.
template<RuleLocal::erule effrule>
void loadConstructedPoints();
//! \brief Fast algorithm, uses global Kronecker algorithm to recompute all surpluses
template<RuleLocal::erule effrule>
void recomputeSurpluses();
void recomputeSurpluses();
/*!
* \brief Update the surpluses for a portion of the graph.
*
* Updates the surpluses (i.e., hierarchical coefficients) for a portion of the DAG graph.
* - \b work is the point set to consider
* - \b max_level is the largest element in \b level
* - \b level is the hierarchical level of the points in \b work (i.e., not the sum of multi-index entries);
* points where \b level is zero will not be computed
* - \b dagUp must have been computed using \b MultiIndexManipulations::computeDAGup(\b work, \b rule, \b dagUp)
*
* Note: adjusting the \b level vector allows to update the surpluses for only a portion of the graph.
*
* Note: see the comments inside recomputeSurpluses() for the performance comparison between different algorithms
* also note that this method can be used to partially update, i.e., update the surpluses for some of the indexes
*/
template<RuleLocal::erule effrule>
void updateSurpluses(MultiIndexSet const &work, int max_level, std::vector<int> const &level, Data2D<int> const &dagUp);
// Same idea as in GridSequence::applyTransformationTransposed().
template<int mode>
void applyTransformationTransposed(double weights[], const MultiIndexSet &work, const std::vector<int> &active_points) const;
template<int mode, RuleLocal::erule effrule>
void applyTransformationTransposed(double weights[], const MultiIndexSet &work, const std::vector<int> &active_points) const;
void buildSparseMatrixBlockForm(const double x[], int num_x, int num_chunk, std::vector<int> &numnz,
std::vector<std::vector<int>> &tindx, std::vector<std::vector<double>> &tvals) const;
template<RuleLocal::erule eff_rule>
double evalBasisSupported(const int point[], const double x[], bool &isSupported) const{
double f = RuleLocal::evalSupport<eff_rule>(order, point[0], x[0], isSupported);
if (!isSupported) return 0.0;
for(int j=1; j<num_dimensions; j++){
f *= RuleLocal::evalSupport<eff_rule>(order, point[j], x[j], isSupported);
if (!isSupported) return 0.0;
}
return f;
}
template<RuleLocal::erule effrule>
void diffBasisSupported(const int point[], const double x[], double diff_values[], bool &isSupported) const{
isSupported = false;
for(int i=0; i<num_dimensions; i++) diff_values[i] = 1.0;
bool isDimSupported = false;
for(int k=0; k<num_dimensions; k++) {
double fval = RuleLocal::evalSupport<effrule>(order, point[k], x[k], isDimSupported);
isSupported = isDimSupported or isSupported;
for(int j=0; j<k; j++) diff_values[j] *= fval;
for(int j=k+1; j<num_dimensions; j++) diff_values[j] *= fval;
}
for (int k=0; k<num_dimensions; k++) {
diff_values[k] *= RuleLocal::diffSupport<effrule>(order, point[k], x[k], isDimSupported);
isSupported = isDimSupported or isSupported;
}
}
/*!
* \brief Walk through all the nodes of the tree and touches only the nodes supported at \b x.
*
* The template is instantiated in different modes:
* - \b mode \b 0, ignore \b sindx and \b svals, find the non-zero basis functions multiply them by the surpluses and add to \b y
* - \b mode \b 1, ignore \b y, form a sparse vector by std::vector::push_back() to the \b sindx and \b svals
* - \b mode \b 2, same as \b mode \b 1 but it also sorts the entries within the vector (requirement of Nvidia cusparseDgemvi)
* - \b mode \b 3, same as \b mode \b 0, but replaces the non-zero basis function values with their gradient vectors
* - \b mode \b 4, same as \b mode \b 1, but replaces the non-zero basis function values with their gradient vectors
*
* For mode 4, the i-th entry of sindx maps to the set of num_dimension values in svals at index (i * num_dimension). Hence,
* we have (sindx.size() * num_dimensions == svals.size()).
*
* In all cases, \b work is the \b points or \b needed set that has been used to construct the tree.
*/
template<int mode, RuleLocal::erule effrule>
void walkTree(const MultiIndexSet &work, const double x[], std::vector<int> &sindx, std::vector<double> &svals, double *y) const{
std::vector<int> monkey_count(top_level+1); // traverse the tree, counts the branches of the current node
std::vector<int> monkey_tail(top_level+1); // traverse the tree, keeps track of the previous node (history)
bool isSupported;
double basis_value;
std::vector<double> basis_derivative(num_dimensions);
for(const auto &r : roots){
if (mode == 3 or mode == 4) {
diffBasisSupported<effrule>(work.getIndex(r), x, basis_derivative.data(), isSupported);
} else {
basis_value = evalBasisSupported<effrule>(work.getIndex(r), x, isSupported);
}
if (isSupported){
if (mode == 0){
double const *s = surpluses.getStrip(r);
for(int k=0; k<num_outputs; k++)
y[k] += basis_value * s[k];
}else if (mode == 1 or mode == 2){
sindx.push_back(r);
svals.push_back(basis_value);
}else if (mode == 3){
double const *s = surpluses.getStrip(r);
for(int k=0; k<num_outputs; k++)
for (int d=0; d<num_dimensions; d++)
y[k * num_dimensions + d] += basis_derivative[d] * s[k];
}else{
sindx.push_back(r);
for (auto dx : basis_derivative)
svals.push_back(dx);
}
int current = 0;
monkey_tail[0] = r;
monkey_count[0] = pntr[r];
while(monkey_count[0] < pntr[monkey_tail[0]+1]){
if (monkey_count[current] < pntr[monkey_tail[current]+1]){
int p = indx[monkey_count[current]];
if (mode == 3 or mode == 4){
diffBasisSupported<effrule>(work.getIndex(p), x, basis_derivative.data(), isSupported);
}else{
basis_value = evalBasisSupported<effrule>(work.getIndex(p), x, isSupported);
}
if (isSupported){
if (mode == 0){
double const *s = surpluses.getStrip(p);
for(int k=0; k<num_outputs; k++)
y[k] += basis_value * s[k];
}else if (mode == 1 or mode == 2){
sindx.push_back(p);
svals.push_back(basis_value);
}else if (mode == 3){
double const *s = surpluses.getStrip(p);
for(int k=0; k<num_outputs; k++)
for (int d=0; d<num_dimensions; d++)
y[k * num_dimensions + d] += basis_derivative[d] * s[k];
}else{
sindx.push_back(p);
for (auto dx : basis_derivative)
svals.push_back(dx);
}
monkey_tail[++current] = p;
monkey_count[current] = pntr[p];
}else{
monkey_count[current]++;
}
}else{
monkey_count[--current]++;
}
}
}
}
// according to https://docs.nvidia.com/cuda/cusparse/index.html#sparse-format
// "... it is assumed that the indices are provided in increasing order and that each index appears only once."
// This may not be a requirement for cusparseDgemvi(), but it may be that I have not tested it sufficiently
// Also, see AccelerationDataGPUFull::cusparseMatveci() for inaccuracies in Nvidia documentation
if (mode == 2){
std::vector<int> map(sindx);
std::iota(map.begin(), map.end(), 0);
std::sort(map.begin(), map.end(), [&](int a, int b)->bool{ return (sindx[a] < sindx[b]); });
std::vector<int> idx = sindx;
std::vector<double> vls = svals;
std::transform(map.begin(), map.end(), sindx.begin(), [&](int i)->int{ return idx[i]; });
std::transform(map.begin(), map.end(), svals.begin(), [&](int i)->double{ return vls[i]; });
}
}
// Explicitly instantiates based on the effective_rule
template<int mode>
void walkTree(const MultiIndexSet &work, const double x[], std::vector<int> &sindx, std::vector<double> &svals, double *y) const{
switch(effective_rule) {
case RuleLocal::erule::pwc:
walkTree<mode, RuleLocal::erule::pwc>(work,x, sindx, svals, y);
break;
case RuleLocal::erule::localp:
walkTree<mode, RuleLocal::erule::localp>(work,x, sindx, svals, y);
break;
case RuleLocal::erule::semilocalp:
walkTree<mode, RuleLocal::erule::semilocalp>(work,x, sindx, svals, y);
break;
case RuleLocal::erule::localp0:
walkTree<mode, RuleLocal::erule::localp0>(work,x, sindx, svals, y);
break;
default: // case RuleLocal::erule::localpb:
walkTree<mode, RuleLocal::erule::localpb>(work,x, sindx, svals, y);
break;
};
}
template<RuleLocal::erule effrule>
double evalBasisRaw(const int point[], const double x[]) const {
double f = RuleLocal::evalRaw<effrule>(order, point[0], x[0]);
for(int j=1; j<num_dimensions; j++) f *= RuleLocal::evalRaw<effrule>(order, point[j], x[j]);
return f;
}
std::vector<double> getNormalization() const;
template<RuleLocal::erule effrule>
Data2D<int> buildUpdateMap(double tolerance, TypeRefinement criteria, int output, const double *scale_correction) const;
template<RuleLocal::erule effrule>
MultiIndexSet getRefinementCanidates(double tolerance, TypeRefinement criteria, int output, const std::vector<int> &level_limits, const double *scale_correction) const;
template<RuleLocal::erule effrule>
bool addParent(const int point[], int direction, const MultiIndexSet &exclude, Data2D<int> &destination) const;
template<RuleLocal::erule effrule>
void addChild(const int point[], int direction, const MultiIndexSet &exclude, Data2D<int> &destination) const;
template<RuleLocal::erule effrule>
void addChildLimited(const int point[], int direction, const MultiIndexSet &exclude, const std::vector<int> &level_limits, Data2D<int> &destination) const;
void clearGpuSurpluses();
void clearGpuBasisHierarchy();
private:
int order, top_level;
Data2D<double> surpluses;
Data2D<int> parents;
// tree for evaluation
std::vector<int> roots;
std::vector<int> pntr;
std::vector<int> indx;
RuleLocal::erule effective_rule;
std::unique_ptr<SimpleConstructData> dynamic_values;
// synchronize with tasgpu_devalpwpoly_feval
template<int ord, TypeOneDRule crule, typename T>
Data2D<T> encodeSupportForGPU(const MultiIndexSet &work) const{
Data2D<T> cpu_support(num_dimensions, work.getNumIndexes());
for(int i=0; i<work.getNumIndexes(); i++){
const int* p = work.getIndex(i);
T *s = cpu_support.getStrip(i);
for(int j=0; j<num_dimensions; j++){
if (ord == 0){
s[j] = static_cast<T>(RuleLocal::getSupport<RuleLocal::erule::pwc>(p[j]));
} else {
switch(crule) {
case rule_localp:
s[j] = static_cast<T>(RuleLocal::getSupport<RuleLocal::erule::localp>(p[j]));
break;
case rule_semilocalp:
s[j] = static_cast<T>(RuleLocal::getSupport<RuleLocal::erule::semilocalp>(p[j]));
break;
case rule_localp0:
s[j] = static_cast<T>(RuleLocal::getSupport<RuleLocal::erule::localp0>(p[j]));
break;
case rule_localpb:
s[j] = static_cast<T>(RuleLocal::getSupport<RuleLocal::erule::localpb>(p[j]));
break;
};
if (ord == 2) s[j] *= s[j];
if ((crule == rule_localp) || (crule == rule_semilocalp)) if (p[j] == 0) s[j] = static_cast<T>(-1.0); // constant function
if ((crule == rule_localp) && (ord == 2)){
if (p[j] == 1) s[j] = static_cast<T>(-2.0);
else if (p[j] == 2) s[j] = static_cast<T>(-3.0);
}
if ((crule == rule_semilocalp) && (ord == 2)){
if (p[j] == 1) s[j] = static_cast<T>(-4.0);
else if (p[j] == 2) s[j] = static_cast<T>(-5.0);
}
if ((crule == rule_localpb) && (ord == 2)){
if (p[j] < 2) s[j] = static_cast<T>(-2.0); // linear functions on level 0
}
}
}
}
return cpu_support;
}
std::unique_ptr<CudaLocalPolynomialData<double>>& getGpuCacheOverload(double) const{ return gpu_cache; }
std::unique_ptr<CudaLocalPolynomialData<float>>& getGpuCacheOverload(float) const{ return gpu_cachef; }
template<typename T> std::unique_ptr<CudaLocalPolynomialData<T>>& getGpuCache() const{
return getGpuCacheOverload(static_cast<T>(0.0));
}
template<typename T> void loadGpuBasis() const;
template<typename T> void loadGpuHierarchy() const;
template<typename T> void loadGpuSurpluses() const;
mutable std::unique_ptr<CudaLocalPolynomialData<double>> gpu_cache;
mutable std::unique_ptr<CudaLocalPolynomialData<float>> gpu_cachef;
};
// Old version reader
template<> struct GridReaderVersion5<GridLocalPolynomial>{
template<typename iomode> static std::unique_ptr<GridLocalPolynomial> read(AccelerationContext const *acc, std::istream &is){
std::unique_ptr<GridLocalPolynomial> grid = Utils::make_unique<GridLocalPolynomial>(acc);
grid->num_dimensions = IO::readNumber<iomode, int>(is);
grid->num_outputs = IO::readNumber<iomode, int>(is);
grid->order = IO::readNumber<iomode, int>(is);
grid->top_level = IO::readNumber<iomode, int>(is);
TypeOneDRule rule = IO::readRule<iomode>(is);
grid->effective_rule = RuleLocal::getEffectiveRule(grid->order, rule);
if (IO::readFlag<iomode>(is)) grid->points = MultiIndexSet(is, iomode());
if (std::is_same<iomode, IO::mode_ascii_type>::value){ // backwards compatible: surpluses and needed, or needed and surpluses
if (IO::readFlag<iomode>(is))
grid->surpluses = IO::readData2D<iomode, double>(is, grid->num_outputs, grid->points.getNumIndexes());
if (IO::readFlag<iomode>(is)) grid->needed = MultiIndexSet(is, iomode());
}else{
if (IO::readFlag<iomode>(is)) grid->needed = MultiIndexSet(is, iomode());
if (IO::readFlag<iomode>(is))
grid->surpluses = IO::readData2D<iomode, double>(is, grid->num_outputs, grid->points.getNumIndexes());
}
int max_parents = [&]()->int {
switch(grid->effective_rule) {
case RuleLocal::erule::pwc: return RuleLocal::getMaxNumParents<RuleLocal::erule::pwc>();
case RuleLocal::erule::localp: return RuleLocal::getMaxNumParents<RuleLocal::erule::localp>();
case RuleLocal::erule::semilocalp: return RuleLocal::getMaxNumParents<RuleLocal::erule::semilocalp>();
case RuleLocal::erule::localp0: return RuleLocal::getMaxNumParents<RuleLocal::erule::localp0>();
default: // case RuleLocal::erule::localpb:
return RuleLocal::getMaxNumParents<RuleLocal::erule::localpb>();
};
}();
if (IO::readFlag<iomode>(is))
grid->parents = IO::readData2D<iomode, int>(is, max_parents * grid->num_dimensions, grid->points.getNumIndexes());
size_t num_points = (size_t) ((grid->points.empty()) ? grid->needed.getNumIndexes() : grid->points.getNumIndexes());
grid->roots = std::vector<int>((size_t) IO::readNumber<iomode, int>(is));
if (grid->roots.size() > 0){
IO::readVector<iomode>(is, grid->roots);
grid->pntr = IO::readVector<iomode, int>(is, num_points + 1);
if (grid->pntr[num_points] > 0){
grid->indx = IO::readVector<iomode, int>(is, grid->pntr[num_points]);
}else{
grid->indx = IO::readVector<iomode, int>(is, 1);
}
}
if (grid->num_outputs > 0) grid->values = StorageSet(is, iomode());
return grid;
}
};
#endif // __TASMANIAN_DOXYGEN_SKIP
}
#endif