-
Notifications
You must be signed in to change notification settings - Fork 299
/
CudaScan.h
101 lines (81 loc) · 3.56 KB
/
CudaScan.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/******************************************************************************
* SOFA, Simulation Open-Framework Architecture *
* (c) 2006 INRIA, USTL, UJF, CNRS, MGH *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU Lesser General Public License as published by *
* the Free Software Foundation; either version 2.1 of the License, or (at *
* your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, but WITHOUT *
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
* for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
*******************************************************************************
* Authors: The SOFA Team and external contributors (see Authors.txt) *
* *
* Contact information: contact@sofa-framework.org *
******************************************************************************/
#ifndef SOFA_GPU_CUDA_CUDASCAN_H
#define SOFA_GPU_CUDA_CUDASCAN_H
#include <sofa/gpu/cuda/mycuda.h>
#if defined(__cplusplus)
#include <sofa/gpu/cuda/CudaTypes.h>
#include <functional>
#include <numeric> // for std::partial_sum
namespace sofa::gpu::cuda
{
#endif
enum ScanType
{
SCAN_INCLUSIVE = 0,
SCAN_EXCLUSIVE = 1,
};
extern "C" {
extern bool SOFA_GPU_CUDA_API CudaScanGPUAvailable(unsigned int size, ScanType type);
extern bool SOFA_GPU_CUDA_API CudaScanGPU(const void* input, void* output, unsigned int size, ScanType type);
} // "C"
#if defined(__cplusplus)
template<class TData>
void CudaScanCPU(const TData* input, TData* output, unsigned int size, ScanType type)
{
switch(type)
{
case SCAN_INCLUSIVE:
std::partial_sum(input,input+size, output);
break;
case SCAN_EXCLUSIVE:
output[0] = 0;
std::partial_sum(input, input+(size-1), output+1);
break;
}
}
#endif
static inline void CudaScanPrepare(unsigned int size, ScanType type)
{
if (!CudaScanGPUAvailable(size, type))
msg_error("SofaCUDA") << "CUDA: GPU scan implementation not available (size=" << size << ")";
}
template<class TData>
static inline void CudaScan(const CudaVector<TData>* input, unsigned int input0, CudaVector<TData>* output, unsigned int output0, unsigned int size, ScanType type, bool forceCPU = false)
{
bool withCPU = forceCPU;
if (!withCPU && !CudaScanGPU(input->deviceReadAt(input0), output->deviceWriteAt(output0), size, type))
withCPU = true;
if (withCPU)
{
CudaScanCPU(input->hostReadAt(input0), output->hostWriteAt(output0), size, type);
}
}
template<class TData>
static inline void CudaScan(const CudaVector<TData>* input, CudaVector<TData>* output, unsigned int size, ScanType type, bool forceCPU = false)
{
CudaScan(input, 0, output, 0, size, type, forceCPU);
}
#if defined(__cplusplus)
} // namespace sofa::gpu::cuda
#endif
#endif