-
Notifications
You must be signed in to change notification settings - Fork 298
/
CudaSort.h
120 lines (97 loc) · 4.11 KB
/
CudaSort.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/******************************************************************************
* SOFA, Simulation Open-Framework Architecture *
* (c) 2006 INRIA, USTL, UJF, CNRS, MGH *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU Lesser General Public License as published by *
* the Free Software Foundation; either version 2.1 of the License, or (at *
* your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, but WITHOUT *
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or *
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
* for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
*******************************************************************************
* Authors: The SOFA Team and external contributors (see Authors.txt) *
* *
* Contact information: contact@sofa-framework.org *
******************************************************************************/
#ifndef SOFA_GPU_CUDA_CUDASORT_H
#define SOFA_GPU_CUDA_CUDASORT_H
#include <sofa/gpu/cuda/mycuda.h>
#if defined(__cplusplus)
#include <sofa/gpu/cuda/CudaTypes.h>
#include <algorithm> // for std::sort
#include <vector>
namespace sofa::gpu::cuda
{
#endif
extern "C" {
extern bool SOFA_GPU_CUDA_API CudaSortGPUAvailable(unsigned int size, bool withData = true);
extern bool SOFA_GPU_CUDA_API CudaSortGPU(void* keys, void* data, unsigned int size, int bits);
} // "C"
#if defined(__cplusplus)
struct compare_pair_first
{
template<class T1, class T2>
bool operator()(const std::pair<T1,T2>& a, const std::pair<T1,T2>& b) const
{
return a.first < b.first;
}
};
template<class TKey, class TData>
void CudaSortCPU(TKey* keys, TData* data, unsigned int size, int /*bits*/)
{
if (data)
{
std::vector< std::pair<TKey,TData> > cpusort;
cpusort.resize(size);
for (unsigned int i=0; i<size; ++i)
{
cpusort[i].first = keys[i];
cpusort[i].second = data[i];
}
std::sort(cpusort.begin(),cpusort.end(),compare_pair_first());
for (unsigned int i=0; i<size; ++i)
{
keys[i] = cpusort[i].first;
data[i] = cpusort[i].second;
}
}
else
{
std::sort(keys,keys+size);
}
}
#endif
static inline void CudaSortPrepare(unsigned int size, bool withData = true)
{
if (!CudaSortGPUAvailable(size, withData))
msg_error("SofaCUDA") << "CUDA: GPU sort implementation not available (size=" << size << ")";
}
template<class TKey, class TData>
static inline void CudaSort(CudaVector<TKey>* keys, unsigned int key0, CudaVector<TData>* data, unsigned int data0, unsigned int size, int bits = 32, bool forceCPU = false)
{
bool withData = (data && !data->empty());
bool withCPU = forceCPU;
//if (!withCPU && !CudaSortGPUAvailable(size, withData))
// withCPU = true;
if (!withCPU && !CudaSortGPU(keys->deviceWriteAt(key0), (withData ? data->deviceWriteAt(data0) : NULL), size, bits))
withCPU = true;
if (withCPU)
{
CudaSortCPU(keys->hostWriteAt(key0), (withData ? data->hostWriteAt(data0) : NULL), size, bits);
}
}
template<class TKey, class TData>
static inline void CudaSort(CudaVector<TKey>* keys, CudaVector<TData>* data, unsigned int size, int bits = 32, bool forceCPU = false)
{
CudaSort(keys, 0, data, 0, size, bits, forceCPU);
}
#if defined(__cplusplus)
} // namespace sofa::gpu::cuda
#endif
#endif