This repository has been archived by the owner on Mar 21, 2024. It is now read-only.
/
dot_products_with_zip.cu
128 lines (95 loc) · 5.31 KB
/
dot_products_with_zip.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/iterator/zip_iterator.h>
#include <cstdlib> // for rand()
// This example shows how thrust::zip_iterator can be used to create a
// 'virtual' array of structures. In this case the structure is a 3d
// vector type (Float3) whose (x,y,z) components will be stored in
// three separate float arrays. The zip_iterator "zips" these arrays
// into a single virtual Float3 array.
// We'll use a 3-tuple to store our 3d vector type
typedef thrust::tuple<float,float,float> Float3;
// This functor implements the dot product between 3d vectors
struct DotProduct
{
__host__ __device__
float operator()(const Float3& a, const Float3& b) const
{
return thrust::get<0>(a) * thrust::get<0>(b) + // x components
thrust::get<1>(a) * thrust::get<1>(b) + // y components
thrust::get<2>(a) * thrust::get<2>(b); // z components
}
};
// Return a host vector with random values in the range [0,1]
thrust::host_vector<float> random_vector(const size_t N)
{
thrust::host_vector<float> temp(N);
for(size_t i = 0; i < N; i++)
temp[i] = (float) rand() / (float) RAND_MAX;
return temp;
}
int main(void)
{
// number of vectors
const size_t N = 1000;
// We'll store the components of the 3d vectors in separate arrays. One set of
// arrays will store the 'A' vectors and another set will store the 'B' vectors.
// This 'structure of arrays' (SoA) approach is usually more efficient than the
// 'array of structures' (AoS) approach. The primary reason is that structures,
// like Float3, don't always obey the memory coalescing rules, so they are not
// efficiently transferred to and from memory. Another reason to prefer SoA to
// AoS is that we don't aways want to process all members of the structure. For
// example, if we only need to look at first element of the structure then it
// is wasteful to load the entire structure from memory. With the SoA approach,
// we can chose which elements of the structure we wish to read.
thrust::device_vector<float> A0 = random_vector(N); // x components of the 'A' vectors
thrust::device_vector<float> A1 = random_vector(N); // y components of the 'A' vectors
thrust::device_vector<float> A2 = random_vector(N); // z components of the 'A' vectors
thrust::device_vector<float> B0 = random_vector(N); // x components of the 'B' vectors
thrust::device_vector<float> B1 = random_vector(N); // y components of the 'B' vectors
thrust::device_vector<float> B2 = random_vector(N); // z components of the 'B' vectors
// Storage for result of each dot product
thrust::device_vector<float> result(N);
// We'll now illustrate two ways to use zip_iterator to compute the dot
// products. The first method is verbose but shows how the parts fit together.
// The second method hides these details and is more concise.
// METHOD #1
// Defining a zip_iterator type can be a little cumbersome ...
typedef typename thrust::device_vector<float>::iterator FloatIterator;
typedef typename thrust::tuple<FloatIterator, FloatIterator, FloatIterator> FloatIteratorTuple;
typedef typename thrust::zip_iterator<FloatIteratorTuple> Float3Iterator;
// Now we'll create some zip_iterators for A and B
Float3Iterator A_first = thrust::make_zip_iterator(make_tuple(A0.begin(), A1.begin(), A2.begin()));
Float3Iterator A_last = thrust::make_zip_iterator(make_tuple(A0.end(), A1.end(), A2.end()));
Float3Iterator B_first = thrust::make_zip_iterator(make_tuple(B0.begin(), B1.begin(), B2.begin()));
// Finally, we pass the zip_iterators into transform() as if they
// were 'normal' iterators for a device_vector<Float3>.
thrust::transform(A_first, A_last, B_first, result.begin(), DotProduct());
// METHOD #2
// Alternatively, we can avoid creating variables for X_first, X_last,
// and Y_first and invoke transform() directly.
thrust::transform( thrust::make_zip_iterator(make_tuple(A0.begin(), A1.begin(), A2.begin())),
thrust::make_zip_iterator(make_tuple(A0.end(), A1.end(), A2.end())),
thrust::make_zip_iterator(make_tuple(B0.begin(), B1.begin(), B2.begin())),
result.begin(),
DotProduct() );
// Finally, we'll print a few results
// Example output
// (0.840188,0.45724,0.0860517) * (0.0587587,0.456151,0.322409) = 0.285683
// (0.394383,0.640368,0.180886) * (0.0138811,0.24875,0.0221609) = 0.168775
// (0.783099,0.717092,0.426423) * (0.622212,0.0699601,0.234811) = 0.63755
// (0.79844,0.460067,0.0470658) * (0.0391351,0.742097,0.354747) = 0.389358
for(size_t i = 0; i < 4; i++)
{
Float3 a = A_first[i];
Float3 b = B_first[i];
float dot = result[i];
std::cout << "(" << thrust::get<0>(a) << "," << thrust::get<1>(a) << "," << thrust::get<2>(a) << ")";
std::cout << " * ";
std::cout << "(" << thrust::get<0>(b) << "," << thrust::get<1>(b) << "," << thrust::get<2>(b) << ")";
std::cout << " = ";
std::cout << dot << std::endl;
}
return 0;
}