Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 178 lines (145 sloc) 5.058 kB
51eb854 @stefan-k changed include to load
authored
1 load("cuda.jl")
2 load("cufft.jl")
81f33ac @pavanky Adding a new CuMatrix type in Julia
pavanky authored
3
901d661 @pavanky New functionality added
pavanky authored
4 # The CuMatrix class
81f33ac @pavanky Adding a new CuMatrix type in Julia
pavanky authored
5 type CuMatrix
6 T::Type
bfe8e6c @stefan-k better type handling, makes getptr useless
authored
7 ptr::Ptr
81f33ac @pavanky Adding a new CuMatrix type in Julia
pavanky authored
8 dims::(Integer,Integer)
901d661 @pavanky New functionality added
pavanky authored
9
10 # Construct Matrix on device
11 function CuMatrix(T::Type, dims::(Integer, Integer))
a08f4e0 @pavanky Adding complex support
pavanky authored
12 if T != Float32 && T != Float64 &&
13 T != Complex64 && T != Complex128
901d661 @pavanky New functionality added
pavanky authored
14 error("No integer or boolean support yet")
15 end
4254773 @pavanky Converting jl_cuda_* to cuda_*
pavanky authored
16 ptr = cuda_malloc(T, dims)
17 Res = new(T, ptr, dims)
18 finalizer(Res, CuFree)
19 Res
901d661 @pavanky New functionality added
pavanky authored
20 end
21
22 # Copy Matrix from host
98995e5 @stefan-k get type from passed matrix
authored
23 function CuMatrix{T}(in::Matrix{T})
901d661 @pavanky New functionality added
pavanky authored
24 dims = size(in)
4254773 @pavanky Converting jl_cuda_* to cuda_*
pavanky authored
25 ptr = cuda_malloc(T, dims)
26 mem_device(ptr, in)
27 Res = new(T, ptr, dims)
28 finalizer(Res, CuFree)
29 Res
901d661 @pavanky New functionality added
pavanky authored
30 end
31
0c6c5e6 @pavanky Adding random number generation support
pavanky authored
32 # Constructor with existing device pointer
c90058d @stefan-k eltype for CuMatrix, bugfix in copy
authored
33 function CuMatrix{T}(ptr::Ptr{T}, dims::(Integer, Integer))
4254773 @pavanky Converting jl_cuda_* to cuda_*
pavanky authored
34 Res = new(T, ptr, dims)
35 finalizer(Res, CuFree)
36 Res
81f33ac @pavanky Adding a new CuMatrix type in Julia
pavanky authored
37 end
901d661 @pavanky New functionality added
pavanky authored
38
39 # Aliasing
40 CuMatrix(T::Type, rows::Integer, cols::Integer) = CuMatrix(T, (rows, cols))
41 CuMatrix(T::Type, rows::Integer) = CuMatrix(T, (rows, 1))
0c6c5e6 @pavanky Adding random number generation support
pavanky authored
42
43 # Default type: Single precision (for compatibility with older cards)
44 CuMatrix(rows::Integer, cols::Integer) = CuMatrix(Float32, rows, cols)
45 CuMatrix(rows::Integer) = CuMatrix(Float32, rows, 1)
901d661 @pavanky New functionality added
pavanky authored
46 end
47
48 # Get matrix from device to host
49 function Array(in::CuMatrix)
c90058d @stefan-k eltype for CuMatrix, bugfix in copy
authored
50 out = Array(eltype(in), in.dims[1], in.dims[2])
4254773 @pavanky Converting jl_cuda_* to cuda_*
pavanky authored
51 mem_host(out, in.ptr)
901d661 @pavanky New functionality added
pavanky authored
52 return out
53 end
54
55 # Perform a deep copy
56 function copy(in::CuMatrix)
c90058d @stefan-k eltype for CuMatrix, bugfix in copy
authored
57 ptr = cuda_malloc(eltype(in), in.dims)
58 bytes::Int32 = numel(in) * sizeof(eltype(in))
4254773 @pavanky Converting jl_cuda_* to cuda_*
pavanky authored
59 mem_copy(ptr, in.ptr, bytes)
c90058d @stefan-k eltype for CuMatrix, bugfix in copy
authored
60 CuMatrix(ptr, in.dims)
901d661 @pavanky New functionality added
pavanky authored
61 end
5bd792b @pavanky Adding show() to cumatrix
pavanky authored
62
901d661 @pavanky New functionality added
pavanky authored
63 # Display function
64 function print(in::CuMatrix)
5bd792b @pavanky Adding show() to cumatrix
pavanky authored
65 print("On GPU\n", Array(in), "\n")
66 end
67
68 # Display function
69 function show(in::CuMatrix)
70 print(in)
81f33ac @pavanky Adding a new CuMatrix type in Julia
pavanky authored
71 end
4254773 @pavanky Converting jl_cuda_* to cuda_*
pavanky authored
72
c90058d @stefan-k eltype for CuMatrix, bugfix in copy
authored
73 # Return number of elements
f6e9eed @stefan-k numel for CuMatrix type
authored
74 numel(A::CuMatrix) = A.dims[1]*A.dims[2]
75 numel(T::Type, A::CuMatrix) = convert(T, numel(A))
76
c90058d @stefan-k eltype for CuMatrix, bugfix in copy
authored
77 # Return type of elements
78 eltype(A::CuMatrix) = A.T
79
4254773 @pavanky Converting jl_cuda_* to cuda_*
pavanky authored
80 # Freeing memory
81 function CuFree(in::CuMatrix)
82 cuda_free(in.ptr)
83 end
84
85 # Random Number Generators
86 function curand(T::Type, rows::Integer, cols::Integer)
87 out = CuMatrix(T, rows, cols)
a08f4e0 @pavanky Adding complex support
pavanky authored
88 cuda_rand(out.ptr, rows * cols)
4254773 @pavanky Converting jl_cuda_* to cuda_*
pavanky authored
89 return out
90 end
91
92 function curandn(T::Type, rows::Integer, cols::Integer)
93 # curand only supports randn for even number of elements
94 count = rows * cols
95 count += count & 1 # Increase the malloc size
96 ptr = cuda_malloc(T, count)
a08f4e0 @pavanky Adding complex support
pavanky authored
97 cuda_randn(ptr, count)
4254773 @pavanky Converting jl_cuda_* to cuda_*
pavanky authored
98 # But only use the required number of elements
99 CuMatrix(T, ptr, (rows, cols))
100 end
101
102 curand(rows::Integer, cols::Integer) = curand(Float32, rows, cols)
103 curandn(rows::Integer, cols::Integer) = curandn(Float32, rows, cols)
104
105 # BLAS Functions
106 function (*)(A::CuMatrix, B::CuMatrix)
107 if (A.dims[2] != B.dims[1])
108 error("Inner dimension mismatch in Matrix multiply")
109 end
c90058d @stefan-k eltype for CuMatrix, bugfix in copy
authored
110 if (eltype(A) != eltype(B))
4254773 @pavanky Converting jl_cuda_* to cuda_*
pavanky authored
111 error("Precision mismatch in Matrix multiply")
112 end
c90058d @stefan-k eltype for CuMatrix, bugfix in copy
authored
113 C = CuMatrix(eltype(A), (A.dims[1], B.dims[2]))
4254773 @pavanky Converting jl_cuda_* to cuda_*
pavanky authored
114
115 m = convert(Int32, C.dims[1])
116 n = convert(Int32, C.dims[2])
117 k = convert(Int32, B.dims[1])
118
119 cuda_gemm('N', 'N', m, n, k,
bfe8e6c @stefan-k better type handling, makes getptr useless
authored
120 one(A.T), A.ptr, m, B.ptr, k,
121 zero(A.T), C.ptr, m)
4254773 @pavanky Converting jl_cuda_* to cuda_*
pavanky authored
122 return C
123 end
124
bfe8e6c @stefan-k better type handling, makes getptr useless
authored
125 amax(A::CuMatrix) = cuda_amax(numel(Int32, A), A.ptr)
126 amin(A::CuMatrix) = cuda_amin(numel(Int32, A), A.ptr)
127 asum(A::CuMatrix) = cuda_asum(numel(Int32, A), A.ptr)
4254773 @pavanky Converting jl_cuda_* to cuda_*
pavanky authored
128
e0abd51 @pavanky Fixed support for amax, amin, asum, dot
pavanky authored
129 function (*)(A::CuMatrix, alpha)
f6e9eed @stefan-k numel for CuMatrix type
authored
130 n = numel(Int32, A)
e0abd51 @pavanky Fixed support for amax, amin, asum, dot
pavanky authored
131 B = copy(A)
bfe8e6c @stefan-k better type handling, makes getptr useless
authored
132 cuda_scal(n, B.ptr, alpha)
95d29c7 @pavanky Cleaning up a little bit.
pavanky authored
133 return B
e0abd51 @pavanky Fixed support for amax, amin, asum, dot
pavanky authored
134 end
135
196d0f7 @stefan-k additional multiplication method
authored
136 (*)(alpha, A::CuMatrix) = (*)(A::CuMatrix, alpha)
137
e0abd51 @pavanky Fixed support for amax, amin, asum, dot
pavanky authored
138 function dot(A::CuMatrix, B::CuMatrix)
c90058d @stefan-k eltype for CuMatrix, bugfix in copy
authored
139 if eltype(A) != eltype(B)
e0abd51 @pavanky Fixed support for amax, amin, asum, dot
pavanky authored
140 error("Precision mismatch in Dot product")
141 end
95d29c7 @pavanky Cleaning up a little bit.
pavanky authored
142
f6e9eed @stefan-k numel for CuMatrix type
authored
143 n = numel(Int32, A)
144 m = numel(Int32, A)
e0abd51 @pavanky Fixed support for amax, amin, asum, dot
pavanky authored
145
95d29c7 @pavanky Cleaning up a little bit.
pavanky authored
146 if m != n
147 error("Size mismatch in Dot product")
148 end
e0abd51 @pavanky Fixed support for amax, amin, asum, dot
pavanky authored
149
bfe8e6c @stefan-k better type handling, makes getptr useless
authored
150 cuda_dot(n, A.ptr, B.ptr)
4254773 @pavanky Converting jl_cuda_* to cuda_*
pavanky authored
151 end
6eee384 @stefan-k nrm2 function
authored
152
f916fb0 @pavanky Adding partial norm support
pavanky authored
153 # Euclidean norm
bfe8e6c @stefan-k better type handling, makes getptr useless
authored
154 nrm2(A::CuMatrix) = cuda_nrm2(numel(Int32, A), A.ptr)
f916fb0 @pavanky Adding partial norm support
pavanky authored
155
156 # Thrust functions
157
158
159 # lapack functions
160 function norm(A::CuMatrix, p)
161 if (p == 2)
162 nrm2(A::CuMatrix)
163 else
164 error("norm not supported for p == ", p)
165 end
166 end
d176d0e @stefan-k moved cufftExec, added cufftPlan methods for better usability
authored
167
168 # CUFFT functions
169 cufftExec(plan::Uint32, idata::Ptr{Complex64}, odata::Ptr{Complex64}, direction::Int32) = cufftExecC2C(plan, idata, odata, direction)
170 cufftExec(plan::Uint32, idata::Ptr{Float32}, odata::Ptr{Complex64}) = cufftExecR2C(plan, idata, odata)
171 cufftExec(plan::Uint32, idata::Ptr{Complex64}, odata::Ptr{Float32}) = cufftExecC2R(plan, idata, odata)
172 cufftExec(plan::Uint32, idata::Ptr{Complex128}, odata::Ptr{Complex128}, direction::Int32) = cufftExecZ2Z(plan, idata, odata, direction)
173 cufftExec(plan::Uint32, idata::Ptr{Float64}, odata::Ptr{Complex128}) = cufftExecD2Z(plan, idata, odata)
174 cufftExec(plan::Uint32, idata::Ptr{Complex128}, odata::Ptr{Float64}) = cufftExecZ2D(plan, idata, odata)
175 cufftExec(plan::Uint32, idata::CuMatrix, odata::CuMatrix, direction::Int32) = cufftExec(plan, idata.ptr, odata.ptr, direction)
176 cufftExec(plan::Uint32, idata::CuMatrix, odata::CuMatrix) = cufftExec(plan, idata.ptr, odata.ptr)
177
Something went wrong with that request. Please try again.