Skip to content

Commit

Permalink
Add float64 support to matrix-vector multiplication
Browse files Browse the repository at this point in the history
  • Loading branch information
mratsim committed Feb 20, 2018
1 parent 00bbe67 commit a962205
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 44 deletions.
2 changes: 1 addition & 1 deletion src/tensor/operators_blas_l1_opencl.nim
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import ./backend/metadataArray,
# ####################################################################
# BLAS Level 1 (Vector dot product, Addition, Scalar to Vector/Matrix)

template dotImpl(T: typedesc, clblast_proc: untyped): untyped =
template dotImpl(T: typedesc[SomeReal], clblast_proc: untyped): untyped =
proc dot*(a, b: ClTensor[T]): T =
## Vector to Vector dot (scalar) product
when compileOption("boundChecks"):
Expand Down
53 changes: 27 additions & 26 deletions src/tensor/operators_blas_l2l3_opencl.nim
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,37 @@ import ./data_structure,
./private/[p_init_opencl, p_checks]



proc openCL_MV_y_eq_aAx_p_by(
alpha: float32, a, x: ClTensor[float32],
beta: float32, y: var ClTensor[float32]) =
# Matrix-Vector: y = alpha A matvecmul x + beta y

# TODO: remove this contiguous layout constraint
if not a.isContiguous:
raise newException(ValueError, "NotImplemented: for now both tensors should be contiguous")

let
a_is_rowMajor = a.is_C_contiguous
layout = if a_is_rowMajor: CLBlastLayoutRowMajor
else: CLBlastLayoutColMajor
lda = if a_is_rowMajor: a.strides[0]
else: a.strides[1]

check clblastSgemv(layout, CLBlastTransposeNo, a.shape[0], a.shape[1],
alpha,
a.toClPointer, a.offset, lda,
x.toClpointer, x.offset, x.strides[0],
beta,
y.toClpointer, y.offset, y.strides[0],
unsafeAddr clQueue0, nil)

template l1l2_blas_Impl(T: typedesc[SomeReal], clblast_gemv_proc: untyped): untyped =
proc openCL_MV_y_eq_aAx_p_by(
alpha: T, a, x: ClTensor[T],
beta: T, y: var ClTensor[T]) =
# Matrix-Vector: y = alpha A matvecmul x + beta y

# TODO: remove this contiguous layout constraint
if not a.isContiguous:
raise newException(ValueError, "NotImplemented: for now both tensors should be contiguous")

let
a_is_rowMajor = a.is_C_contiguous
layout = if a_is_rowMajor: CLBlastLayoutRowMajor
else: CLBlastLayoutColMajor
lda = if a_is_rowMajor: a.strides[0]
else: a.strides[1]

check clblast_gemv_proc(layout, CLBlastTransposeNo, a.shape[0], a.shape[1],
alpha,
a.toClPointer, a.offset, lda,
x.toClpointer, x.offset, x.strides[0],
beta,
y.toClpointer, y.offset, y.strides[0],
unsafeAddr clQueue0, nil)

l1l2_blas_Impl(float32, clblastSgemv)
l1l2_blas_Impl(float64, clblastDgemv)

proc `*`*[T: SomeReal](a, b: ClTensor[T]): ClTensor[T] =
## Matrix multiplication (Matrix-Matrix and Matrix-Vector) on CUDA

assert T is float32, "Only float32 is supported at the moment"
assert b.rank == 1, "Only Matrix-Vector product is supported at the moment"

if a.rank == 2 and b.rank == 1:
Expand Down
43 changes: 26 additions & 17 deletions tests/tensor/test_operators_blas_opencl.nim
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,27 @@ import ../../src/arraymancer
import unittest

suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
test "GEMV - General Matrix to Vector Multiplication":
test "GEMV - General Matrix to Vector Multiplication - float32":
## TODO: test with slices
## TODO: support and test non-contiguous tensors

let d = @[@[1.0'f32,-1,2],@[0.0'f32,-3,1]].toTensor().opencl()
let e = @[2.0'f32, 1, 0].toTensor().opencl()
let d = [[float32 1,-1,2], [float32 0.0,-3,1]].toTensor().opencl()
let e = [float32 2, 1, 0].toTensor().opencl()

check: (d * e).cpu == [1.0'f32, -3].toTensor()
check: (d * e).cpu == [float32 1, -3].toTensor()

test "GEMV - General Matrix to Vector Multiplication - float64":
## TODO: test with slices
## TODO: support and test non-contiguous tensors

let d = [[float64 1,-1,2], [float64 0.0,-3,1]].toTensor().opencl()
let e = [float64 2, 1, 0].toTensor().opencl()

check: (d * e).cpu == [float64 1, -3].toTensor()

test "Matrix and vector addition":
let u = @[1'f32, 3, -5].toTensor.opencl
let v = @[1'f32, 1, 1].toTensor.opencl
let u = @[float32 1, 3, -5].toTensor.opencl
let v = @[float32 1, 1, 1].toTensor.opencl

check: (u + v).cpu == @[2'f32, 4, -4].toTensor()

Expand All @@ -44,10 +53,10 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
discard a + b.cpu[0..1, 0..1].opencl

test "Matrix and vector substraction":
let u = @[1'f32, 3, -5].toTensor.opencl
let v = @[1'f32, 1, 1].toTensor.opencl
let u = @[float32 1, 3, -5].toTensor.opencl
let v = @[float32 1, 1, 1].toTensor.opencl

check: (u - v).cpu == @[0'f32, 2, -6].toTensor()
check: (u - v).cpu == @[float32 0, 2, -6].toTensor()

let a = @[7.0, 4.0, 3.0, 1.0, 8.0, 6.0, 8.0, 1.0, 6.0, 2.0].toTensor.reshape([5,2]).opencl
let b = @[6.0, 6.0, 2.0, 0.0, 4.0, 3.0, 2.0, 0.0, 0.0, 3.0].toTensor.reshape([5,2]).opencl
Expand All @@ -68,8 +77,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":


test "Matrix and Vector in-place addition":
var u = @[1'f64, 3, -5].toTensor().opencl()
let v = @[4'f64, -2, -1].toTensor().opencl()
var u = @[float64 1, 3, -5].toTensor().opencl()
let v = @[float64 4, -2, -1].toTensor().opencl()

u += v

Expand Down Expand Up @@ -107,8 +116,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
z += t2.cpu[0..1,0..1].opencl

test "Matrix and Vector in-place substraction":
var u = @[1'f32, 3, -5].toTensor.opencl
let v = @[1'f32, 1, 1].toTensor.opencl
var u = @[float32 1, 3, -5].toTensor.opencl
let v = @[float32 1, 1, 1].toTensor.opencl

u -= v

Expand All @@ -133,8 +142,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
a += b.cpu[0..1,0..1].opencl

test "Matrix and vector addition":
let u = @[1'f32, 3, -5].toTensor.opencl
let v = @[1'f32, 1, 1].toTensor.opencl
let u = @[float32 1, 3, -5].toTensor.opencl
let v = @[float32 1, 1, 1].toTensor.opencl

check: (u + v).cpu == @[2'f32, 4, -4].toTensor()

Expand All @@ -150,8 +159,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
discard a + b.cpu[0..1, 0..1].opencl

test "Matrix and vector substraction":
let u = @[1'f32, 3, -5].toTensor.opencl
let v = @[1'f32, 1, 1].toTensor.opencl
let u = @[float32 1, 3, -5].toTensor.opencl
let v = @[float32 1, 1, 1].toTensor.opencl

check: (u - v).cpu == @[0'f32, 2, -6].toTensor()

Expand Down

0 comments on commit a962205

Please sign in to comment.