Add float64 support to matrix-vector multiplication

mratsim · Feb 20, 2018 · a962205 · a962205
1 parent 00bbe67
commit a962205
Show file tree

Hide file tree

Showing 3 changed files with 54 additions and 44 deletions.
diff --git a/src/tensor/operators_blas_l1_opencl.nim b/src/tensor/operators_blas_l1_opencl.nim
@@ -27,7 +27,7 @@ import  ./backend/metadataArray,
 # ####################################################################
 # BLAS Level 1 (Vector dot product, Addition, Scalar to Vector/Matrix)
 
-template dotImpl(T: typedesc, clblast_proc: untyped): untyped =
+template dotImpl(T: typedesc[SomeReal], clblast_proc: untyped): untyped =
   proc dot*(a, b: ClTensor[T]): T =
     ## Vector to Vector dot (scalar) product
     when compileOption("boundChecks"):

diff --git a/src/tensor/operators_blas_l2l3_opencl.nim b/src/tensor/operators_blas_l2l3_opencl.nim
@@ -7,36 +7,37 @@ import  ./data_structure,
         ./private/[p_init_opencl, p_checks]
 
 
-
-proc openCL_MV_y_eq_aAx_p_by(
-  alpha: float32, a, x: ClTensor[float32],
-  beta: float32, y: var ClTensor[float32]) =
-  # Matrix-Vector: y = alpha A matvecmul x + beta y
-
-  # TODO: remove this contiguous layout constraint
-  if not a.isContiguous:
-    raise newException(ValueError, "NotImplemented: for now both tensors should be contiguous")
-
-  let
-    a_is_rowMajor = a.is_C_contiguous
-    layout =  if a_is_rowMajor: CLBlastLayoutRowMajor
-              else: CLBlastLayoutColMajor
-    lda = if a_is_rowMajor: a.strides[0]
-          else: a.strides[1]
-
-  check clblastSgemv(layout, CLBlastTransposeNo, a.shape[0], a.shape[1],
-              alpha,
-              a.toClPointer, a.offset, lda,
-              x.toClpointer, x.offset, x.strides[0],
-              beta,
-              y.toClpointer, y.offset, y.strides[0],
-              unsafeAddr clQueue0, nil)
-
+template l1l2_blas_Impl(T: typedesc[SomeReal], clblast_gemv_proc: untyped): untyped =
+  proc openCL_MV_y_eq_aAx_p_by(
+    alpha: T, a, x: ClTensor[T],
+    beta: T, y: var ClTensor[T]) =
+    # Matrix-Vector: y = alpha A matvecmul x + beta y
+
+    # TODO: remove this contiguous layout constraint
+    if not a.isContiguous:
+      raise newException(ValueError, "NotImplemented: for now both tensors should be contiguous")
+
+    let
+      a_is_rowMajor = a.is_C_contiguous
+      layout =  if a_is_rowMajor: CLBlastLayoutRowMajor
+                else: CLBlastLayoutColMajor
+      lda = if a_is_rowMajor: a.strides[0]
+            else: a.strides[1]
+
+    check clblast_gemv_proc(layout, CLBlastTransposeNo, a.shape[0], a.shape[1],
+                alpha,
+                a.toClPointer, a.offset, lda,
+                x.toClpointer, x.offset, x.strides[0],
+                beta,
+                y.toClpointer, y.offset, y.strides[0],
+                unsafeAddr clQueue0, nil)
+
+l1l2_blas_Impl(float32, clblastSgemv)
+l1l2_blas_Impl(float64, clblastDgemv)
 
 proc `*`*[T: SomeReal](a, b: ClTensor[T]): ClTensor[T] =
   ## Matrix multiplication (Matrix-Matrix and Matrix-Vector) on CUDA
 
-  assert T is float32, "Only float32 is supported at the moment"
   assert b.rank == 1, "Only Matrix-Vector product is supported at the moment"
 
   if a.rank == 2 and b.rank == 1:

diff --git a/tests/tensor/test_operators_blas_opencl.nim b/tests/tensor/test_operators_blas_opencl.nim
@@ -17,18 +17,27 @@ import ../../src/arraymancer
 import unittest
 
 suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
-  test "GEMV - General Matrix to Vector Multiplication":
+  test "GEMV - General Matrix to Vector Multiplication - float32":
     ## TODO: test with slices
     ## TODO: support and test non-contiguous tensors
 
-    let d = @[@[1.0'f32,-1,2],@[0.0'f32,-3,1]].toTensor().opencl()
-    let e = @[2.0'f32, 1, 0].toTensor().opencl()
+    let d = [[float32 1,-1,2], [float32 0.0,-3,1]].toTensor().opencl()
+    let e = [float32 2, 1, 0].toTensor().opencl()
 
-    check: (d * e).cpu ==  [1.0'f32, -3].toTensor()
+    check: (d * e).cpu ==  [float32 1, -3].toTensor()
+
+  test "GEMV - General Matrix to Vector Multiplication - float64":
+    ## TODO: test with slices
+    ## TODO: support and test non-contiguous tensors
+
+    let d = [[float64 1,-1,2], [float64 0.0,-3,1]].toTensor().opencl()
+    let e = [float64 2, 1, 0].toTensor().opencl()
+
+    check: (d * e).cpu ==  [float64 1, -3].toTensor()
 
   test "Matrix and vector addition":
-    let u = @[1'f32, 3, -5].toTensor.opencl
-    let v = @[1'f32, 1, 1].toTensor.opencl
+    let u = @[float32 1, 3, -5].toTensor.opencl
+    let v = @[float32 1, 1, 1].toTensor.opencl
 
     check: (u + v).cpu == @[2'f32, 4, -4].toTensor()
 
@@ -44,10 +53,10 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
       discard a + b.cpu[0..1, 0..1].opencl
 
   test "Matrix and vector substraction":
-    let u = @[1'f32, 3, -5].toTensor.opencl
-    let v = @[1'f32, 1, 1].toTensor.opencl
+    let u = @[float32 1, 3, -5].toTensor.opencl
+    let v = @[float32 1, 1, 1].toTensor.opencl
 
-    check: (u - v).cpu == @[0'f32, 2, -6].toTensor()
+    check: (u - v).cpu == @[float32 0, 2, -6].toTensor()
 
     let a = @[7.0, 4.0, 3.0, 1.0, 8.0, 6.0, 8.0, 1.0, 6.0, 2.0].toTensor.reshape([5,2]).opencl
     let b = @[6.0, 6.0, 2.0, 0.0, 4.0, 3.0, 2.0, 0.0, 0.0, 3.0].toTensor.reshape([5,2]).opencl
@@ -68,8 +77,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
 
 
   test "Matrix and Vector in-place addition":
-    var u = @[1'f64, 3, -5].toTensor().opencl()
-    let v = @[4'f64, -2, -1].toTensor().opencl()
+    var u = @[float64 1, 3, -5].toTensor().opencl()
+    let v = @[float64 4, -2, -1].toTensor().opencl()
 
     u += v
 
@@ -107,8 +116,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
       z += t2.cpu[0..1,0..1].opencl
 
   test "Matrix and Vector in-place substraction":
-    var u = @[1'f32, 3, -5].toTensor.opencl
-    let v = @[1'f32, 1, 1].toTensor.opencl
+    var u = @[float32 1, 3, -5].toTensor.opencl
+    let v = @[float32 1, 1, 1].toTensor.opencl
 
     u -= v
 
@@ -133,8 +142,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
       a += b.cpu[0..1,0..1].opencl
 
   test "Matrix and vector addition":
-    let u = @[1'f32, 3, -5].toTensor.opencl
-    let v = @[1'f32, 1, 1].toTensor.opencl
+    let u = @[float32 1, 3, -5].toTensor.opencl
+    let v = @[float32 1, 1, 1].toTensor.opencl
 
     check: (u + v).cpu == @[2'f32, 4, -4].toTensor()
 
@@ -150,8 +159,8 @@ suite "OpenCL BLAS operations (Basic Linear Algebra Subprograms)":
       discard a + b.cpu[0..1, 0..1].opencl
 
   test "Matrix and vector substraction":
-    let u = @[1'f32, 3, -5].toTensor.opencl
-    let v = @[1'f32, 1, 1].toTensor.opencl
+    let u = @[float32 1, 3, -5].toTensor.opencl
+    let v = @[float32 1, 1, 1].toTensor.opencl
 
     check: (u - v).cpu == @[0'f32, 2, -6].toTensor()