Skip to content

Commit

Permalink
Linear regression (fixes #208) (#211)
Browse files Browse the repository at this point in the history
* Add lapack support

* Wrapper done. TODO tests

* Least square working for 1d, not for 2d

* Fix residual, the first value wasn't squared

* Least squares solver fully implemented 🎆
  • Loading branch information
mratsim committed Apr 1, 2018
1 parent 068075d commit bb8ad5c
Show file tree
Hide file tree
Showing 13 changed files with 317 additions and 8 deletions.
3 changes: 2 additions & 1 deletion .appveyor.yml
Expand Up @@ -29,12 +29,13 @@ install:
- SET PATH=%CD%\tools_tmp\%NIM_DIR%\bin;%CD%\tools_tmp\%MINGW_DIR%\bin;%PATH%
- ps: nuget install OpenBLAS -o "${env:APPVEYOR_BUILD_FOLDER}"
- ps: cp OpenBLAS.0.2.14.1/lib/native/bin/x64/libopenblas.dll blas.dll
# - ps: Start-FileDownload http://icl.cs.utk.edu/lapack-for-windows/libraries/VisualStudio/3.7.0/Dynamic-MINGW/Win64/liblapack.dll lapack.dll # This does not work, why?
- SET PATH=%PATH%;%CD%

build_script:
- nimble.exe refresh

test_script:
- nimble.exe test
- nimble.exe test_no_lapack

deploy: off
1 change: 1 addition & 0 deletions .travis.yml
Expand Up @@ -47,6 +47,7 @@ addons:
packages:
# On Linux we need OpenBLAS, on OSX Apple Accelerate is present by default
- libopenblas-dev
- liblapack-dev

before_install:
# On MacOS flame/blis can be tested as it is an homebrew package
Expand Down
11 changes: 9 additions & 2 deletions arraymancer.nimble
Expand Up @@ -5,7 +5,7 @@ description = "A n-dimensional tensor (ndarray) library"
license = "Apache License 2.0"

### Dependencies
requires "nim >= 0.18.0", "nimblas >= 0.1.3", "nimcuda >= 0.1.4", "nimcl >= 0.1.2", "clblast"
requires "nim >= 0.18.0", "nimblas >= 0.1.3", "nimlapack >= 0.1.1", "nimcuda >= 0.1.4", "nimcl >= 0.1.2", "clblast"

## Install files
srcDir = "src"
Expand Down Expand Up @@ -54,6 +54,7 @@ template mkl_threadedSwitches() =
switch("define","openmp")
switch("stackTrace","off")
switch("define","blas=mkl_intel_lp64")
switch("define","lapack=mkl_intel_lp64")
switch("clibdir", "/opt/intel/mkl/lib/intel64")
switch("passl", "/opt/intel/mkl/lib/intel64/libmkl_intel_lp64.a")
switch("passl", "-lmkl_core")
Expand All @@ -63,6 +64,7 @@ template mkl_threadedSwitches() =

template mkl_singleSwitches() =
switch("define","blas=mkl_intel_lp64")
switch("define","lapack=mkl_intel_lp64")
switch("clibdir", "/opt/intel/mkl/lib/intel64")
switch("passl", "/opt/intel/mkl/lib/intel64/libmkl_intel_lp64.a")
switch("passl", "-lmkl_core")
Expand Down Expand Up @@ -110,7 +112,11 @@ task all_tests, "Run all tests - Intel MKL + OpenMP + Cuda + march=native + rele
cuda_mkl_openmp
test "full_test_suite", "cpp"

task test, "Run all tests - Default BLAS":
task test, "Run all tests - Default BLAS & Lapack":
test "tests_cpu"

task test_no_lapack, "Run all tests - Default BLAS without lapack":
switch("define", "no_lapack")
test "tests_cpu"

task test_cpp, "Run all tests - Cpp codegen":
Expand All @@ -133,6 +139,7 @@ task test_opencl, "Run all OpenCL backend tests":

task test_openblas, "Run all tests - OpenBLAS":
switch("define","blas=openblas")
switch("define","lapack=openblas")
when defined(macosx):
## Should work but somehow Nim doesn't find libopenblas.dylib on MacOS
switch("clibdir", "/usr/local/opt/openblas/lib")
Expand Down
6 changes: 4 additions & 2 deletions nim.cfg
@@ -1,6 +1,6 @@
# Arraymancer compilation flag config

####From default nim.cfg, somehow it's not taken into account with a custom nim.cfg
#### From default nim.cfg, somehow it's not taken into account with a custom nim.cfg
@if release or quick:
obj_checks:off
field_checks:off
Expand Down Expand Up @@ -57,6 +57,7 @@ clang.options.size = "-Os"

@if openblas:
define:"blas=openblas" # For nimblas
define:"blas=openblas" # For nimlapack
@if macosx:
clibdir:"/usr/local/opt/openblas/lib"
cincludes:"/usr/local/opt/openblas/include"
Expand All @@ -71,6 +72,7 @@ clang.options.size = "-Os"
@if mkl: # MKL multi_threaded
define:"openmp"
define:"blas=mkl_intel_lp64"
define:"lapack=mkl_intel_lp64"
clibdir:"/opt/intel/mkl/lib/intel64"
passl:"/opt/intel/mkl/lib/intel64/libmkl_intel_lp64.a"
passl:"-lmkl_core"
Expand All @@ -87,4 +89,4 @@ clang.options.size = "-Os"
gcc.exe:"/usr/local/bin/gcc-7"
gcc.linkerexe:"/usr/local/bin/gcc-7"
@end
@end
@end
4 changes: 4 additions & 0 deletions src/arraymancer.nim
Expand Up @@ -43,3 +43,7 @@ export tensor,
mnist,
io_csv,
ml

when not defined(no_lapack):
import ./linear_algebra/linear_algebra
export linear_algebra
136 changes: 136 additions & 0 deletions src/linear_algebra/least_squares.nim
@@ -0,0 +1,136 @@
# Copyright (c) 2018 the Arraymancer contributors
# Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
# This file may not be copied, modified, or distributed except according to those terms.

import ../tensor/tensor, ../tensor/backend/metadataArray,
../tensor/private/p_init_cpu,
nimlapack, fenv, math


proc gelsd(m: ptr cint; n: ptr cint; nrhs: ptr cint; a: ptr cfloat; lda: ptr cint;
b: ptr cfloat; ldb: ptr cint; s: ptr cfloat; rcond: ptr cfloat; rank: ptr cint;
work: ptr cfloat; lwork: ptr cint; iwork: ptr cint; info: ptr cint) {.inline.}=
sgelsd(
m, n, nrhs,
a, m,
b, ldb,
s, rcond, rank,
work, lwork,
iwork, info
)

proc gelsd(m: ptr cint; n: ptr cint; nrhs: ptr cint; a: ptr cdouble; lda: ptr cint;
b: ptr cdouble; ldb: ptr cint; s: ptr cdouble; rcond: ptr cdouble;
rank: ptr cint; work: ptr cdouble; lwork: ptr cint; iwork: ptr cint;
info: ptr cint) {.inline.}=
dgelsd(
m, n, nrhs,
a, m,
b, ldb,
s, rcond, rank,
work, lwork,
iwork, info
)


proc least_squares_solver*[T: SOmeReal](a, b: Tensor[T]):
tuple[
least_square_sol: Tensor[T],
residuals: Tensor[T],
matrix_rank: int,
singular_values: Tensor[T]
] {.noInit.}=

assert a.rank == 2 and b.rank in {1, 2} and a.shape[0] > 0 and a.shape[0] == b.shape[0]
# We need to copy the input as Lapack will destroy A and replace B with its result.
# Furthermore it expects a column major ordering.
# In the future with move operator we can consider letting moved tensors be destroyed.

# Note on dealing with row-major without copies:
# http://drsfenner.org/blog/2016/01/into-the-weeds-iii-hacking-lapack-calls-to-save-memory/

# LAPACK reference doc:
# http://www.netlib.org/lapack/explore-html/d7/d3b/group__double_g_esolve_ga94bd4a63a6dacf523e25ff617719f752.html
# Intel example for the Fortran API
# https://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_lapack_examples/dgelsd_ex.c.htm

let is1d = b.rank == 1 # Axis 1 will be squeezed at the end for the 1d case

var # Parameters
m = a.shape[0].cint
n = a.shape[1].cint
nrhs = if is1d: 1.cint
else: b.shape[1].cint
# A is row-major so lda = m
ldb = max(m, n).cint

# Shadowing the inputs
var a = a.clone(colMajor) # Lapack destroys the A matrix during solving

# Lapack replaces the B matrix by the result during solving
# Furthermore, as we have input B shape M x NRHS and output N x NRHS
# if M < N we must zero the reminder of the tensor
var bstar: Tensor[T]
tensorCpu([ldb.int, nrhs.int], bstar, colMajor)
bstar.storage.Fdata = newSeq[T](bstar.size)

var bstar_slice = bstar[0 ..< b.shape[0], 0 ..< nrhs] # Workaround because slicing does no produce a var at the moment
apply2_inline(bstar_slice, b):
# paste b in bstar.
# if bstar is larger, the rest is zeros
y

# Temporary sizes
const smlsiz = 25.cint # equal to the maximum size of the subproblems at the bottom of the computation tree
let
minmn = min(m,n).int
nlvl = max(0, (minmn.T / (smlsiz+1).T).ln.cint + 1)
# Bug in some Lapack, liwork must be determined manually: http://icl.cs.utk.edu/lapack-forum/archives/lapack/msg00899.html
liwork = max(1, 3 * minmn * nlvl + 11 * minmn)

result.singular_values = newTensorUninit[T](minmn) # will hold the singular values of A

var # Temporary parameter values
# Condition for a float to be considered 0
rcond = epsilon(T) * a.shape.max.T * a.max
lwork = max(1, 12 * m + 2 * m * smlsiz + 8 * m * nlvl + m * nrhs + (smlsiz + 1) ^ 2)
work = newSeqUninitialized[T](lwork)
iwork = newSeqUninitialized[cint](liwork)
info, rank: cint

# Solve the equations A*X = B
gelsd(
m.addr, n.addr, nrhs.addr,
a.get_data_ptr, m.addr, # lda
bstar.get_data_ptr, ldb.addr,
result.singular_values[0].addr, rcond.addr, rank.addr,
work[0].addr, lwork.addr,
iwork[0].addr, info.addr
)

if info > 0:
# TODO, this should not be an exception, not converging is something that can happen and should
# not interrupt the program. Alternative. Fill the result with Inf?
raise newException(ValueError, "the algorithm for computing the SVD failed to converge")

if info < 0:
raise newException(ValueError, "Illegal parameter in linear square solver gelsd: " & $(-info))

result.matrix_rank = rank.int # This is the matrix_rank not the tensor rank
result.least_square_sol = bstar[0 ..< n, _].squeeze(axis = 1) # Correction for 1d case

if rank == n and m > n:
result.residuals = (bstar[n .. _, _].squeeze(axis = 1)).fold_axis_inline(Tensor[T], fold_axis = 0):
x = y .^ 2f # initial value
do:
x += y .^ 2f # core loop
do:
x += y # values were stored in a temporary array of size == nb of cores
# to avoid multithreading issues and must be reduced one last time
else:
result.residuals = newTensorUninit[T](0)
# Workaround as we can't create empty tensors for now:
result.residuals.shape[0] = 0
result.residuals.shape.len = 0
result.residuals.strides[0] = 0
result.residuals.shape.len = 0
7 changes: 7 additions & 0 deletions src/linear_algebra/linear_algebra.nim
@@ -0,0 +1,7 @@
# Copyright (c) 2018 the Arraymancer contributors
# Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
# This file may not be copied, modified, or distributed except according to those terms.

import ./least_squares

export least_squares
4 changes: 4 additions & 0 deletions src/tensor/backend/metadataArray.nim
Expand Up @@ -226,3 +226,7 @@ proc concat*[T](dsas: varargs[DynamicStackArray[T]]): DynamicStackArray[T] =
for val in dsa:
result[i] = val
inc(i)

proc max*[T](a: DynamicStackArray[T]): T {.noSideEffect, inline.} =
for val in a:
result = max(result, val)
7 changes: 4 additions & 3 deletions src/tensor/init_copy_cpu.nim
Expand Up @@ -13,15 +13,16 @@
# limitations under the License.

import ./data_structure,
./higher_order_applymap
./higher_order_applymap,
./private/p_shapeshifting

# Unfortunately higher_order depends on init_cpu and "clone" depends on higher_order, so we need an extra file
# to deal with circular dependencies

proc clone*[T](t: Tensor[T]): Tensor[T] {.noInit.}=
proc clone*[T](t: Tensor[T], layout: OrderType = rowMajor): Tensor[T] {.noInit.}=
## Input:
## - A tensor
## Returns:
## - A full copy. The copy is recreated as a contiguous tensor.
## If the input was a slice, unused values are discarded.
result = t.map_inline(x)
contiguousImpl(t, layout, result)
80 changes: 80 additions & 0 deletions tests/linear_algebra/test_linear_algebra.nim
@@ -0,0 +1,80 @@
# Copyright (c) 2018 the Arraymancer contributors
# Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).

import ../../src/arraymancer
import unittest, math, fenv

suite "Linear algebra":
test "Linear equation solver using least squares":
block: # "Single equation"
# Example from Numpy documentation
let A = [
[0f, 1f],
[1f, 1f],
[2f, 1f],
[3f, 1f]
].toTensor

let y = [-1f, 0.2, 0.9, 2.1].toTensor

let (solution, residuals, matrix_rank, singular_values) = least_squares_solver(A, y)

# From Numpy with double precision and `lstsq` function
let
expected_sol = [1f, -0.95f].toTensor
expected_residuals = [0.05f].toTensor
expected_matrix_rank = 2
expected_sv = [ 4.10003045f, 1.09075677].toTensor

check:
mean_relative_error(solution, expected_sol) < 1e-6
mean_relative_error(residuals, expected_residuals) < 1e-6
matrix_rank == expected_matrix_rank
mean_relative_error(singular_values, expected_sv) < 1e-6

block: # Example from Eigen
# https://eigen.tuxfamily.org/dox/group__LeastSquares.html
let A = [[ 0.68 , 0.597],
[-0.211, 0.823],
[ 0.566, -0.605]].toTensor
let b = [-0.33, 0.536, -0.444].toTensor

let (solution, _, _, _) = least_squares_solver(A, b)
let expected_sol = [-0.67, 0.314].toTensor

check: mean_relative_error(solution, expected_sol) < 1e-3

block: # "Multiple independant equations"
# Example from Intel documentation:
# https://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_lapack_examples/dgelsd_ex.c.htm
let a = [
[ 0.12, -8.19, 7.69, -2.26, -4.71],
[-6.91, 2.22, -5.12, -9.08, 9.96],
[-3.33, -8.94, -6.72, -4.40, -9.98],
[ 3.97, 3.33, -2.74, -7.92, -3.20]
].toTensor

let b = [
[ 7.30, 0.47, -6.28],
[ 1.33, 6.58, -3.42],
[ 2.68, -1.71, 3.46],
[-9.62, -0.79, 0.41]
].toTensor

let (solution, residuals, matrix_rank, singular_values) = least_squares_solver(a, b)

# From Intel minimum norm solution
let expected_sol = [[-0.69, -0.24, 0.06],
[-0.80, -0.08, 0.21],
[ 0.38, 0.12, -0.65],
[ 0.29, -0.24, 0.42],
[ 0.29, 0.35, -0.30]].toTensor
# No residuals
let expected_matrix_rank = 4
let expected_sv = [ 18.66, 15.99, 10.01, 8.51].toTensor

check:
mean_relative_error(solution, expected_sol) < 0.015
residuals.rank == 0 and residuals.shape[0] == 0 and residuals.strides[0] == 0
matrix_rank == expected_matrix_rank
mean_relative_error(singular_values, expected_sv) < 1e-03
33 changes: 33 additions & 0 deletions tests/manual_checks/least_squares.nim
@@ -0,0 +1,33 @@

import ../../src/arraymancer

block:
let a = [
[ 0.12f, -8.19, 7.69, -2.26, -4.71],
[-6.91f, 2.22, -5.12, -9.08, 9.96],
[-3.33f, -8.94, -6.72, -4.40, -9.98],
[ 3.97f, 3.33, -2.74, -7.92, -3.20]
].toTensor

let b = [
[ 7.30f, 0.47f, -6.28f],
[ 1.33f, 6.58f, -3.42f],
[ 2.68f, -1.71f, 3.46f],
[-9.62f, -0.79f, 0.41f]
].toTensor

let sol = least_squares_solver(a, b)
echo sol

block:
let A = [
[0f, 1f],
[1f, 1f],
[2f, 1f],
[3f, 1f]
].toTensor

let y = [-1f, 0.2, 0.9, 2.1].toTensor

let sol = least_squares_solver(A, y)
echo sol

0 comments on commit bb8ad5c

Please sign in to comment.