Skip to content

Commit

Permalink
Merge branch 'feature/solve'
Browse files Browse the repository at this point in the history
  • Loading branch information
mdekstrand committed Dec 7, 2018
2 parents c5898f3 + 78bf69b commit 79f7ee4
Show file tree
Hide file tree
Showing 10 changed files with 266 additions and 52 deletions.
4 changes: 2 additions & 2 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ variables:
pip.deps: >
invoke pytest coverage pytest-cov
pandas scipy pyarrow
numba cython
numba cython cffi
pip.extras: >
hpfrec==0.2.2.5 implicit
hpfrec implicit
jobs:

Expand Down
9 changes: 9 additions & 0 deletions doc/math.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Math utilities
==============

Solvers
-------

.. module:: lenskit.math.solve

.. autofunction:: solve_tri
30 changes: 30 additions & 0 deletions doc/matrix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
Matrix Utilities
----------------

.. module:: lenskit.matrix

We have some matrix-related utilities, since matrices are used so heavily in recommendation
algorithms.

Building Ratings Matrices
~~~~~~~~~~~~~~~~~~~~~~~~~

.. autofunction:: sparse_ratings
.. autoclass:: RatingMatrix

Compressed Sparse Row Matrices
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

We use CSR-format sparse matrices in quite a few places. Since SciPy's sparse matrices are not
directly usable from Numba, we have implemented a Numba-compiled CSR representation that can
be used from accelerated algorithm implementations.

.. autofunction:: csr_from_coo
.. autofunction:: csr_from_scipy
.. autofunction:: csr_to_scipy
.. autofunction:: csr_rowinds
.. autofunction:: csr_save
.. autofunction:: csr_load

.. autoclass:: CSR
:members:
39 changes: 7 additions & 32 deletions doc/util.rst
Original file line number Diff line number Diff line change
@@ -1,37 +1,12 @@
Utility Functions
=================

.. automodule:: lenskit.util
:members:

Matrix Utilities
----------------

.. module:: lenskit.matrix

We have some matrix-related utilities, since matrices are used so heavily in recommendation
algorithms.

Building Ratings Matrices
~~~~~~~~~~~~~~~~~~~~~~~~~
.. toctree::
matrix
math

.. autofunction:: sparse_ratings
.. autoclass:: RatingMatrix

Compressed Sparse Row Matrices
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

We use CSR-format sparse matrices in quite a few places. Since SciPy's sparse matrices are not
directly usable from Numba, we have implemented a Numba-compiled CSR representation that can
be used from accelerated algorithm implementations.

.. autofunction:: csr_from_coo
.. autofunction:: csr_from_scipy
.. autofunction:: csr_to_scipy
.. autofunction:: csr_rowinds
.. autofunction:: csr_save
.. autofunction:: csr_load

.. autoclass:: CSR
:members:
Miscellaneous
-------------

.. automodule:: lenskit.util
:members:
29 changes: 13 additions & 16 deletions lenskit/algorithms/als.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .mf_common import BiasMFModel, MFModel
from ..matrix import sparse_ratings, CSR
from .. import util
from ..math.solve import _dposv

_logger = logging.getLogger(__name__)

Expand All @@ -18,6 +19,7 @@ def _train_matrix(mat: CSR, other: np.ndarray, reg: float):
"One half of an explicit ALS training round."
nr = mat.nrows
nf = other.shape[1]
regI = np.identity(nf) * reg
assert mat.ncols == other.shape[0]
result = np.zeros((nr, nf))
for i in prange(nr):
Expand All @@ -30,12 +32,11 @@ def _train_matrix(mat: CSR, other: np.ndarray, reg: float):
MMT = M.T @ M
# assert MMT.shape[0] == ctx.n_features
# assert MMT.shape[1] == ctx.n_features
A = MMT + np.identity(nf) * reg * len(cols)
Ainv = np.linalg.inv(A)
A = MMT + regI * len(cols)
V = M.T @ vals
uv = Ainv @ V
# assert len(uv) == ctx.n_features
result[i, :] = uv
# and solve
_dposv(A, V, True)
result[i, :] = V

return result

Expand Down Expand Up @@ -68,19 +69,15 @@ def _train_implicit_matrix(mat: CSR, other: np.ndarray, reg: float):
MMT = (M.T.copy() * rates) @ M
# assert MMT.shape[0] == ctx.n_features
# assert MMT.shape[1] == ctx.n_features
# Build and invert the matrix
# Build the matrix for solving
A = OtOr + MMT
Ainv = np.linalg.inv(A)
# And now we can compute the final piece of the update rule
AiYt = Ainv @ Ot
cu = np.ones(nc)
cu[cols] = rates + 1.0
AiYt *= cu
pu = np.zeros(nc)
pu[cols] = 1.0
uv = AiYt @ pu
# Compute RHS - only used columns (p_ui != 0) values needed
# Cu is rates + 1 for the cols, so just trim Ot
y = Ot[:, cols] @ (rates + 1.0)
# and solve
_dposv(A, y, True)
# assert len(uv) == ctx.n_features
result[i, :] = uv
result[i, :] = y

return result

Expand Down
3 changes: 3 additions & 0 deletions lenskit/math/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""
Mathematical helper routines.
"""
96 changes: 96 additions & 0 deletions lenskit/math/solve.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""
Efficient solver routines.
"""


import numpy as np

import cffi
import numba as n
from numba.extending import get_cython_function_address

__ffi = cffi.FFI()

__uplo_U = np.array(ord('U'), dtype=np.int8)
__uplo_L = np.array(ord('L'), dtype=np.int8)
__trans_N = np.array(ord('N'), dtype=np.int8)
__trans_T = np.array(ord('T'), dtype=np.int8)
__trans_C = np.array(ord('C'), dtype=np.int8)
__diag_U = np.array(ord('U'), dtype=np.int8)
__diag_N = np.array(ord('N'), dtype=np.int8)
__inc_1 = np.ones(1, dtype=np.int32)

__dtrsv = __ffi.cast("void (*) (char*, char*, char*, int*, double*, int*, double*, int*)",
get_cython_function_address("scipy.linalg.cython_blas", "dtrsv"))
__dposv = __ffi.cast("void (*) (char*, int*, int*, double*, int*, double*, int*, int*)",
get_cython_function_address("scipy.linalg.cython_lapack", "dposv"))


@n.njit(n.void(n.boolean, n.boolean, n.double[:, ::1], n.double[::1]), nogil=True)
def _dtrsv(lower, trans, a, x):
inc1 = __ffi.from_buffer(__inc_1)

# dtrsv uses Fortran-layout arrays. Because we use C-layout arrays, we will
# invert the meaning of 'lower' and 'trans', and the function will work fine.
# We also need to swap index orders
uplo = __uplo_U if lower else __uplo_L
tspec = __trans_N if trans else __trans_T

n_p = np.array([a.shape[0]], dtype=np.intc)
n_p = __ffi.from_buffer(n_p)
lda_p = np.array([a.shape[1]], dtype=np.intc)
lda_p = __ffi.from_buffer(lda_p)

__dtrsv(__ffi.from_buffer(uplo), __ffi.from_buffer(tspec), __ffi.from_buffer(__diag_N),
n_p, __ffi.from_buffer(a), lda_p,
__ffi.from_buffer(x), inc1)


def solve_tri(A, b, transpose=False, lower=True):
"""
Solve the system :math:`Ax = b`, where :math:`A` is triangular.
This is equivalent to :py:fun:`scipy.linalg.solve_triangular`, but does *not*
check for non-singularity. It is a thin wrapper around the BLAS ``dtrsv``
function.
Args:
A(ndarray): the matrix.
b(ndarray): the taget vector.
transpose(bool): whether to solve :math:`Ax = b` or :math:`A^T x = b`.
lower(bool): whether :math:`A` is lower- or upper-triangular.
"""
x = b.copy()
_dtrsv(lower, transpose, A, x)
return x


@n.njit(n.intc(n.float64[:, ::1], n.float64[::1], n.boolean), nogil=True)
def _dposv(A, b, lower):
if A.shape[0] != A.shape[1]:
return -11
if A.shape[0] != b.shape[0]:
return -12

# dposv uses Fortran-layout arrays. Because we use C-layout arrays, we will
# invert the meaning of 'lower' and 'trans', and the function will work fine.
# We also need to swap index orders
uplo = __uplo_U if lower else __uplo_L
n_p = __ffi.from_buffer(np.array([A.shape[0]], dtype=np.intc))
nrhs_p = __ffi.from_buffer(np.ones(1, dtype=np.intc))
info = np.zeros(1, dtype=np.intc)
info_p = __ffi.from_buffer(info)

__dposv(__ffi.from_buffer(uplo), n_p, nrhs_p,
__ffi.from_buffer(A), n_p,
__ffi.from_buffer(b), n_p,
info_p)

return info[0]


def dposv(A, b, lower=False):
info = _dposv(A, b, lower)
if info < 0:
raise ValueError('invalid args to dposv, code ' + str(info))
elif info > 0:
raise RuntimeError('error in dposv, code ' + str(info))
4 changes: 2 additions & 2 deletions tests/test_knn_user_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,10 +225,10 @@ def test_uu_batch_accuracy():
preds = [__batch_eval((algo, train, test)) for (train, test) in folds]
preds = pd.concat(preds)
mae = pm.mae(preds.prediction, preds.rating)
assert mae == approx(0.71, abs=0.025)
assert mae == approx(0.71, abs=0.027)

user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
assert user_rmse.mean() == approx(0.91, abs=0.05)
assert user_rmse.mean() == approx(0.91, abs=0.055)


@mark.slow
Expand Down
100 changes: 100 additions & 0 deletions tests/test_math_solve.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import numpy as np
import scipy.linalg as sla

from pytest import approx

from lenskit.math.solve import solve_tri, dposv


def test_solve_ltri():
for i in range(10):
size = np.random.randint(5, 50)
Af = np.random.randn(size, size)
b = np.random.randn(size)
A = np.tril(Af)

x = solve_tri(A, b)
assert len(x) == size

xexp = sla.solve_triangular(A, b, lower=True)
assert x == approx(xexp, rel=1.0e-6)


def test_solve_ltri_transpose():
for i in range(10):
size = np.random.randint(5, 50)
Af = np.random.randn(size, size)
b = np.random.randn(size)
A = np.tril(Af)

x = solve_tri(A, b, True)
assert len(x) == size

xexp = sla.solve_triangular(A.T, b, lower=False)
assert x == approx(xexp, rel=1.0e-6)


def test_solve_utri():
for i in range(10):
size = np.random.randint(5, 50)
Af = np.random.randn(size, size)
b = np.random.randn(size)
A = np.triu(Af)

x = solve_tri(A, b, lower=False)
assert len(x) == size
xexp = sla.solve_triangular(A, b, lower=False)
assert x == approx(xexp, rel=1.0e-6)


def test_solve_utri_transpose():
for i in range(10):
size = np.random.randint(5, 50)
Af = np.random.randn(size, size)
b = np.random.randn(size)
A = np.triu(Af)

x = solve_tri(A, b, True, lower=False)
assert len(x) == size
xexp = sla.solve_triangular(A.T, b, lower=True)
assert x == approx(xexp, rel=1.0e-6)


def test_solve_cholesky():
for i in range(10):
size = np.random.randint(5, 50)
A = np.random.randn(size, size)
b = np.random.randn(size)

# square values of A
A = A * A

# and solve
xexp, resid, rank, s = np.linalg.lstsq(A, b)

# chol solve
L = np.linalg.cholesky(A.T @ A)

w = solve_tri(L, A.T @ b)
x = solve_tri(L, w, transpose=True)

assert x == approx(xexp)


def test_solve_dposv():
for i in range(10):
size = np.random.randint(5, 50)
A = np.random.randn(size, size)
b = np.random.randn(size)

# square values of A
A = A * A

# and solve
xexp, resid, rank, s = np.linalg.lstsq(A, b)

F = A.T @ A
x = A.T @ b
dposv(F, x, True)

assert x == approx(xexp, rel=1.0e-4)
4 changes: 4 additions & 0 deletions tests/test_matrix.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import scipy.sparse as sps
import scipy.linalg as sla
import numpy as np

import lenskit.matrix as lm

import lk_test_utils as lktu

from pytest import approx


def test_sparse_matrix():
ratings = lktu.ml_pandas.renamed.ratings
Expand Down

0 comments on commit 79f7ee4

Please sign in to comment.