-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
/
nmf_pgd.pyx
62 lines (44 loc) · 1.8 KB
/
nmf_pgd.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# Author: Timofey Yefimov
# cython: cdivision=True
# cython: boundscheck=False
# cython: wraparound=False
# cython: nonecheck=False
# cython: embedsignature=True
from libc.math cimport sqrt
from cython.parallel import prange
cdef double fmin(double x, double y) nogil:
return x if x < y else y
cdef double fmax(double x, double y) nogil:
return x if x > y else y
def solve_h(double[:, ::1] h, double[:, :] Wtv, double[:, ::1] WtW, int[::1] permutation, double kappa):
"""Find optimal dense vector representation for current W and r matrices.
Parameters
----------
h : matrix
Dense representation of documents in current batch.
Wtv : matrix
WtW : matrix
Returns
-------
float
Cumulative difference between previous and current h vectors.
"""
cdef Py_ssize_t n_components = h.shape[0]
cdef Py_ssize_t n_samples = h.shape[1]
cdef double violation = 0
cdef double grad, projected_grad, hessian
cdef Py_ssize_t sample_idx = 0
cdef Py_ssize_t component_idx_1 = 0
cdef Py_ssize_t component_idx_2 = 0
for sample_idx in prange(n_samples, nogil=True):
for component_idx_1 in range(n_components):
component_idx_1 = permutation[component_idx_1]
grad = -Wtv[component_idx_1, sample_idx]
for component_idx_2 in range(n_components):
grad += WtW[component_idx_1, component_idx_2] * h[component_idx_2, sample_idx]
hessian = WtW[component_idx_1, component_idx_1]
grad = grad * kappa / hessian
projected_grad = fmin(0, grad) if h[component_idx_1, sample_idx] == 0 else grad
violation += projected_grad * projected_grad
h[component_idx_1, sample_idx] = fmax(h[component_idx_1, sample_idx] - grad, 0.)
return sqrt(violation)