# nzhiltsov/Ext-RESCAL

Merge branch 'release-0.1.2'

nzhiltsov committed Feb 24, 2013
2 parents c7a26f3 + ebf1dcb commit f1d48dc9a774904994d02d6e420c3568553ffc29
Showing with 76 additions and 43 deletions.
2. +9 −2 commonFunctions.py
3. +21 −3 commonFunctionsTest.py
4. +16 −25 extrescal.py
5. +5 −0 extrescalFunctions.py
6. +22 −2 extrescalFunctionsTest.py
7. +1 −9 rescal.py
 @@ -4,11 +4,11 @@ Ext-RESCAL Scalable Tensor Factorization Scalable Tensor Factorization ------------------------------ ------------------------------ Ext-RESCAL is a memory efficient implementation of the [RESCAL algorithm](http://www.cip.ifi.lmu.de/~nickel/). It is written in Python and relies on the SciPy Sparse module. Ext-RESCAL is a memory efficient implementation of the [RESCAL algorithm](http://www.cip.ifi.lmu.de/~nickel/data/slides-icml2011.pdf). It is written in Python and relies on the SciPy Sparse module. Current Version Current Version ------------ ------------ [0.1.1](https://github.com/nzhiltsov/Ext-RESCAL/archive/0.1.1.zip) [0.1.2](https://github.com/nzhiltsov/Ext-RESCAL/archive/0.1.2.zip) Features Features ------------ ------------
 @@ -1,4 +1,4 @@ from numpy import dot def squareFrobeniusNormOfSparse(M): def squareFrobeniusNormOfSparse(M): """ """ @@ -8,4 +8,11 @@ def squareFrobeniusNormOfSparse(M): norm = 0 norm = 0 for i in range(len(rows)): for i in range(len(rows)): norm += M[rows[i],cols[i]] ** 2 norm += M[rows[i],cols[i]] ** 2 return norm return norm def fitNorm(row, col, Xi, ARk, A): """ Computes i,j element of the squared Frobenius norm of the fitting matrix """ ARAtValue = dot(ARk[row,:], A[col,:]) return (Xi[row, col] - ARAtValue)**2
 @@ -1,9 +1,11 @@ from numpy import ones from numpy import ones, dot import numpy as np import numpy as np from scipy.sparse import coo_matrix from scipy.sparse import coo_matrix from commonFunctions import squareFrobeniusNormOfSparse from commonFunctions import squareFrobeniusNormOfSparse, fitNorm from numpy.linalg.linalg import norm from nose.tools import assert_almost_equal def test(): def testSquareFrobeniusNorm(): zeroCount = 2 zeroCount = 2 rowIndices = np.array([1, 2]) rowIndices = np.array([1, 2]) colIndices = np.array([0, 0]) colIndices = np.array([0, 0]) @@ -12,4 +14,20 @@ def test(): M = coo_matrix((ones(zeroCount),(rowIndices, colIndices)), shape=(rowSize, colSize), dtype=np.uint8).tolil() M = coo_matrix((ones(zeroCount),(rowIndices, colIndices)), shape=(rowSize, colSize), dtype=np.uint8).tolil() assert squareFrobeniusNormOfSparse(M) == 2 assert squareFrobeniusNormOfSparse(M) == 2 def testFitNorm(): X = coo_matrix((ones(4),([0, 1, 2, 2], [1, 1, 0, 1])), shape=(3, 3), dtype=np.uint8).tolil() n = X.shape[0] A = np.array([[0.9, 0.1], [0.8, 0.2], [0.1, 0.9]]) R = np.array([[0.9, 0.1], [0.1, 0.9]]) expectedNorm = norm(X - dot(A,dot(R, A.T)))**2 ARk = dot(A, R) fits = [] for i in xrange(n): for j in xrange(n): fits.append(fitNorm(i, j, X, ARk, A)) assert_almost_equal(sum(fits), expectedNorm)
 @@ -7,26 +7,19 @@ import numpy as np import numpy as np import os import os import fnmatch import fnmatch from commonFunctions import squareFrobeniusNormOfSparse from commonFunctions import squareFrobeniusNormOfSparse, fitNorm from extrescalFunctions import updateA, updateV from extrescalFunctions import updateA, updateV, matrixFitNormElement __version__ = "0.1" __version__ = "0.1" __DEF_MAXITER = 50 __DEF_MAXITER = 50 __DEF_PREHEATNUM = 1 __DEF_PREHEATNUM = 1 __DEF_INIT = 'nvecs' __DEF_INIT = 'nvecs' __DEF_PROJ = True __DEF_PROJ = True __DEF_CONV = 1e-6 __DEF_CONV = 1e-5 __DEF_LMBDA = 0 __DEF_LMBDA = 0 __DEF_EXACT_FIT = False __DEF_EXACT_FIT = False def fitNorm(row, col, Xi, ARk, A): """ Computes i,j element of the squared Frobenius norm of the fitting matrix """ ARAtValue = dot(ARk[row,:], A[col,:]) return (Xi[row, col] - ARAtValue)**2 def rescal(X, D, rank, **kwargs): def rescal(X, D, rank, **kwargs): """ """ RESCAL RESCAL @@ -129,8 +122,8 @@ def rescal(X, D, rank, **kwargs): raise 'Projection via QR decomposition is required; pass proj=true' raise 'Projection via QR decomposition is required; pass proj=true' # initialize V # initialize V Drow, Dcol = D.shape DrowSize, DcolSize = D.shape V = array(rand(rank, Dcol), dtype=np.float64) V = array(rand(rank, DcolSize), dtype=np.float64) # compute factorization # compute factorization fit = fitchange = fitold = 0 fit = fitchange = fitold = 0 @@ -158,7 +151,7 @@ def rescal(X, D, rank, **kwargs): fitDAV = 0 fitDAV = 0 if iter > preheatnum: if iter > preheatnum: if lmbda != 0: if lmbda != 0: for i in range(len(R)): for i in xrange(len(R)): regRFit += norm(R[i])**2 regRFit += norm(R[i])**2 regularizedFit = lmbda*(norm(A)**2) + lmbda*regRFit regularizedFit = lmbda*(norm(A)**2) + lmbda*regRFit if lmbda != 0: if lmbda != 0: @@ -167,20 +160,18 @@ def rescal(X, D, rank, **kwargs): fitDAV = norm(D - dot(A,V))**2 fitDAV = norm(D - dot(A,V))**2 else : else : Drow, Dcol = D.nonzero() Drow, Dcol = D.nonzero() for ff in range(len(Drow)): for ff in xrange(len(Drow)): fitDAV += (D[Drow[ff],Dcol[ff]] - dot(A[Drow[ff],:], V[:, Dcol[ff]]))**2 fitDAV += matrixFitNormElement(Drow[ff], Dcol[ff], D, A, V) if exactfit: if exactfit: for i in range(len(R)): for i in xrange(len(R)): tensorFit = norm(X[i] - dot(A,dot(R[i], A.T)))**2 tensorFit += norm(X[i] - dot(A,dot(R[i], A.T)))**2 else : else : for i in range(len(R)): for i in xrange(len(R)): ARk = dot(A, R[i]) ARk = dot(A, R[i]) Xrow, Xcol = X[i].nonzero() Xrow, Xcol = X[i].nonzero() fits = [] for rr in xrange(len(Xrow)): for rr in range(len(Xrow)): tensorFit += fitNorm(Xrow[rr], Xcol[rr], X[i], ARk, A) fits.append(fitNorm(Xrow[rr], Xcol[rr], X[i], ARk, A)) tensorFit = sum(fits) fit = 0.5*tensorFit fit = 0.5*tensorFit fit += regularizedFit fit += regularizedFit @@ -207,12 +198,12 @@ def __updateR(X, A, lmbda): At = A.T At = A.T if lmbda == 0: if lmbda == 0: ainv = dot(pinv(dot(At, A)), At) ainv = dot(pinv(dot(At, A)), At) for i in range(len(X)): for i in xrange(len(X)): R.append( dot(ainv, X[i].dot(ainv.T)) ) R.append( dot(ainv, X[i].dot(ainv.T)) ) else : else : AtA = dot(At, A) AtA = dot(At, A) tmp = inv(kron(AtA, AtA) + lmbda * eye(r**2)) tmp = inv(kron(AtA, AtA) + lmbda * eye(r**2)) for i in range(len(X)): for i in xrange(len(X)): AtXA = dot(At, X[i].dot(A)) AtXA = dot(At, X[i].dot(A)) R.append( dot(AtXA.flatten(), tmp).reshape(r, r) ) R.append( dot(AtXA.flatten(), tmp).reshape(r, r) ) return R return R @@ -221,7 +212,7 @@ def __updateR(X, A, lmbda): def __projectSlices(X, Q): def __projectSlices(X, Q): q = Q.shape[1] q = Q.shape[1] X2 = [] X2 = [] for i in range(len(X)): for i in xrange(len(X)): X2.append( dot(Q.T, X[i].dot(Q)) ) X2.append( dot(Q.T, X[i].dot(Q)) ) return X2 return X2
 @@ -26,3 +26,8 @@ def updateV(A, D, lmbda): invPart = inv(dot(At, A) + lmbda * eye(rank)) invPart = inv(dot(At, A) + lmbda * eye(rank)) return dot(invPart, At) * D return dot(invPart, At) * D def matrixFitNormElement(i, j, D, A, V): """ Computes i,j element of the fitting matrix Frobenius norm ||D - A*V|| """ return (D[i,j] - dot(A[i,:], V[:, j]))**2
 @@ -1,9 +1,10 @@ from scipy.sparse import coo_matrix from scipy.sparse import coo_matrix from numpy import ones, dot, eye from numpy import ones, dot, eye import numpy as np import numpy as np from extrescalFunctions import updateA, updateV from extrescalFunctions import updateA, updateV, matrixFitNormElement from nose.tools import assert_almost_equal from nose.tools import assert_almost_equal from numpy.linalg import inv from numpy.linalg import inv from numpy.linalg.linalg import norm def testUpdateA(): def testUpdateA(): A = np.array([[0.1, 0.1, 0.1], A = np.array([[0.1, 0.1, 0.1], @@ -59,5 +60,24 @@ def testUpdateV(): for i in range(3): for i in range(3): for j in range(4): for j in range(4): assert_almost_equal(newV[i,j], expectedNewV[i, j]) assert_almost_equal(newV[i,j], expectedNewV[i, j]) def testMatrixFitNorm(): A = np.array([[0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.1, 0.1, 0.1], [0.1, 0.1, 0.1]]) V = np.array([[0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1], [0.1, 0.1, 0.1, 0.1]]) D = coo_matrix((ones(6),([0, 1, 2, 3, 4, 5], [0, 1, 1, 2, 3, 3])), shape=(6, 4), dtype=np.uint8).tocsr() DrowNum, DcolNum = D.shape expectedNorm = norm(D - dot(A,V))**2 fit = 0 for i in xrange(DrowNum): for j in xrange(DcolNum): fit += matrixFitNormElement(i, j, D, A, V) assert_almost_equal(fit, expectedNorm)
 @@ -7,7 +7,7 @@ import numpy as np import numpy as np import os import os import fnmatch import fnmatch from commonFunctions import squareFrobeniusNormOfSparse from commonFunctions import squareFrobeniusNormOfSparse, fitNorm __version__ = "0.1" __version__ = "0.1" @@ -19,14 +19,6 @@ __DEF_LMBDA = 0 __DEF_LMBDA = 0 __DEF_EXACT_FIT = False __DEF_EXACT_FIT = False def fitNorm(row, col, Xi, ARk, A): """ Computes i,j element of the squared Frobenius norm of the fitting matrix """ ARAtValue = dot(ARk[row,:], A[col,:]) return (Xi[row, col] - ARAtValue)**2 def rescal(X, rank, **kwargs): def rescal(X, rank, **kwargs): """ """ RESCAL RESCAL